lib/CPAN/Testers/ParseReport.pm

   1 package CPAN::Testers::ParseReport;
   2
   3 use warnings;
   4 use strict;
   5
   6 use Compress::Zlib ();
   7 use DateTime::Format::Strptime;
   8 use File::Basename qw(basename);
   9 use File::Path qw(mkpath);
  10 use HTML::Entities qw(decode_entities);
  11 use LWP::UserAgent;
  12 use List::Util qw(max min sum);
  13 use List::MoreUtils qw(uniq);
  14 use MIME::QuotedPrint ();
  15 use Time::Local ();
  16 use utf8;
  17
  18 our $default_transport = "http_cpantesters";
  19 our $default_cturl = "http://static.cpantesters.org/distro";
  20 our $Signal = 0;
  21
  22 =encoding utf-8
  23
  24 =head1 NAME
  25
  26 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
  27
  28 =cut
  29
  30 use version; our $VERSION = qv('0.3.0');
  31
  32 =head1 SYNOPSIS
  33
  34 The documentation in here is normally not needed because the code is
  35 meant to be run from the standalone program C<ctgetreports>.
  36
  37   ctgetreports --q mod:Moose Devel-Events
  38
  39 =head1 DESCRIPTION
  40
  41 This is the core module for CPAN::Testers::ParseReport. If you're not
  42 looking to extend or alter the behaviour of this module, you probably
  43 want to look at L<ctgetreports> instead.
  44
  45 =head1 OPTIONS
  46
  47 Options are described in the L<ctgetreports> manpage and are passed
  48 through to the functions unaltered.
  49
  50 =head1 FUNCTIONS
  51
  52 =head2 parse_distro($distro,%options)
  53
  54 reads the cpantesters JSON file or the local database for the distro
  55 and loops through the reports for the specified or most recent version
  56 of that distro found in these data.
  57
  58 parse_distro() intentionally has no meaningful return value, different
  59 options would require different ones.
  60
  61 =head2 $extract = parse_single_report($report,$dumpvars,%options)
  62
  63 mirrors and reads this report. $report is of the form
  64
  65   { id => <integer>, guid => <guid>, }
  66
  67 $dumpvar is a hashreference that gets filled with data.
  68
  69 $extract is the result of parse_report() described below.
  70
  71 =cut
  72
  73 {
  74     my $ua;
  75     sub _ua {
  76         return $ua if $ua;
  77         $ua = LWP::UserAgent->new
  78             (
  79              keep_alive => 1,
  80              env_proxy => 1,
  81             );
  82         $ua->parse_head(0);
  83         $ua;
  84     }
  85 }
  86 {
  87     my $ua;
  88     sub _ua_gzip {
  89         return $ua if $ua;
  90         $ua = LWP::UserAgent->new
  91             (
  92              keep_alive => 1,
  93              env_proxy => 1,
  94             );
  95         $ua->parse_head(0);
  96         $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable());
  97         $ua;
  98     }
  99 }
 100
 101 {
 102     # we called it yaml because it was yaml; now it is json
 103     use JSON::XS;
 104     my $j = JSON::XS->new->ascii->pretty;
 105     sub _slurp {
 106         my($file) = @_;
 107         local $/;
 108         open my $fh, $file or die "Could not open '$file': $!";
 109         <$fh>;
 110     }
 111     sub _yaml_loadfile {
 112         $j->decode(_slurp shift);
 113     }
 114     sub _yaml_dump {
 115         $j->encode(shift);
 116     }
 117 }
 118
 119 sub _download_overview {
 120     my($cts_dir, $distro, %Opt) = @_;
 121     my $cturl = $Opt{cturl} ||= $default_cturl;
 122     my $ctarget = "$cts_dir/$distro.json";
 123     my $cheaders = "$cts_dir/$distro.headers";
 124     if ($Opt{local}) {
 125         unless (-e $ctarget) {
 126             die "Alert: No local file '$ctarget' found, cannot continue\n";
 127         }
 128     } else {
 129         if (! -e $ctarget or -M $ctarget > .25) {
 130             if (-e $ctarget && $Opt{verbose}) {
 131                 my(@stat) = stat _;
 132                 my $timestamp = gmtime $stat[9];
 133                 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
 134             }
 135             print STDERR "Fetching $ctarget..." if $Opt{verbose} && !$Opt{quiet};
 136             my $firstletter = substr($distro,0,1);
 137             my $uri = "$cturl/$firstletter/$distro.json";
 138             my $resp = _ua->mirror($uri,$ctarget);
 139             if ($resp->is_success) {
 140                 print STDERR "DONE\n" if $Opt{verbose} && !$Opt{quiet};
 141                 open my $fh, ">", $cheaders or die;
 142                 for ($resp->headers->as_string) {
 143                     print $fh $_;
 144                     if ($Opt{verbose} && $Opt{verbose}>1) {
 145                         print STDERR $_ unless $Opt{quiet};
 146                     }
 147                 }
 148             } elsif (304 == $resp->code) {
 149                 print STDERR "DONE (not modified)\n" if $Opt{verbose} && !$Opt{quiet};
 150                 my $atime = my $mtime = time;
 151                 utime $atime, $mtime, $cheaders;
 152             } else {
 153                 die sprintf
 154                     (
 155                      "No success downloading %s: %s",
 156                      $uri,
 157                      $resp->status_line,
 158                     );
 159             }
 160         }
 161     }
 162     return $ctarget;
 163 }
 164
 165 sub _parse_yaml {
 166     my($ctarget, %Opt) = @_;
 167     my $arr = _yaml_loadfile($ctarget);
 168     my($selected_release_ul,$selected_release_distrov,$excuse_string);
 169     if ($Opt{vdistro}) {
 170         $excuse_string = "selected distro '$Opt{vdistro}'";
 171         $arr = [grep {$_->{distversion} eq $Opt{vdistro}} @$arr];
 172         ($selected_release_distrov) = $arr->[0]{distversion};
 173     } else {
 174         $excuse_string = "any distro";
 175         my $last_addition;
 176         my %seen;
 177         for my $report (sort { $a->{id} <=> $b->{id} } @$arr) {
 178             unless ($seen{$report->{distversion}}++) {
 179                 $last_addition = $report->{distversion};
 180             }
 181         }
 182         $arr = [grep {$_->{distversion} eq $last_addition} @$arr];
 183         ($selected_release_distrov) = $last_addition;
 184     }
 185     unless ($selected_release_distrov) {
 186         warn "Warning: could not find $excuse_string in '$ctarget'";
 187         return;
 188     }
 189     print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
 190     my @all;
 191     for my $test (@$arr) {
 192         my $id = $test->{id};
 193         push @all, {
 194                     id   => $test->{id},
 195                     guid => $test->{guid},
 196                    };
 197         return if $Signal;
 198     }
 199     @all = sort { $b->{id} <=> $a->{id} } @all;
 200     return \@all;
 201 }
 202
 203 sub parse_single_report {
 204     my($report, $dumpvars, %Opt) = @_;
 205     my($id) = $report->{id};
 206     my($guid) = $report->{guid};
 207     $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
 208     # the name nntp-testers was picked because originally the reports
 209     # were available from an NNTP server
 210     my $nnt_dir = "$Opt{cachedir}/nntp-testers";
 211     mkpath $nnt_dir;
 212     my $target = "$nnt_dir/$id";
 213     if ($Opt{local}) {
 214         unless (-e $target) {
 215             die {severity=>0,text=>"Warning: No local file '$target' found, skipping\n"};
 216         }
 217     } else {
 218         $Opt{transport} ||= $default_transport;
 219         my $ttarget;
 220         if (-e $target) {
 221             $ttarget = $target;
 222         } elsif (-e "$target.gz") {
 223             $ttarget = "$target.gz";
 224         }
 225         if ($ttarget) {
 226             my $raw_report;
 227             open my $fh, $ttarget or die "Could not open '$ttarget': $!";
 228             if (0) {
 229             } elsif ($Opt{transport} eq "http_cpantesters") {
 230                 local $/;
 231                 $raw_report = <$fh>;
 232             } elsif ($Opt{transport} eq "http_cpantesters_gzip") {
 233                 my $gz = Compress::Zlib::gzopen($fh, "rb");
 234                 $raw_report = "";
 235                 my $buffer;
 236                 while (my $bytesread = $gz->gzread($buffer)) {
 237                     $raw_report .= $buffer;
 238                 }
 239             }
 240             if ($raw_report =~ m{<title>.*(Report not found|Error).*</title>}) {
 241                 unlink $ttarget or die "Could not unlink '$ttarget': $!";
 242             }
 243         }
 244         if (! -e $target) {
 245             print STDERR "Fetching $target..." if $Opt{verbose} && !$Opt{quiet};
 246             if (0) {
 247             } elsif ($Opt{transport} eq "http_cpantesters") {
 248                 my $mustfetch = 0;
 249                 if ($Opt{"prefer-local-reports"}) {
 250                     unless (-e $target) {
 251                         $mustfetch = 1;
 252                     }
 253                 } else {
 254                     $mustfetch = 1;
 255                 }
 256                 if ($mustfetch) {
 257                     my $resp = _ua->mirror("http://www.cpantesters.org/cpan/report/$guid?raw=1",$target);
 258                     if ($resp->is_success) {
 259                         if ($Opt{verbose}) {
 260                             my(@stat) = stat $target;
 261                             my $timestamp = gmtime $stat[9];
 262                             print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
 263                             if ($Opt{verbose} > 1) {
 264                                 print STDERR $resp->headers->as_string unless $Opt{quiet};
 265                             }
 266                         }
 267                         my $headers = "$target.headers";
 268                         open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
 269                         print $fh $resp->headers->as_string;
 270                     } else {
 271                         die {severity=>0,
 272                                  text=>sprintf "HTTP Server Error[%s] for id[%s] guid[%s]", $resp->status_line, $id, $guid};
 273                     }
 274                 }
 275             } elsif ($Opt{transport} eq "http_cpantesters_gzip") {
 276                 my $mustfetch = 0;
 277                 if ($Opt{"prefer-local-reports"}) {
 278                     unless (-e "$target.gz") {
 279                         $mustfetch = 1;
 280                     }
 281                 } else {
 282                     $mustfetch = 1;
 283                 }
 284                 if ($mustfetch) {
 285                     my $resp = _ua_gzip->mirror("http://www.cpantesters.org/cpan/report/$guid?raw=1","$target.gz");
 286                     if ($resp->is_success) {
 287                         if ($Opt{verbose}) {
 288                             my(@stat) = stat "$target.gz";
 289                             my $timestamp = gmtime $stat[9];
 290                             print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
 291                             if ($Opt{verbose} > 1) {
 292                                 print STDERR $resp->headers->as_string unless $Opt{quiet};
 293                             }
 294                         }
 295                         my $headers = "$target.headers";
 296                         open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
 297                         print $fh $resp->headers->as_string;
 298                     } else {
 299                         die {severity=>0,
 300                                  text=>sprintf "HTTP Server Error[%s] for id[%s] guid[%s]", $resp->status_line, $id, $guid};
 301                     }
 302                 }
 303             } else {
 304                 die {severity=>1,text=>"Illegal value for --transport: '$Opt{transport}'"};
 305             }
 306         }
 307     }
 308     parse_report($target, $dumpvars, %Opt);
 309 }
 310
 311 sub parse_distro {
 312     my($distro,%Opt) = @_;
 313     my %dumpvars;
 314     $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
 315     # the name cpantesters-show was picked because originally
 316     # http://www.cpantesters.org/show/ contained html file that we had
 317     # to parse.
 318     my $cts_dir = "$Opt{cachedir}/cpantesters-show";
 319     mkpath $cts_dir;
 320     if ($Opt{solve}) {
 321         require Statistics::Regression;
 322         $Opt{dumpvars} = "." unless defined $Opt{dumpvars};
 323     }
 324     if (!$Opt{vdistro} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
 325         $Opt{vdistro} = $distro;
 326         $distro = $1;
 327     }
 328     my $reports;
 329     if (my $ctdb = $Opt{ctdb}) {
 330         require CPAN::WWW::Testers::Generator::Database;
 331         require CPAN::DistnameInfo;
 332         my $dbi = CPAN::WWW::Testers::Generator::Database->new(database=>$ctdb) or die "Alert: unknown error while opening database '$ctdb'";
 333         unless ($Opt{vdistro}) {
 334             my $sql = "select version from cpanstats where dist=? order by id";
 335             my @rows = $dbi->get_query($sql,$distro);
 336             my($newest,%seen);
 337             for my $row (@rows) {
 338                 $newest = $row->[0] unless $seen{$row->[0]}++;
 339             }
 340             $Opt{vdistro} = "$distro-$newest";
 341         }
 342         my $d = CPAN::DistnameInfo->new("FOO/$Opt{vdistro}.tgz");
 343         my $dist = $d->dist;
 344         my $version = $d->version;
 345         my $sql = "select id, guid from cpanstats where dist=? and version=? order by id desc";
 346         my @rows = $dbi->get_query($sql,$dist,$version);
 347         my @all;
 348         for my $row (@rows) {
 349             push @all, {
 350                         id   => $row->[0],
 351                         guid => $row->[1],
 352                        };
 353         }
 354         $reports = \@all;
 355     } else {
 356         my $ctarget = _download_overview($cts_dir, $distro, %Opt);
 357         $reports = _parse_yaml($ctarget,%Opt);
 358     }
 359     return unless $reports;
 360     my $sampled = 0;
 361     my $samplesize = $Opt{sample} || 0;
 362     $samplesize = 0 if $samplesize && $samplesize >= @$reports;
 363  REPEATER: {
 364         my $i = 0;
 365         my %taken;
 366     REPORT: for my $report (@$reports) {
 367             $i++;
 368             if ($samplesize) {
 369                 my $need = $samplesize - $sampled;
 370                 next REPORT unless $need;
 371                 my $left = @$reports - $i;
 372                 # warn sprintf "tot %d i %d sampled %d need %d left %d\n", scalar @$reports, $i, $sampled, $need, $left;
 373                 my $want_this = (rand(1) <= ($need/$left));
 374                 next REPORT unless $want_this;
 375             }
 376             eval {parse_single_report($report, \%dumpvars, %Opt)};
 377             if ($@) {
 378                 if (ref $@) {
 379                     if ($@->{severity}) {
 380                         die $@->{text};
 381                     } else {
 382                         warn $@->{text};
 383                     }
 384                 } else {
 385                     die $@;
 386                 }
 387             }
 388             $sampled++;
 389             $taken{$i-1}=undef;
 390             last REPEATER if $Signal;
 391         }
 392         if ($samplesize) {
 393         PASSFAIL: for my $pf ("pass","fail") {
 394                 my $minx = $Opt{"min".$pf} or next PASSFAIL;
 395                 my $x = $dumpvars{"meta:ok"}{uc $pf}{uc $pf};
 396                 if ($x < $minx) {
 397                     # bump samplesize, remove already sampled reports from array, redo
 398                     my $bump = int($samplesize * 0.05)+1;
 399                     $samplesize += $bump;
 400                     for my $k (sort {$b <=> $a} keys %taken) {
 401                         splice @$reports, $k, 1;
 402                     }
 403                     redo REPEATER;
 404                 }
 405             }
 406         }
 407     }
 408     if ($Opt{dumpvars}) {
 409         my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
 410         open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
 411         print $fh _yaml_dump(\%dumpvars);
 412         close $fh or die "Could not close '$dumpfile': $!"
 413     }
 414     if ($Opt{solve}) {
 415         solve(\%dumpvars,%Opt);
 416     }
 417 }
 418
 419 =head2 $bool = _looks_like_qp($raw_report)
 420
 421 We had to acknowledge the fact that some MTAs swallow the MIME-Version
 422 header while passing MIME through. So we introduce fallback heuristics
 423 that try to determine if a report is written in quoted printable.
 424
 425 Note that this subroutine is internal, just documented to have the
 426 internals documented.
 427
 428 The current implementation counts the number of QP escaped spaces and
 429 equal signs.
 430
 431 =cut
 432
 433 sub _looks_like_qp {
 434     my($report) = @_;
 435     my $count_space = () = $report =~ /=20/g;
 436     return 1 if $count_space > 12;
 437     my $count_equal = () = $report =~ /=3D/g;
 438     return 1 if $count_equal > 12;
 439     return 1 if $count_space+$count_equal > 24;
 440     return 0; # waiting for a counter example
 441 }
 442
 443 =head2 $extract = parse_report($target,$dumpvars,%Opt)
 444
 445 Reads one report. $target is the local filename to read (but see below
 446 for option 'article'). $dumpvars is a hashref which gets filled with
 447 descriptive stats about PASS/FAIL/etc. %Opt are the options as
 448 described in the C<ctgetreports> manpage. $extract is a hashref
 449 containing the found variables.
 450
 451 Note: this parsing is a bit dirty but as it seems good enough I'm not
 452 inclined to change it. We parse HTML with regexps only, not an HTML
 453 parser. Only the entities are decoded.
 454
 455 In %Opt you can use
 456
 457     article => $some_full_article_as_scalar
 458
 459 to use this function to parse one full article as text. When this is
 460 given, the argument $target is not read, but its basename is taken to
 461 be the id of the article. (OMG, hackers!)
 462
 463 =cut
 464 sub parse_report {
 465     my($target,$dumpvars,%Opt) = @_;
 466     our @q;
 467     my $id = basename($target);
 468     # warn "DEBUG: id[$id]";
 469     my($ok,$about);
 470
 471     my(%extract);
 472
 473     my($report,$isHTML) = _get_cooked_report($target, \%Opt);
 474     my @qr = map /^qr:(.+)/, @{$Opt{q}};
 475     if ($Opt{raw} || @qr) {
 476         for my $qr (@qr) {
 477             my $cqr = eval "qr{$qr}";
 478             die "Could not compile regular expression '$qr': $@" if $@;
 479             my(@matches) = $report =~ $cqr;
 480             my $v;
 481             if (@matches) {
 482                 if (@matches==1) {
 483                     $v = $matches[0];
 484                 } else {
 485                     $v = join "", map {"($_)"} @matches;
 486                 }
 487             } else {
 488                 $v = "";
 489             }
 490             $extract{"qr:$qr"} = $v;
 491         }
 492     }
 493
 494     my $report_writer;
 495     my $moduleunpack = {};
 496     my $expect_prereq = 0;
 497     my $expect_toolchain = 0;
 498     my $expecting_toolchain_soon = 0;
 499     my $expect_module_versions_report = 0;
 500     my $fallback_p5 = "";
 501
 502     my $in_summary = 0;
 503     my $in_summary_seen_platform = 0;
 504     my $in_prg_output = 0;
 505     my $in_env_context = 0;
 506     my $in_test_summary = 0;
 507
 508     my $current_headline;
 509     my @previous_line = ""; # so we can neutralize line breaks
 510     my @rlines = split /\r?\n/, $report;
 511  LINE: for (@rlines) {
 512         next LINE unless ($isHTML ? m/<title>((\S+)\s+(\S+))/ : m/^Subject:\s*((\S+)\s+(\S+))/);
 513         my $s = $1;
 514         $s = $1 if $s =~ m{<strong>(.+)};
 515         if ($s =~ /(\S+)\s+(\S+)/) {
 516             $ok = $1;
 517             $about = $2;
 518         }
 519         $extract{"meta:ok"}    = $ok;
 520         $extract{"meta:about"} = $about;
 521         last;
 522     }
 523     unless ($extract{"meta:about"}) {
 524         $extract{"meta:about"} = $Opt{vdistro};
 525         unless ($extract{"meta:ok"}) {
 526             warn "Warning: could not determine state of report";
 527         }
 528     }
 529  LINE: while (@rlines) {
 530         $_ = shift @rlines;
 531         while (/!$/ and @rlines) {
 532             my $followupline = shift @rlines;
 533             $followupline =~ s/^\s+//; # remo leading space
 534             $_ .= $followupline;
 535         }
 536         if (/^--------/) {
 537             if ($previous_line[-2] && $previous_line[-2] =~ /^--------/) {
 538                 $current_headline = $previous_line[-1];
 539                 if ($current_headline =~ /PROGRAM OUTPUT/) {
 540                     $in_prg_output = 1;
 541                 } else {
 542                     $in_prg_output = 0;
 543                 }
 544                 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
 545                     $in_env_context = 1;
 546                 } else {
 547                     $in_env_context = 0;
 548                 }
 549             } elsif ($previous_line[-1] && $previous_line[-1] =~ /Test Summary Report/) {
 550                 $in_test_summary = 1;
 551                 $in_prg_output = 0;
 552             }
 553         }
 554         if ($extract{"meta:perl"}) {
 555             if (    $in_summary
 556                 and !$extract{"conf:git_commit_id"}
 557                 and /Commit id:\s*([[:xdigit:]]+)/) {
 558                 $extract{"conf:git_commit_id"} = $1;
 559             }
 560         } else {
 561             my $p5;
 562             if (0) {
 563             } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
 564                 $p5 = $1;
 565                 $in_summary = 1;
 566                 $in_env_context = 0;
 567             }
 568             if ($p5) {
 569                 my($r,$v,$s,$p);
 570                 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
 571                     $r =~ s/\.0//; # 5.0 6 2!
 572                     $extract{"meta:perl"} = "$r.$v.$s\@$p";
 573                 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
 574                     $r =~ s/\.0//;
 575                     $extract{"meta:perl"} = "$r.$v.$s";
 576                 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
 577                     $r =~ s/\.0//;
 578                     $extract{"meta:perl"} = "$r.$v.$s";
 579                 } else {
 580                     $extract{"meta:perl"} = $p5;
 581                 }
 582             }
 583         }
 584         unless ($extract{"meta:from"}) {
 585             if (0) {
 586             } elsif ($isHTML ?
 587                      m|<div class="h_name">From:</div> <b>(.+?)</b><br/>| :
 588                      m|^From:\s*(.+)|
 589                     ) {
 590                 my $f = $1;
 591                 $f = $1 if $f =~ m{<strong>(.+)</strong>};
 592                 $extract{"meta:from"} = $f;
 593             }
 594             $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
 595         }
 596         unless ($extract{"meta:date"}) {
 597             if (0) {
 598             } elsif ($isHTML ?
 599                      m|<div class="h_name">Date:</div> (.+?)<br/>| :
 600                      m|^Date:\s*(.+)|
 601                     ) {
 602                 my $date = $1;
 603                 $date = $1 if $date =~ m{<strong>(.+)</strong>};
 604                 my($dt);
 605             DATEFMT: for my $pat ("%Y-%m-%dT%TZ", # 2010-07-07T14:01:40Z
 606                                   "%a, %d %b %Y %T %z", # Sun, 28 Sep 2008 12:23:12 +0100
 607                                   "%b %d, %Y %R", # July 10,...
 608                                   "%b  %d, %Y %R", # July  4,...
 609                                  ) {
 610                     $dt = eval {
 611                         my $p = DateTime::Format::Strptime->new
 612                             (
 613                              locale => "en",
 614                              time_zone => "UTC",
 615                              pattern => $pat,
 616                             );
 617                         $p->parse_datetime($date)
 618                     };
 619                     last DATEFMT if $dt;
 620                 }
 621                 unless ($dt) {
 622                     warn "Could not parse date[$date], setting to epoch 0";
 623                     $dt = DateTime->from_epoch( epoch => 0 );
 624                 }
 625                 $extract{"meta:date"} = $dt->datetime;
 626             }
 627             $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
 628         }
 629         unless ($extract{"meta:writer"}) {
 630             for ("$previous_line[-1] $_") {
 631                 if (0) {
 632                 } elsif (/CPANPLUS, version (\S+)/) {
 633                     $extract{"meta:writer"} = "CPANPLUS $1";
 634                 } elsif (/created by (App::cpanminus::reporter \S+)/) {
 635                     $extract{"meta:writer"} = $1;
 636                 } elsif (/created (?:automatically )?by (\S+)/) {
 637                     $extract{"meta:writer"} = $1;
 638                     if (/\s+on\s+perl\s+([^,]+),/) {
 639                         $fallback_p5 = $1;
 640                     }
 641                 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
 642                     $extract{"meta:writer"} = "$1 $2";
 643                 }
 644                 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
 645             }
 646         }
 647         if ($in_summary) {
 648             # we do that first three lines a bit too often
 649             my $qr = $Opt{dumpvars} || "";
 650             $qr = qr/$qr/ if $qr;
 651             unless (@q) {
 652                 @q = @{$Opt{q}||[]};
 653                 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
 654             }
 655
 656             my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
 657
 658             if (/^\s+Platform:$/) {
 659                 $in_summary_seen_platform=1;
 660             } elsif (/^\s*$/ || m|</pre>|) {
 661                 # if not html, we have reached the end now
 662                 if ($in_summary_seen_platform) {
 663                     # some perls have an empty line after the summary line
 664                     $in_summary = 0;
 665                 }
 666             } else {
 667                 my(%kv) = m!\G,?\s*([^=]+)= # left hand side and equal sign
 668                             (
 669                                 [^',\s]+(?=.+=) # use64bitint=define use64bitall=define uselongdouble=undef
 670                                                 # (lookahead needed for left-over equal sign)
 671                             |
 672                                 [^',]+$     # libpth=/usr/lib /usr/local/lib
 673                             |
 674                                 '[^']+?'    # cccdlflags='-DPIC -fPIC'
 675                             |
 676                                 \S+         # useshrplib=false
 677                             )!xgc;
 678                 while (my($k,$v) = each %kv) {
 679                     my $ck = "conf:$k";
 680                     $ck =~ s/\s+$//;
 681                     $v =~ s/,$//;
 682                     if ($v =~ /^'(.*)'$/) {
 683                         $v = $1;
 684                     }
 685                     $v =~ s/^\s+//;
 686                     $v =~ s/\s+$//;
 687                     if ($qr && $ck =~ $qr) {
 688                         $extract{$ck} = $v;
 689                     } elsif ($conf_vars{$ck}) {
 690                         $extract{$ck} = $v;
 691                     }
 692                 }
 693             }
 694         }
 695         if ($in_prg_output) {
 696             unless ($extract{"meta:output_from"}) {
 697                 if (/Output from (.+):$/) {
 698                     $extract{"meta:output_from"} = $1
 699                 }
 700             }
 701
 702             # Parsing of Module::Versions::Report text in test output
 703             if (/Modules in memory:/) {
 704                 $expect_module_versions_report = 1;
 705                 next LINE;
 706             }
 707             elsif ($expect_module_versions_report) {
 708                 if (/\s+(\S+)(?:\s+(v\d\S+?))?;/) {
 709                     $extract{"mod:$1"} = defined $2 ? $2 : 'undef';
 710                     next LINE;
 711                 }
 712                 elsif (/\[at .+?\]/) {
 713                     # trailing timestamp
 714                     $expect_module_versions_report = 0;
 715                     next LINE;
 716                 }
 717             }
 718         }
 719         if ($in_env_context) {
 720             if ($extract{"meta:writer"} =~ /^CPANPLUS\b/
 721                 ||
 722                 exists $extract{"env:PERL5_CPANPLUS_IS_VERSION"}
 723                ) {
 724                 (
 725                  s/Perl:\s+\$\^X/\$^X/
 726                  ||
 727                  s/EUID:\s+\$>/\$EUID/
 728                  ||
 729                  s/UID:\s+\$</\$UID/
 730                  ||
 731                  s/EGID:\s+\$\)/\$EGID/
 732                  ||
 733                  s/GID:\s+\$\(/\$GID/
 734                 )
 735             }
 736             if (my($left,$right) = /^\s{4}(\S+)\s*=\s*(.*)$/) {
 737                 if ($left eq '$UID/$EUID') {
 738                     my($uid,$euid) = split m{\s*/\s*}, $right;
 739                     $extract{'env:$UID'} = $uid;
 740                     $extract{'env:$EUID'} = $euid;
 741                 } elsif ($left =~ /GID/) {
 742                     for my $xgid (uniq split " ", $right) {
 743                         $extract{"env:$left∋$xgid"} = "true";
 744                     }
 745                 } else {
 746                     $extract{"env:$left"} = $right;
 747                 }
 748             }
 749         }
 750         if ($in_test_summary) {
 751             if (/^(?:Result:|Files=\d)/) {
 752                 $in_test_summary = 0;
 753             } elsif (/^(\S+)\s+\(Wstat:.+?Tests:.+?Failed:\s*(\d+)\)$/) {
 754                 my $in_test_summary_current_test = $1; # t/globtest.t or t\globtest.t
 755                 my $in_test_summary_current_failed = $2;
 756                 $in_test_summary_current_test =~ s|\\|/|g; # only t/globtest.t
 757                 $extract{"fail:$in_test_summary_current_test"} = $in_test_summary_current_failed;
 758             } elsif (/^\s+Failed tests?:/) {
 759                 # ignoring the exact combination of tests for now, seems like overkill
 760             }
 761         }
 762         push @previous_line, $_;
 763         if ($expect_prereq || $expect_toolchain) {
 764             if (/Perl module toolchain versions installed/) {
 765                 # first time discovered in CPANPLUS 0.89_06
 766                 $expecting_toolchain_soon = 1;
 767                 $expect_prereq=0;
 768                 next LINE;
 769             }
 770             if (exists $moduleunpack->{type}) {
 771                 my($module,$v,$needwant);
 772                 # type 1 and 2 are about prereqs, type three about toolchain
 773                 if ($moduleunpack->{type} == 1) {
 774                     (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
 775                     next LINE if $@;
 776                     if ($leader =~ /^-/) {
 777                         $moduleunpack = {};
 778                         $expect_prereq = 0;
 779                         next LINE;
 780                     } elsif ($leader =~ /^(
 781                                          buil          # build_requires:
 782                                          |conf         # configure_requires:
 783                                         )/x) {
 784                         next LINE;
 785                     } elsif ($module =~ /^(
 786                                          -             # line drawing
 787                                         )/x) {
 788                         next LINE;
 789                     }
 790                 } elsif ($moduleunpack->{type} == 2) {
 791                     (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl}, $_; };
 792                     next LINE if $@;
 793                     for ($module,$v,$needwant) {
 794                         s/^\s+//;
 795                         s/\s+$//;
 796                     }
 797                     if ($leader =~ /^\*/) {
 798                         $moduleunpack = {};
 799                         $expect_prereq = 0;
 800                         next LINE;
 801                     } elsif (!defined $v
 802                              or !defined $needwant
 803                              or $v =~ /\s/
 804                              or $needwant =~ /\s/
 805                             ) {
 806                         ($module,$v,$needwant) = split " ", $_;
 807                     }
 808                 } elsif ($moduleunpack->{type} == 3) {
 809                     (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
 810                     next LINE if $@;
 811                     if (!$module) {
 812                         $moduleunpack = {};
 813                         $expect_toolchain = 0;
 814                         next LINE;
 815                     } elsif ($module =~ /^-/) {
 816                         next LINE;
 817                     }
 818                 }
 819                 $module =~ s/\s+$//;
 820                 if ($module) {
 821                     $v =~ s/^\s+//;
 822                     $v =~ s/\s+$//;
 823                     my($modulename,$versionlead) = split " ", $module;
 824                     if (defined $modulename and defined $versionlead) {
 825                         $module = $modulename;
 826                         $v = "$versionlead$v";
 827                     }
 828                     if ($v eq "Have") {
 829                         next LINE;
 830                     }
 831                     $extract{"mod:$module"} = $v;
 832                     if (defined $needwant) {
 833                         $needwant =~ s/^\s+//;
 834                         $needwant =~ s/\s+$//;
 835                         $extract{"prereq:$module"} = $needwant;
 836                     }
 837                 }
 838             }
 839             if (/(\s+)(Module\s+)(Need\s+)Have/) {
 840                 $in_env_context = 0;
 841                 $moduleunpack = {
 842                                  tpl => 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
 843                                  type => 1,
 844                                 };
 845             } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
 846                 $in_env_context = 0;
 847                 my $adjust_1 = 0;
 848                 my $adjust_2 = -length($4);
 849                 my $adjust_3 = length($4);
 850                 # I think they do not really try to align, usually we
 851                 # get away with split
 852                 $moduleunpack = {
 853                                  tpl => 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3).'a*',
 854                                  type => 2,
 855                                 };
 856             }
 857         }
 858         if (/PREREQUISITES|Prerequisite modules loaded/) {
 859             $in_env_context = 0;
 860             $expect_prereq=1;
 861         }
 862         if ($expecting_toolchain_soon) {
 863             if (/(\s+)(Module(?:\sName)?\s+) Have/) {
 864                 $in_env_context = 0;
 865                 $expect_toolchain=1;
 866                 $expecting_toolchain_soon=0;
 867                 $moduleunpack = {
 868                                  tpl => 'a'.length($1).'a'.length($2).'a*',
 869                                  type => 3,
 870                                 };
 871             }
 872         }
 873         if (/toolchain versions installed/) {
 874             $in_env_context = 0;
 875             $expecting_toolchain_soon=1;
 876         }
 877     }                           # LINE
 878     if (! $extract{"mod:CPANPLUS"} && $extract{"meta:writer"} =~ /^CPANPLUS\s(\d+(\.\d+))$/) {
 879         $extract{"mod:CPANPLUS"} = $1;
 880     }
 881     if (! $extract{"meta:perl"} && $fallback_p5) {
 882         my($p5,$patch) = split /\s+patch\s+/, $fallback_p5;
 883         $extract{"meta:perl"} = $p5;
 884         $extract{"conf:git_describe"} = $patch if defined $patch;
 885     }
 886     $extract{id} = $id;
 887     if (my $filtercbbody = $Opt{filtercb}) {
 888         my $filtercb = eval('sub {'.$filtercbbody.'}');
 889         $filtercb->(\%extract);
 890     }
 891     if ($Opt{solve}) {
 892         if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
 893             $extract{"conf:archname+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
 894         }
 895         if ($extract{"meta:perl"} && $extract{"conf:osname"}) {
 896             $extract{"meta:osname+perl"} = join " ", @extract{"conf:osname","meta:perl"};
 897         }
 898         my $data = $dumpvars->{"==DATA=="} ||= [];
 899         push @$data, \%extract;
 900     }
 901     # ---- %extract finished ----
 902     my $diag = "";
 903     if (my $qr = $Opt{dumpvars}) {
 904         $qr = qr/$qr/;
 905         while (my($k,$v) = each %extract) {
 906             if ($k =~ $qr) {
 907                 $dumpvars->{$k}{$v}{$extract{"meta:ok"}}++;
 908             }
 909         }
 910     }
 911     for my $want (@q) {
 912         my $have  = $extract{$want} || "";
 913         $diag .= " $want\[$have]";
 914     }
 915     printf STDERR " %-4s %8s%s\n", $extract{"meta:ok"}, $id, $diag unless $Opt{quiet};
 916     if ($Opt{raw}) {
 917         $report =~ s/\s+\z//;
 918         print STDERR $report, "\n================\n" unless $Opt{quiet};
 919     }
 920     if ($Opt{interactive}) {
 921         eval { require IO::Prompt; 1; } or
 922             die "Option '--interactive' requires IO::Prompt installed";
 923         local @ARGV;
 924         local $ARGV;
 925         my $ans = IO::Prompt::prompt
 926             (
 927              -p => "View $id? [onechar: ynq] ",
 928              -d => "y",
 929              -u => qr/[ynq]/,
 930              -onechar,
 931             );
 932         print STDERR "\n" unless $Opt{quiet};
 933         if ($ans eq "y") {
 934             my($report) = _get_cooked_report($target, \%Opt);
 935             $Opt{pager} ||= "less";
 936             open my $lfh, "|-", $Opt{pager} or die "Could not fork '$Opt{pager}': $!";
 937             local $/;
 938             print {$lfh} $report;
 939             close $lfh or die "Could not close pager: $!"
 940         } elsif ($ans eq "q") {
 941             $Signal++;
 942             return;
 943         }
 944     }
 945     return \%extract;
 946 }
 947
 948 sub _get_cooked_report {
 949     my($target, $Opt_ref) = @_;
 950     my($report, $isHTML);
 951     if ($report = $Opt_ref->{article}) {
 952         $isHTML = $report =~ /^</;
 953         undef $target;
 954     }
 955     if ($target) {
 956         local $/;
 957         my $raw_report;
 958         if (0) {
 959         } elsif (-e $target) {
 960             open my $fh, '<', $target or die "Could not open '$target': $!";
 961             $raw_report = <$fh>;
 962         } elsif (-e "$target.gz") {
 963             open my $fh, "<", "$target.gz" or die "Could not open '$target.gz': $!";
 964
 965 #     Opens a gzip (.gz) file for reading or writing. The mode parameter
 966 #   is as in fopen ("rb" or "wb") but can also include a compression level
 967 #   ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for
 968 #   Huffman only compression as in "wb1h", or 'R' for run-length encoding
 969 #   as in "wb1R". (See the description of deflateInit2 for more information
 970 #   about the strategy parameter.)
 971
 972             my $gz = Compress::Zlib::gzopen($fh, "rb");
 973             $raw_report = "";
 974             my $buffer;
 975             while (my $bytesread = $gz->gzread($buffer)) {
 976                 $raw_report .= $buffer;
 977             }
 978         } else {
 979             die "Could not find '$target' or '$target.gz'";
 980         }
 981         $isHTML = $raw_report =~ /^</;
 982         if ($isHTML) {
 983             if ($raw_report =~ m{^<\?.+?<html.+?<head.+?<body.+?<pre[^>]*>(.+)</pre>.*</body>.*</html>}s) {
 984                 $raw_report = decode_entities($1);
 985                 $isHTML = 0;
 986             }
 987         }
 988         if ($isHTML) {
 989             $report = decode_entities($raw_report);
 990         } elsif ($raw_report =~ /^MIME-Version: 1.0$/m
 991                  ||
 992                  _looks_like_qp($raw_report)
 993                 ) {
 994             # note(1): minimizing MIME effort; don't know about reports in other formats
 995             # note(2): Net-Generatus-0.40 had an offending report
 996             $report = eval { MIME::QuotedPrint::decode_qp($raw_report) };
 997             if (!$report || $@) {
 998                 warn "WARNING: report '$target' could not be parsed as qp, giving up";
 999                 if ($raw_report =~ /Subject:.+Dear.+Perl.+Summary/s) {
1000                     $report = $raw_report;
1001                 }
1002             }
1003         } else {
1004             $report = $raw_report;
1005         }
1006     }
1007     if ($report =~ /\r\n/) {
1008         my @rlines = split /\r?\n/, $report;
1009         $report = join "\n", @rlines;
1010     }
1011     ($report, $isHTML);
1012 }
1013
1014 =head2 solve
1015
1016 Feeds a couple of potentially interesting data to
1017 Statistics::Regression and sorts the result by R^2 descending. Do not
1018 confuse this with a prove, rather take it as a useful hint. It can
1019 save you minutes of staring at data and provide a quick overview where
1020 one should look closer. Displays the N top candidates, where N
1021 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
1022 Regressions results with an R^2 of 1.00 are displayed in any case.
1023
1024 The function is called when the option C<-solve> is given on the
1025 commandline. Several extra config variables are calculated, see source
1026 code for details.
1027
1028 =cut
1029 {
1030     my %never_solve_on = map {($_ => 1)}
1031         (
1032          "conf:ccflags",
1033          "conf:config_args",
1034          "conf:cppflags",
1035          "conf:lddlflags",
1036          "conf:uname",
1037          "env:PATH",
1038          "env:PERL5LIB",
1039          "env:PERL5OPT",
1040          'env:$^X',
1041          'env:PERL5_CPANPLUS_IS_RUNNING',
1042          'env:PERL5_CPAN_IS_RUNNING',
1043          'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
1044          'meta:ok',
1045         );
1046     my %normalize_numeric =
1047         (
1048          id => sub { return shift },
1049
1050          # here we were treating date as numeric; current
1051          # implementation requires to decide for one normalization, so
1052          # we decided 2012-02 for a sampling focussing on recentness;
1053
1054          #'meta:date' => sub {
1055          #    my $v = shift;
1056          #    my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
1057          #    unless (defined $M) {
1058          #        die "illegal value[$v] for a date";
1059          #    }
1060          #    Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
1061          #},
1062         );
1063     my %normalize_value =
1064         (
1065          'meta:perl' => sub {
1066              my($perlatpatchlevel) = shift;
1067              my $perl = $perlatpatchlevel;
1068              $perl =~ s/\@.*//;
1069              $perl;
1070          },
1071          'meta:date' => sub {
1072              my $v = shift;
1073              my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
1074              unless (defined $M) {
1075                  die "illegal value[$v] for a date";
1076              }
1077              my $epoch = Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
1078              my $Y_epoch = time - 2*365.25*86400;
1079              my $ret;
1080              if ($epoch < $Y_epoch) {
1081                  $ret = $Y;
1082              } else {
1083                  my @gmtime = gmtime $Y_epoch; $gmtime[5] += 1900;
1084                  if ($Y == $gmtime[5]) {
1085                      $ret = $Y;
1086                  } else {
1087                      my $M_epoch = time - 9*7*86400;
1088                      if ($epoch < $M_epoch) {
1089                          $ret = "$Y-$M";
1090                      } else {
1091                          my @gmtime = gmtime $M_epoch; $gmtime[5] += 1900; $gmtime[4]++;
1092                          if ($Y == $gmtime[5] && $M == $gmtime[4]) {
1093                              $ret = "$Y-$M";
1094                          } else {
1095                              $ret = "$Y-$M-$D";
1096                          }
1097                      }
1098                  }
1099              }
1100              return $ret;
1101          },
1102         );
1103 sub solve {
1104     my($V,%Opt) = @_;
1105     require Statistics::Regression;
1106     my @regression;
1107     my $ycb;
1108     if (my $ycbbody = $Opt{ycb}) {
1109         $ycb = eval('sub {'.$ycbbody.'}');
1110         die if $@;
1111     } else {
1112         $ycb = sub {
1113             my $rec = shift;
1114             my $y;
1115             if ($rec->{"meta:ok"} eq "PASS") {
1116                 $y = 1;
1117             } elsif ($rec->{"meta:ok"} eq "FAIL") {
1118                 $y = 0;
1119             }
1120             return $y
1121         };
1122     }
1123   VAR: for my $variable (sort keys %$V) {
1124         next if $variable eq "==DATA==";
1125         if ($never_solve_on{$variable}){
1126             warn "Skipping '$variable'\n" unless $Opt{quiet};
1127             next VAR;
1128         }
1129         my $value_distribution = $V->{$variable};
1130         my $keys = keys %$value_distribution;
1131         my @X = qw(const);
1132         if ($normalize_numeric{$variable}) {
1133             push @X, "n_$variable";
1134         } else {
1135             my %seen = ();
1136             for my $value (sort keys %$value_distribution) {
1137                 my $pf = $value_distribution->{$value};
1138                 $pf->{PASS} ||= 0;
1139                 $pf->{FAIL} ||= 0;
1140                 if ($pf->{PASS} || $pf->{FAIL}) {
1141                     my $Xele = sprintf "eq_%s",
1142                         (
1143                          $normalize_value{$variable} ?
1144                          $normalize_value{$variable}->($value) :
1145                          $value
1146                         );
1147                     push @X, $Xele unless $seen{$Xele}++;
1148
1149                 }
1150                 if (
1151                     $pf->{PASS} xor $pf->{FAIL}
1152                    ) {
1153                     my $vl = 40;
1154                     substr($value,$vl) = "..." if length $value > 3+$vl;
1155                     my $poor_mans_freehand_estimation = 0;
1156                     if ($poor_mans_freehand_estimation) {
1157                         warn sprintf
1158                             (
1159                              "%4d %4d %-23s | %s\n",
1160                              $pf->{PASS},
1161                              $pf->{FAIL},
1162                              $variable,
1163                              $value,
1164                             );
1165                     }
1166                 }
1167             }
1168         }
1169         warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet};
1170         next VAR unless @X > 1;
1171         my %regdata =
1172             (
1173              X => \@X,
1174              data => [],
1175             );
1176       RECORD: for my $rec (@{$V->{"==DATA=="}}) {
1177             my $y = $ycb->($rec);
1178             next RECORD unless defined $y;
1179             my %obs;
1180             $obs{Y} = $y;
1181             @obs{@X} = (0) x @X;
1182             $obs{const} = 1;
1183             for my $x (@X) {
1184                 if ($x =~ /^eq_(.+)/) {
1185                     my $read_v = $1;
1186                     if (exists $rec->{$variable}
1187                         && defined $rec->{$variable}
1188                        ) {
1189                         my $use_v = (
1190                                      $normalize_value{$variable} ?
1191                                      $normalize_value{$variable}->($rec->{$variable}) :
1192                                      $rec->{$variable}
1193                                     );
1194                         if ($use_v eq $read_v) {
1195                             $obs{$x} = 1;
1196                         }
1197                     }
1198                     # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
1199                 } elsif ($x =~ /^n_(.+)/) {
1200                     my $v = $1;
1201                     $obs{$x} = eval { $normalize_numeric{$v}->($rec->{$v}); };
1202                     if ($@) {
1203                         warn "Warning: error during parsing v[$v] in record[$rec->{id}]: $@; continuing with undef value";
1204                     }
1205                 }
1206             }
1207             push @{$regdata{data}}, \%obs;
1208         }
1209         _run_regression ($variable, \%regdata, \@regression, \%Opt);
1210     }
1211     my $top = min ($Opt{solvetop} || 3, scalar @regression);
1212     my $max_rsq = sum map {1==$_->rsq ? 1 : 0} @regression;
1213     $top = $max_rsq if $max_rsq && $max_rsq > $top;
1214     my $score = 0;
1215     printf
1216         (
1217          "State after regression testing: %d results, showing top %d\n\n",
1218          scalar @regression,
1219          $top,
1220         );
1221     for my $reg (sort {
1222                      $b->rsq <=> $a->rsq
1223                      ||
1224                      $a->k <=> $b->k
1225                  } @regression) {
1226         printf "(%d)\n", ++$score;
1227         eval { $reg->print; };
1228         if ($@) {
1229             printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
1230         }
1231         last if --$top <= 0;
1232     }
1233 }
1234 }
1235
1236 # $variable is the name we pass through to S:R constructor
1237 # $regdata is hash and has the arrays "X" and "data" (observations)
1238 # X goes to S:R constructor
1239 # each observation has a Y which we pass to S:R in an include() call
1240 # $regression is the collector array of results
1241 # $opt are the options from outside, used to see if we are "verbose"
1242 sub _run_regression {
1243     my($variable,$regdata,$regression,$opt) = @_;
1244     my @X = @{$regdata->{X}};
1245     # my $splo = $regdata->{"spliced-out"} = []; # maybe can be used to
1246                                                # hold the reference
1247                                                # group
1248     while (@X > 1) {
1249         my $reg = Statistics::Regression->new($variable,\@X);
1250         for my $obs (@{$regdata->{data}}) {
1251             my $y = delete $obs->{Y};
1252             $reg->include($y, $obs);
1253             $obs->{Y} = $y;
1254         }
1255         eval {$reg->theta;
1256               my @e = $reg->standarderrors;
1257               die "found standarderrors == 0" if grep { 0 == $_ } @e;
1258               $reg->rsq;};
1259         if ($@) {
1260             if ($opt->{verbose} && $opt->{verbose}>=2) {
1261                 warn _yaml_dump
1262                     ({error=>"could not determine some regression parameters",
1263                       variable=>$variable,
1264                       k=>$reg->k,
1265                       n=>$reg->n,
1266                       X=>$regdata->{"X"},
1267                       errorstr => $@,
1268                      });
1269             }
1270             # reduce k in case that linear dependencies disturbed us;
1271             # often called reference group; I'm tempted to collect and
1272             # make visible
1273             splice @X, 1, 1;
1274         } else {
1275             # $reg->print;
1276             push @$regression, $reg;
1277             return;
1278         }
1279     }
1280 }
1281
1282 =head1 AUTHOR
1283
1284 Andreas König
1285
1286 =head1 BUGS
1287
1288 Please report any bugs or feature requests through the web
1289 interface at
1290 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
1291 I will be notified, and then you'll automatically be notified of
1292 progress on your bug as I make changes.
1293
1294 =head1 SUPPORT
1295
1296 You can find documentation for this module with the perldoc command.
1297
1298     perldoc CPAN::Testers::ParseReport
1299
1300
1301 You can also look for information at:
1302
1303 =over 4
1304
1305 =item * RT: CPAN's request tracker
1306
1307 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1308
1309 =item * AnnoCPAN: Annotated CPAN documentation
1310
1311 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1312
1313 =item * CPAN Ratings
1314
1315 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1316
1317 =item * Search CPAN
1318
1319 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1320
1321 =back
1322
1323
1324 =head1 ACKNOWLEDGEMENTS
1325
1326 Thanks to RJBS for module-starter.
1327
1328 =head1 COPYRIGHT & LICENSE
1329
1330 Copyright 2008,2009,2010,2011,2012,2013,2014,2015,2016 Andreas König.
1331
1332 This program is free software; you can redistribute it and/or modify it
1333 under the same terms as Perl itself.
1334
1335
1336 =cut
1337
1338 1; # End of CPAN::Testers::ParseReport