do not invoke a new regexp before the first is done
[cpan-testers-parsereport.git] / lib / CPAN / Testers / ParseReport.pm
blob2d87ca6422d54cf9dc3cde5341e9e2160ca8f015
1 package CPAN::Testers::ParseReport;
3 use warnings;
4 use strict;
6 use Compress::Zlib ();
7 use DateTime::Format::Strptime;
8 use File::Basename qw(basename);
9 use File::Path qw(mkpath);
10 use HTML::Entities qw(decode_entities);
11 use LWP::UserAgent;
12 use List::Util qw(max min sum);
13 use List::MoreUtils qw(uniq);
14 use MIME::QuotedPrint ();
15 use Time::Local ();
16 use utf8;
18 our $default_transport = "http_cpantesters";
19 our $default_cturl = "http://static.cpantesters.org/distro";
20 our $Signal = 0;
22 =encoding utf-8
24 =head1 NAME
26 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
28 =cut
30 use version; our $VERSION = qv('0.2.9');
32 =head1 SYNOPSIS
34 The documentation in here is normally not needed because the code is
35 meant to be run from the standalone program C<ctgetreports>.
37 ctgetreports --q mod:Moose Devel-Events
39 =head1 DESCRIPTION
41 This is the core module for CPAN::Testers::ParseReport. If you're not
42 looking to extend or alter the behaviour of this module, you probably
43 want to look at L<ctgetreports> instead.
45 =head1 OPTIONS
47 Options are described in the L<ctgetreports> manpage and are passed
48 through to the functions unaltered.
50 =head1 FUNCTIONS
52 =head2 parse_distro($distro,%options)
54 reads the cpantesters JSON file or the local database for the distro
55 and loops through the reports for the specified or most recent version
56 of that distro found in these data.
58 parse_distro() intentionally has no meaningful return value, different
59 options would require different ones.
61 =head2 $extract = parse_single_report($report,$dumpvars,%options)
63 mirrors and reads this report. $report is of the form
65 { id => <integer>, guid => <guid>, }
67 $dumpvar is a hashreference that gets filled with data.
69 $extract is the result of parse_report() described below.
71 =cut
74 my $ua;
75 sub _ua {
76 return $ua if $ua;
77 $ua = LWP::UserAgent->new
79 keep_alive => 1,
80 env_proxy => 1,
82 $ua->parse_head(0);
83 $ua;
87 my $ua;
88 sub _ua_gzip {
89 return $ua if $ua;
90 $ua = LWP::UserAgent->new
92 keep_alive => 1,
93 env_proxy => 1,
95 $ua->parse_head(0);
96 $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable());
97 $ua;
102 # we called it yaml because it was yaml; now it is json
103 use JSON::XS;
104 my $j = JSON::XS->new->ascii->pretty;
105 sub _slurp {
106 my($file) = @_;
107 local $/;
108 open my $fh, $file or die "Could not open '$file': $!";
109 <$fh>;
111 sub _yaml_loadfile {
112 $j->decode(_slurp shift);
114 sub _yaml_dump {
115 $j->encode(shift);
119 sub _download_overview {
120 my($cts_dir, $distro, %Opt) = @_;
121 my $cturl = $Opt{cturl} ||= $default_cturl;
122 my $ctarget = "$cts_dir/$distro.json";
123 my $cheaders = "$cts_dir/$distro.headers";
124 if ($Opt{local}) {
125 unless (-e $ctarget) {
126 die "Alert: No local file '$ctarget' found, cannot continue\n";
128 } else {
129 if (! -e $ctarget or -M $ctarget > .25) {
130 if (-e $ctarget && $Opt{verbose}) {
131 my(@stat) = stat _;
132 my $timestamp = gmtime $stat[9];
133 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
135 print STDERR "Fetching $ctarget..." if $Opt{verbose} && !$Opt{quiet};
136 my $firstletter = substr($distro,0,1);
137 my $uri = "$cturl/$firstletter/$distro.json";
138 my $resp = _ua->mirror($uri,$ctarget);
139 if ($resp->is_success) {
140 print STDERR "DONE\n" if $Opt{verbose} && !$Opt{quiet};
141 open my $fh, ">", $cheaders or die;
142 for ($resp->headers->as_string) {
143 print $fh $_;
144 if ($Opt{verbose} && $Opt{verbose}>1) {
145 print STDERR $_ unless $Opt{quiet};
148 } elsif (304 == $resp->code) {
149 print STDERR "DONE (not modified)\n" if $Opt{verbose} && !$Opt{quiet};
150 my $atime = my $mtime = time;
151 utime $atime, $mtime, $cheaders;
152 } else {
153 die sprintf
155 "No success downloading %s: %s",
156 $uri,
157 $resp->status_line,
162 return $ctarget;
165 sub _parse_yaml {
166 my($ctarget, %Opt) = @_;
167 my $arr = _yaml_loadfile($ctarget);
168 my($selected_release_ul,$selected_release_distrov,$excuse_string);
169 if ($Opt{vdistro}) {
170 $excuse_string = "selected distro '$Opt{vdistro}'";
171 $arr = [grep {$_->{distversion} eq $Opt{vdistro}} @$arr];
172 ($selected_release_distrov) = $arr->[0]{distversion};
173 } else {
174 $excuse_string = "any distro";
175 my $last_addition;
176 my %seen;
177 for my $report (sort { $a->{id} <=> $b->{id} } @$arr) {
178 unless ($seen{$report->{distversion}}++) {
179 $last_addition = $report->{distversion};
182 $arr = [grep {$_->{distversion} eq $last_addition} @$arr];
183 ($selected_release_distrov) = $last_addition;
185 unless ($selected_release_distrov) {
186 warn "Warning: could not find $excuse_string in '$ctarget'";
187 return;
189 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
190 my @all;
191 for my $test (@$arr) {
192 my $id = $test->{id};
193 push @all, {
194 id => $test->{id},
195 guid => $test->{guid},
197 return if $Signal;
199 @all = sort { $b->{id} <=> $a->{id} } @all;
200 return \@all;
203 sub parse_single_report {
204 my($report, $dumpvars, %Opt) = @_;
205 my($id) = $report->{id};
206 my($guid) = $report->{guid};
207 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
208 # the name nntp-testers was picked because originally the reports
209 # were available from an NNTP server
210 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
211 mkpath $nnt_dir;
212 my $target = "$nnt_dir/$id";
213 if ($Opt{local}) {
214 unless (-e $target) {
215 die {severity=>0,text=>"Warning: No local file '$target' found, skipping\n"};
217 } else {
218 $Opt{transport} ||= $default_transport;
219 my $ttarget;
220 if (-e $target) {
221 $ttarget = $target;
222 } elsif (-e "$target.gz") {
223 $ttarget = "$target.gz";
225 if ($ttarget) {
226 my $raw_report;
227 open my $fh, $ttarget or die "Could not open '$ttarget': $!";
228 if (0) {
229 } elsif ($Opt{transport} eq "http_cpantesters") {
230 local $/;
231 $raw_report = <$fh>;
232 } elsif ($Opt{transport} eq "http_cpantesters_gzip") {
233 my $gz = Compress::Zlib::gzopen($fh, "rb");
234 $raw_report = "";
235 my $buffer;
236 while (my $bytesread = $gz->gzread($buffer)) {
237 $raw_report .= $buffer;
240 if ($raw_report =~ m{<title>.*(Report not found|Error).*</title>}) {
241 unlink $ttarget or die "Could not unlink '$ttarget': $!";
244 if (! -e $target) {
245 print STDERR "Fetching $target..." if $Opt{verbose} && !$Opt{quiet};
246 if (0) {
247 } elsif ($Opt{transport} eq "http_cpantesters") {
248 my $mustfetch = 0;
249 if ($Opt{"prefer-local-reports"}) {
250 unless (-e $target) {
251 $mustfetch = 1;
253 } else {
254 $mustfetch = 1;
256 if ($mustfetch) {
257 my $resp = _ua->mirror("http://www.cpantesters.org/cpan/report/$guid?raw=1",$target);
258 if ($resp->is_success) {
259 if ($Opt{verbose}) {
260 my(@stat) = stat $target;
261 my $timestamp = gmtime $stat[9];
262 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
263 if ($Opt{verbose} > 1) {
264 print STDERR $resp->headers->as_string unless $Opt{quiet};
267 my $headers = "$target.headers";
268 open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
269 print $fh $resp->headers->as_string;
270 } else {
271 die {severity=>0,
272 text=>sprintf "HTTP Server Error[%s] for id[%s] guid[%s]", $resp->status_line, $id, $guid};
275 } elsif ($Opt{transport} eq "http_cpantesters_gzip") {
276 my $mustfetch = 0;
277 if ($Opt{"prefer-local-reports"}) {
278 unless (-e "$target.gz") {
279 $mustfetch = 1;
281 } else {
282 $mustfetch = 1;
284 if ($mustfetch) {
285 my $resp = _ua_gzip->mirror("http://www.cpantesters.org/cpan/report/$guid?raw=1","$target.gz");
286 if ($resp->is_success) {
287 if ($Opt{verbose}) {
288 my(@stat) = stat "$target.gz";
289 my $timestamp = gmtime $stat[9];
290 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
291 if ($Opt{verbose} > 1) {
292 print STDERR $resp->headers->as_string unless $Opt{quiet};
295 my $headers = "$target.headers";
296 open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
297 print $fh $resp->headers->as_string;
298 } else {
299 die {severity=>0,
300 text=>sprintf "HTTP Server Error[%s] for id[%s] guid[%s]", $resp->status_line, $id, $guid};
303 } else {
304 die {severity=>1,text=>"Illegal value for --transport: '$Opt{transport}'"};
308 parse_report($target, $dumpvars, %Opt);
311 sub parse_distro {
312 my($distro,%Opt) = @_;
313 my %dumpvars;
314 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
315 # the name cpantesters-show was picked because originally
316 # http://www.cpantesters.org/show/ contained html file that we had
317 # to parse.
318 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
319 mkpath $cts_dir;
320 if ($Opt{solve}) {
321 require Statistics::Regression;
322 $Opt{dumpvars} = "." unless defined $Opt{dumpvars};
324 if (!$Opt{vdistro} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
325 $Opt{vdistro} = $distro;
326 $distro = $1;
328 my $reports;
329 if (my $ctdb = $Opt{ctdb}) {
330 require CPAN::WWW::Testers::Generator::Database;
331 require CPAN::DistnameInfo;
332 my $dbi = CPAN::WWW::Testers::Generator::Database->new(database=>$ctdb) or die "Alert: unknown error while opening database '$ctdb'";
333 unless ($Opt{vdistro}) {
334 my $sql = "select version from cpanstats where dist=? order by id";
335 my @rows = $dbi->get_query($sql,$distro);
336 my($newest,%seen);
337 for my $row (@rows) {
338 $newest = $row->[0] unless $seen{$row->[0]}++;
340 $Opt{vdistro} = "$distro-$newest";
342 my $d = CPAN::DistnameInfo->new("FOO/$Opt{vdistro}.tgz");
343 my $dist = $d->dist;
344 my $version = $d->version;
345 my $sql = "select id, guid from cpanstats where dist=? and version=? order by id desc";
346 my @rows = $dbi->get_query($sql,$dist,$version);
347 my @all;
348 for my $row (@rows) {
349 push @all, {
350 id => $row->[0],
351 guid => $row->[1],
354 $reports = \@all;
355 } else {
356 my $ctarget = _download_overview($cts_dir, $distro, %Opt);
357 $reports = _parse_yaml($ctarget,%Opt);
359 return unless $reports;
360 my $sampled = 0;
361 my $samplesize = $Opt{sample} || 0;
362 $samplesize = 0 if $samplesize && $samplesize >= @$reports;
363 REPEATER: {
364 my $i = 0;
365 my %taken;
366 REPORT: for my $report (@$reports) {
367 $i++;
368 if ($samplesize) {
369 my $need = $samplesize - $sampled;
370 next REPORT unless $need;
371 my $left = @$reports - $i;
372 # warn sprintf "tot %d i %d sampled %d need %d left %d\n", scalar @$reports, $i, $sampled, $need, $left;
373 my $want_this = (rand(1) <= ($need/$left));
374 next REPORT unless $want_this;
376 eval {parse_single_report($report, \%dumpvars, %Opt)};
377 if ($@) {
378 if (ref $@) {
379 if ($@->{severity}) {
380 die $@->{text};
381 } else {
382 warn $@->{text};
384 } else {
385 die $@;
388 $sampled++;
389 $taken{$i-1}=undef;
390 last REPEATER if $Signal;
392 if ($samplesize) {
393 PASSFAIL: for my $pf ("pass","fail") {
394 my $minx = $Opt{"min".$pf} or next PASSFAIL;
395 my $x = $dumpvars{"meta:ok"}{uc $pf}{uc $pf};
396 if ($x < $minx) {
397 # bump samplesize, remove already sampled reports from array, redo
398 my $bump = int($samplesize * 0.05)+1;
399 $samplesize += $bump;
400 for my $k (sort {$b <=> $a} keys %taken) {
401 splice @$reports, $k, 1;
403 redo REPEATER;
408 if ($Opt{dumpvars}) {
409 my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
410 open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
411 print $fh _yaml_dump(\%dumpvars);
412 close $fh or die "Could not close '$dumpfile': $!"
414 if ($Opt{solve}) {
415 solve(\%dumpvars,%Opt);
419 =head2 $bool = _looks_like_qp($raw_report)
421 We had to acknowledge the fact that some MTAs swallow the MIME-Version
422 header while passing MIME through. So we introduce fallback heuristics
423 that try to determine if a report is written in quoted printable.
425 Note that this subroutine is internal, just documented to have the
426 internals documented.
428 The current implementation counts the number of QP escaped spaces and
429 equal signs.
431 =cut
433 sub _looks_like_qp {
434 my($report) = @_;
435 my $count_space = () = $report =~ /=20/g;
436 return 1 if $count_space > 12;
437 my $count_equal = () = $report =~ /=3D/g;
438 return 1 if $count_equal > 12;
439 return 1 if $count_space+$count_equal > 24;
440 return 0; # waiting for a counter example
443 =head2 $extract = parse_report($target,$dumpvars,%Opt)
445 Reads one report. $target is the local filename to read. $dumpvars is
446 a hashref which gets filled with descriptive stats about
447 PASS/FAIL/etc. %Opt are the options as described in the
448 C<ctgetreports> manpage. $extract is a hashref containing the found
449 variables.
451 Note: this parsing is a bit dirty but as it seems good enough I'm not
452 inclined to change it. We parse HTML with regexps only, not an HTML
453 parser. Only the entities are decoded.
455 In %Opt you can use
457 article => $some_full_article_as_scalar
459 to use this function to parse one full article as text. When this is
460 given, the argument $target is not read, but its basename is taken to
461 be the id of the article. (OMG, hackers!)
463 =cut
464 sub parse_report {
465 my($target,$dumpvars,%Opt) = @_;
466 our @q;
467 my $id = basename($target);
468 # warn "DEBUG: id[$id]";
469 my($ok,$about);
471 my(%extract);
473 my($report,$isHTML) = _get_cooked_report($target, \%Opt);
474 my @qr = map /^qr:(.+)/, @{$Opt{q}};
475 if ($Opt{raw} || @qr) {
476 for my $qr (@qr) {
477 my $cqr = eval "qr{$qr}";
478 die "Could not compile regular expression '$qr': $@" if $@;
479 my(@matches) = $report =~ $cqr;
480 my $v;
481 if (@matches) {
482 if (@matches==1) {
483 $v = $matches[0];
484 } else {
485 $v = join "", map {"($_)"} @matches;
487 } else {
488 $v = "";
490 $extract{"qr:$qr"} = $v;
494 my $report_writer;
495 my $moduleunpack = {};
496 my $expect_prereq = 0;
497 my $expect_toolchain = 0;
498 my $expecting_toolchain_soon = 0;
499 my $fallback_p5 = "";
501 my $in_summary = 0;
502 my $in_summary_seen_platform = 0;
503 my $in_prg_output = 0;
504 my $in_env_context = 0;
505 my $in_test_summary = 0;
507 my $current_headline;
508 my @previous_line = ""; # so we can neutralize line breaks
509 my @rlines = split /\r?\n/, $report;
510 LINE: for (@rlines) {
511 next LINE unless ($isHTML ? m/<title>((\S+)\s+(\S+))/ : m/^Subject:\s*((\S+)\s+(\S+))/);
512 my $s = $1;
513 $s = $1 if $s =~ m{<strong>(.+)};
514 if ($s =~ /(\S+)\s+(\S+)/) {
515 $ok = $1;
516 $about = $2;
518 $extract{"meta:ok"} = $ok;
519 $extract{"meta:about"} = $about;
520 last;
522 unless ($extract{"meta:about"}) {
523 $extract{"meta:about"} = $Opt{vdistro};
524 unless ($extract{"meta:ok"}) {
525 warn "Warning: could not determine state of report";
528 LINE: while (@rlines) {
529 $_ = shift @rlines;
530 while (/!$/ and @rlines) {
531 my $followupline = shift @rlines;
532 $followupline =~ s/^\s+//; # remo leading space
533 $_ .= $followupline;
535 if (/^--------/) {
536 if ($previous_line[-2] && $previous_line[-2] =~ /^--------/) {
537 $current_headline = $previous_line[-1];
538 if ($current_headline =~ /PROGRAM OUTPUT/) {
539 $in_prg_output = 1;
540 } else {
541 $in_prg_output = 0;
543 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
544 $in_env_context = 1;
545 } else {
546 $in_env_context = 0;
548 } elsif ($previous_line[-1] && $previous_line[-1] =~ /Test Summary Report/) {
549 $in_test_summary = 1;
550 $in_prg_output = 0;
553 if ($extract{"meta:perl"}) {
554 if ( $in_summary
555 and !$extract{"conf:git_commit_id"}
556 and /Commit id:\s*([[:xdigit:]]+)/) {
557 $extract{"conf:git_commit_id"} = $1;
559 } else {
560 my $p5;
561 if (0) {
562 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
563 $p5 = $1;
564 $in_summary = 1;
565 $in_env_context = 0;
567 if ($p5) {
568 my($r,$v,$s,$p);
569 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
570 $r =~ s/\.0//; # 5.0 6 2!
571 $extract{"meta:perl"} = "$r.$v.$s\@$p";
572 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
573 $r =~ s/\.0//;
574 $extract{"meta:perl"} = "$r.$v.$s";
575 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
576 $r =~ s/\.0//;
577 $extract{"meta:perl"} = "$r.$v.$s";
578 } else {
579 $extract{"meta:perl"} = $p5;
583 unless ($extract{"meta:from"}) {
584 if (0) {
585 } elsif ($isHTML ?
586 m|<div class="h_name">From:</div> <b>(.+?)</b><br/>| :
587 m|^From:\s*(.+)|
589 my $f = $1;
590 $f = $1 if $f =~ m{<strong>(.+)</strong>};
591 $extract{"meta:from"} = $f;
593 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
595 unless ($extract{"meta:date"}) {
596 if (0) {
597 } elsif ($isHTML ?
598 m|<div class="h_name">Date:</div> (.+?)<br/>| :
599 m|^Date:\s*(.+)|
601 my $date = $1;
602 $date = $1 if $date =~ m{<strong>(.+)</strong>};
603 my($dt);
604 DATEFMT: for my $pat ("%Y-%m-%dT%TZ", # 2010-07-07T14:01:40Z
605 "%a, %d %b %Y %T %z", # Sun, 28 Sep 2008 12:23:12 +0100
606 "%b %d, %Y %R", # July 10,...
607 "%b %d, %Y %R", # July 4,...
609 $dt = eval {
610 my $p = DateTime::Format::Strptime->new
612 locale => "en",
613 time_zone => "UTC",
614 pattern => $pat,
616 $p->parse_datetime($date)
618 last DATEFMT if $dt;
620 unless ($dt) {
621 warn "Could not parse date[$date], setting to epoch 0";
622 $dt = DateTime->from_epoch( epoch => 0 );
624 $extract{"meta:date"} = $dt->datetime;
626 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
628 unless ($extract{"meta:writer"}) {
629 for ("$previous_line[-1] $_") {
630 if (0) {
631 } elsif (/CPANPLUS, version (\S+)/) {
632 $extract{"meta:writer"} = "CPANPLUS $1";
633 } elsif (/created (?:automatically )?by (\S+)/) {
634 $extract{"meta:writer"} = $1;
635 if (/\s+on\s+perl\s+([^,]+),/) {
636 $fallback_p5 = $1;
638 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
639 $extract{"meta:writer"} = "$1 $2";
641 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
644 if ($in_summary) {
645 # we do that first three lines a bit too often
646 my $qr = $Opt{dumpvars} || "";
647 $qr = qr/$qr/ if $qr;
648 unless (@q) {
649 @q = @{$Opt{q}||[]};
650 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
653 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
655 if (/^\s+Platform:$/) {
656 $in_summary_seen_platform=1;
657 } elsif (/^\s*$/ || m|</pre>|) {
658 # if not html, we have reached the end now
659 if ($in_summary_seen_platform) {
660 # some perls have an empty line after the summary line
661 $in_summary = 0;
663 } else {
664 my(%kv) = m!\G,?\s*([^=]+)= # left hand side and equal sign
666 [^',\s]+(?=.+=) # use64bitint=define use64bitall=define uselongdouble=undef
667 # (lookahead needed for left-over equal sign)
669 [^',]+$ # libpth=/usr/lib /usr/local/lib
671 '[^']+?' # cccdlflags='-DPIC -fPIC'
673 \S+ # useshrplib=false
674 )!xgc;
675 while (my($k,$v) = each %kv) {
676 my $ck = "conf:$k";
677 $ck =~ s/\s+$//;
678 $v =~ s/,$//;
679 if ($v =~ /^'(.*)'$/) {
680 $v = $1;
682 $v =~ s/^\s+//;
683 $v =~ s/\s+$//;
684 if ($qr && $ck =~ $qr) {
685 $extract{$ck} = $v;
686 } elsif ($conf_vars{$ck}) {
687 $extract{$ck} = $v;
692 if ($in_prg_output) {
693 unless ($extract{"meta:output_from"}) {
694 if (/Output from (.+):$/) {
695 $extract{"meta:output_from"} = $1
699 if ($in_env_context) {
700 if ($extract{"meta:writer"} =~ /^CPANPLUS\b/
702 exists $extract{"env:PERL5_CPANPLUS_IS_VERSION"}
705 s/Perl:\s+\$\^X/\$^X/
707 s/EUID:\s+\$>/\$EUID/
709 s/UID:\s+\$</\$UID/
711 s/EGID:\s+\$\)/\$EGID/
713 s/GID:\s+\$\(/\$GID/
716 if (my($left,$right) = /^\s{4}(\S+)\s*=\s*(.*)$/) {
717 if ($left eq '$UID/$EUID') {
718 my($uid,$euid) = split m{\s*/\s*}, $right;
719 $extract{'env:$UID'} = $uid;
720 $extract{'env:$EUID'} = $euid;
721 } elsif ($left =~ /GID/) {
722 for my $xgid (uniq split " ", $right) {
723 $extract{"env:$left$xgid"} = "true";
725 } else {
726 $extract{"env:$left"} = $right;
730 if ($in_test_summary) {
731 if (/^(?:Result:|Files=\d)/) {
732 $in_test_summary = 0;
733 } elsif (/^(\S+)\s+\(Wstat:.+?Tests:.+?Failed:\s*(\d+)\)$/) {
734 my $in_test_summary_current_test = $1; # t/globtest.t or t\globtest.t
735 my $in_test_summary_current_failed = $2;
736 $in_test_summary_current_test =~ s|\\|/|g; # only t/globtest.t
737 $extract{"fail:$in_test_summary_current_test"} = $in_test_summary_current_failed;
738 } elsif (/^\s+Failed tests?:/) {
739 # ignoring the exact combination of tests for now, seems like overkill
742 push @previous_line, $_;
743 if ($expect_prereq || $expect_toolchain) {
744 if (/Perl module toolchain versions installed/) {
745 # first time discovered in CPANPLUS 0.89_06
746 $expecting_toolchain_soon = 1;
747 $expect_prereq=0;
748 next LINE;
750 if (exists $moduleunpack->{type}) {
751 my($module,$v,$needwant);
752 # type 1 and 2 are about prereqs, type three about toolchain
753 if ($moduleunpack->{type} == 1) {
754 (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
755 next LINE if $@;
756 if ($leader =~ /^-/) {
757 $moduleunpack = {};
758 $expect_prereq = 0;
759 next LINE;
760 } elsif ($leader =~ /^(
761 buil # build_requires:
762 |conf # configure_requires:
763 )/x) {
764 next LINE;
765 } elsif ($module =~ /^(
766 - # line drawing
767 )/x) {
768 next LINE;
770 } elsif ($moduleunpack->{type} == 2) {
771 (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl}, $_; };
772 next LINE if $@;
773 for ($module,$v,$needwant) {
774 s/^\s+//;
775 s/\s+$//;
777 if ($leader =~ /^\*/) {
778 $moduleunpack = {};
779 $expect_prereq = 0;
780 next LINE;
781 } elsif (!defined $v
782 or !defined $needwant
783 or $v =~ /\s/
784 or $needwant =~ /\s/
786 ($module,$v,$needwant) = split " ", $_;
788 } elsif ($moduleunpack->{type} == 3) {
789 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
790 next LINE if $@;
791 if (!$module) {
792 $moduleunpack = {};
793 $expect_toolchain = 0;
794 next LINE;
795 } elsif ($module =~ /^-/) {
796 next LINE;
799 $module =~ s/\s+$//;
800 if ($module) {
801 $v =~ s/^\s+//;
802 $v =~ s/\s+$//;
803 my($modulename,$versionlead) = split " ", $module;
804 if (defined $modulename and defined $versionlead) {
805 $module = $modulename;
806 $v = "$versionlead$v";
808 if ($v eq "Have") {
809 next LINE;
811 $extract{"mod:$module"} = $v;
812 if (defined $needwant) {
813 $needwant =~ s/^\s+//;
814 $needwant =~ s/\s+$//;
815 $extract{"prereq:$module"} = $needwant;
819 if (/(\s+)(Module\s+)(Need\s+)Have/) {
820 $in_env_context = 0;
821 $moduleunpack = {
822 tpl => 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
823 type => 1,
825 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
826 $in_env_context = 0;
827 my $adjust_1 = 0;
828 my $adjust_2 = -length($4);
829 my $adjust_3 = length($4);
830 # I think they do not really try to align, usually we
831 # get away with split
832 $moduleunpack = {
833 tpl => 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3).'a*',
834 type => 2,
838 if (/PREREQUISITES|Prerequisite modules loaded/) {
839 $in_env_context = 0;
840 $expect_prereq=1;
842 if ($expecting_toolchain_soon) {
843 if (/(\s+)(Module(?:\sName)?\s+) Have/) {
844 $in_env_context = 0;
845 $expect_toolchain=1;
846 $expecting_toolchain_soon=0;
847 $moduleunpack = {
848 tpl => 'a'.length($1).'a'.length($2).'a*',
849 type => 3,
853 if (/toolchain versions installed/) {
854 $in_env_context = 0;
855 $expecting_toolchain_soon=1;
857 } # LINE
858 if (! $extract{"mod:CPANPLUS"} && $extract{"meta:writer"} =~ /^CPANPLUS\s(\d+(\.\d+))$/) {
859 $extract{"mod:CPANPLUS"} = $1;
861 if (! $extract{"meta:perl"} && $fallback_p5) {
862 my($p5,$patch) = split /\s+patch\s+/, $fallback_p5;
863 $extract{"meta:perl"} = $p5;
864 $extract{"conf:git_describe"} = $patch if defined $patch;
866 $extract{id} = $id;
867 if (my $filtercbbody = $Opt{filtercb}) {
868 my $filtercb = eval('sub {'.$filtercbbody.'}');
869 $filtercb->(\%extract);
871 if ($Opt{solve}) {
872 if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
873 $extract{"conf:archname+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
875 if ($extract{"meta:perl"} && $extract{"conf:osname"}) {
876 $extract{"meta:osname+perl"} = join " ", @extract{"conf:osname","meta:perl"};
878 my $data = $dumpvars->{"==DATA=="} ||= [];
879 push @$data, \%extract;
881 # ---- %extract finished ----
882 my $diag = "";
883 if (my $qr = $Opt{dumpvars}) {
884 $qr = qr/$qr/;
885 while (my($k,$v) = each %extract) {
886 if ($k =~ $qr) {
887 $dumpvars->{$k}{$v}{$extract{"meta:ok"}}++;
891 for my $want (@q) {
892 my $have = $extract{$want} || "";
893 $diag .= " $want\[$have]";
895 printf STDERR " %-4s %8d%s\n", $extract{"meta:ok"}, $id, $diag unless $Opt{quiet};
896 if ($Opt{raw}) {
897 $report =~ s/\s+\z//;
898 print STDERR $report, "\n================\n" unless $Opt{quiet};
900 if ($Opt{interactive}) {
901 eval { require IO::Prompt; 1; } or
902 die "Option '--interactive' requires IO::Prompt installed";
903 local @ARGV;
904 local $ARGV;
905 my $ans = IO::Prompt::prompt
907 -p => "View $id? [onechar: ynq] ",
908 -d => "y",
909 -u => qr/[ynq]/,
910 -onechar,
912 print STDERR "\n" unless $Opt{quiet};
913 if ($ans eq "y") {
914 my($report) = _get_cooked_report($target, \%Opt);
915 $Opt{pager} ||= "less";
916 open my $lfh, "|-", $Opt{pager} or die "Could not fork '$Opt{pager}': $!";
917 local $/;
918 print {$lfh} $report;
919 close $lfh or die "Could not close pager: $!"
920 } elsif ($ans eq "q") {
921 $Signal++;
922 return;
925 return \%extract;
928 sub _get_cooked_report {
929 my($target, $Opt_ref) = @_;
930 my($report, $isHTML);
931 if ($report = $Opt_ref->{article}) {
932 $isHTML = $report =~ /^</;
933 undef $target;
935 if ($target) {
936 local $/;
937 my $raw_report;
938 if (0) {
939 } elsif (-e $target) {
940 open my $fh, '<', $target or die "Could not open '$target': $!";
941 $raw_report = <$fh>;
942 } elsif (-e "$target.gz") {
943 open my $fh, "<", "$target.gz" or die "Could not open '$target.gz': $!";
945 # Opens a gzip (.gz) file for reading or writing. The mode parameter
946 # is as in fopen ("rb" or "wb") but can also include a compression level
947 # ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for
948 # Huffman only compression as in "wb1h", or 'R' for run-length encoding
949 # as in "wb1R". (See the description of deflateInit2 for more information
950 # about the strategy parameter.)
952 my $gz = Compress::Zlib::gzopen($fh, "rb");
953 $raw_report = "";
954 my $buffer;
955 while (my $bytesread = $gz->gzread($buffer)) {
956 $raw_report .= $buffer;
958 } else {
959 die "Could not find '$target' or '$target.gz'";
961 $isHTML = $raw_report =~ /^</;
962 if ($isHTML) {
963 if ($raw_report =~ m{^<\?.+?<html.+?<head.+?<body.+?<pre[^>]*>(.+)</pre>.*</body>.*</html>}s) {
964 $raw_report = decode_entities($1);
965 $isHTML = 0;
968 if ($isHTML) {
969 $report = decode_entities($raw_report);
970 } elsif ($raw_report =~ /^MIME-Version: 1.0$/m
972 _looks_like_qp($raw_report)
974 # minimizing MIME effort; don't know about reports in other formats
975 $report = MIME::QuotedPrint::decode_qp($raw_report);
976 } else {
977 $report = $raw_report;
980 if ($report =~ /\r\n/) {
981 my @rlines = split /\r?\n/, $report;
982 $report = join "\n", @rlines;
984 ($report, $isHTML);
987 =head2 solve
989 Feeds a couple of potentially interesting data to
990 Statistics::Regression and sorts the result by R^2 descending. Do not
991 confuse this with a prove, rather take it as a useful hint. It can
992 save you minutes of staring at data and provide a quick overview where
993 one should look closer. Displays the N top candidates, where N
994 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
995 Regressions results with an R^2 of 1.00 are displayed in any case.
997 The function is called when the option C<-solve> is give on the
998 commandline. Several extra config variables are calculated, see source
999 code for details.
1001 =cut
1003 my %never_solve_on = map {($_ => 1)}
1005 "conf:ccflags",
1006 "conf:config_args",
1007 "conf:cppflags",
1008 "conf:lddlflags",
1009 "conf:uname",
1010 "env:PATH",
1011 "env:PERL5LIB",
1012 "env:PERL5OPT",
1013 'env:$^X',
1014 'env:PERL5_CPANPLUS_IS_RUNNING',
1015 'env:PERL5_CPAN_IS_RUNNING',
1016 'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
1017 'meta:ok',
1019 my %normalize_numeric =
1021 id => sub { return shift },
1023 # here we were treating date as numeric; current
1024 # implementation requires to decide for one normalization, so
1025 # we decided 2012-02 for a sampling focussing on recentness;
1027 #'meta:date' => sub {
1028 # my $v = shift;
1029 # my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
1030 # unless (defined $M) {
1031 # die "illegal value[$v] for a date";
1033 # Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
1036 my %normalize_value =
1038 'meta:perl' => sub {
1039 my($perlatpatchlevel) = shift;
1040 my $perl = $perlatpatchlevel;
1041 $perl =~ s/\@.*//;
1042 $perl;
1044 'meta:date' => sub {
1045 my $v = shift;
1046 my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
1047 unless (defined $M) {
1048 die "illegal value[$v] for a date";
1050 my $epoch = Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
1051 my $Y_epoch = time - 2*365.25*86400;
1052 my $ret;
1053 if ($epoch < $Y_epoch) {
1054 $ret = $Y;
1055 } else {
1056 my @gmtime = gmtime $Y_epoch; $gmtime[5] += 1900;
1057 if ($Y == $gmtime[5]) {
1058 $ret = $Y;
1059 } else {
1060 my $M_epoch = time - 9*7*86400;
1061 if ($epoch < $M_epoch) {
1062 $ret = "$Y-$M";
1063 } else {
1064 my @gmtime = gmtime $M_epoch; $gmtime[5] += 1900; $gmtime[4]++;
1065 if ($Y == $gmtime[5] && $M == $gmtime[4]) {
1066 $ret = "$Y-$M";
1067 } else {
1068 $ret = "$Y-$M-$D";
1073 return $ret;
1076 sub solve {
1077 my($V,%Opt) = @_;
1078 require Statistics::Regression;
1079 my @regression;
1080 my $ycb;
1081 if (my $ycbbody = $Opt{ycb}) {
1082 $ycb = eval('sub {'.$ycbbody.'}');
1083 die if $@;
1084 } else {
1085 $ycb = sub {
1086 my $rec = shift;
1087 my $y;
1088 if ($rec->{"meta:ok"} eq "PASS") {
1089 $y = 1;
1090 } elsif ($rec->{"meta:ok"} eq "FAIL") {
1091 $y = 0;
1093 return $y
1096 VAR: for my $variable (sort keys %$V) {
1097 next if $variable eq "==DATA==";
1098 if ($never_solve_on{$variable}){
1099 warn "Skipping '$variable'\n" unless $Opt{quiet};
1100 next VAR;
1102 my $value_distribution = $V->{$variable};
1103 my $keys = keys %$value_distribution;
1104 my @X = qw(const);
1105 if ($normalize_numeric{$variable}) {
1106 push @X, "n_$variable";
1107 } else {
1108 my %seen = ();
1109 for my $value (sort keys %$value_distribution) {
1110 my $pf = $value_distribution->{$value};
1111 $pf->{PASS} ||= 0;
1112 $pf->{FAIL} ||= 0;
1113 if ($pf->{PASS} || $pf->{FAIL}) {
1114 my $Xele = sprintf "eq_%s",
1116 $normalize_value{$variable} ?
1117 $normalize_value{$variable}->($value) :
1118 $value
1120 push @X, $Xele unless $seen{$Xele}++;
1123 if (
1124 $pf->{PASS} xor $pf->{FAIL}
1126 my $vl = 40;
1127 substr($value,$vl) = "..." if length $value > 3+$vl;
1128 my $poor_mans_freehand_estimation = 0;
1129 if ($poor_mans_freehand_estimation) {
1130 warn sprintf
1132 "%4d %4d %-23s | %s\n",
1133 $pf->{PASS},
1134 $pf->{FAIL},
1135 $variable,
1136 $value,
1142 warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet};
1143 next VAR unless @X > 1;
1144 my %regdata =
1146 X => \@X,
1147 data => [],
1149 RECORD: for my $rec (@{$V->{"==DATA=="}}) {
1150 my $y = $ycb->($rec);
1151 next RECORD unless defined $y;
1152 my %obs;
1153 $obs{Y} = $y;
1154 @obs{@X} = (0) x @X;
1155 $obs{const} = 1;
1156 for my $x (@X) {
1157 if ($x =~ /^eq_(.+)/) {
1158 my $read_v = $1;
1159 if (exists $rec->{$variable}
1160 && defined $rec->{$variable}
1162 my $use_v = (
1163 $normalize_value{$variable} ?
1164 $normalize_value{$variable}->($rec->{$variable}) :
1165 $rec->{$variable}
1167 if ($use_v eq $read_v) {
1168 $obs{$x} = 1;
1171 # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
1172 } elsif ($x =~ /^n_(.+)/) {
1173 my $v = $1;
1174 $obs{$x} = eval { $normalize_numeric{$v}->($rec->{$v}); };
1175 if ($@) {
1176 warn "Warning: error during parsing v[$v] in record[$rec->{id}]: $@; continuing with undef value";
1180 push @{$regdata{data}}, \%obs;
1182 _run_regression ($variable, \%regdata, \@regression, \%Opt);
1184 my $top = min ($Opt{solvetop} || 3, scalar @regression);
1185 my $max_rsq = sum map {1==$_->rsq ? 1 : 0} @regression;
1186 $top = $max_rsq if $max_rsq && $max_rsq > $top;
1187 my $score = 0;
1188 printf
1190 "State after regression testing: %d results, showing top %d\n\n",
1191 scalar @regression,
1192 $top,
1194 for my $reg (sort {
1195 $b->rsq <=> $a->rsq
1197 $a->k <=> $b->k
1198 } @regression) {
1199 printf "(%d)\n", ++$score;
1200 eval { $reg->print; };
1201 if ($@) {
1202 printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
1204 last if --$top <= 0;
1209 # $variable is the name we pass through to S:R constructor
1210 # $regdata is hash and has the arrays "X" and "data" (observations)
1211 # X goes to S:R constructor
1212 # each observation has a Y which we pass to S:R in an include() call
1213 # $regression is the collector array of results
1214 # $opt are the options from outside, used to see if we are "verbose"
1215 sub _run_regression {
1216 my($variable,$regdata,$regression,$opt) = @_;
1217 my @X = @{$regdata->{X}};
1218 # my $splo = $regdata->{"spliced-out"} = []; # maybe can be used to
1219 # hold the reference
1220 # group
1221 while (@X > 1) {
1222 my $reg = Statistics::Regression->new($variable,\@X);
1223 for my $obs (@{$regdata->{data}}) {
1224 my $y = delete $obs->{Y};
1225 $reg->include($y, $obs);
1226 $obs->{Y} = $y;
1228 eval {$reg->theta;
1229 my @e = $reg->standarderrors;
1230 die "found standarderrors == 0" if grep { 0 == $_ } @e;
1231 $reg->rsq;};
1232 if ($@) {
1233 if ($opt->{verbose} && $opt->{verbose}>=2) {
1234 warn _yaml_dump
1235 ({error=>"could not determine some regression parameters",
1236 variable=>$variable,
1237 k=>$reg->k,
1238 n=>$reg->n,
1239 X=>$regdata->{"X"},
1240 errorstr => $@,
1243 # reduce k in case that linear dependencies disturbed us;
1244 # often called reference group; I'm tempted to collect and
1245 # make visible
1246 splice @X, 1, 1;
1247 } else {
1248 # $reg->print;
1249 push @$regression, $reg;
1250 return;
1255 =head1 AUTHOR
1257 Andreas König
1259 =head1 BUGS
1261 Please report any bugs or feature requests through the web
1262 interface at
1263 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
1264 I will be notified, and then you'll automatically be notified of
1265 progress on your bug as I make changes.
1267 =head1 SUPPORT
1269 You can find documentation for this module with the perldoc command.
1271 perldoc CPAN::Testers::ParseReport
1274 You can also look for information at:
1276 =over 4
1278 =item * RT: CPAN's request tracker
1280 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1282 =item * AnnoCPAN: Annotated CPAN documentation
1284 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1286 =item * CPAN Ratings
1288 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1290 =item * Search CPAN
1292 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1294 =back
1297 =head1 ACKNOWLEDGEMENTS
1299 Thanks to RJBS for module-starter.
1301 =head1 COPYRIGHT & LICENSE
1303 Copyright 2008,2009,2010,2011,2012,2013 Andreas König.
1305 This program is free software; you can redistribute it and/or modify it
1306 under the same terms as Perl itself.
1309 =cut
1311 1; # End of CPAN::Testers::ParseReport