add the last three years to copyright
[cpan-testers-parsereport.git] / lib / CPAN / Testers / ParseReport.pm
bloba96e12bdd5d6bb6d24c8fa693f34130085283dd1
1 package CPAN::Testers::ParseReport;
3 use warnings;
4 use strict;
6 use Compress::Zlib ();
7 use DateTime::Format::Strptime;
8 use File::Basename qw(basename);
9 use File::Path qw(mkpath);
10 use HTML::Entities qw(decode_entities);
11 use LWP::UserAgent;
12 use List::Util qw(max min sum);
13 use List::MoreUtils qw(uniq);
14 use MIME::QuotedPrint ();
15 use Time::Local ();
16 use utf8;
18 our $default_transport = "http_cpantesters";
19 our $default_cturl = "http://static.cpantesters.org/distro";
20 our $Signal = 0;
22 =encoding utf-8
24 =head1 NAME
26 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
28 =cut
30 use version; our $VERSION = qv('0.3.0');
32 =head1 SYNOPSIS
34 The documentation in here is normally not needed because the code is
35 meant to be run from the standalone program C<ctgetreports>.
37 ctgetreports --q mod:Moose Devel-Events
39 =head1 DESCRIPTION
41 This is the core module for CPAN::Testers::ParseReport. If you're not
42 looking to extend or alter the behaviour of this module, you probably
43 want to look at L<ctgetreports> instead.
45 =head1 OPTIONS
47 Options are described in the L<ctgetreports> manpage and are passed
48 through to the functions unaltered.
50 =head1 FUNCTIONS
52 =head2 parse_distro($distro,%options)
54 reads the cpantesters JSON file or the local database for the distro
55 and loops through the reports for the specified or most recent version
56 of that distro found in these data.
58 parse_distro() intentionally has no meaningful return value, different
59 options would require different ones.
61 =head2 $extract = parse_single_report($report,$dumpvars,%options)
63 mirrors and reads this report. $report is of the form
65 { id => <integer>, guid => <guid>, }
67 $dumpvar is a hashreference that gets filled with data.
69 $extract is the result of parse_report() described below.
71 =cut
74 my $ua;
75 sub _ua {
76 return $ua if $ua;
77 $ua = LWP::UserAgent->new
79 keep_alive => 1,
80 env_proxy => 1,
82 $ua->parse_head(0);
83 $ua;
87 my $ua;
88 sub _ua_gzip {
89 return $ua if $ua;
90 $ua = LWP::UserAgent->new
92 keep_alive => 1,
93 env_proxy => 1,
95 $ua->parse_head(0);
96 $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable());
97 $ua;
102 # we called it yaml because it was yaml; now it is json
103 use JSON::XS;
104 my $j = JSON::XS->new->ascii->pretty;
105 sub _slurp {
106 my($file) = @_;
107 local $/;
108 open my $fh, $file or die "Could not open '$file': $!";
109 <$fh>;
111 sub _yaml_loadfile {
112 $j->decode(_slurp shift);
114 sub _yaml_dump {
115 $j->encode(shift);
119 sub _download_overview {
120 my($cts_dir, $distro, %Opt) = @_;
121 my $cturl = $Opt{cturl} ||= $default_cturl;
122 my $ctarget = "$cts_dir/$distro.json";
123 my $cheaders = "$cts_dir/$distro.headers";
124 if ($Opt{local}) {
125 unless (-e $ctarget) {
126 die "Alert: No local file '$ctarget' found, cannot continue\n";
128 } else {
129 if (! -e $ctarget or -M $ctarget > .25) {
130 if (-e $ctarget && $Opt{verbose}) {
131 my(@stat) = stat _;
132 my $timestamp = gmtime $stat[9];
133 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
135 print STDERR "Fetching $ctarget..." if $Opt{verbose} && !$Opt{quiet};
136 my $firstletter = substr($distro,0,1);
137 my $uri = "$cturl/$firstletter/$distro.json";
138 my $resp = _ua->mirror($uri,$ctarget);
139 if ($resp->is_success) {
140 print STDERR "DONE\n" if $Opt{verbose} && !$Opt{quiet};
141 open my $fh, ">", $cheaders or die;
142 for ($resp->headers->as_string) {
143 print $fh $_;
144 if ($Opt{verbose} && $Opt{verbose}>1) {
145 print STDERR $_ unless $Opt{quiet};
148 } elsif (304 == $resp->code) {
149 print STDERR "DONE (not modified)\n" if $Opt{verbose} && !$Opt{quiet};
150 my $atime = my $mtime = time;
151 utime $atime, $mtime, $cheaders;
152 } else {
153 die sprintf
155 "No success downloading %s: %s",
156 $uri,
157 $resp->status_line,
162 return $ctarget;
165 sub _parse_yaml {
166 my($ctarget, %Opt) = @_;
167 my $arr = _yaml_loadfile($ctarget);
168 my($selected_release_ul,$selected_release_distrov,$excuse_string);
169 if ($Opt{vdistro}) {
170 $excuse_string = "selected distro '$Opt{vdistro}'";
171 $arr = [grep {$_->{distversion} eq $Opt{vdistro}} @$arr];
172 ($selected_release_distrov) = $arr->[0]{distversion};
173 } else {
174 $excuse_string = "any distro";
175 my $last_addition;
176 my %seen;
177 for my $report (sort { $a->{id} <=> $b->{id} } @$arr) {
178 unless ($seen{$report->{distversion}}++) {
179 $last_addition = $report->{distversion};
182 $arr = [grep {$_->{distversion} eq $last_addition} @$arr];
183 ($selected_release_distrov) = $last_addition;
185 unless ($selected_release_distrov) {
186 warn "Warning: could not find $excuse_string in '$ctarget'";
187 return;
189 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
190 my @all;
191 for my $test (@$arr) {
192 my $id = $test->{id};
193 push @all, {
194 id => $test->{id},
195 guid => $test->{guid},
197 return if $Signal;
199 @all = sort { $b->{id} <=> $a->{id} } @all;
200 return \@all;
203 sub parse_single_report {
204 my($report, $dumpvars, %Opt) = @_;
205 my($id) = $report->{id};
206 my($guid) = $report->{guid};
207 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
208 # the name nntp-testers was picked because originally the reports
209 # were available from an NNTP server
210 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
211 mkpath $nnt_dir;
212 my $target = "$nnt_dir/$id";
213 if ($Opt{local}) {
214 unless (-e $target) {
215 die {severity=>0,text=>"Warning: No local file '$target' found, skipping\n"};
217 } else {
218 $Opt{transport} ||= $default_transport;
219 my $ttarget;
220 if (-e $target) {
221 $ttarget = $target;
222 } elsif (-e "$target.gz") {
223 $ttarget = "$target.gz";
225 if ($ttarget) {
226 my $raw_report;
227 open my $fh, $ttarget or die "Could not open '$ttarget': $!";
228 if (0) {
229 } elsif ($Opt{transport} eq "http_cpantesters") {
230 local $/;
231 $raw_report = <$fh>;
232 } elsif ($Opt{transport} eq "http_cpantesters_gzip") {
233 my $gz = Compress::Zlib::gzopen($fh, "rb");
234 $raw_report = "";
235 my $buffer;
236 while (my $bytesread = $gz->gzread($buffer)) {
237 $raw_report .= $buffer;
240 if ($raw_report =~ m{<title>.*(Report not found|Error).*</title>}) {
241 unlink $ttarget or die "Could not unlink '$ttarget': $!";
244 if (! -e $target) {
245 print STDERR "Fetching $target..." if $Opt{verbose} && !$Opt{quiet};
246 if (0) {
247 } elsif ($Opt{transport} eq "http_cpantesters") {
248 my $mustfetch = 0;
249 if ($Opt{"prefer-local-reports"}) {
250 unless (-e $target) {
251 $mustfetch = 1;
253 } else {
254 $mustfetch = 1;
256 if ($mustfetch) {
257 my $resp = _ua->mirror("http://www.cpantesters.org/cpan/report/$guid?raw=1",$target);
258 if ($resp->is_success) {
259 if ($Opt{verbose}) {
260 my(@stat) = stat $target;
261 my $timestamp = gmtime $stat[9];
262 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
263 if ($Opt{verbose} > 1) {
264 print STDERR $resp->headers->as_string unless $Opt{quiet};
267 my $headers = "$target.headers";
268 open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
269 print $fh $resp->headers->as_string;
270 } else {
271 die {severity=>0,
272 text=>sprintf "HTTP Server Error[%s] for id[%s] guid[%s]", $resp->status_line, $id, $guid};
275 } elsif ($Opt{transport} eq "http_cpantesters_gzip") {
276 my $mustfetch = 0;
277 if ($Opt{"prefer-local-reports"}) {
278 unless (-e "$target.gz") {
279 $mustfetch = 1;
281 } else {
282 $mustfetch = 1;
284 if ($mustfetch) {
285 my $resp = _ua_gzip->mirror("http://www.cpantesters.org/cpan/report/$guid?raw=1","$target.gz");
286 if ($resp->is_success) {
287 if ($Opt{verbose}) {
288 my(@stat) = stat "$target.gz";
289 my $timestamp = gmtime $stat[9];
290 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
291 if ($Opt{verbose} > 1) {
292 print STDERR $resp->headers->as_string unless $Opt{quiet};
295 my $headers = "$target.headers";
296 open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
297 print $fh $resp->headers->as_string;
298 } else {
299 die {severity=>0,
300 text=>sprintf "HTTP Server Error[%s] for id[%s] guid[%s]", $resp->status_line, $id, $guid};
303 } else {
304 die {severity=>1,text=>"Illegal value for --transport: '$Opt{transport}'"};
308 parse_report($target, $dumpvars, %Opt);
311 sub parse_distro {
312 my($distro,%Opt) = @_;
313 my %dumpvars;
314 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
315 # the name cpantesters-show was picked because originally
316 # http://www.cpantesters.org/show/ contained html file that we had
317 # to parse.
318 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
319 mkpath $cts_dir;
320 if ($Opt{solve}) {
321 require Statistics::Regression;
322 $Opt{dumpvars} = "." unless defined $Opt{dumpvars};
324 if (!$Opt{vdistro} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
325 $Opt{vdistro} = $distro;
326 $distro = $1;
328 my $reports;
329 if (my $ctdb = $Opt{ctdb}) {
330 require CPAN::WWW::Testers::Generator::Database;
331 require CPAN::DistnameInfo;
332 my $dbi = CPAN::WWW::Testers::Generator::Database->new(database=>$ctdb) or die "Alert: unknown error while opening database '$ctdb'";
333 unless ($Opt{vdistro}) {
334 my $sql = "select version from cpanstats where dist=? order by id";
335 my @rows = $dbi->get_query($sql,$distro);
336 my($newest,%seen);
337 for my $row (@rows) {
338 $newest = $row->[0] unless $seen{$row->[0]}++;
340 $Opt{vdistro} = "$distro-$newest";
342 my $d = CPAN::DistnameInfo->new("FOO/$Opt{vdistro}.tgz");
343 my $dist = $d->dist;
344 my $version = $d->version;
345 my $sql = "select id, guid from cpanstats where dist=? and version=? order by id desc";
346 my @rows = $dbi->get_query($sql,$dist,$version);
347 my @all;
348 for my $row (@rows) {
349 push @all, {
350 id => $row->[0],
351 guid => $row->[1],
354 $reports = \@all;
355 } else {
356 my $ctarget = _download_overview($cts_dir, $distro, %Opt);
357 $reports = _parse_yaml($ctarget,%Opt);
359 return unless $reports;
360 my $sampled = 0;
361 my $samplesize = $Opt{sample} || 0;
362 $samplesize = 0 if $samplesize && $samplesize >= @$reports;
363 REPEATER: {
364 my $i = 0;
365 my %taken;
366 REPORT: for my $report (@$reports) {
367 $i++;
368 if ($samplesize) {
369 my $need = $samplesize - $sampled;
370 next REPORT unless $need;
371 my $left = @$reports - $i;
372 # warn sprintf "tot %d i %d sampled %d need %d left %d\n", scalar @$reports, $i, $sampled, $need, $left;
373 my $want_this = (rand(1) <= ($need/$left));
374 next REPORT unless $want_this;
376 eval {parse_single_report($report, \%dumpvars, %Opt)};
377 if ($@) {
378 if (ref $@) {
379 if ($@->{severity}) {
380 die $@->{text};
381 } else {
382 warn $@->{text};
384 } else {
385 die $@;
388 $sampled++;
389 $taken{$i-1}=undef;
390 last REPEATER if $Signal;
392 if ($samplesize) {
393 PASSFAIL: for my $pf ("pass","fail") {
394 my $minx = $Opt{"min".$pf} or next PASSFAIL;
395 my $x = $dumpvars{"meta:ok"}{uc $pf}{uc $pf};
396 if ($x < $minx) {
397 # bump samplesize, remove already sampled reports from array, redo
398 my $bump = int($samplesize * 0.05)+1;
399 $samplesize += $bump;
400 for my $k (sort {$b <=> $a} keys %taken) {
401 splice @$reports, $k, 1;
403 redo REPEATER;
408 if ($Opt{dumpvars}) {
409 my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
410 open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
411 print $fh _yaml_dump(\%dumpvars);
412 close $fh or die "Could not close '$dumpfile': $!"
414 if ($Opt{solve}) {
415 solve(\%dumpvars,%Opt);
419 =head2 $bool = _looks_like_qp($raw_report)
421 We had to acknowledge the fact that some MTAs swallow the MIME-Version
422 header while passing MIME through. So we introduce fallback heuristics
423 that try to determine if a report is written in quoted printable.
425 Note that this subroutine is internal, just documented to have the
426 internals documented.
428 The current implementation counts the number of QP escaped spaces and
429 equal signs.
431 =cut
433 sub _looks_like_qp {
434 my($report) = @_;
435 my $count_space = () = $report =~ /=20/g;
436 return 1 if $count_space > 12;
437 my $count_equal = () = $report =~ /=3D/g;
438 return 1 if $count_equal > 12;
439 return 1 if $count_space+$count_equal > 24;
440 return 0; # waiting for a counter example
443 =head2 $extract = parse_report($target,$dumpvars,%Opt)
445 Reads one report. $target is the local filename to read (but see below
446 for option 'article'). $dumpvars is a hashref which gets filled with
447 descriptive stats about PASS/FAIL/etc. %Opt are the options as
448 described in the C<ctgetreports> manpage. $extract is a hashref
449 containing the found variables.
451 Note: this parsing is a bit dirty but as it seems good enough I'm not
452 inclined to change it. We parse HTML with regexps only, not an HTML
453 parser. Only the entities are decoded.
455 In %Opt you can use
457 article => $some_full_article_as_scalar
459 to use this function to parse one full article as text. When this is
460 given, the argument $target is not read, but its basename is taken to
461 be the id of the article. (OMG, hackers!)
463 =cut
464 sub parse_report {
465 my($target,$dumpvars,%Opt) = @_;
466 our @q;
467 my $id = basename($target);
468 # warn "DEBUG: id[$id]";
469 my($ok,$about);
471 my(%extract);
473 my($report,$isHTML) = _get_cooked_report($target, \%Opt);
474 my @qr = map /^qr:(.+)/, @{$Opt{q}};
475 if ($Opt{raw} || @qr) {
476 for my $qr (@qr) {
477 my $cqr = eval "qr{$qr}";
478 die "Could not compile regular expression '$qr': $@" if $@;
479 my(@matches) = $report =~ $cqr;
480 my $v;
481 if (@matches) {
482 if (@matches==1) {
483 $v = $matches[0];
484 } else {
485 $v = join "", map {"($_)"} @matches;
487 } else {
488 $v = "";
490 $extract{"qr:$qr"} = $v;
494 my $report_writer;
495 my $moduleunpack = {};
496 my $expect_prereq = 0;
497 my $expect_toolchain = 0;
498 my $expecting_toolchain_soon = 0;
499 my $expect_module_versions_report = 0;
500 my $fallback_p5 = "";
502 my $in_summary = 0;
503 my $in_summary_seen_platform = 0;
504 my $in_prg_output = 0;
505 my $in_env_context = 0;
506 my $in_test_summary = 0;
508 my $current_headline;
509 my @previous_line = ""; # so we can neutralize line breaks
510 my @rlines = split /\r?\n/, $report;
511 LINE: for (@rlines) {
512 next LINE unless ($isHTML ? m/<title>((\S+)\s+(\S+))/ : m/^Subject:\s*((\S+)\s+(\S+))/);
513 my $s = $1;
514 $s = $1 if $s =~ m{<strong>(.+)};
515 if ($s =~ /(\S+)\s+(\S+)/) {
516 $ok = $1;
517 $about = $2;
519 $extract{"meta:ok"} = $ok;
520 $extract{"meta:about"} = $about;
521 last;
523 unless ($extract{"meta:about"}) {
524 $extract{"meta:about"} = $Opt{vdistro};
525 unless ($extract{"meta:ok"}) {
526 warn "Warning: could not determine state of report";
529 LINE: while (@rlines) {
530 $_ = shift @rlines;
531 while (/!$/ and @rlines) {
532 my $followupline = shift @rlines;
533 $followupline =~ s/^\s+//; # remo leading space
534 $_ .= $followupline;
536 if (/^--------/) {
537 if ($previous_line[-2] && $previous_line[-2] =~ /^--------/) {
538 $current_headline = $previous_line[-1];
539 if ($current_headline =~ /PROGRAM OUTPUT/) {
540 $in_prg_output = 1;
541 } else {
542 $in_prg_output = 0;
544 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
545 $in_env_context = 1;
546 } else {
547 $in_env_context = 0;
549 } elsif ($previous_line[-1] && $previous_line[-1] =~ /Test Summary Report/) {
550 $in_test_summary = 1;
551 $in_prg_output = 0;
554 if ($extract{"meta:perl"}) {
555 if ( $in_summary
556 and !$extract{"conf:git_commit_id"}
557 and /Commit id:\s*([[:xdigit:]]+)/) {
558 $extract{"conf:git_commit_id"} = $1;
560 } else {
561 my $p5;
562 if (0) {
563 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
564 $p5 = $1;
565 $in_summary = 1;
566 $in_env_context = 0;
568 if ($p5) {
569 my($r,$v,$s,$p);
570 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
571 $r =~ s/\.0//; # 5.0 6 2!
572 $extract{"meta:perl"} = "$r.$v.$s\@$p";
573 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
574 $r =~ s/\.0//;
575 $extract{"meta:perl"} = "$r.$v.$s";
576 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
577 $r =~ s/\.0//;
578 $extract{"meta:perl"} = "$r.$v.$s";
579 } else {
580 $extract{"meta:perl"} = $p5;
584 unless ($extract{"meta:from"}) {
585 if (0) {
586 } elsif ($isHTML ?
587 m|<div class="h_name">From:</div> <b>(.+?)</b><br/>| :
588 m|^From:\s*(.+)|
590 my $f = $1;
591 $f = $1 if $f =~ m{<strong>(.+)</strong>};
592 $extract{"meta:from"} = $f;
594 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
596 unless ($extract{"meta:date"}) {
597 if (0) {
598 } elsif ($isHTML ?
599 m|<div class="h_name">Date:</div> (.+?)<br/>| :
600 m|^Date:\s*(.+)|
602 my $date = $1;
603 $date = $1 if $date =~ m{<strong>(.+)</strong>};
604 my($dt);
605 DATEFMT: for my $pat ("%Y-%m-%dT%TZ", # 2010-07-07T14:01:40Z
606 "%a, %d %b %Y %T %z", # Sun, 28 Sep 2008 12:23:12 +0100
607 "%b %d, %Y %R", # July 10,...
608 "%b %d, %Y %R", # July 4,...
610 $dt = eval {
611 my $p = DateTime::Format::Strptime->new
613 locale => "en",
614 time_zone => "UTC",
615 pattern => $pat,
617 $p->parse_datetime($date)
619 last DATEFMT if $dt;
621 unless ($dt) {
622 warn "Could not parse date[$date], setting to epoch 0";
623 $dt = DateTime->from_epoch( epoch => 0 );
625 $extract{"meta:date"} = $dt->datetime;
627 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
629 unless ($extract{"meta:writer"}) {
630 for ("$previous_line[-1] $_") {
631 if (0) {
632 } elsif (/CPANPLUS, version (\S+)/) {
633 $extract{"meta:writer"} = "CPANPLUS $1";
634 } elsif (/created by (App::cpanminus::reporter \S+)/) {
635 $extract{"meta:writer"} = $1;
636 } elsif (/created (?:automatically )?by (\S+)/) {
637 $extract{"meta:writer"} = $1;
638 if (/\s+on\s+perl\s+([^,]+),/) {
639 $fallback_p5 = $1;
641 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
642 $extract{"meta:writer"} = "$1 $2";
644 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
647 if ($in_summary) {
648 # we do that first three lines a bit too often
649 my $qr = $Opt{dumpvars} || "";
650 $qr = qr/$qr/ if $qr;
651 unless (@q) {
652 @q = @{$Opt{q}||[]};
653 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
656 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
658 if (/^\s+Platform:$/) {
659 $in_summary_seen_platform=1;
660 } elsif (/^\s*$/ || m|</pre>|) {
661 # if not html, we have reached the end now
662 if ($in_summary_seen_platform) {
663 # some perls have an empty line after the summary line
664 $in_summary = 0;
666 } else {
667 my(%kv) = m!\G,?\s*([^=]+)= # left hand side and equal sign
669 [^',\s]+(?=.+=) # use64bitint=define use64bitall=define uselongdouble=undef
670 # (lookahead needed for left-over equal sign)
672 [^',]+$ # libpth=/usr/lib /usr/local/lib
674 '[^']+?' # cccdlflags='-DPIC -fPIC'
676 \S+ # useshrplib=false
677 )!xgc;
678 while (my($k,$v) = each %kv) {
679 my $ck = "conf:$k";
680 $ck =~ s/\s+$//;
681 $v =~ s/,$//;
682 if ($v =~ /^'(.*)'$/) {
683 $v = $1;
685 $v =~ s/^\s+//;
686 $v =~ s/\s+$//;
687 if ($qr && $ck =~ $qr) {
688 $extract{$ck} = $v;
689 } elsif ($conf_vars{$ck}) {
690 $extract{$ck} = $v;
695 if ($in_prg_output) {
696 unless ($extract{"meta:output_from"}) {
697 if (/Output from (.+):$/) {
698 $extract{"meta:output_from"} = $1
702 # Parsing of Module::Versions::Report text in test output
703 if (/Modules in memory:/) {
704 $expect_module_versions_report = 1;
705 next LINE;
707 elsif ($expect_module_versions_report) {
708 if (/\s+(\S+)(?:\s+(v\d\S+?))?;/) {
709 $extract{"mod:$1"} = defined $2 ? $2 : 'undef';
710 next LINE;
712 elsif (/\[at .+?\]/) {
713 # trailing timestamp
714 $expect_module_versions_report = 0;
715 next LINE;
719 if ($in_env_context) {
720 if ($extract{"meta:writer"} =~ /^CPANPLUS\b/
722 exists $extract{"env:PERL5_CPANPLUS_IS_VERSION"}
725 s/Perl:\s+\$\^X/\$^X/
727 s/EUID:\s+\$>/\$EUID/
729 s/UID:\s+\$</\$UID/
731 s/EGID:\s+\$\)/\$EGID/
733 s/GID:\s+\$\(/\$GID/
736 if (my($left,$right) = /^\s{4}(\S+)\s*=\s*(.*)$/) {
737 if ($left eq '$UID/$EUID') {
738 my($uid,$euid) = split m{\s*/\s*}, $right;
739 $extract{'env:$UID'} = $uid;
740 $extract{'env:$EUID'} = $euid;
741 } elsif ($left =~ /GID/) {
742 for my $xgid (uniq split " ", $right) {
743 $extract{"env:$left$xgid"} = "true";
745 } else {
746 $extract{"env:$left"} = $right;
750 if ($in_test_summary) {
751 if (/^(?:Result:|Files=\d)/) {
752 $in_test_summary = 0;
753 } elsif (/^(\S+)\s+\(Wstat:.+?Tests:.+?Failed:\s*(\d+)\)$/) {
754 my $in_test_summary_current_test = $1; # t/globtest.t or t\globtest.t
755 my $in_test_summary_current_failed = $2;
756 $in_test_summary_current_test =~ s|\\|/|g; # only t/globtest.t
757 $extract{"fail:$in_test_summary_current_test"} = $in_test_summary_current_failed;
758 } elsif (/^\s+Failed tests?:/) {
759 # ignoring the exact combination of tests for now, seems like overkill
762 push @previous_line, $_;
763 if ($expect_prereq || $expect_toolchain) {
764 if (/Perl module toolchain versions installed/) {
765 # first time discovered in CPANPLUS 0.89_06
766 $expecting_toolchain_soon = 1;
767 $expect_prereq=0;
768 next LINE;
770 if (exists $moduleunpack->{type}) {
771 my($module,$v,$needwant);
772 # type 1 and 2 are about prereqs, type three about toolchain
773 if ($moduleunpack->{type} == 1) {
774 (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
775 next LINE if $@;
776 if ($leader =~ /^-/) {
777 $moduleunpack = {};
778 $expect_prereq = 0;
779 next LINE;
780 } elsif ($leader =~ /^(
781 buil # build_requires:
782 |conf # configure_requires:
783 )/x) {
784 next LINE;
785 } elsif ($module =~ /^(
786 - # line drawing
787 )/x) {
788 next LINE;
790 } elsif ($moduleunpack->{type} == 2) {
791 (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl}, $_; };
792 next LINE if $@;
793 for ($module,$v,$needwant) {
794 s/^\s+//;
795 s/\s+$//;
797 if ($leader =~ /^\*/) {
798 $moduleunpack = {};
799 $expect_prereq = 0;
800 next LINE;
801 } elsif (!defined $v
802 or !defined $needwant
803 or $v =~ /\s/
804 or $needwant =~ /\s/
806 ($module,$v,$needwant) = split " ", $_;
808 } elsif ($moduleunpack->{type} == 3) {
809 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
810 next LINE if $@;
811 if (!$module) {
812 $moduleunpack = {};
813 $expect_toolchain = 0;
814 next LINE;
815 } elsif ($module =~ /^-/) {
816 next LINE;
819 $module =~ s/\s+$//;
820 if ($module) {
821 $v =~ s/^\s+//;
822 $v =~ s/\s+$//;
823 my($modulename,$versionlead) = split " ", $module;
824 if (defined $modulename and defined $versionlead) {
825 $module = $modulename;
826 $v = "$versionlead$v";
828 if ($v eq "Have") {
829 next LINE;
831 $extract{"mod:$module"} = $v;
832 if (defined $needwant) {
833 $needwant =~ s/^\s+//;
834 $needwant =~ s/\s+$//;
835 $extract{"prereq:$module"} = $needwant;
839 if (/(\s+)(Module\s+)(Need\s+)Have/) {
840 $in_env_context = 0;
841 $moduleunpack = {
842 tpl => 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
843 type => 1,
845 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
846 $in_env_context = 0;
847 my $adjust_1 = 0;
848 my $adjust_2 = -length($4);
849 my $adjust_3 = length($4);
850 # I think they do not really try to align, usually we
851 # get away with split
852 $moduleunpack = {
853 tpl => 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3).'a*',
854 type => 2,
858 if (/PREREQUISITES|Prerequisite modules loaded/) {
859 $in_env_context = 0;
860 $expect_prereq=1;
862 if ($expecting_toolchain_soon) {
863 if (/(\s+)(Module(?:\sName)?\s+) Have/) {
864 $in_env_context = 0;
865 $expect_toolchain=1;
866 $expecting_toolchain_soon=0;
867 $moduleunpack = {
868 tpl => 'a'.length($1).'a'.length($2).'a*',
869 type => 3,
873 if (/toolchain versions installed/) {
874 $in_env_context = 0;
875 $expecting_toolchain_soon=1;
877 } # LINE
878 if (! $extract{"mod:CPANPLUS"} && $extract{"meta:writer"} =~ /^CPANPLUS\s(\d+(\.\d+))$/) {
879 $extract{"mod:CPANPLUS"} = $1;
881 if (! $extract{"meta:perl"} && $fallback_p5) {
882 my($p5,$patch) = split /\s+patch\s+/, $fallback_p5;
883 $extract{"meta:perl"} = $p5;
884 $extract{"conf:git_describe"} = $patch if defined $patch;
886 $extract{id} = $id;
887 if (my $filtercbbody = $Opt{filtercb}) {
888 my $filtercb = eval('sub {'.$filtercbbody.'}');
889 $filtercb->(\%extract);
891 if ($Opt{solve}) {
892 if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
893 $extract{"conf:archname+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
895 if ($extract{"meta:perl"} && $extract{"conf:osname"}) {
896 $extract{"meta:osname+perl"} = join " ", @extract{"conf:osname","meta:perl"};
898 my $data = $dumpvars->{"==DATA=="} ||= [];
899 push @$data, \%extract;
901 # ---- %extract finished ----
902 my $diag = "";
903 if (my $qr = $Opt{dumpvars}) {
904 $qr = qr/$qr/;
905 while (my($k,$v) = each %extract) {
906 if ($k =~ $qr) {
907 $dumpvars->{$k}{$v}{$extract{"meta:ok"}}++;
911 for my $want (@q) {
912 my $have = $extract{$want} || "";
913 $diag .= " $want\[$have]";
915 printf STDERR " %-4s %8s%s\n", $extract{"meta:ok"}, $id, $diag unless $Opt{quiet};
916 if ($Opt{raw}) {
917 $report =~ s/\s+\z//;
918 print STDERR $report, "\n================\n" unless $Opt{quiet};
920 if ($Opt{interactive}) {
921 eval { require IO::Prompt; 1; } or
922 die "Option '--interactive' requires IO::Prompt installed";
923 local @ARGV;
924 local $ARGV;
925 my $ans = IO::Prompt::prompt
927 -p => "View $id? [onechar: ynq] ",
928 -d => "y",
929 -u => qr/[ynq]/,
930 -onechar,
932 print STDERR "\n" unless $Opt{quiet};
933 if ($ans eq "y") {
934 my($report) = _get_cooked_report($target, \%Opt);
935 $Opt{pager} ||= "less";
936 open my $lfh, "|-", $Opt{pager} or die "Could not fork '$Opt{pager}': $!";
937 local $/;
938 print {$lfh} $report;
939 close $lfh or die "Could not close pager: $!"
940 } elsif ($ans eq "q") {
941 $Signal++;
942 return;
945 return \%extract;
948 sub _get_cooked_report {
949 my($target, $Opt_ref) = @_;
950 my($report, $isHTML);
951 if ($report = $Opt_ref->{article}) {
952 $isHTML = $report =~ /^</;
953 undef $target;
955 if ($target) {
956 local $/;
957 my $raw_report;
958 if (0) {
959 } elsif (-e $target) {
960 open my $fh, '<', $target or die "Could not open '$target': $!";
961 $raw_report = <$fh>;
962 } elsif (-e "$target.gz") {
963 open my $fh, "<", "$target.gz" or die "Could not open '$target.gz': $!";
965 # Opens a gzip (.gz) file for reading or writing. The mode parameter
966 # is as in fopen ("rb" or "wb") but can also include a compression level
967 # ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for
968 # Huffman only compression as in "wb1h", or 'R' for run-length encoding
969 # as in "wb1R". (See the description of deflateInit2 for more information
970 # about the strategy parameter.)
972 my $gz = Compress::Zlib::gzopen($fh, "rb");
973 $raw_report = "";
974 my $buffer;
975 while (my $bytesread = $gz->gzread($buffer)) {
976 $raw_report .= $buffer;
978 } else {
979 die "Could not find '$target' or '$target.gz'";
981 $isHTML = $raw_report =~ /^</;
982 if ($isHTML) {
983 if ($raw_report =~ m{^<\?.+?<html.+?<head.+?<body.+?<pre[^>]*>(.+)</pre>.*</body>.*</html>}s) {
984 $raw_report = decode_entities($1);
985 $isHTML = 0;
988 if ($isHTML) {
989 $report = decode_entities($raw_report);
990 } elsif ($raw_report =~ /^MIME-Version: 1.0$/m
992 _looks_like_qp($raw_report)
994 # note(1): minimizing MIME effort; don't know about reports in other formats
995 # note(2): Net-Generatus-0.40 had an offending report
996 $report = eval { MIME::QuotedPrint::decode_qp($raw_report) };
997 if (!$report || $@) {
998 warn "WARNING: report '$target' could not be parsed as qp, giving up";
999 if ($raw_report =~ /Subject:.+Dear.+Perl.+Summary/s) {
1000 $report = $raw_report;
1003 } else {
1004 $report = $raw_report;
1007 if ($report =~ /\r\n/) {
1008 my @rlines = split /\r?\n/, $report;
1009 $report = join "\n", @rlines;
1011 ($report, $isHTML);
1014 =head2 solve
1016 Feeds a couple of potentially interesting data to
1017 Statistics::Regression and sorts the result by R^2 descending. Do not
1018 confuse this with a prove, rather take it as a useful hint. It can
1019 save you minutes of staring at data and provide a quick overview where
1020 one should look closer. Displays the N top candidates, where N
1021 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
1022 Regressions results with an R^2 of 1.00 are displayed in any case.
1024 The function is called when the option C<-solve> is given on the
1025 commandline. Several extra config variables are calculated, see source
1026 code for details.
1028 =cut
1030 my %never_solve_on = map {($_ => 1)}
1032 "conf:ccflags",
1033 "conf:config_args",
1034 "conf:cppflags",
1035 "conf:lddlflags",
1036 "conf:uname",
1037 "env:PATH",
1038 "env:PERL5LIB",
1039 "env:PERL5OPT",
1040 'env:$^X',
1041 'env:PERL5_CPANPLUS_IS_RUNNING',
1042 'env:PERL5_CPAN_IS_RUNNING',
1043 'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
1044 'meta:ok',
1046 my %normalize_numeric =
1048 id => sub { return shift },
1050 # here we were treating date as numeric; current
1051 # implementation requires to decide for one normalization, so
1052 # we decided 2012-02 for a sampling focussing on recentness;
1054 #'meta:date' => sub {
1055 # my $v = shift;
1056 # my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
1057 # unless (defined $M) {
1058 # die "illegal value[$v] for a date";
1060 # Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
1063 my %normalize_value =
1065 'meta:perl' => sub {
1066 my($perlatpatchlevel) = shift;
1067 my $perl = $perlatpatchlevel;
1068 $perl =~ s/\@.*//;
1069 $perl;
1071 'meta:date' => sub {
1072 my $v = shift;
1073 my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
1074 unless (defined $M) {
1075 die "illegal value[$v] for a date";
1077 my $epoch = Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
1078 my $Y_epoch = time - 2*365.25*86400;
1079 my $ret;
1080 if ($epoch < $Y_epoch) {
1081 $ret = $Y;
1082 } else {
1083 my @gmtime = gmtime $Y_epoch; $gmtime[5] += 1900;
1084 if ($Y == $gmtime[5]) {
1085 $ret = $Y;
1086 } else {
1087 my $M_epoch = time - 9*7*86400;
1088 if ($epoch < $M_epoch) {
1089 $ret = "$Y-$M";
1090 } else {
1091 my @gmtime = gmtime $M_epoch; $gmtime[5] += 1900; $gmtime[4]++;
1092 if ($Y == $gmtime[5] && $M == $gmtime[4]) {
1093 $ret = "$Y-$M";
1094 } else {
1095 $ret = "$Y-$M-$D";
1100 return $ret;
1103 sub solve {
1104 my($V,%Opt) = @_;
1105 require Statistics::Regression;
1106 my @regression;
1107 my $ycb;
1108 if (my $ycbbody = $Opt{ycb}) {
1109 $ycb = eval('sub {'.$ycbbody.'}');
1110 die if $@;
1111 } else {
1112 $ycb = sub {
1113 my $rec = shift;
1114 my $y;
1115 if ($rec->{"meta:ok"} eq "PASS") {
1116 $y = 1;
1117 } elsif ($rec->{"meta:ok"} eq "FAIL") {
1118 $y = 0;
1120 return $y
1123 VAR: for my $variable (sort keys %$V) {
1124 next if $variable eq "==DATA==";
1125 if ($never_solve_on{$variable}){
1126 warn "Skipping '$variable'\n" unless $Opt{quiet};
1127 next VAR;
1129 my $value_distribution = $V->{$variable};
1130 my $keys = keys %$value_distribution;
1131 my @X = qw(const);
1132 if ($normalize_numeric{$variable}) {
1133 push @X, "n_$variable";
1134 } else {
1135 my %seen = ();
1136 for my $value (sort keys %$value_distribution) {
1137 my $pf = $value_distribution->{$value};
1138 $pf->{PASS} ||= 0;
1139 $pf->{FAIL} ||= 0;
1140 if ($pf->{PASS} || $pf->{FAIL}) {
1141 my $Xele = sprintf "eq_%s",
1143 $normalize_value{$variable} ?
1144 $normalize_value{$variable}->($value) :
1145 $value
1147 push @X, $Xele unless $seen{$Xele}++;
1150 if (
1151 $pf->{PASS} xor $pf->{FAIL}
1153 my $vl = 40;
1154 substr($value,$vl) = "..." if length $value > 3+$vl;
1155 my $poor_mans_freehand_estimation = 0;
1156 if ($poor_mans_freehand_estimation) {
1157 warn sprintf
1159 "%4d %4d %-23s | %s\n",
1160 $pf->{PASS},
1161 $pf->{FAIL},
1162 $variable,
1163 $value,
1169 warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet};
1170 next VAR unless @X > 1;
1171 my %regdata =
1173 X => \@X,
1174 data => [],
1176 RECORD: for my $rec (@{$V->{"==DATA=="}}) {
1177 my $y = $ycb->($rec);
1178 next RECORD unless defined $y;
1179 my %obs;
1180 $obs{Y} = $y;
1181 @obs{@X} = (0) x @X;
1182 $obs{const} = 1;
1183 for my $x (@X) {
1184 if ($x =~ /^eq_(.+)/) {
1185 my $read_v = $1;
1186 if (exists $rec->{$variable}
1187 && defined $rec->{$variable}
1189 my $use_v = (
1190 $normalize_value{$variable} ?
1191 $normalize_value{$variable}->($rec->{$variable}) :
1192 $rec->{$variable}
1194 if ($use_v eq $read_v) {
1195 $obs{$x} = 1;
1198 # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
1199 } elsif ($x =~ /^n_(.+)/) {
1200 my $v = $1;
1201 $obs{$x} = eval { $normalize_numeric{$v}->($rec->{$v}); };
1202 if ($@) {
1203 warn "Warning: error during parsing v[$v] in record[$rec->{id}]: $@; continuing with undef value";
1207 push @{$regdata{data}}, \%obs;
1209 _run_regression ($variable, \%regdata, \@regression, \%Opt);
1211 my $top = min ($Opt{solvetop} || 3, scalar @regression);
1212 my $max_rsq = sum map {1==$_->rsq ? 1 : 0} @regression;
1213 $top = $max_rsq if $max_rsq && $max_rsq > $top;
1214 my $score = 0;
1215 printf
1217 "State after regression testing: %d results, showing top %d\n\n",
1218 scalar @regression,
1219 $top,
1221 for my $reg (sort {
1222 $b->rsq <=> $a->rsq
1224 $a->k <=> $b->k
1225 } @regression) {
1226 printf "(%d)\n", ++$score;
1227 eval { $reg->print; };
1228 if ($@) {
1229 printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
1231 last if --$top <= 0;
1236 # $variable is the name we pass through to S:R constructor
1237 # $regdata is hash and has the arrays "X" and "data" (observations)
1238 # X goes to S:R constructor
1239 # each observation has a Y which we pass to S:R in an include() call
1240 # $regression is the collector array of results
1241 # $opt are the options from outside, used to see if we are "verbose"
1242 sub _run_regression {
1243 my($variable,$regdata,$regression,$opt) = @_;
1244 my @X = @{$regdata->{X}};
1245 # my $splo = $regdata->{"spliced-out"} = []; # maybe can be used to
1246 # hold the reference
1247 # group
1248 while (@X > 1) {
1249 my $reg = Statistics::Regression->new($variable,\@X);
1250 for my $obs (@{$regdata->{data}}) {
1251 my $y = delete $obs->{Y};
1252 $reg->include($y, $obs);
1253 $obs->{Y} = $y;
1255 eval {$reg->theta;
1256 my @e = $reg->standarderrors;
1257 die "found standarderrors == 0" if grep { 0 == $_ } @e;
1258 $reg->rsq;};
1259 if ($@) {
1260 if ($opt->{verbose} && $opt->{verbose}>=2) {
1261 warn _yaml_dump
1262 ({error=>"could not determine some regression parameters",
1263 variable=>$variable,
1264 k=>$reg->k,
1265 n=>$reg->n,
1266 X=>$regdata->{"X"},
1267 errorstr => $@,
1270 # reduce k in case that linear dependencies disturbed us;
1271 # often called reference group; I'm tempted to collect and
1272 # make visible
1273 splice @X, 1, 1;
1274 } else {
1275 # $reg->print;
1276 push @$regression, $reg;
1277 return;
1282 =head1 AUTHOR
1284 Andreas König
1286 =head1 BUGS
1288 Please report any bugs or feature requests through the web
1289 interface at
1290 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
1291 I will be notified, and then you'll automatically be notified of
1292 progress on your bug as I make changes.
1294 =head1 SUPPORT
1296 You can find documentation for this module with the perldoc command.
1298 perldoc CPAN::Testers::ParseReport
1301 You can also look for information at:
1303 =over 4
1305 =item * RT: CPAN's request tracker
1307 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1309 =item * AnnoCPAN: Annotated CPAN documentation
1311 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1313 =item * CPAN Ratings
1315 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1317 =item * Search CPAN
1319 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1321 =back
1324 =head1 ACKNOWLEDGEMENTS
1326 Thanks to RJBS for module-starter.
1328 =head1 COPYRIGHT & LICENSE
1330 Copyright 2008,2009,2010,2011,2012,2013,2014,2015,2016 Andreas König.
1332 This program is free software; you can redistribute it and/or modify it
1333 under the same terms as Perl itself.
1336 =cut
1338 1; # End of CPAN::Testers::ParseReport