1 package CPAN
::Testers
::ParseReport
;
6 use DateTime
::Format
::Strptime
;
7 use File
::Basename
qw(basename);
8 use File
::Path
qw(mkpath);
10 use HTML
::TreeBuilder
();
12 use XML
::LibXML
::XPathContext
;
18 CPAN::Testers::ParseReport - parse reports to cpantesters.perl.org from various sources
26 use version
; our $VERSION = qv
('0.0.4');
31 Nothing in here is meant for public consumption. Use C<ctgetreports>
34 ctgetreports --q mod:Moose Devel-Events
38 This is the core module for CPAN::Testers::ParseReport. If you're not
39 looking to extend or alter the behaviour of this module, you probably
40 want to look at C<ctgetreports> instead.
44 Are described in the <ctgetreports> manpage and are passed through to
45 the functions unaltered.
49 =head2 parse_distro($distro,$options)
51 reads the cpantesters HTML page for the distro and loops through the
52 reports for the first (usually most recent) version of that distro
61 $ua = LWP
::UserAgent
->new;
67 sub _download_overview
{
68 my($cts_dir, $distro, %Opt) = @_;
69 my $format = $Opt{ctformat
} || "html";
70 my $ctarget = "$cts_dir/$distro.$format";
71 my $cheaders = "$cts_dir/$distro.headers";
72 if (! -e
$ctarget or (!$Opt{local} && -M
$ctarget > .25)) {
73 if (-e
$ctarget && $Opt{verbose
}) {
75 my $timestamp = gmtime $stat[9];
76 print "(timestamp $timestamp GMT)\n";
78 print "Fetching $ctarget..." if $Opt{verbose
};
79 my $resp = _ua
->mirror("http://cpantesters.perl.org/show/$distro.$format",$ctarget);
80 if ($resp->is_success) {
81 print "DONE\n" if $Opt{verbose
};
82 open my $fh, ">", $cheaders or die;
83 for ($resp->headers->as_string) {
85 if ($Opt{verbose
} && $Opt{verbose
}>1) {
89 } elsif (304 == $resp->code) {
90 print "DONE (not modified)\n";
91 my $atime = my $mtime = time;
92 utime $atime, $mtime, $cheaders;
94 die $resp->status_line;
101 my($ctarget, %Opt) = @_;
102 my $tree = HTML
::TreeBuilder
->new;
103 $tree->implicit_tags(1);
105 $tree->ignore_ignorable_whitespace(0);
106 my $ccontent = do { open my $fh, $ctarget or die; local $/; <$fh> };
107 $tree->parse_content($ccontent);
109 my $content = $tree->as_XML;
110 my $parser = XML
::LibXML
->new;;
111 $parser->keep_blanks(0);
112 $parser->clean_namespaces(1);
113 my $doc = eval { $parser->parse_string($content) };
116 my $distro = basename
$ctarget;
117 die sprintf "Error while parsing %s\: %s", $distro, $err;
119 $parser->clean_namespaces(1);
120 my $xc = XML
::LibXML
::XPathContext
->new($doc);
121 my $nsu = $doc->documentElement->namespaceURI;
122 $xc->registerNs('x', $nsu) if $nsu;
124 my($selected_release_ul,$selected_release_distrov,$excuse_string);
126 $excuse_string = "selected distro '$Opt{vdistro}'";
127 ($selected_release_distrov) = $nsu ?
$xc->findvalue("/x:html/x:body/x:div[\@id = 'doc']/x:div//x:h2[x:a/\@id = '$Opt{vdistro}']/x:a/\@id") :
128 $doc->findvalue("/html/body/div[\@id = 'doc']/div//h2[a/\@id = '$Opt{vdistro}']/a/\@id");
129 ($selected_release_ul) = $nsu ?
$xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']/x:div//x:h2[x:a/\@id = '$Opt{vdistro}']/following-sibling::ul[1]") :
130 $doc->findnodes("/html/body/div[\@id = 'doc']/div//h2[a/\@id = '$Opt{vdistro}']/following-sibling::ul[1]");
132 $excuse_string = "any distro";
133 ($selected_release_distrov) = $nsu ?
$xc->findvalue("/x:html/x:body/x:div[\@id = 'doc']/x:div//x:h2[1]/x:a/\@id") :
134 $doc->findvalue("/html/body/div[\@id = 'doc']/div//h2[1]/a/\@id");
135 ($selected_release_ul) = $nsu ?
$xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']/x:div//x:ul[1]") :
136 $doc->findnodes("/html/body/div[\@id = 'doc']/div//ul[1]");
138 unless ($selected_release_distrov) {
139 warn "Warning: could not find $excuse_string in '$ctarget'";
142 print "SELECTED: $selected_release_distrov\n";
145 for my $test ($nsu ?
$xc->findnodes("x:li",$selected_release_ul) : $selected_release_ul->findnodes("li")) {
146 $ok = $nsu ?
$xc->findvalue("x:span[1]/\@class",$test) : $test->findvalue("span[1]/\@class");
147 $id = $nsu ?
$xc->findvalue("x:a[1]/text()",$test) : $test->findvalue("a[1]/text()");
148 push @all, {id
=>$id,ok
=>$ok};
155 my($ctarget, %Opt) = @_;
157 my $arr = YAML
::Syck
::LoadFile
($ctarget);
158 my($selected_release_ul,$selected_release_distrov,$excuse_string);
160 $excuse_string = "selected distro '$Opt{vdistro}'";
161 $arr = [grep {$_->{distversion
} eq $Opt{vdistro
}} @
$arr];
162 ($selected_release_distrov) = $arr->[0]{distversion
};
164 $excuse_string = "any distro";
167 for my $report (@
$arr) {
168 unless ($seen{$report->{distversion
}}++) {
169 $last_addition = $report->{distversion
};
172 $arr = [grep {$_->{distversion
} eq $last_addition} @
$arr];
173 ($selected_release_distrov) = $last_addition;
175 unless ($selected_release_distrov) {
176 warn "Warning: could not find $excuse_string in '$ctarget'";
179 print "SELECTED: $selected_release_distrov\n";
182 for my $test (@
$arr) {
183 $ok = $test->{action
};
185 push @all, {id
=>$id,ok
=>$ok};
191 sub _parse_single_report
{
192 my($report, $dumpvars, %Opt) = @_;
193 my($id) = $report->{id
};
194 my($ok) = $report->{ok
};
195 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
197 my $target = "$nnt_dir/$id";
198 unless (-e
$target) {
199 print "Fetching $target..." if $Opt{verbose
};
200 my $resp = _ua
->mirror("http://www.nntp.perl.org/group/perl.cpan.testers/$id",$target);
201 if ($resp->is_success) {
203 my(@stat) = stat $target;
204 my $timestamp = gmtime $stat[9];
205 print "(timestamp $timestamp GMT)\n";
206 if ($Opt{verbose
} > 1) {
207 print $resp->headers->as_string;
210 my $headers = "$target.headers";
211 open my $fh, ">", $headers or die;
212 print $fh $resp->headers->as_string;
214 die $resp->status_line;
217 parse_report
($target, $dumpvars, %Opt);
221 my($distro,%Opt) = @_;
223 $Opt{cachedir
} ||= "$ENV{HOME}/var/cpantesters";
224 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
226 my $ctarget = _download_overview
($cts_dir, $distro, %Opt);
228 $Opt{ctformat
} ||= "html";
229 if ($Opt{ctformat
} eq "html") {
230 $reports = _parse_html
($ctarget);
232 $reports = _parse_yaml
($ctarget);
234 return unless $reports;
235 for my $report (@
$reports) {
236 _parse_single_report
($report, \
%dumpvars, %Opt);
238 if ($Opt{dumpvars
}) {
239 print YAML
::Syck
::Dump
(\
%dumpvars);
243 =head2 parse_report($target,$dumpvars,%Opt)
245 Reads one report. $target is the local filename to read. $dumpvars is
246 a hashref which gets filled. %Opt are the options as described in the
247 C<ctgetreports> manpage.
251 my($target,$dumpvars,%Opt) = @_;
253 my $id = basename
($target);
255 open my $fh, $target or die;
258 my $moduleunpack = {};
259 my $expect_prereq = 0;
260 my $expect_toolchain = 0;
261 my $expecting_toolchain_soon = 0;
264 my $in_prg_output = 0;
266 my $current_headline;
267 my @previous_line = ""; # so we can neutralize line breaks
268 LINE
: while (<$fh>) {
269 next unless /<title>(\S+)/;
274 LINE
: while (<$fh>) {
275 chomp; # reliable line endings?
276 s/"//; # HTML !!!
277 if (/^--------/ && $previous_line[-2] && $previous_line[-2] =~ /^--------/) {
278 $current_headline = $previous_line[-1];
279 if ($current_headline =~ /PROGRAM OUTPUT/) {
285 unless ($extract{"meta:perl"}) {
288 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
294 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
295 $r =~ s/\.0//; # 5.0 6 2!
296 $extract{"meta:perl"} = "$r.$v.$s\@$p";
297 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
299 $extract{"meta:perl"} = "$r.$v.$s";
300 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
302 $extract{"meta:perl"} = "$r.$v.$s";
304 $extract{"meta:perl"} = $p5;
308 unless ($extract{"meta:from"}) {
310 } elsif (m
|<div
class="h_name">From
:</div> <b>(.+?)</b
><br
/>|) {
311 $extract{"meta:from"} = $1;
313 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
315 unless ($extract{"meta:date"}) {
317 } elsif (m
|<div
class="h_name">Date
:</div> (.+?)<br/>|) {
319 my $p = DateTime
::Format
::Strptime
->new(
322 # April 13, 2005 23:50
323 pattern
=> "%b %d, %Y %R",
325 my $dt = $p->parse_datetime($date);
326 $extract{"meta:date"} = $dt->datetime;
328 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
330 unless ($extract{"meta:writer"}) {
331 for ("$previous_line[-1] $_") {
333 } elsif (/created (?:automatically )?by (\S+)/) {
334 $extract{"meta:writer"} = $1;
335 } elsif (/CPANPLUS, version (\S+)/) {
336 $extract{"meta:writer"} = "CPANPLUS $1";
337 } elsif (/This report was machine-generated by CPAN::YACSmoke (\S+)/) {
338 $extract{"meta:writer"} = "CPAN::YACSmoke $1";
340 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
344 # we do that first three lines a bit too often
345 my $qr = $Opt{dumpvars
} || "";
346 $qr = qr/$qr/ if $qr;
349 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
352 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
354 if (/^\s*$/ || m
|</pre
>|) {
358 my(%kv) = /\G,?\s*([^=]+)=('[^']+?'|\S+)/gc;
359 while (my($k,$v) = each %kv) {
362 if ($v =~ /^'(.*)'$/) {
367 # $DB::single = $ck eq "conf:archname";
368 if ($qr && $ck =~ $qr) {
369 $dumpvars->{$ck}{$v}{$ok}++;
371 if ($conf_vars{$ck}) {
377 if ($in_prg_output) {
378 unless ($extract{"meta:output_from"}) {
379 if (/Output from (.+):$/) {
380 $extract{"meta:output_from"} = $1
384 if ($expect_prereq || $expect_toolchain) {
385 if (exists $moduleunpack->{type
}) {
387 if ($moduleunpack->{type
} == 1) {
388 (my $leader,$module,undef,$v) = eval { unpack $moduleunpack->{tpl
}, $_; };
390 if ($leader =~ /^-/) {
394 } elsif ($leader =~ /^(
395 buil
# build_requires:
398 } elsif ($module =~ /^(
403 } elsif ($moduleunpack->{type
} == 2) {
404 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl
}, $_; };
406 if ($leader =~ /^\*/) {
411 } elsif ($moduleunpack->{type
} == 3) {
412 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl
}, $_; };
416 $expect_toolchain = 0;
418 } elsif ($module =~ /^-/) {
426 $extract{"mod:$module"} = $v;
429 if (/(\s+)(Module\s+)(Need\s+)Have/) {
431 tpl
=> 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
434 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
436 my $adjust_2 = -length($4);
437 my $adjust_3 = length($4);
438 # two pass would be required to see where the
439 # columns really are. Or could we get away with split?
441 tpl
=> 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3),
446 if (/PREREQUISITES|Prerequisite modules loaded/) {
449 if ($expecting_toolchain_soon) {
450 if (/(\s+)(Module\s+) Have/) {
452 $expecting_toolchain_soon=0;
454 tpl
=> 'a'.length($1).'a'.length($2).'a*',
459 if (/toolchain versions installed/) {
460 $expecting_toolchain_soon=1;
462 push @previous_line, $_;
465 if (my $qr = $Opt{dumpvars
}) {
467 while (my($k,$v) = each %extract) {
469 $dumpvars->{$k}{$v}{$ok}++;
474 my $have = $extract{$want} || "";
475 $diag .= " $want\[$have]";
477 printf " %-4s %8d%s\n", $ok, $id, $diag;
478 if ($Opt{interactive
}) {
482 my $ans = IO
::Prompt
::prompt
491 open my $ifh, "<", $target or die "Could not open $target: $!";
492 $Opt{pager
} ||= "less";
493 open my $lfh, "|-", $Opt{pager
} or die "Could not fork '$Opt{pager}': $!";
496 close $ifh or die "Could not close $target: $!";
497 close $lfh or die "Could not close pager: $!"
498 } elsif ($ans eq "q") {
511 Please report any bugs or feature requests through the web
513 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
514 I will be notified, and then you'll automatically be notified of
515 progress on your bug as I make changes.
519 You can find documentation for this module with the perldoc command.
521 perldoc CPAN::Testers::ParseReport
524 You can also look for information at:
528 =item * RT: CPAN's request tracker
530 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
532 =item * AnnoCPAN: Annotated CPAN documentation
534 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
538 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
542 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
547 =head1 ACKNOWLEDGEMENTS
549 Thanks to RJBS for module-starter.
551 =head1 COPYRIGHT & LICENSE
553 Copyright 2008 Andreas Koenig, all rights reserved.
555 This program is free software; you can redistribute it and/or modify it
556 under the same terms as Perl itself.
561 1; # End of CPAN::Testers::ParseReport