Change the repository URL to https, bump version, ready to release
[cpan-testers-parsereport.git] / bin / ctgetreports
blobbccdb7203e37d1e652d66fb63b4e65a87a454260
1 #!/usr/bin/perl
3 =head1 NAME
5 ctgetreports - Quickly fetch cpantesters results with all reports
7 =head1 SYNOPSIS
9 ctgetreports [options] distroname
10 ctgetreports [options] --report number ...
11 ctgetreports [options] --reportfiles path ...
12 ctgetreports -h
14 =head1 OPTIONS
16 A distroname is unversioned, e.g. C<IPC-Run>. For versioned names as
17 in C<IPC-Run-0.80> see --vdistro.
19 =over 2
21 =cut
23 my $optpod = <<'=back';
25 =item B<--cachedir=s>
27 Directory to keep mirrored data in. Defaults to C<$HOME/var/cpantesters>.
29 =item B<--ctdb=s>
31 If you have your own copy of the cpan testers I<cpanstats> database
32 you can use this to set the path to the database. Must not be used
33 together with C<--cturl>. Requires
34 C<CPAN::WWW::Testers::Generator::Database> and C<CPAN::DistnameInfo>
35 installed.
37 =item B<--cturl=s>
39 Base URL of the cpantesters website. Defaults to
40 C<http://static.cpantesters.org/show>.
42 =item B<--dumpfile=s>
44 If dumpvars are specified, dump them into this file. Defaults to "ctgetreports.out".
46 =item B<--dumpvars=s>
48 Dump all queryable variables matching the regular expression given as
49 argument at the end of the loop for a distro.
51 =item B<--filtercb=s>
53 A callback that is called at the end of parse_report(). It allows to
54 manipulate the result, e.g. change the resulting values or add
55 calculated values. The callback function gets a record (hashref) as
56 the only argument. The return value is ignored. The callback is pure
57 perl code without any surrounding sub declaration.
59 Compared to the C<--ycb> callback described below C<--filtercb> is
60 considered easier to use.
62 The following example excludes reports by the user C<jack.blacksmoke>
63 from regression testing:
65 ctgetreport --solve --filtercb '
66 my $rec = shift;
67 $rec->{"meta:ok"}="FILTERED" if $rec->{"meta:from"} =~ /jack.blacksmoke/;
68 ' String-RewritePrefix-0.005
70 The following example excludes reports that contain the string
71 C<Perl_sv_2pv_flags>:
73 ctgetreport --q "qr:Perl_sv_2pv_flags" \
74 --filtercb 'my $rec = shift;
75 $rec->{"meta:ok"}="FILTERED" if $rec->{"qr:Perl_sv_2pv_flags"}
76 ' --solve Text-MiniTmpl-v2.0.0
78 =item B<--help|h>
80 Prints a brief message and exists.
82 =item B<--interactive|i>
84 After every parsed report asks if you want to see it in a pager.
86 =item B<--local>
88 Do not mirror, use a local *.yaml file. Dies if the YAML file
89 is missing, skips missing report files.
91 =item B<--minfail=i>
93 Same thing as --minpass but for fail reports.
95 Default value is the value of --minpass; if this is missing, no
96 default applies.
98 =item B<--minpass=i>
100 If --sample is set, then it could happen that randomness strikes
101 unluckily and the sample ends without a pass report. For the --solve
102 option this would then have the consequence that it cannot succeed. By
103 setting a minpass, the sample size is iteratively increased by small
104 steps until the number of passes is equal to this option or higher.
106 No default.
108 If --sample is not set, --minpass and --minfail are ignored.
110 =item B<--pager=s>
112 Pager (needed when -i is given). Defaults to C<less>.
114 =item B<--parse-common-errors|pce>
116 While the C<<--q qr:...> syntax ultimately offers free parsing it is
117 cumbersome to use. The C<--parse-common-errors> option is a
118 placeholder for a variety of frequent errors to watch. Currently it
119 stands for the following additional options:
121 -q qr:(Failed test\s+\S+.*)
122 -q qr:(Failed to load .*)
123 -q qr:(Can't load .*)
124 -q qr:((?i:.*could.?n.t find.*))
125 -q qr:(Can't locate object method .+)
126 -q qr:(Can't locate \S+pm)
127 -q qr:(Please\s+install\s+\S+)
128 -q qr:(You tried to run a test without a plan.*)
129 -q qr:(.*Server didn't start.*)
130 -q qr:(You planned.*)
132 This list is subject to change in future releases.
134 =item B<--prefer-local-reports|plr>
136 Boolean. If true, we skip downloading of reports from cpantesters when
137 the file that is designated to be the local target of the mirror
138 command already exists. This is highly recommended since it has been
139 observed (2011-11) that cpantesters is not sending Last-Modified
140 headers for reports and does not send a 304 on requests with an
141 If-Modified-Since header. But even when the HTTP handling becomes more
142 efficient at cpantesters main site, this parameter should lower the
143 burden on them and reduce the latency on the mirror side considerably.
145 =item B<--q=s@>
147 Query, may be repeated.
149 Example: C<--q mod:Clone --q meta:writer>
151 =item B<--quiet!>
153 Do not output the usual query lines per parsed report. Quiet
154 overrules verbose.
156 =item B<--raw!>
158 Boolean which, if set, causes the full (HTML) report to be
159 concatenated to STDOUT after every status line.
161 =item B<--report=s@>
163 Avert going through a cpan testers index, go straight to the report
164 with this number.
166 Example: C<--report 1238673>
168 If report is set and dumpvars is not set, dumpvars will be set to a
169 dot (meaning that all variables shall be dumped into dumpfile).
171 =item B<--reportfiles=s@{1,}>
173 Specify file names for local report files. One to many files may be given.
175 Example: C<--reportfiles pass.Foo-Bar-0.01.i386-linux.123456789.12345.rpt fail.Foo-Bar-0.01.i386-linux.123456788.12344.rpt>
177 =item B<--sample=i>
179 Limit the number of reports to be analyzed. If the total number of
180 reports is lower than or equal to the value specifed here then the
181 option is ignored and all available reports will be used. Only if the
182 total number of reports is larger than specified then the number of
183 reports will be sampled randomly to the demanded sample size. Useful
184 to limit the computing power needed for a result.
186 See also --minpass and --minfail.
188 =item B<--solve!>
190 Calls the solve function which tries to identify the best contenders
191 for a blame using Statistics::Regression. Currently only limited to
192 single variables and with simple heuristics. Implies C<--dumpvars=.>
193 unless the caller sets dumpvars himself.
195 The function prints at the moment to STDOUT the top 3 (set with
196 C<--solvetop>) candidates according to R^2 with their regression
197 analysis.
199 A few words of advise: do not take the results as a prove ever. Take
200 them just as a hint where you can most probably prove a causal
201 relationship. And keep in mind that causal relationships can be the
202 other direction as well.
204 If you want to extend on that approach, I recommend you study the
205 ctgetreports.out file where you find all the data you'd need and feed
206 your assumptions to Statistics::Regression.
208 =item B<--solvetop=i>
210 The number of top candidates from the C<--solve> regression analysis
211 to display.
213 =item B<--transport=s>
215 Specifies transport to get the reports. Defaults to C<http_cpantesters>.
217 C<http_cpantesters> uses LWP::UserAgent at static.cpantesters.org.
219 C<http_cpantesters_gzip> also uses LWP::UserAgent at
220 static.cpantesters.org but compresses the fetched result after fetching
221 and decompresses cached results before mirroring. This option
222 requires that C<Compress::Zlib> is installed.
224 =item B<--vdistro=s>
226 Versioned distro, e.g.
228 IPC-Run-0.80
232 Moose-2.1103-TRIAL
234 This is the way to target a version different from the most recent
235 one.
237 In the case that the command line argument already contains an easy to
238 recognize version as in C<IPC-Run-0.80>, that argument is split and
240 ctgetreports Foo-Bar-3.14
242 is equivalent to
244 ctgetreports --vdistro=Foo-Bar-3.14 Foo-Bar
246 Note, that there may be distributions on CPAN where the trivial
247 splitting implemented in ctgetreports does not work.
249 =item B<--verbose|v+>
251 Feedback during download.
253 =item B<--ycb=s>
255 Only used during --solve. Provides perl code to be used as a callback
256 from the regression to determine the B<Y> of the regression equation.
257 The callback function gets a record (hashref) as the only argument and
258 must return a value or undefined. If it returns undefined, the record
259 is skipped, otherwise this record is processed with the returned
260 value. The callback is pure perl code without any surrounding sub
261 declaration.
263 The following example analyses diagnostic output from Acme-Study-Perl:
265 ctgetreports --q qr:"#(.*native big math float/int.*)" --solve \
266 --ycb 'my $rec = shift;
267 my $nbfi = $rec->{"qr:#(.*native big math float/int.*)"};
268 return undef unless defined $nbfi;
269 my $VAR1 = eval($nbfi);
270 return $VAR1->{">"}' Acme-Study-Perl
272 =back
274 =head1 DESCRIPTION
276 The intent is to get at both the summary at cpantesters and the
277 individual reports and parse the reports and collect the data for
278 further inspection.
280 We always only fetch the reports for the most recent (optionally
281 picked) release. Target root directory is C<$HOME/var/cpantesters>
282 (can be overridden with the --cachedir option).
284 The C<--q> parameter can be repeated. It takes one argument which
285 stands for a query. This query must consist of two parts, a qualifier
286 and the query itself. Qualifiers are one of the following
288 conf parameters from the output of 'perl -V'
289 e.g.: conf:usethreads, conf:cc
290 mod for installed modules, either from prerequisites or from the toolchain
291 e.g.: mod:Test::Simple, mod:Imager
292 env environment variables
293 e.g.: env:TERM
294 meta all other parameters
295 e.g.: meta:perl, meta:from, meta:date, meta:writer
296 qr boolean set if the appended regexp matches the report
297 e.g.: qr:'division by zero'
299 The conf parameters specify a word used by the C<Config> module.
301 The mod parameters consist of a package name.
303 The meta parameters are the following: C<perl> for the perl version,
304 C<from> for the sender of the report, C<date> for the date in the mail
305 header, C<writer> for the module that produced the report,
306 C<output_from> for the line that is reported to have produced the output.
309 =head2 Examples
311 This gets all recent reports for Object-Relation and outputs the
312 version number of the prerequisite Clone:
314 $0 --q mod:Clone Object-Relation
316 Collects reports about Clone and reports the default set of metadata:
318 $0 Clone
320 Collect reports for Devel-Events and report the version number of
321 Moose in thses reports and sort by success/failure. If Moose broke
322 Devel-Events is becomes pretty obvious:
324 $0 --q mod:Moose Devel-Events |sort
326 Which tool was used to write how many reports, sorted by frequency:
328 $0 --q meta:writer Template-Timer | sed -e 's/.*meta:writer//' | sort | uniq -c | sort -n
330 Who was in the From field of the mails whose report writer was not determined:
332 $0 --q meta:writer --q meta:from Template-Timer | grep 'UNDEF'
334 At the time of this writing this collected the results of
335 IPC-Run-0.80_91 which was not really the latest release. In this case
336 manual investigations were necessary to find out that 0.80 was the
337 most recent:
339 $0 IPC-Run
341 Pick the specific release IPC-Run-0.80:
343 $0 IPC-Run-0.80
345 The following displays in its own column if the report contains the
346 regexp C<division by zero>:
348 $0 --q qr:"division by zero" CPAN-Testers-ParseReport-0.0.7
350 The following is a simple job to refresh all HTML pages we already
351 have and fetch new reports referenced there too:
353 perl -le '
354 for my $dirent (glob "$ENV{HOME}/var/cpantesters/cpantesters-show/*.html"){
355 my($distro) = $dirent =~ m|/([^/]+)\.html$| or next;
356 print $distro;
357 my $system = "ctgetreports --verbose --verbose $distro";
358 0 == system $system or die;
361 =cut
363 use strict;
364 use warnings;
366 use CPAN::Testers::ParseReport;
367 use Getopt::Long;
368 use Hash::Util qw(lock_keys);
369 use Pod::Usage qw(pod2usage);
371 our %Opt;
372 my @opt = $optpod =~ /B<--(\S+)>/g;
373 for (@opt) {
374 $_ .= "!" unless /[+!=]/;
376 lock_keys %Opt, map { /([^=!\|]+)/ } @opt;
378 GetOptions(\%Opt,
379 @opt,
380 ) or pod2usage(2);
382 if ($Opt{help}) {
383 pod2usage(0);
386 if ($Opt{report} || $Opt{reportfiles}) {
387 if (@ARGV) {
388 pod2usage(2);
390 } else {
391 if (1 != @ARGV) {
392 pod2usage(2);
396 if ($Opt{interactive}) {
397 eval { require IO::Prompt; 1; } or
398 die "Option '--interactive' requires IO::Prompt installed";
401 if ($Opt{solve}) {
402 eval { require Statistics::Regression };
403 if ($@) {
404 die "Statistics::Regression required for solved option: $@";
407 if ($Opt{report} || $Opt{reportfiles}) {
408 $Opt{dumpvars} ||= ".";
410 if ($Opt{dumpvars}) {
411 eval { require YAML::Syck };
412 if ($@) {
413 die "YAML::Syck required for dumpvars option: $@";
417 if ($Opt{"parse-common-errors"}) {
418 $Opt{q} ||= [];
419 my($para) = grep {/^\s+-q qr:/} split /\n\n/, $optpod;
420 for my $line (split /\n/, $para) {
421 my($qr) = $line =~ /-q (qr:.*)/;
422 push @{$Opt{q}}, $qr;
425 if (defined $Opt{minpass}) {
426 if (! defined $Opt{minfail}) {
427 $Opt{minfail} = $Opt{minpass};
431 $|=1;
432 if ($Opt{report} || $Opt{reportfiles}) {
433 my $dumpvars = {};
434 if ($Opt{solve}) {
435 $Opt{dumpvars} = "." unless defined $Opt{dumpvars};
437 REPORT: for my $key (qw(report reportfiles)) {
438 my $reports = $Opt{$key};
439 if ($reports && @$reports) {
440 for my $report (@$reports) {
441 my $extract;
442 if ($key eq 'report') {
443 $extract = eval { CPAN::Testers::ParseReport::parse_single_report({id => $report},$dumpvars,%Opt) };
444 } else {
445 $extract = eval { CPAN::Testers::ParseReport::parse_report($report,$dumpvars,%Opt) };
447 if ($@) {
448 if (ref $@) {
449 warn "Warning: error while parsing '$report': $@->{text}";
450 } else {
451 warn "Alert: error while parsing '$report': $@";
454 last REPORT if $CPAN::Testers::ParseReport::Signal;
458 if ($Opt{solve}) {
459 CPAN::Testers::ParseReport::solve($dumpvars,%Opt);
460 } else {
461 my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
462 YAML::Syck::DumpFile($dumpfile,$dumpvars);
464 } else {
465 $ARGV[0] =~ s|.+/||;
466 CPAN::Testers::ParseReport::parse_distro($ARGV[0],%Opt);
469 __END__
471 # Local Variables:
472 # mode: cperl
473 # End: