Merge branch 'broken-by-amend'
[cpan-testers-parsereport.git] / bin / ctgetreports
blob72055d637f819c6d4719d77ecd8f5932a34132a5
1 #!/usr/bin/perl
3 =head1 NAME
5 ctgetreports - Quickly fetch cpantesters results with all reports
7 =head1 SYNOPSIS
9 ctgetreports [options] distroname
10 ctgetreports [options] --report number ...
11 ctgetreports -h
13 =head1 OPTIONS
15 A distroname is unversioned, e.g. C<IPC-Run>. For versioned names as
16 in C<IPC-Run-0.80> see --vdistro.
18 =over 2
20 =cut
22 my $optpod = <<'=back';
24 =item B<--cachedir=s>
26 Directory to keep mirrored data in. Defaults to C<$HOME/var/cpantesters>.
28 =item B<--ctdb=s>
30 If you have your own copy of the cpan testers I<cpanstats> database
31 you can use this to set the path to the database. Must not be used
32 together with C<--cturl>. Requires
33 C<CPAN::WWW::Testers::Generator::Database> and C<CPAN::DistnameInfo>
34 installed.
36 =item B<--cturl=s>
38 Base URL of the cpantesters website. Defaults to
39 C<http://static.cpantesters.org/show>.
41 =item B<--dumpfile=s>
43 If dumpvars are specified, dump them into this file. Defaults to "ctgetreports.out".
45 =item B<--dumpvars=s>
47 Dump all queryable variables matching the regular expression given as
48 argument at the end of the loop for a distro.
50 =item B<--filtercb=s>
52 A callback that is called at the end of parse_report(). It allows to
53 manipulate the result, e.g. change the resulting values or add
54 calculated values. The callback function gets a record (hashref) as
55 the only argument. The return value is ignored. The callback is pure
56 perl code without any surrounding sub declaration.
58 Compared to the C<--ycb> callback described below C<--filtercb> is
59 considered easier to use.
61 The following example excludes reports by the user C<jack.blacksmoke>
62 from regression testing:
64 ctgetreport --q --solve --filtercb '
65 my $rec = shift;
66 $rec->{"meta:ok"}="FILTERED" if $rec->{"meta:from"} =~ /jack.blacksmoke/;
67 ' String-RewritePrefix-0.005
69 =item B<--help|h>
71 Prints a brief message and exists.
73 =item B<--interactive|i>
75 After every parsed report asks if you want to see it in a pager.
77 =item B<--local>
79 Do not mirror, use a local *.yaml file. Dies if the YAML file
80 is missing, skips missing report files.
82 =item B<--pager=s>
84 Pager (needed when -i is given). Defaults to C<less>.
86 =item B<--parse-common-errors|pce>
88 While the C<<--q qr:...> syntax ultimately offers free parsing it is
89 cumbersome to use. The C<--parse-common-errors> option is a
90 placeholder for a variety of frequent errors to watch. Currently it
91 stands for the following additional options:
93 -q qr:(Failed test\s+\S+.*)
94 -q qr:(Failed to load .*)
95 -q qr:(Can't load .*)
96 -q qr:((?i:.*could.?n.t find.*))
97 -q qr:(Can't locate object method .+)
98 -q qr:(Can't locate \S+pm)
99 -q qr:(Please\s+install\s+\S+)
100 -q qr:(You tried to run a test without a plan.*)
101 -q qr:(.*Server didn't start.*)
102 -q qr:(You planned.*)
104 This list is subject to change in future releases.
106 =item B<--prefer-local-reports|plr>
108 Boolean. If true, we skip downloading of reports from cpantesters when
109 the file that is designated to be the local target of the mirror
110 command already exists. This is highly recommended since it has been
111 observed (2011-11) that cpantesters is not sending Last-Modified
112 headers for reports and does not send a 304 on requests with an
113 If-Modified-Since header. But even when the HTTP handling becomes more
114 efficient at cpantesters main site, this parameter should lower the
115 burden on them and reduce the latency on the mirror side considerably.
117 =item B<--q=s@>
119 Query, may be repeated.
121 Example: C<--q mod:Clone --q meta:writer>
123 =item B<--quiet!>
125 Do not output the usual query lines per parsed report. Quiet
126 overrules verbose.
128 =item B<--raw!>
130 Boolean which, if set, causes the full (HTML) report to be
131 concatenated to STDOUT after every status line.
133 =item B<--report=s@>
135 Avert going through a cpan testers index, go straight to the report
136 with this number.
138 Example: C<--report 1238673>
140 If report is set and dumpvars is not set, dumpvars will be set to a
141 dot (meaning that all variables shall be dumped into dumpfile).
143 =item B<--sample=i>
145 Limit the number of reports to be analyzed. If the total number of
146 reports is lower than or equal to the value specifed here then the
147 option is ignored and all available reports will be used. Only if the
148 total number of reports is larger than specified then the number of
149 reports will be sampled randomly to the demanded sample size. Useful
150 to limit the computing power needed for a result.
152 =item B<--solve!>
154 Calls the solve function which tries to identify the best contenders
155 for a blame using Statistics::Regression. Currently only limited to
156 single variables and with simple heuristics. Implies C<--dumpvars=.>
157 unless the caller sets dumpvars himself.
159 The function prints at the moment to STDOUT the top 3 (set with
160 C<--solvetop>) candidates according to R^2 with their regression
161 analysis.
163 A few words of advise: do not take the results as a prove ever. Take
164 them just as a hint where you can most probably prove a causal
165 relationship. And keep in mind that causal relationships can be the
166 other direction as well.
168 If you want to extend on that approach, I recommend you study the
169 ctgetreports.out file where you find all the data you'd need and feed
170 your assumptions to Statistics::Regression.
172 =item B<--solvetop=i>
174 The number of top candidates from the C<--solve> regression analysis
175 to display.
177 =item B<--transport=s>
179 Specifies transport to get the reports. Defaults to C<http_cpantesters>.
181 C<http_cpantesters> uses LWP::UserAgent at static.cpantesters.org.
183 C<http_cpantesters_gzip> also uses LWP::UserAgent at
184 static.cpantesters.org but compresses the fetched result after fetching
185 and decompresses cached results before mirroring. This option
186 requires that C<Compress::Zlib> is installed.
188 =item B<--vdistro=s>
190 Versioned distro, e.g.
192 IPC-Run-0.80
196 Moose-2.1103-TRIAL
198 This is the way to target a version different from the most recent
199 one.
201 In the case that the command line argument already contains an easy to
202 recognize version as in C<IPC-Run-0.80>, that argument is split and
204 ctgetreports Foo-Bar-3.14
206 is equivalent to
208 ctgetreports --vdistro=Foo-Bar-3.14 Foo-Bar
210 Note, that there may be distributions on CPAN where the trivial
211 splitting implemented in ctgetreports does not work.
213 =item B<--verbose|v+>
215 Feedback during download.
217 =item B<--ycb=s>
219 Only used during --solve. Provides perl code to be used as a callback
220 from the regression to determine the B<Y> of the regression equation.
221 The callback function gets a record (hashref) as the only argument and
222 must return a value or undefined. If it returns undefined, the record
223 is skipped, otherwise this record is processed with the returned
224 value. The callback is pure perl code without any surrounding sub
225 declaration.
227 The following example analyses diagnostic output from Acme-Study-Perl:
229 ctgetreports --q qr:"#(.*native big math float/int.*)" --solve \
230 --ycb 'my $rec = shift;
231 my $nbfi = $rec->{"qr:#(.*native big math float/int.*)"};
232 return undef unless defined $nbfi;
233 my $VAR1 = eval($nbfi);
234 return $VAR1->{">"}' Acme-Study-Perl
236 =back
238 =head1 DESCRIPTION
240 The intent is to get at both the summary at cpantesters and the
241 individual reports and parse the reports and collect the data for
242 further inspection.
244 We always only fetch the reports for the most recent (optionally
245 picked) release. Target root directory is C<$HOME/var/cpantesters>
246 (can be overridden with the --cachedir option).
248 The C<--q> paramater can be repeated. It takes one argument which
249 stands for a query. This query must consist of two parts, a qualifier
250 and the query itself. Qualifiers are one of the following
252 conf parameters from the output of 'perl -V'
253 e.g.: conf:usethreads, conf:cc
254 mod for installed modules, either from prerequisites or from the toolchain
255 e.g.: mod:Test::Simple, mod:Imager
256 env environment variables
257 e.g.: env:TERM
258 meta all other parameters
259 e.g.: meta:perl, meta:from, meta:date, meta:writer
260 qr boolean set if the appended regexp matches the report
261 e.g.: qr:'division by zero'
263 The conf parameters specify a word used by the C<Config> module.
265 The mod parameters consist of a package name.
267 The meta parameters are the following: C<perl> for the perl version,
268 C<from> for the sender of the report, C<date> for the date in the mail
269 header, C<writer> for the module that produced the report,
270 C<output_from> for the line that is reported to have produced the output.
273 =head2 Examples
275 This gets all recent reports for Object-Relation and outputs the
276 version number of the prerequisite Clone:
278 $0 --q mod:Clone Object-Relation
280 Collects reports about Clone and reports the default set of metadata:
282 $0 Clone
284 Collect reports for Devel-Events and report the version number of
285 Moose in thses reports and sort by success/failure. If Moose broke
286 Devel-Events is becomes pretty obvious:
288 $0 --q mod:Moose Devel-Events |sort
290 Which tool was used to write how many reports, sorted by frequency:
292 $0 --q meta:writer Template-Timer | sed -e 's/.*meta:writer//' | sort | uniq -c | sort -n
294 Who was in the From field of the mails whose report writer was not determined:
296 $0 --q meta:writer --q meta:from Template-Timer | grep 'UNDEF'
298 At the time of this writing this collected the results of
299 IPC-Run-0.80_91 which was not really the latest release. In this case
300 manual investigations were necessary to find out that 0.80 was the
301 most recent:
303 $0 IPC-Run
305 Pick the specific release IPC-Run-0.80:
307 $0 IPC-Run-0.80
309 The following displays in its own column if the report contains the
310 regexp C<division by zero>:
312 $0 --q qr:"division by zero" CPAN-Testers-ParseReport-0.0.7
314 The following is a simple job to refresh all HTML pages we already
315 have and fetch new reports referenced there too:
317 perl -le '
318 for my $dirent (glob "$ENV{HOME}/var/cpantesters/cpantesters-show/*.html"){
319 my($distro) = $dirent =~ m|/([^/]+)\.html$| or next;
320 print $distro;
321 my $system = "ctgetreports --verbose --verbose $distro";
322 0 == system $system or die;
325 =cut
327 use strict;
328 use warnings;
330 use CPAN::Testers::ParseReport;
331 use Getopt::Long;
332 use Hash::Util qw(lock_keys);
333 use Pod::Usage qw(pod2usage);
335 our %Opt;
336 my @opt = $optpod =~ /B<--(\S+)>/g;
337 for (@opt) {
338 $_ .= "!" unless /[+!=]/;
340 lock_keys %Opt, map { /([^=!\|]+)/ } @opt;
342 GetOptions(\%Opt,
343 @opt,
344 ) or pod2usage(2);
346 if ($Opt{help}) {
347 pod2usage(0);
350 if ($Opt{report}) {
351 if (@ARGV) {
352 pod2usage(2);
354 } else {
355 if (1 != @ARGV) {
356 pod2usage(2);
360 if ($Opt{interactive}) {
361 eval { require IO::Prompt; 1; } or
362 die "Option '--interactive' requires IO::Prompt installed";
365 if ($Opt{solve}) {
366 eval { require Statistics::Regression };
367 if ($@) {
368 die "Statistics::Regression required for solved option: $@";
371 if ($Opt{report}) {
372 $Opt{dumpvars} ||= ".";
374 if ($Opt{dumpvars}) {
375 eval { require YAML::Syck };
376 if ($@) {
377 die "YAML::Syck required for dumpvars option: $@";
381 if ($Opt{"parse-common-errors"}) {
382 $Opt{q} ||= [];
383 my($para) = grep {/^\s+-q qr:/} split /\n\n/, $optpod;
384 for my $line (split /\n/, $para) {
385 my($qr) = $line =~ /-q (qr:.*)/;
386 push @{$Opt{q}}, $qr;
390 $|=1;
391 if (my $reports = delete $Opt{report}) {
392 my $dumpvars = {};
393 REPORT: for my $report (@$reports) {
394 my $extract = eval { CPAN::Testers::ParseReport::parse_single_report({id => $report},$dumpvars,%Opt) };
395 if ($@) {
396 if (ref $@) {
397 warn "Warning: error while parsing '$report': $@->{text}";
398 } else {
399 warn "Alert: error while parsing '$report': $@";
402 last REPORT if $CPAN::Testers::ParseReport::Signal;
404 my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
405 YAML::Syck::DumpFile($dumpfile,$dumpvars);
406 } else {
407 $ARGV[0] =~ s|.+/||;
408 CPAN::Testers::ParseReport::parse_distro($ARGV[0],%Opt);
411 __END__
413 # Local Variables:
414 # mode: cperl
415 # End: