adjust urls according to a mail from Barbie to reduce the strain on cpantesters
[cpan-testers-parsereport.git] / bin / ctgetreports
blobcfd30874fb219eaae6658d5bfaa139d120296c34
1 #!/usr/bin/perl
3 =head1 NAME
5 ctgetreports - Quickly fetch cpantesters results with all reports
7 =head1 SYNOPSIS
9 ctgetreports [options] distroname
10 ctgetreports [options] --report number ...
11 ctgetreports -h
13 =head1 OPTIONS
15 A distroname is either unversioned as in C<IPC-Run> or versioned as in
16 C<IPC-Run-0.80>.
18 =over 2
20 =cut
22 my $optpod = <<'=back';
24 =item B<--cachedir=s>
26 Directory to keep mirrored data in. Defaults to C<$HOME/var/cpantesters>.
28 =item B<--ctdb=s>
30 If you have your own copy of the cpan testers I<cpanstats> database
31 you can use this to set the path to the database. Must not be used
32 together with C<--cturl>. Requires
33 C<CPAN::WWW::Testers::Generator::Database> and C<CPAN::DistnameInfo>
34 installed.
36 =item B<--ctformat=s>
38 Format of the cpan-testers file that should be downloaded. Available
39 options were originally C<html> and C<yaml>. With major construction
40 works going on the HTML on cpantesters is now unsupported for the time
41 being.
43 =item B<--cturl=s>
45 Base URL of the cpantesters website. Defaults to
46 C<http://static.cpantesters.org/show>.
48 =item B<--dumpfile=s>
50 If dumpvars are specified, dump them into this file. Defaults to "ctgetreports.out".
52 =item B<--dumpvars=s>
54 Dump all queryable variables matching the regular expression given as
55 argument at the end of the loop for a distro.
57 =item B<--filtercb=s>
59 A callback that is called at the end of parse_report(). It allows to
60 manipulate the result, e.g. change the resulting values or add
61 calculated values. The callback function gets a record (hashref) as
62 the only argument. The return value is ignored. The callback is pure
63 perl code without any surrounding sub declaration.
65 Compared to the C<--ycb> callback described below C<--filtercb> is
66 considered easier to use.
68 The following example excludes reports by the user C<jack.blacksmoke>
69 from regression testing:
71 ctgetreport --q --solve --filtercb '
72 my $rec = shift;
73 $rec->{"meta:ok"}="FILTERED" if $rec->{"meta:from"} =~ /jack.blacksmoke/;
74 ' String-RewritePrefix-0.005
76 =item B<--help|h>
78 Prints a brief message and exists.
80 =item B<--interactive|i>
82 After every parsed report asks if you want to see it in a pager.
84 =item B<--local>
86 Do not mirror, use a local *.html file. Dies if the HTML or YAML file
87 is missing, skips missing report files.
89 =item B<--pager=s>
91 Pager (needed when -i is given). Defaults to C<less>.
93 =item B<--parse-common-errors|pce>
95 While the C<<--q qr:...> syntax ultimately offers free parsing it is
96 cumbersome to use. The C<--parse-common-errors> option is a
97 placeholder for a variety of frequent errors to watch. Currently it
98 stands for the following additional options:
100 -q qr:(Failed test\s+\S+.*)
101 -q qr:(Failed to load .*)
102 -q qr:(Can't load .*)
103 -q qr:((?i:.*could.?n.t find.*))
104 -q qr:(Can't locate object method .+)
105 -q qr:(Can't locate \S+pm)
106 -q qr:(Please\s+install\s+\S+)
107 -q qr:(You tried to run a test without a plan.*)
108 -q qr:(.*Server didn't start.*)
109 -q qr:(You planned.*)
111 This list is subject to change in future releases.
113 =item B<--q=s@>
115 Query, may be repeated.
117 Example: C<--q mod:Clone --q meta:writer>
119 =item B<--quiet!>
121 Do not output the usual query lines per parsed report. Quiet
122 overrules verbose.
124 =item B<--raw!>
126 Boolean which, if set, causes the full (HTML) report to be
127 concatenated to STDOUT after every status line.
129 =item B<--report=s@>
131 Avert going through a cpan testers index, go straight to the report
132 with this number.
134 Example: C<--report 1238673>
136 If report is set and dumpvars is not set, dumpvars will be set to a
137 dot (meaning that all variables shall be dumped into dumpfile).
139 =item B<--sample=i>
141 Limit the number of reports to be analyzed. If the total number of
142 reports is lower than or equal to the value specifed here then the
143 option is ignored and all available reports will be used. Only if the
144 total number of reports is larger than specified then the number of
145 reports will be sampled randomly to the demanded sample size. Useful
146 to limit the computing power needed for a result.
148 =item B<--solve!>
150 Calls the solve function which tries to identify the best contenders
151 for a blame using Statistics::Regression. Currently only limited to
152 single variables and with simple heuristics. Implies C<--dumpvars=.>
153 unless the caller sets dumpvars himself.
155 The function prints at the moment to STDOUT the top 3 (set with
156 C<--solvetop>) candidates according to R^2 with their regression
157 analysis.
159 A few words of advise: do not take the results as a prove ever. Take
160 them just as a hint where you can most probablt prove a causal
161 relationship. And keep in mind that causal relationships can be the
162 other direction as well.
164 If you want to extend on that approach, I recommend you study the
165 ctgetreports.out file where you find all the data you'd need and feed
166 your assumptions to Statistics::Regression.
168 =item B<--solvetop=i>
170 The number of top candidates from the C<--solve> regression analysis
171 to display.
173 =item B<--transport=s>
175 Specifies transport to get the reports. Defaults to C<http_cpantesters>.
177 C<http_cpantesters> uses LWP::UserAgent at static.cpantesters.org.
179 C<http_cpantesters_gzip> also uses LWP::UserAgent at
180 static.cpantesters.org but compresses the fetched result after fetching
181 and decompresses cached results before mirroring. This optione
182 requires that the external programs C<gzip>,C<gunzip>, and C<zcat> are
183 installed on the system.
185 =item B<--vdistro=s>
187 Versioned distro, e.g.
189 IPC-Run-0.80
191 Usually not needed because a versioned distro name can be specified as
192 normal commandline argument.
194 =item B<--verbose|v+>
196 Feedback during download.
198 =item B<--ycb=s>
200 Only used during --solve. Provides perl code to be used as a callback
201 from the regression to determine the B<Y> of the regression equation.
202 The callback function gets a record (hashref) as the only argument and
203 must return a value or undefined. If it returns undefined, the record
204 is skipped, otherwise this record is processed with the returned
205 value. The callback is pure perl code without any surrounding sub
206 declaration.
208 The following example analyses diagnostic output from Acme-Study-Perl:
210 ctgetreports --q qr:"#(.*native big math float/int.*)" --solve \
211 --ycb 'my $rec = shift;
212 my $nbfi = $rec->{"qr:#(.*native big math float/int.*)"};
213 return undef unless defined $nbfi;
214 my $VAR1 = eval($nbfi);
215 return $VAR1->{">"}' Acme-Study-Perl
217 =back
219 =head1 DESCRIPTION
221 !!!!Alert: alpha quality software, subject to change without warning!!!!
223 The intent is to get at both the summary at cpantesters and the
224 individual reports and parse the reports and collect the data for
225 further inspection.
227 We always only fetch the reports for the most recent (optionally
228 picked) release. Target root directory is C<$HOME/var/cpantesters>
229 (can be overridden with the --cachedir option).
231 The C<--q> paramater can be repeated. It takes one argument which
232 stands for a query. This query must consist of two parts, a qualifier
233 and the query itself. Qualifiers are one of the following
235 conf parameters from the output of 'perl -V'
236 e.g.: conf:usethreads, conf:cc
237 mod for installed modules, either from prerequisites or from the toolchain
238 e.g.: mod:Test::Simple, mod:Imager
239 env environment variables
240 e.g.: env:TERM
241 meta all other parameters
242 e.g.: meta:perl, meta:from, meta:date, meta:writer
243 qr boolean set if the appended regexp matches the report
244 e.g.: qr:'division by zero'
246 The conf parameters specify a word used by the C<Config> module.
248 The mod parameters consist of a package name.
250 The meta parameters are the following: C<perl> for the perl version,
251 C<from> for the sender of the report, C<date> for the date in the mail
252 header, C<writer> for the module that produced the report,
253 C<output_from> for the line that is reported to have produced the output.
256 =head2 Examples
258 This gets all recent reports for Object-Relation and outputs the
259 version number of the prerequisite Clone:
261 $0 --q mod:Clone Object-Relation
263 Collects reports about Clone and reports the default set of metadata:
265 $0 Clone
267 Collect reports for Devel-Events and report the version number of
268 Moose in thses reports and sort by success/failure. If Moose broke
269 Devel-Events is becomes pretty obvious:
271 $0 --q mod:Moose Devel-Events |sort
273 Which tool was used to write how many reports, sorted by frequency:
275 $0 --q meta:writer Template-Timer | sed -e 's/.*meta:writer//' | sort | uniq -c | sort -n
277 Who was in the From field of the mails whose report writer was not determined:
279 $0 --q meta:writer --q meta:from Template-Timer | grep 'UNDEF'
281 At the time of this writing this collected the results of
282 IPC-Run-0.80_91 which was not really the latest release. In this case
283 manual investigations were necessary to find out that 0.80 was the
284 most recent:
286 $0 IPC-Run
288 Pick the specific release IPC-Run-0.80:
290 $0 IPC-Run-0.80
292 The following displays in its own column if the report contains the
293 regexp C<division by zero>:
295 $0 --q qr:"division by zero" CPAN-Testers-ParseReport-0.0.7
297 The following is a simple job to refresh all HTML pages we already
298 have and fetch new reports referenced there too:
300 perl -le '
301 for my $dirent (glob "$ENV{HOME}/var/cpantesters/cpantesters-show/*.html"){
302 my($distro) = $dirent =~ m|/([^/]+)\.html$| or next;
303 print $distro;
304 my $system = "ctgetreports --verbose --verbose $distro";
305 0 == system $system or die;
308 =cut
310 use strict;
311 use warnings;
313 use CPAN::Testers::ParseReport;
314 use Getopt::Long;
315 use Hash::Util qw(lock_keys);
316 use Pod::Usage qw(pod2usage);
318 our %Opt;
319 my @opt = $optpod =~ /B<--(\S+)>/g;
320 for (@opt) {
321 $_ .= "!" unless /[+!=]/;
323 lock_keys %Opt, map { /([^=!\|]+)/ } @opt;
325 GetOptions(\%Opt,
326 @opt,
327 ) or pod2usage(2);
329 if ($Opt{help}) {
330 pod2usage(0);
333 if ($Opt{report}) {
334 if (@ARGV) {
335 pod2usage(2);
337 } else {
338 if (1 != @ARGV) {
339 pod2usage(2);
343 if ($Opt{solve}) {
344 eval { require Statistics::Regression };
345 if ($@) {
346 die "Statistics::Regression required for solved option: $@";
349 if ($Opt{report}) {
350 $Opt{dumpvars} ||= ".";
352 if ($Opt{dumpvars}) {
353 eval { require YAML::Syck };
354 if ($@) {
355 die "YAML::Syck required for dumpvars option: $@";
359 if ($Opt{"parse-common-errors"}) {
360 $Opt{q} ||= [];
361 my($para) = grep {/^\s+-q qr:/} split /\n\n/, $optpod;
362 for my $line (split /\n/, $para) {
363 my($qr) = $line =~ /-q (qr:.*)/;
364 push @{$Opt{q}}, $qr;
368 $|=1;
369 if (my $reports = delete $Opt{report}) {
370 my $dumpvars = {};
371 REPORT: for my $report (@$reports) {
372 my $extract = eval { CPAN::Testers::ParseReport::parse_single_report({id => $report},$dumpvars,%Opt) };
373 if ($@) {
374 if (ref $@) {
375 warn "Warning: error while parsing '$report': $@->{text}";
376 } else {
377 warn "Alert: error while parsing '$report': $@";
380 last REPORT if $CPAN::Testers::ParseReport::Signal;
382 my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
383 YAML::Syck::DumpFile($dumpfile,$dumpvars);
384 } else {
385 $ARGV[0] =~ s|.+/||;
386 CPAN::Testers::ParseReport::parse_distro($ARGV[0],%Opt);
389 __END__
391 # Local Variables:
392 # mode: cperl
393 # End: