new issue
[andk-cpan-tools.git] / bin / cnntp-refresh.pl
blob9b5448dc5ed98ee5073c0f20dd6a7fa10c909a85
1 #!/usr/bin/perl
3 die "Program is dated";
5 # use 5.010;
6 use strict;
7 use warnings;
9 =head1 NAME
11 cnntp-refresh.pl - collect metadata of postings to nntp.perl.org
13 =head1 SYNOPSIS
17 =head1 OPTIONS
19 =over 8
21 =cut
23 my @opt = <<'=back' =~ /B<--(\S+)>/g;
25 =item B<--sleep=i>
27 Will poll every N seconds.
29 =item B<--workdir=s>
31 defaults to something like ~/var/...
33 =back
35 =head1 END OF LIVE REACHED
37 This program is dated. Its original purpose was to update cpanstats.db
38 from develooper's nntp feed for the cpantesters' report mails. It's
39 only kept for hysterical raisins.
41 If you're looking for a successor, try refill-cpanstatsdb.pl.
43 =head1 DESCRIPTION
45 https://svn.develooper.com/projects/cnntp/ is the code base that
46 produces the pages. I found the link on
47 http://www.nntp.perl.org/about/
49 We fetched the database from
50 http://devel.cpantesters.org/cpanstats.db.gz and want to keep a
51 current/recent state of only a few fields. We are not interested much
52 in data from the month before.
54 % time perl -le '
55 use Net::NNTP; use YAML::XS qw(Dump);
56 use List::Pairwise qw(mapp grepp);
57 my $nntp=Net::NNTP->new("nntp.perl.org");
58 my(undef,undef,$last) = $nntp->group("perl.cpan.testers");
59 my %fail = grepp { $b =~ /^FAIL / } %{$nntp->xhdr("Subject", [$last-200, $last])};
60 print Dump \%fail;
62 ---
63 '4346542': FAIL Log-Accounting-SVN-0.01 i686-linux-thread-multi-ld 2.6.28-11-generic
64 '4346547': FAIL Parse-SVNDiff-0.03 i686-linux-thread-multi 2.6.28-11-generic
65 '4346552': FAIL Tie-Scalar-MarginOfError-0.03 i686-linux-thread-multi-ld 2.6.28-11-generic
66 '4346556': FAIL Pointer-0.11 i686-linux-thread-multi 2.6.28-11-generic
67 [...]
68 perl -le 0.08s user 0.02s system 0% cpu 10.059 total
70 =cut
73 use FindBin;
74 use lib "$FindBin::Bin/../CPAN-Blame/lib";
75 BEGIN {
76 push @INC, qw( /usr/local/perl-5.10.1-uld/lib/5.10.1/x86_64-linux-ld
77 /usr/local/perl-5.10.1-uld/lib/5.10.1
78 /usr/local/perl-5.10.1-uld/lib/site_perl/5.10.1/x86_64-linux-ld
79 /usr/local/perl-5.10.1-uld/lib/site_perl/5.10.1
80 );}
81 use CPAN::Blame::Config::Cnntp;
82 use CPAN::DistnameInfo;
83 use CPAN::WWW::Testers::Generator::Database;
84 # methods discussed in CPAN::Testers::Common::DBUtils
85 # schema in CPAN::WWW::Testers::Generator
87 use Dumpvalue;
88 use File::Basename qw(dirname);
89 use File::Path qw(mkpath);
90 use File::Spec;
91 use File::Temp;
92 use Getopt::Long;
93 use Hash::Util qw(lock_keys);
94 #use List::Pairwise qw(mapp grepp);
95 use List::Util qw(max min);
96 use Net::NNTP;
97 use Pod::Usage qw(pod2usage);
98 #use Set::IntSpan::Fast::XS ();
99 #use Set::IntSpan::Fast ();
100 use URI ();
101 use YAML::XS;
103 our %Opt;
104 lock_keys %Opt, map { /([^=]+)/ } @opt;
105 GetOptions(\%Opt,
106 @opt,
107 ) or pod2usage(1);
109 $Opt{workdir} ||= File::Spec->catdir(vardir(),"workdir");
110 $Opt{sleep} ||= 311;
111 #chdir $workdir
112 # or die("Couldn't change to $workdir: $!");
114 while () {
115 # read db
116 my $dbi = CPAN::WWW::Testers::Generator::Database->new(database=>"$Opt{workdir}/cpanstats.db");
117 $dbi->{dbh}->func(1800000,'busy_timeout'); # undocumented, RTFS
118 # no locking of the db needed because only we write to it
120 # find out current article number
121 my $sql = "select max(id) from cpanstats";
122 my @rows = $dbi->get_query($sql);
123 my $maxid = $rows[0][0];
124 #$sql = "select * from cpanstats where id='$maxid'";
125 #@rows = $dbi->get_query($sql);
127 =pod
129 0 | id | 5379605 |5430514
130 1 | state | pass |fail
131 2 | postdate | 200909 |
132 3 | tester | bingos@cpan.org |
133 4 | dist | Yahoo-Photos |Apache-Admin-Config
134 5 | version | 0.0.2 |0.94
135 6 | platform | i386-freebsd-thread-multi-64int |i386-freebsd
136 7 | perl | 5.10.0 |
137 8 | osname | freebsd |
138 9 | osvers | 7.2-release |7.2-release
139 10 | date | 200909190440 |200909190440
141 From: George Greer
142 Date: October 31, 2009 20:40
143 Subject: PASS Log-StdLog-v0.0.3 x86_64-linux-thread-multi 2.6.28-15-generic
145 DB<22> x $dbi->get_query("select * from cpanstats where id=5829958")
146 0 ARRAY(0x3a32280)
147 0 5829958
148 1 'pass'
149 2 undef
150 3 undef
151 4 'Log-StdLog'
152 5 'v0.0.3'
153 6 'x86_64-linux-thread-multi'
154 7 undef
155 8 undef
156 9 '2.6.28-15-generic'
157 10 200911010342
159 =cut
161 # (since parsereport has the full articlespace of things having
162 # failed once we need not fill in the missing data)
164 # (all other data we will need are owned by "contemplate" or even
165 # others)
167 # merge everything (up to 200?,500?) between max of deb and current into db
168 my $nntp;
169 while (!$nntp) {
170 unless ($nntp=Net::NNTP->new("nntp.perl.org")) {
171 warn "Warning: Could not create nntp object for talking to perl.org; sleeping 15\n";
172 sleep 15;
175 my(undef,undef,$nnmaxid) = $nntp->group("perl.cpan.testers");
176 my($from) = $maxid+1;
177 my($upto) = min($from+3000, $nnmaxid);
178 if ($from <= $upto) {
179 warn sprintf "fetching subjects of articles %s..%s (nnmaxid being $nnmaxid)\n", $from, $upto;
180 my $articles = $nntp->xhdr("Subject", [$from, $upto]); # hashref
181 warn "inserting\n";
182 my @now = gmtime;
183 $now[4]++;
184 $now[5]+=1900;
185 my $date = sprintf "%04d%02d%02d%02d%02d", @now[reverse 1..5];
186 while (my($id,$subject) = each %$articles) {
187 my @split = split " ", $subject;
188 if (@split <= 3) { # suspect xhdr bug discovered 2009-10-15
189 my $head = $nntp->head($id);
190 my $subject2 = "";
191 my $in_subject = 0;
192 ARTICLE_HEAD: for my $line (@$head) {
193 if ($line =~ /^Subject:\s*(.+)\n/) {
194 $subject2 = $1;
195 $in_subject = 1;
196 } elsif ($in_subject) {
197 if ($line =~ /^\s+(.+)\n/) {
198 $subject2 .= " ".$1;
199 } else {
200 last ARTICLE_HEAD;
204 if ($subject2 ne $subject) {
205 warn "Info: revoking subject[$subject] using subject2[$subject2]";
206 @split = split " ", $subject2;
209 my($ok,$distro,$platform,$osvers) = @split;
210 my $d = CPAN::DistnameInfo->new("FOO/$distro.tgz");
211 my $sql = "INSERT INTO cpanstats (id,state,dist,version,platform,osvers,date) VALUES (?,?,?,?,?,?,?)";
212 $dbi->do_query
214 $sql,
215 $id,
216 lc($ok),
217 $d->dist,
218 $d->version,
219 $platform,
220 $osvers,
221 $date,
223 if ($ok =~ /fail/i) {
224 printf "%d: %s\n", $id, $distro;
227 warn "DEBUG: about to commit at ".scalar gmtime()."\n";
228 $dbi->do_commit;
229 } else {
230 warn "Nothing to do, already fetched newest article $nnmaxid\n";
233 # release db
234 undef $dbi;
235 warn sprintf "sleeping %s at %s\n", $Opt{sleep}, scalar localtime;
236 sleep $Opt{sleep};
239 sub vardir {
240 return $CPAN::Blame::Config::Cnntp::Config->{solver_vardir};