13 ~/src/installed-perls/v5.16.0/4e6d/bin/perl bin/refill-cpanstatsdb.pl
21 my @opt = <<'=back' =~ /B<--(\S+)>/g;
27 =item B<--finishlimit=i>
29 A query that yields a result with less rows than this number is the
30 signal to refrain from further refill queries and finish this program.
31 Defaults to 0 which means other limits are needed to stop this
34 Note: before we invented the sleep parameters, this was the way how we
35 stopped the program. Probably not needed anymore.
39 No default, which means no limit. Maximum number of records to inject.
40 If set to zero, we test the surroundings, then exit.
44 Maximum time in seconds this program should run. Defaults to 1770. If
45 set to zero, no limit.
49 Normally the database is asked for its max(id) and then the first
50 query is about one more than that with an open end. If --queryid is
51 specified, then we query that and only that and then finish the
54 =item B<--sleeplimit=i>
56 A query that yields a result with less rows than this number is the
57 signal to sleep for $Opt{sleeptime} seconds before querying again.
58 Defaults to 500. Do not set it too low, it would produce an annoying
61 =item B<--sleeptime=i>
63 For how long to sleep in the case of $Opt{sleeplimit} undercut.
64 Defaults to 150 seconds.
70 Replacement for the job that downloaded the whole cpanstats.db and
73 Now we simply repeatedly fetch the descriptions for the next 2500
74 reports until the supply dries out. Thus we reach a new max, write all
75 the stuff to the db and let the other jobs work from there.
79 remove unneeded data, maybe split them out.
83 refill-cpanstatsdb-minutes.pl
89 use lib
"$FindBin::Bin/../CPAN-Blame/lib";
90 use CPAN
::Blame
::Config
::Cnntp
;
93 use File
::Basename
();
99 use Hash
::Util
qw(lock_keys);
100 use List
::Util
qw(min);
101 use lib
"$FindBin::Bin/../CPAN-Blame/lib";
102 use IPC
::ConcurrencyLimit
;
106 lock_keys
%Opt, map { /([^=|!]+)/ } @opt;
113 $Opt{finishlimit
} ||= 0;
114 $Opt{sleeplimit
} ||= 500;
115 $Opt{sleeptime
} ||= 150;
116 $Opt{maxtime
} = 1770 unless defined $Opt{maxtime
};
120 $workdir = File
::Spec
->catdir
121 ($CPAN::Blame
::Config
::Cnntp
::Config
->{solver_vardir
},
125 my($basename) = File
::Basename
::basename
(__FILE__
);
126 my $limit = IPC
::ConcurrencyLimit
->new
129 path
=> "$workdir/IPC-ConcurrencyLimit-$basename",
131 my $limitid = $limit->get_lock;
133 warn "Another process appears to be still running. Exiting.";
138 use Time
::HiRes
qw(time);
140 use List
::Util
qw(max);
141 use CPAN
::Testers
::WWW
::Reports
::Query
::Reports
;
143 our $jsonxs = JSON
::XS
->new->indent(0);
144 our $redis = Redis
->new(reconnect
=> 120, every
=> 1000);
146 my($pgdbh,$pgsth,$pgmaxid,$nextid);
147 $pgdbh = DBI
->connect("dbi:Pg:dbname=analysis") or die "Could not connect to 'analysis': $DBI::err";
150 $nextid = $Opt{queryid
};
152 my $sql = "select max(id) from cpanstats";
153 $pgsth = $pgdbh->prepare($sql);
155 my $rv = eval { $pgsth->execute(); };
157 my $err = $pgsth->errstr;
158 die "Warning: error occurred while executing '$sql': $err";
161 my(@row) = $pgsth->fetchrow_array();
163 warn "INFO: In Pg found max id '$pgmaxid'";
164 $nextid = $pgmaxid+1;
167 my $sql = "INSERT INTO cpanstats
168 (id,guid,state,postdate,tester,dist,version,platform,perl,osname,osvers,fulldate,type) values
169 (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
170 $pgsth = $pgdbh->prepare($sql);
172 my $query = CPAN
::Testers
::WWW
::Reports
::Query
::Reports
->new;
174 my($pg_n,$pg_time) = (0,0);
176 my $range = $Opt{queryid
} ?
$nextid : "$nextid-";
177 warn sprintf "%s: Next query range '%s'\n", scalar gmtime(), $range;
178 my $result = $query->range($range);
179 my $querycnt = keys %$result;
180 my $thismax = $querycnt > 0 ? max
(keys %$result) : undef;
181 warn sprintf "%s: Got %d records from '%s' to '%s'\n", scalar gmtime(), $querycnt, $nextid, $thismax||"<UNDEF>";
182 if (defined($Opt{maxins
}) && $Opt{maxins
} <= 0) {
186 if ($Opt{maxtime
} && time+$Opt{sleeptime
}-$^T
>= $Opt{maxtime
}) {
189 sleep $Opt{sleeptime
};
194 # so we have some work to do
196 my $logfile = sprintf
198 "%s/var/refill-cpanstatsdb/%04d/%02d/%04d%02d%02dT%02d%02d-%d-MAX.json.gz",
207 File
::Path
::mkpath File
::Basename
::dirname
$logfile;
209 die "ALERT: found '$logfile', will not overwrite it";
211 open my $fh, "|-", "gzip -9c > $logfile" or die "Could not open gzip to '$logfile': $!";
212 my $next_log = time + 60;
213 # dist => "Attribute-Overload",
214 # fulldate => 201205262229,
215 # guid => "4454e538-a782-11e1-802a-3db30df65b4f",
218 # osvers => "2.6.18-1.2798.fc6",
219 # perl => "5.16.0 RC0",
220 # platform => "i686-linux-thread-multi-64int-ld",
221 # postdate => 201205,
223 # tester => "Khen1950fx\@aol.com",
225 # version => "1.100710",
228 REC
: for my $id (sort {$a <=> $b} keys %$result) {
229 if (defined($Opt{maxins
}) && $inscount >= $Opt{maxins
}) {
232 if ($Opt{maxtime
} && time-$^T
>= $Opt{maxtime
}) {
236 my $record = $result->{$id};
237 if ($id > $pgmaxid) {
239 $pgsth->execute($id,@
{$record}{qw(guid state postdate tester dist version platform perl osname osvers fulldate type)});
241 $pg_time += time - $start;
243 my $distv = "$record->{dist}-$record->{version}";
244 $redis->sadd("analysis:distv:legalset",$distv);
245 #### hincrby not supported by our ubuntu redis
246 #### if ($record->{state} eq "pass") {
247 #### $redis->hincrby("analysis:distv:pass",$distv,1);
248 #### } elsif ($record->{state} eq "fail") {
249 #### $redis->hincrby("analysis:distv:fail",$distv,1);
251 # ddx $record; # see also Data::Dump line
252 print $fh $jsonxs->encode($record), "\n";
254 if (time >= $next_log) {
255 warn sprintf "%s: %d records inserted\n", scalar gmtime(), $i;
260 close $fh or die "Could not close gzip to '$logfile': $!";
261 my $finallogfile = $logfile;
263 $max_seen = $nextid - 1;
265 $finallogfile =~ s/MAX/$max_seen/;
266 rename $logfile, $finallogfile or die "Could not rename $logfile, $finallogfile: $!";
270 if ( $Opt{finishlimit
} && $querycnt < $Opt{finishlimit
}) {
273 if (defined($Opt{maxins
}) && $inscount >= $Opt{maxins
}) {
277 if ( $Opt{sleeplimit
} && $querycnt < $Opt{sleeplimit
} ) {
278 $sleeptime = $Opt{sleeptime
};
280 if ($Opt{maxtime
} && time+$sleeptime-$^T
>= $Opt{maxtime
}) {
286 $nextid = $thismax+1;
289 warn sprintf "STATS: pg avg ins time per rec %.5f\n", $pg_time/$pg_n;
292 # for the record: today I added the two:
293 # CREATE INDEX ixdist ON cpanstats (dist); # took ca 30 minutes
294 # CREATE INDEX ixtypestate ON cpanstats (type, state);
299 # cperl-indent-level: 4