LJSUP-17669: Login.bml form refactoring
[livejournal.git] / cgi-bin / statslib.pl
blob292f0905b80ecb5bab1efb661bf463bb3ba86611
1 #!/usr/bin/perl
4 # Partial Stats
7 use strict;
9 package LJ::Stats;
11 %LJ::Stats::INFO = (
12 # jobname => { type => 'global' || 'clustered',
13 # jobname => jobname
14 # statname => statname || [statname1, statname2]
15 # handler => sub {},
16 # max_age => age }
19 sub LJ::Stats::register_stat {
20 my $stat = shift;
21 return undef unless ref $stat eq 'HASH';
23 $stat->{'type'} = $stat->{'type'} eq 'clustered' ? 'clustered' : 'global';
24 return undef unless $stat->{'jobname'};
25 $stat->{'statname'} ||= $stat->{'jobname'};
26 return undef unless ref $stat->{'handler'} eq 'CODE';
27 delete $stat->{'max_age'} unless $stat->{'max_age'} > 0;
29 # register in master INFO hash
30 $LJ::Stats::INFO{$stat->{'jobname'}} = $stat;
32 return 1;
35 sub LJ::Stats::run_stats {
36 my @stats = @_ ? @_ : sort keys %LJ::Stats::INFO;
38 # clear out old partialstatsdata for clusters which are no longer active
39 # (not in @LJ::CLUSTERS)
40 LJ::Stats::clear_invalid_cluster_parts();
42 foreach my $jobname (@stats) {
44 my $stat = $LJ::Stats::INFO{$jobname};
46 # stats calculated on global db reader
47 if ($stat->{'type'} eq "global") {
48 unless (LJ::Stats::need_calc($jobname)) {
49 print "-I- Up-to-date: $jobname\n";
50 next;
53 # rather than passing an actual db handle to the stat handler,
54 # just pass a getter subef so it can be revalidated as necessary
55 my $dbr_getter = sub {
56 return LJ::Stats::get_db("dbr")
57 or die "Can't get db reader handle.";
60 print "-I- Running: $jobname\n";
62 my $res = $stat->{'handler'}->($dbr_getter);
63 die "Error running '$jobname' handler on global reader."
64 unless $res;
66 if ($stat->{'cleanup'}) {
67 LJ::Stats::cleanup_stat($stat->{'statname'});
70 # 2 cases:
71 # - 'statname' is an arrayref, %res structure is ( 'statname' => { 'arg' => 'val' } )
72 # - 'statname' is scalar, %res structure is ( 'arg' => 'val' )
74 if (ref $stat->{'statname'} eq 'ARRAY') {
75 foreach my $statname (@{$stat->{'statname'}}) {
76 foreach my $key (keys %{$res->{$statname}}) {
77 LJ::Stats::save_stat($statname, $key, $res->{$statname}->{$key});
80 } else {
81 my $statname = $stat->{'statname'};
82 foreach my $key (keys %$res) {
83 LJ::Stats::save_stat($statname, $key, $res->{$key});
88 LJ::Stats::save_calc($jobname);
90 next;
93 # stats calculated per-cluster
94 if ($stat->{'type'} eq "clustered") {
96 foreach my $cid (@LJ::CLUSTERS) {
97 unless (LJ::Stats::need_calc($jobname, $cid)) {
98 print "-I- Up-to-date: $jobname, cluster $cid\n";
99 next;
102 # pass a dbcr getter subref so the stat handler knows how
103 # to revalidate its database handles, by invoking this closure
104 my $dbcr_getter = sub {
105 return LJ::Stats::get_db("dbcr", $cid)
106 or die "Can't get cluster $cid db handle.";
109 print "-I- Running: $jobname, cluster $cid\n";
111 my $res = $stat->{'handler'}->($dbcr_getter, $cid);
112 die "Error running '$jobname' handler on cluster $cid."
113 unless $res;
115 # 2 cases:
116 # - 'statname' is an arrayref, %res structure is ( 'statname' => { 'arg' => 'val' } )
117 # - 'statname' is scalar, %res structure is ( 'arg' => 'val' )
119 if (ref $stat->{'statname'} eq 'ARRAY') {
120 foreach my $statname (@{$stat->{'statname'}}) {
121 foreach my $key (keys %{$res->{$statname}}) {
122 LJ::Stats::save_part($statname, $cid, $key, $res->{$statname}->{$key});
125 } else {
126 my $statname = $stat->{'statname'};
127 foreach my $key (keys %$res) {
128 LJ::Stats::save_part($statname, $cid, $key, $res->{$key});
133 LJ::Stats::save_calc($jobname, $cid);
136 # save the summation(s) of the statname(s) we found above
137 if (ref $stat->{'statname'} eq 'ARRAY') {
138 foreach my $statname (@{$stat->{'statname'}}) {
139 LJ::Stats::save_sum($statname);
141 } else {
142 LJ::Stats::save_sum($stat->{'statname'});
148 return 1;
151 # get raw dbr/dbh/cluster handle
152 sub LJ::Stats::get_db {
153 my $type = shift;
154 return undef unless $type;
155 my $cid = shift;
157 # tell DBI to revalidate connections before returning them
158 $LJ::DBIRole->clear_req_cache();
160 my $opts = {raw=>1,nocache=>1}; # get_dbh opts
162 # global handles
163 if ($type eq "dbr") {
164 my @roles = $LJ::STATS_FORCE_SLOW ? ("slow") : ("slave", "master");
166 my $db = LJ::get_dbh($opts, @roles);
167 return $db if $db;
169 # don't fall back to slave/master if STATS_FORCE_SLOW is on
170 die "ERROR: Could not get handle for slow database role\n"
171 if $LJ::STATS_FORCE_SLOW;
173 return undef;
176 return LJ::get_dbh($opts, 'master')
177 if $type eq "dbh";
179 # cluster handles
180 return undef unless $cid > 0;
181 return LJ::get_cluster_def_reader($opts, $cid)
182 if $type eq "dbcm" || $type eq "dbcr";
184 return undef;
187 # save a given stat to the 'stats' table in the db
188 sub LJ::Stats::save_stat {
189 my ($cat, $statkey, $val) = @_;
190 return undef unless $cat && $statkey && $val;
192 # replace/insert stats row
193 my $dbh = LJ::Stats::get_db("dbh");
194 $dbh->do("REPLACE INTO stats (statcat, statkey, statval) VALUES (?, ?, ?)",
195 undef, $cat, $statkey, $val);
196 die $dbh->errstr if $dbh->err;
198 return 1;
201 sub cleanup_stat {
202 my ($cat) = @_;
204 return unless $cat;
206 my $dbh = LJ::Stats::get_db('dbh');
207 $dbh->{'RaiseError'} = 1;
209 $dbh->do("DELETE FROM stats WHERE statcat=?", undef, $cat);
211 return 1;
214 # note the last calctime of a given stat
215 sub LJ::Stats::save_calc {
216 my ($jobname, $cid) = @_;
217 return unless $jobname;
219 my $dbh = LJ::Stats::get_db("dbh");
220 $dbh->do("REPLACE INTO partialstats (jobname, clusterid, calctime) " .
221 "VALUES (?,?,UNIX_TIMESTAMP())", undef, $jobname, $cid || 1);
222 die $dbh->errstr if $dbh->err;
224 return 1;
227 # save partial stats
228 sub LJ::Stats::save_part {
229 my ($statname, $cid, $arg, $value) = @_;
230 return undef unless $statname && $cid > 0;
232 # replace/insert partialstats(data) row
233 my $dbh = LJ::Stats::get_db("dbh");
234 $dbh->do("REPLACE INTO partialstatsdata (statname, arg, clusterid, value) " .
235 "VALUES (?,?,?,?)", undef, $statname, $arg, $cid, $value);
236 die $dbh->errstr if $dbh->err;
238 return 1;
241 # see if a given stat is stale
242 sub LJ::Stats::need_calc {
243 my ($jobname, $cid) = @_;
244 return undef unless $jobname;
246 return 1 if $LJ::IS_DEV_SERVER;
248 my $dbr = LJ::Stats::get_db("dbr");
249 my $calctime = $dbr->selectrow_array("SELECT calctime FROM partialstats " .
250 "WHERE jobname=? AND clusterid=?",
251 undef, $jobname, $cid || 1);
253 my $max = $LJ::Stats::INFO{$jobname}->{'max_age'} || 3600*6; # 6 hours default
254 return ($calctime < time() - $max);
257 # clear invalid partialstats data for old clusters
258 # -- this way if clusters go inactive/dead their partial tallies won't remain
259 sub LJ::Stats::clear_invalid_cluster_parts {
261 # delete partialstats rows for invalid clusters
262 # -- query not indexed, but data set is small. could add one later
263 my $dbh = LJ::Stats::get_db("dbh");
264 my $bind = join(",", map { "?" } @LJ::CLUSTERS);
265 $dbh->do("DELETE FROM partialstatsdata WHERE clusterid NOT IN ($bind)",
266 undef, @LJ::CLUSTERS);
267 die $dbh->errstr if $dbh->err;
269 return 1;
272 # sum up counts for all clusters
273 sub LJ::Stats::save_sum {
274 my $statname = shift;
275 return undef unless $statname;
277 # get sum of this stat for all clusters
278 my $dbr = LJ::Stats::get_db("dbr");
279 my $sth = $dbr->prepare("SELECT arg, SUM(value) FROM partialstatsdata " .
280 "WHERE statname=? GROUP BY 1");
281 $sth->execute($statname);
282 while (my ($arg, $count) = $sth->fetchrow_array) {
283 next unless $count;
284 LJ::Stats::save_stat($statname, $arg, $count);
287 return 1;
290 # get number of pages, given a total row count
291 sub LJ::Stats::num_blocks {
292 my $row_tot = shift;
293 return 0 unless $row_tot;
295 return int($row_tot / $LJ::STATS_BLOCK_SIZE) + (($row_tot % $LJ::STATS_BLOCK_SIZE) ? 1 : 0);
298 # get low/high ids for a BETWEEN query based on page number
299 sub LJ::Stats::get_block_bounds {
300 my ($block, $offset) = @_;
301 return ($offset+0, $offset+$LJ::STATS_BLOCK_SIZE) unless $block;
303 # calculate min, then add one to not overlap previous max,
304 # unless there was no previous max so we set to 0 so we don't
305 # miss rows with id=0
306 my $min = ($block-1)*$LJ::STATS_BLOCK_SIZE + 1;
307 $min = $min == 1 ? 0 : $min;
309 return ($offset+$min, $offset+$block*$LJ::STATS_BLOCK_SIZE);
312 sub LJ::Stats::block_status_line {
313 my ($block, $total) = @_;
314 return "" unless $LJ::Stats::VERBOSE;
315 return "" if $total == 1; # who cares about percentage for one block?
317 # status line gets called AFTER work is done, so we show percentage
318 # for $block+1, that way the final line displays 100%
319 my $pct = sprintf("%.2f", 100*($block / ($total || 1)));
320 return " [$pct%] Processing block $block of $total.\n";