From: Jakub Narebski Date: Sun, 5 Dec 2010 23:01:09 +0000 (+0100) Subject: gitweb/lib - Regenerate (refresh) cache in background X-Git-Url: https://repo.or.cz/w/git/jnareb-git.git/commitdiff_plain/3d88e50189e0e28911eaddec08033f6a133a223e gitweb/lib - Regenerate (refresh) cache in background This commit removes asymmetry in serving stale data (if stale data exists) when regenerating cache in GitwebCache::FileCacheWithLocking. The process that acquired exclusive (writers) lock, and is therefore selected to be the one that (re)generates data to fill the cache, can now generate data in background, while serving stale data. Those background processes are daemonized, i.e. detached from the main process (the one returning data or stale data). Otherwise there might be a problem when gitweb is running as (part of) long-lived process, for example from mod_perl or from FastCGI: it would leave unreaped children as zombies (entries in process table). We don't want to wait for background process, and we can't set $SIG{CHLD} to 'IGNORE' in gitweb to automatically reap child processes, because this interferes with using open my $fd, '-|', git_cmd(), 'param', ... or die_error(...) # read from <$fd> close $fd or die_error(...) In the above code "close" for magic "-|" open calls waitpid... and we would would die with "No child processes". Removing 'or die' would possibly remove ability to react to other errors. This feature can be enabled or disabled on demand via 'background_cache' cache parameter. It is turned on by default. The t9503 test got updated to test both case with background generation enabled and case with background generation disabled. Inspired-by-code-by: John 'Warthog9' Hawley Signed-off-by: Jakub Narebski --- diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index f6932319ad..c5ba1d4f1e 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -335,6 +335,15 @@ our %cache_options = ( # Set it to -1 to always serve existing data if it exists, # set it to 0 to turn off serving stale data - always wait. 'max_lifetime' => 5*60*60, # 5 hours + + # This enables/disables background caching. If it is set to true value, + # caching engine would return stale data (if it is not older than + # 'max_lifetime' seconds) if it exists, and launch process if regenerating + # (refreshing) cache into the background. If it is set to false value, + # the process that fills cache must always wait for data to be generated. + # In theory this will make gitweb seem more responsive at the price of + # serving possibly stale data. + 'background_cache' => 1, ); # Set to _initialized_ instance of GitwebCache::Capture compatibile capturing # engine, i.e. one implementing ->new() constructor, and ->capture($code) diff --git a/gitweb/lib/GitwebCache/FileCacheWithLocking.pm b/gitweb/lib/GitwebCache/FileCacheWithLocking.pm index 1d32810e93..82e88f16ed 100644 --- a/gitweb/lib/GitwebCache/FileCacheWithLocking.pm +++ b/gitweb/lib/GitwebCache/FileCacheWithLocking.pm @@ -23,6 +23,7 @@ use warnings; use File::Path qw(mkpath); use Fcntl qw(:flock); +use POSIX qw(setsid); # ...................................................................... # constructor @@ -70,21 +71,27 @@ use Fcntl qw(:flock); # than it, serve stale data when waiting for cache entry to be # regenerated (refreshed). Non-adaptive. # Defaults to -1 (never expire / always serve stale). +# * 'background_cache' (boolean) +# This enables/disables regenerating cache in background process. +# Defaults to true. sub new { my $class = shift; my %opts = ref $_[0] ? %{ $_[0] } : @_; my $self = $class->SUPER::new(\%opts); - my ($max_lifetime); + my ($max_lifetime, $background_cache); if (%opts) { $max_lifetime = $opts{'max_lifetime'} || $opts{'max_cache_lifetime'}; + $background_cache = $opts{'background_cache'}; } $max_lifetime = -1 unless defined($max_lifetime); + $background_cache = 1 unless defined($background_cache); $self->set_max_lifetime($max_lifetime); + $self->set_background_cache($background_cache); return $self; } @@ -95,7 +102,7 @@ sub new { # http://perldesignpatterns.com/perldesignpatterns.html#AccessorPattern # creates get_depth() and set_depth($depth) etc. methods -foreach my $i (qw(max_lifetime)) { +foreach my $i (qw(max_lifetime background_cache)) { my $field = $i; no strict 'refs'; *{"get_$field"} = sub { @@ -146,6 +153,52 @@ sub _tempfile_to_path { # ...................................................................... # interface methods +sub _set_maybe_background { + my ($self, $key, $fetch_code, $set_code) = @_; + + my $pid; + my (@result, @stale_result); + + if ($self->{'background_cache'}) { + # try to retrieve stale data + @stale_result = $fetch_code->() + if $self->is_valid($key, $self->get_max_lifetime()); + + # fork if there is stale data, for background process + # to regenerate/refresh the cache (generate data) + $pid = fork() if (@stale_result); + } + + if ($pid) { + ## forked and are in parent process + # reap child, which spawned grandchild process (detaching it) + waitpid $pid, 0; + + } else { + ## didn't fork, or are in background process + + # daemonize background process, detaching it from parent + # see also Proc::Daemonize, Apache2::SubProcess + if (defined $pid) { + ## in background process + POSIX::setsid(); # or setpgrp(0, 0); + fork() && CORE::exit(0); + } + + @result = $set_code->(); + + if (defined $pid) { + ## in background process; parent will serve stale data + + # lockfile will be automatically closed on exit, + # and therefore lockfile would be unlocked + CORE::exit(0); + } + } + + return @result > 0 ? @result : @stale_result; +} + sub _compute_generic { my ($self, $key, $get_code, $fetch_code, $set_code, $fetch_locked) = @_; @@ -162,16 +215,19 @@ sub _compute_generic { do { open my $lock_fh, '+>', $lockfile or die "Could't open lockfile '$lockfile': $!"; + $lock_state = flock($lock_fh, LOCK_EX | LOCK_NB); if ($lock_state) { - # acquired writers lock - @result = $set_code->(); + ## acquired writers lock, have to generate data + @result = $self->_set_maybe_background($key, $fetch_code, $set_code); # closing lockfile releases lock close $lock_fh or die "Could't close lockfile '$lockfile': $!"; } else { + ## didn't acquire writers lock, get stale data or wait for regeneration + # try to retrieve stale data @result = $fetch_code->() if $self->is_valid($key, $self->get_max_lifetime()); diff --git a/t/t9503/test_cache_interface.pl b/t/t9503/test_cache_interface.pl index 8a52261f64..7f088637fa 100755 --- a/t/t9503/test_cache_interface.pl +++ b/t/t9503/test_cache_interface.pl @@ -24,9 +24,13 @@ diag("Testing '$INC{'GitwebCache/FileCacheWithLocking.pm'}'"); my $cache = new_ok('GitwebCache::FileCacheWithLocking', [ { 'max_lifetime' => 0, # turn it off + 'background_cache' => 0, } ]); isa_ok($cache, 'GitwebCache::SimpleFileCache'); +# compute can fork, don't generate zombies +#local $SIG{CHLD} = 'IGNORE'; + # Test that default values are defined # ok(defined $GitwebCache::SimpleFileCache::DEFAULT_CACHE_ROOT, @@ -303,6 +307,9 @@ subtest 'parallel access' => sub { my $stale_value = 'Stale Value'; subtest 'serving stale data when (re)generating' => sub { + # without background generation + $cache->set_background_cache(0); + $cache->set($key, $stale_value); $call_count = 0; $cache->set_expires_in(0); # expire now @@ -312,12 +319,39 @@ subtest 'serving stale data when (re)generating' => sub { my $data = cache_compute($cache, $key, \&get_value_slow); print $data; }; - ok(scalar(grep { $_ eq $stale_value } @output), - 'stale data in at least one process when expired'); + # returning stale data works + is_deeply( + [sort @output], + [sort ($value, $stale_value)], + 'no background: stale data returned by one process' + ); + + $cache->set_expires_in(-1); # never expire for next ->get + is($cache->get($key), $value, + 'no background: value got set correctly, even if stale data returned'); + + + # with background generation + $cache->set_background_cache(1); + + $cache->set($key, $stale_value); + $cache->set_expires_in(0); # set value is now expired + @output = parallel_run { + my $data = cache_compute($cache, $key, \&get_value_slow); + print $data; + }; + # returning stale data works + is_deeply( + \@output, + [ ($stale_value) x 2 ], + 'background: stale data returned by both process when expired' + ); $cache->set_expires_in(-1); # never expire for next ->get + note('waiting for background process to have time to set data'); + sleep $slow_time; # wait for background process to have chance to set data is($cache->get($key), $value, - 'value got set correctly, even if stale data returned'); + 'background: value got set correctly by background process'); # $cache->set($key, $stale_value);