694c3182e20daf20fd7f851a0d92c2c834dc169e
[git/jnareb-git.git] / gitweb / lib / GitwebCache / FileCacheWithLocking.pm
blob694c3182e20daf20fd7f851a0d92c2c834dc169e
1 # gitweb - simple web interface to track changes in git repositories
3 # (C) 2006, John 'Warthog9' Hawley <warthog19@eaglescrag.net>
4 # (C) 2010, Jakub Narebski <jnareb@gmail.com>
6 # This program is licensed under the GPLv2
9 # Gitweb caching engine, simple file-based cache, with locking
12 # Based on GitwebCache::SimpleFileCache, minimalistic cache that
13 # stores data in the filesystem, without serialization.
15 # It uses file locks (flock) to have only one process generating data
16 # and writing to cache, when using CHI interface ->compute() method.
18 package GitwebCache::FileCacheWithLocking;
19 use base qw(GitwebCache::SimpleFileCache);
21 use strict;
22 use warnings;
24 use File::Path qw(mkpath);
25 use Fcntl qw(:flock);
26 use POSIX qw(setsid);
28 # ......................................................................
29 # constructor
31 # The options are set by passing in a reference to a hash containing
32 # any of the following keys:
33 # * 'namespace'
34 # The namespace associated with this cache. This allows easy separation of
35 # multiple, distinct caches without worrying about key collision. Defaults
36 # to $DEFAULT_NAMESPACE.
37 # * 'cache_root' (Cache::FileCache compatibile),
38 # 'root_dir' (CHI::Driver::File compatibile),
39 # The location in the filesystem that will hold the root of the cache.
40 # Defaults to $DEFAULT_CACHE_ROOT.
41 # * 'cache_depth' (Cache::FileCache compatibile),
42 # 'depth' (CHI::Driver::File compatibile),
43 # The number of subdirectories deep to cache object item. This should be
44 # large enough that no cache directory has more than a few hundred objects.
45 # Defaults to $DEFAULT_CACHE_DEPTH unless explicitly set.
46 # * 'default_expires_in' (Cache::Cache compatibile),
47 # 'expires_in' (CHI compatibile) [seconds]
48 # The expiration time for objects place in the cache.
49 # Defaults to -1 (never expire) if not explicitly set.
50 # Sets 'expires_min' to given value.
51 # * 'expires_min' [seconds]
52 # The minimum expiration time for objects in cache (e.g. with 0% CPU load).
53 # Used as lower bound in adaptive cache lifetime / expiration.
54 # Defaults to 20 seconds; 'expires_in' sets it also.
55 # * 'expires_max' [seconds]
56 # The maximum expiration time for objects in cache.
57 # Used as upper bound in adaptive cache lifetime / expiration.
58 # Defaults to 1200 seconds, if not set;
59 # defaults to 'expires_min' if 'expires_in' is used.
60 # * 'check_load'
61 # Subroutine (code) used for adaptive cache lifetime / expiration.
62 # If unset, adaptive caching is turned off; defaults to unset.
63 # * 'increase_factor' [seconds / 100% CPU load]
64 # Factor multiplying 'check_load' result when calculating cache lietime.
65 # Defaults to 60 seconds for 100% SPU load ('check_load' returning 1.0).
67 # (all the above are inherited from GitwebCache::SimpleFileCache)
69 # * 'max_lifetime' [seconds]
70 # If it is greater than 0, and cache entry is expired but not older
71 # than it, serve stale data when waiting for cache entry to be
72 # regenerated (refreshed). Non-adaptive.
73 # Defaults to -1 (never expire / always serve stale).
74 # * 'background_cache' (boolean)
75 # This enables/disables regenerating cache in background process.
76 # Defaults to true.
77 # * 'generating_info'
78 # Subroutine (code) called when process has to wait for cache entry
79 # to be (re)generated (when there is no not-too-stale data to serve
80 # instead), for other process (or bacground process). It is passed
81 # $cache instance, $key, and opened $lock_fh filehandle to lockfile.
82 # Unset by default (which means no activity indicator).
83 sub new {
84 my $class = shift;
85 my %opts = ref $_[0] ? %{ $_[0] } : @_;
87 my $self = $class->SUPER::new(\%opts);
89 my ($max_lifetime, $background_cache, $generating_info);
90 if (%opts) {
91 $max_lifetime =
92 $opts{'max_lifetime'} ||
93 $opts{'max_cache_lifetime'};
94 $background_cache = $opts{'background_cache'};
95 $generating_info = $opts{'generating_info'};
97 $max_lifetime = -1 unless defined($max_lifetime);
98 $background_cache = 1 unless defined($background_cache);
100 $self->set_max_lifetime($max_lifetime);
101 $self->set_background_cache($background_cache);
102 $self->set_generating_info($generating_info);
104 return $self;
107 # ......................................................................
108 # accessors
110 # http://perldesignpatterns.com/perldesignpatterns.html#AccessorPattern
112 # creates get_depth() and set_depth($depth) etc. methods
113 foreach my $i (qw(max_lifetime background_cache generating_info)) {
114 my $field = $i;
115 no strict 'refs';
116 *{"get_$field"} = sub {
117 my $self = shift;
118 return $self->{$field};
120 *{"set_$field"} = sub {
121 my ($self, $value) = @_;
122 $self->{$field} = $value;
126 # $cache->generating_info($key, $lock);
127 # runs 'generating_info' subroutine, for activity indicator,
128 # checking if it is defined first.
129 sub generating_info {
130 my $self = shift;
132 if (defined $self->{'generating_info'}) {
133 $self->{'generating_info'}->($self, @_);
137 # ----------------------------------------------------------------------
138 # utility functions and methods
140 # Take an human readable key, and return path to be used for lockfile
141 # Ensures that file can be created, if needed.
142 sub get_lockname {
143 my ($self, $key) = @_;
145 my $lockfile = $self->path_to_key($key, \my $dir) . '.lock';
147 # ensure that directory leading to lockfile exists
148 if (!-d $dir) {
149 eval { mkpath($dir, 0, 0777); 1 }
150 or die "Couldn't mkpath '$dir' for lockfile: $!";
153 return $lockfile;
156 # ----------------------------------------------------------------------
157 # "private" utility functions and methods
159 # take a file path to cache entry, and its directory
160 # return filehandle and filename of open temporary file,
161 # like File::Temp::tempfile
162 sub _tempfile_to_path {
163 my ($self, $file, $dir) = @_;
165 my $tempname = "$file.tmp";
166 open my $temp_fh, '>', $tempname
167 or die "Couldn't open temporary file '$tempname' for writing: $!";
169 return ($temp_fh, $tempname);
172 # ......................................................................
173 # interface methods
175 sub _wait_for_data {
176 my ($self, $key,
177 $lock_fh, $lockfile,
178 $fetch_code, $fetch_locked) = @_;
179 my @result;
181 # provide "generating page..." info, if exists
182 $self->generating_info($key, $lock_fh);
183 # generating info may exit, so we can not get there
185 # get readers lock, i.e. wait for writer,
186 # which might be background process
187 flock($lock_fh, LOCK_SH);
188 # closing lockfile releases lock
189 if ($fetch_locked) {
190 @result = $fetch_code->();
191 close $lock_fh
192 or die "Could't close lockfile '$lockfile': $!";
193 } else {
194 close $lock_fh
195 or die "Could't close lockfile '$lockfile': $!";
196 @result = $fetch_code->();
199 return @result;
202 sub _set_maybe_background {
203 my ($self, $key, $fetch_code, $set_code) = @_;
205 my $pid;
206 my (@result, @stale_result);
208 if ($self->{'background_cache'}) {
209 # try to retrieve stale data
210 @stale_result = $fetch_code->()
211 if $self->is_valid($key, $self->get_max_lifetime());
213 # fork if there is stale data, for background process
214 # to regenerate/refresh the cache (generate data),
215 # or if main process would show progress indicator
216 $pid = fork()
217 if (@stale_result || $self->{'generating_info'});
220 if ($pid) {
221 ## forked and are in parent process
222 # reap child, which spawned grandchild process (detaching it)
223 waitpid $pid, 0;
225 } else {
226 ## didn't fork, or are in background process
228 # daemonize background process, detaching it from parent
229 # see also Proc::Daemonize, Apache2::SubProcess
230 if (defined $pid) {
231 ## in background process
232 POSIX::setsid(); # or setpgrp(0, 0);
233 fork() && CORE::exit(0);
236 @result = $set_code->();
238 if (defined $pid) {
239 ## in background process; parent will serve stale data
241 # lockfile will be automatically closed on exit,
242 # and therefore lockfile would be unlocked
243 CORE::exit(0);
247 return @result > 0 ? @result : @stale_result;
250 sub _compute_generic {
251 my ($self, $key,
252 $get_code, $fetch_code, $set_code, $fetch_locked) = @_;
254 my @result = $get_code->();
255 return @result if @result;
257 my $lockfile = $self->get_lockname($key);
259 # this loop is to protect against situation where process that
260 # acquired exclusive lock (writer) dies or exits (die_error)
261 # before writing data to cache
262 my $lock_state; # needed for loop condition
263 do {
264 open my $lock_fh, '+>', $lockfile
265 or die "Could't open lockfile '$lockfile': $!";
267 $lock_state = flock($lock_fh, LOCK_EX | LOCK_NB);
268 if ($lock_state) {
269 ## acquired writers lock, have to generate data
270 @result = $self->_set_maybe_background($key, $fetch_code, $set_code);
272 # closing lockfile releases writer lock
273 close $lock_fh
274 or die "Could't close lockfile '$lockfile': $!";
276 if (!@result) {
277 # wait for background process to finish generating data
278 open $lock_fh, '<', $lockfile
279 or die "Couldn't reopen (for reading) lockfile '$lockfile': $!";
281 @result = $self->_wait_for_data($key, $lock_fh, $lockfile,
282 $fetch_code, $fetch_locked);
285 } else {
286 ## didn't acquire writers lock, get stale data or wait for regeneration
288 # try to retrieve stale data
289 @result = $fetch_code->()
290 if $self->is_valid($key, $self->get_max_lifetime());
291 return @result if @result;
293 # wait for regeneration
294 @result = $self->_wait_for_data($key, $lock_fh, $lockfile,
295 $fetch_code, $fetch_locked);
298 } until (@result || $lock_state);
299 # repeat until we have data, or we tried generating data oneself and failed
300 return @result;
303 # $data = $cache->compute($key, $code);
305 # Combines the get and set operations in a single call. Attempts to
306 # get $key; if successful, returns the value. Otherwise, calls $code
307 # and uses the return value as the new value for $key, which is then
308 # returned.
310 # Uses file locking to have only one process updating value for $key
311 # to avoid 'cache miss stampede' (aka 'stampeding herd') problem.
312 sub compute {
313 my ($self, $key, $code) = @_;
315 return ($self->_compute_generic($key,
316 sub {
317 return $self->get($key);
319 sub {
320 return $self->fetch($key);
322 sub {
323 my $data = $code->();
324 $self->set($key, $data);
325 return $data;
327 0 # $self->get($key); is outside LOCK_SH critical section
328 ))[0]; # return single value: $data
331 # ($fh, $filename) = $cache->compute_fh($key, $code);
333 # Combines the get and set operations in a single call. Attempts to
334 # get $key; if successful, returns the filehandle it can be read from.
335 # Otherwise, calls $code passing filehandle to write to as a
336 # parameter; contents of this file is then used as the new value for
337 # $key; returns filehandle from which one can read newly generated data.
339 # Uses file locking to have only one process updating value for $key
340 # to avoid 'cache miss stampede' (aka 'stampeding herd') problem.
341 sub compute_fh {
342 my ($self, $key, $code_fh) = @_;
344 return $self->_compute_generic($key,
345 sub {
346 return $self->get_fh($key);
348 sub {
349 return $self->fetch_fh($key);
351 sub {
352 return $self->set_coderef_fh($key, $code_fh);
354 1 # $self->fetch_fh($key); just opens file
359 __END__
360 # end of package GitwebCache::FileCacheWithLocking