0823c5564d20107d2e07b9ba95548b79c6ad433f
[git/jnareb-git.git] / gitweb / lib / GitwebCache / FileCacheWithLocking.pm
blob0823c5564d20107d2e07b9ba95548b79c6ad433f
1 # gitweb - simple web interface to track changes in git repositories
3 # (C) 2006, John 'Warthog9' Hawley <warthog19@eaglescrag.net>
4 # (C) 2010, Jakub Narebski <jnareb@gmail.com>
6 # This program is licensed under the GPLv2
9 # Gitweb caching engine, simple file-based cache, with locking
12 # Based on GitwebCache::SimpleFileCache, minimalistic cache that
13 # stores data in the filesystem, without serialization.
15 # It uses file locks (flock) to have only one process generating data
16 # and writing to cache, when using CHI interface ->compute() method.
18 package GitwebCache::FileCacheWithLocking;
19 use base qw(GitwebCache::SimpleFileCache);
21 use strict;
22 use warnings;
24 use File::Path qw(mkpath);
25 use Fcntl qw(:flock);
26 use POSIX qw(setsid);
28 # ......................................................................
29 # constructor
31 # The options are set by passing in a reference to a hash containing
32 # any of the following keys:
33 # * 'namespace'
34 # The namespace associated with this cache. This allows easy separation of
35 # multiple, distinct caches without worrying about key collision. Defaults
36 # to $DEFAULT_NAMESPACE.
37 # * 'cache_root' (Cache::FileCache compatibile),
38 # 'root_dir' (CHI::Driver::File compatibile),
39 # The location in the filesystem that will hold the root of the cache.
40 # Defaults to $DEFAULT_CACHE_ROOT.
41 # * 'cache_depth' (Cache::FileCache compatibile),
42 # 'depth' (CHI::Driver::File compatibile),
43 # The number of subdirectories deep to cache object item. This should be
44 # large enough that no cache directory has more than a few hundred objects.
45 # Defaults to $DEFAULT_CACHE_DEPTH unless explicitly set.
46 # * 'default_expires_in' (Cache::Cache compatibile),
47 # 'expires_in' (CHI compatibile) [seconds]
48 # The expiration time for objects place in the cache.
49 # Defaults to -1 (never expire) if not explicitly set.
50 # Sets 'expires_min' to given value.
51 # * 'expires_min' [seconds]
52 # The minimum expiration time for objects in cache (e.g. with 0% CPU load).
53 # Used as lower bound in adaptive cache lifetime / expiration.
54 # Defaults to 20 seconds; 'expires_in' sets it also.
55 # * 'expires_max' [seconds]
56 # The maximum expiration time for objects in cache.
57 # Used as upper bound in adaptive cache lifetime / expiration.
58 # Defaults to 1200 seconds, if not set;
59 # defaults to 'expires_min' if 'expires_in' is used.
60 # * 'check_load'
61 # Subroutine (code) used for adaptive cache lifetime / expiration.
62 # If unset, adaptive caching is turned off; defaults to unset.
63 # * 'increase_factor' [seconds / 100% CPU load]
64 # Factor multiplying 'check_load' result when calculating cache lietime.
65 # Defaults to 60 seconds for 100% SPU load ('check_load' returning 1.0).
67 # (all the above are inherited from GitwebCache::SimpleFileCache)
69 # * 'max_lifetime' [seconds]
70 # If it is greater than 0, and cache entry is expired but not older
71 # than it, serve stale data when waiting for cache entry to be
72 # regenerated (refreshed). Non-adaptive.
73 # Defaults to -1 (never expire / always serve stale).
74 # * 'background_cache' (boolean)
75 # This enables/disables regenerating cache in background process.
76 # Defaults to true.
77 # * 'generating_info'
78 # Subroutine (code) called when process has to wait for cache entry
79 # to be (re)generated (when there is no not-too-stale data to serve
80 # instead), for other process (or bacground process). It is passed
81 # $cache instance, $key, and opened $lock_fh filehandle to lockfile.
82 # Unset by default (which means no activity indicator).
83 # * 'generating_info_is_safe' (boolean)
84 # If true, run 'generating_info' subroutine also in the project that
85 # is generating data. This has effect only when 'background_cache'
86 # is true (both 'background_cache' and 'generating_info_is_safe' must
87 # be true for project generating data to run 'generating_info').
88 # Defaults to false for safety.
89 sub new {
90 my $class = shift;
91 my %opts = ref $_[0] ? %{ $_[0] } : @_;
93 my $self = $class->SUPER::new(\%opts);
95 my ($max_lifetime, $background_cache, $generating_info, $gen_info_is_safe);
96 if (%opts) {
97 $max_lifetime =
98 $opts{'max_lifetime'} ||
99 $opts{'max_cache_lifetime'};
100 $background_cache = $opts{'background_cache'};
101 $generating_info = $opts{'generating_info'};
102 $gen_info_is_safe = $opts{'generating_info_is_safe'};
104 $max_lifetime = -1 unless defined($max_lifetime);
105 $background_cache = 1 unless defined($background_cache);
106 $gen_info_is_safe = 0 unless defined($gen_info_is_safe);
108 $self->set_max_lifetime($max_lifetime);
109 $self->set_background_cache($background_cache);
110 $self->set_generating_info($generating_info);
111 $self->set_generating_info_is_safe($gen_info_is_safe);
113 return $self;
116 # ......................................................................
117 # accessors
119 # http://perldesignpatterns.com/perldesignpatterns.html#AccessorPattern
121 # creates get_depth() and set_depth($depth) etc. methods
122 foreach my $i (qw(max_lifetime background_cache
123 generating_info generating_info_is_safe)) {
124 my $field = $i;
125 no strict 'refs';
126 *{"get_$field"} = sub {
127 my $self = shift;
128 return $self->{$field};
130 *{"set_$field"} = sub {
131 my ($self, $value) = @_;
132 $self->{$field} = $value;
136 # $cache->generating_info($key, $lock);
137 # runs 'generating_info' subroutine, for activity indicator,
138 # checking if it is defined first.
139 sub generating_info {
140 my $self = shift;
142 if (defined $self->{'generating_info'}) {
143 $self->{'generating_info'}->($self, @_);
147 # ----------------------------------------------------------------------
148 # utility functions and methods
150 # Take an human readable key, and return path to be used for lockfile
151 # Ensures that file can be created, if needed.
152 sub get_lockname {
153 my ($self, $key) = @_;
155 my $lockfile = $self->path_to_key($key, \my $dir) . '.lock';
157 # ensure that directory leading to lockfile exists
158 if (!-d $dir) {
159 eval { mkpath($dir, 0, 0777); 1 }
160 or die "Couldn't mkpath '$dir' for lockfile: $!";
163 return $lockfile;
166 # ----------------------------------------------------------------------
167 # "private" utility functions and methods
169 # take a file path to cache entry, and its directory
170 # return filehandle and filename of open temporary file,
171 # like File::Temp::tempfile
172 sub _tempfile_to_path {
173 my ($self, $file, $dir) = @_;
175 my $tempname = "$file.tmp";
176 open my $temp_fh, '>', $tempname
177 or die "Couldn't open temporary file '$tempname' for writing: $!";
179 return ($temp_fh, $tempname);
182 # ......................................................................
183 # interface methods
185 sub _wait_for_data {
186 my ($self, $key,
187 $lock_fh, $lockfile,
188 $fetch_code, $fetch_locked) = @_;
189 my @result;
191 # provide "generating page..." info, if exists
192 $self->generating_info($key, $lock_fh);
193 # generating info may exit, so we can not get there
195 # get readers lock, i.e. wait for writer,
196 # which might be background process
197 flock($lock_fh, LOCK_SH);
198 # closing lockfile releases lock
199 if ($fetch_locked) {
200 @result = $fetch_code->();
201 close $lock_fh
202 or die "Could't close lockfile '$lockfile': $!";
203 } else {
204 close $lock_fh
205 or die "Could't close lockfile '$lockfile': $!";
206 @result = $fetch_code->();
209 return @result;
212 sub _set_maybe_background {
213 my ($self, $key, $fetch_code, $set_code) = @_;
215 my $pid;
216 my (@result, @stale_result);
218 if ($self->{'background_cache'}) {
219 # try to retrieve stale data
220 @stale_result = $fetch_code->()
221 if $self->is_valid($key, $self->get_max_lifetime());
223 # fork if there is stale data, for background process
224 # to regenerate/refresh the cache (generate data),
225 # or if main process would show progress indicator
226 $pid = fork()
227 if (@stale_result ||
228 ($self->{'generating_info'} && $self->{'generating_info_is_safe'}));
231 if ($pid) {
232 ## forked and are in parent process
233 # reap child, which spawned grandchild process (detaching it)
234 waitpid $pid, 0;
236 } else {
237 ## didn't fork, or are in background process
239 # daemonize background process, detaching it from parent
240 # see also Proc::Daemonize, Apache2::SubProcess
241 if (defined $pid) {
242 ## in background process
243 POSIX::setsid(); # or setpgrp(0, 0);
244 fork() && CORE::exit(0);
247 @result = $set_code->();
249 if (defined $pid) {
250 ## in background process; parent will serve stale data
252 # lockfile will be automatically closed on exit,
253 # and therefore lockfile would be unlocked
254 CORE::exit(0);
258 return @result > 0 ? @result : @stale_result;
261 sub _compute_generic {
262 my ($self, $key,
263 $get_code, $fetch_code, $set_code, $fetch_locked) = @_;
265 my @result = $get_code->();
266 return @result if @result;
268 my $lockfile = $self->get_lockname($key);
270 # this loop is to protect against situation where process that
271 # acquired exclusive lock (writer) dies or exits (die_error)
272 # before writing data to cache
273 my $lock_state; # needed for loop condition
274 do {
275 open my $lock_fh, '+>', $lockfile
276 or die "Could't open lockfile '$lockfile': $!";
278 $lock_state = flock($lock_fh, LOCK_EX | LOCK_NB);
279 if ($lock_state) {
280 ## acquired writers lock, have to generate data
281 @result = $self->_set_maybe_background($key, $fetch_code, $set_code);
283 # closing lockfile releases writer lock
284 close $lock_fh
285 or die "Could't close lockfile '$lockfile': $!";
287 if (!@result) {
288 # wait for background process to finish generating data
289 open $lock_fh, '<', $lockfile
290 or die "Couldn't reopen (for reading) lockfile '$lockfile': $!";
292 @result = $self->_wait_for_data($key, $lock_fh, $lockfile,
293 $fetch_code, $fetch_locked);
296 } else {
297 ## didn't acquire writers lock, get stale data or wait for regeneration
299 # try to retrieve stale data
300 @result = $fetch_code->()
301 if $self->is_valid($key, $self->get_max_lifetime());
302 return @result if @result;
304 # wait for regeneration
305 @result = $self->_wait_for_data($key, $lock_fh, $lockfile,
306 $fetch_code, $fetch_locked);
309 } until (@result || $lock_state);
310 # repeat until we have data, or we tried generating data oneself and failed
311 return @result;
314 # $data = $cache->compute($key, $code);
316 # Combines the get and set operations in a single call. Attempts to
317 # get $key; if successful, returns the value. Otherwise, calls $code
318 # and uses the return value as the new value for $key, which is then
319 # returned.
321 # Uses file locking to have only one process updating value for $key
322 # to avoid 'cache miss stampede' (aka 'stampeding herd') problem.
323 sub compute {
324 my ($self, $key, $code) = @_;
326 return ($self->_compute_generic($key,
327 sub {
328 return $self->get($key);
330 sub {
331 return $self->fetch($key);
333 sub {
334 my $data = $code->();
335 $self->set($key, $data);
336 return $data;
338 0 # $self->get($key); is outside LOCK_SH critical section
339 ))[0]; # return single value: $data
342 # ($fh, $filename) = $cache->compute_fh($key, $code);
344 # Combines the get and set operations in a single call. Attempts to
345 # get $key; if successful, returns the filehandle it can be read from.
346 # Otherwise, calls $code passing filehandle to write to as a
347 # parameter; contents of this file is then used as the new value for
348 # $key; returns filehandle from which one can read newly generated data.
350 # Uses file locking to have only one process updating value for $key
351 # to avoid 'cache miss stampede' (aka 'stampeding herd') problem.
352 sub compute_fh {
353 my ($self, $key, $code_fh) = @_;
355 return $self->_compute_generic($key,
356 sub {
357 return $self->get_fh($key);
359 sub {
360 return $self->fetch_fh($key);
362 sub {
363 return $self->set_coderef_fh($key, $code_fh);
365 1 # $self->fetch_fh($key); just opens file
370 __END__
371 # end of package GitwebCache::FileCacheWithLocking