82e88f16ed43bfb90fce94e1952fc9aab6eb956d
[git/jnareb-git.git] / gitweb / lib / GitwebCache / FileCacheWithLocking.pm
blob82e88f16ed43bfb90fce94e1952fc9aab6eb956d
1 # gitweb - simple web interface to track changes in git repositories
3 # (C) 2006, John 'Warthog9' Hawley <warthog19@eaglescrag.net>
4 # (C) 2010, Jakub Narebski <jnareb@gmail.com>
6 # This program is licensed under the GPLv2
9 # Gitweb caching engine, simple file-based cache, with locking
12 # Based on GitwebCache::SimpleFileCache, minimalistic cache that
13 # stores data in the filesystem, without serialization.
15 # It uses file locks (flock) to have only one process generating data
16 # and writing to cache, when using CHI interface ->compute() method.
18 package GitwebCache::FileCacheWithLocking;
19 use base qw(GitwebCache::SimpleFileCache);
21 use strict;
22 use warnings;
24 use File::Path qw(mkpath);
25 use Fcntl qw(:flock);
26 use POSIX qw(setsid);
28 # ......................................................................
29 # constructor
31 # The options are set by passing in a reference to a hash containing
32 # any of the following keys:
33 # * 'namespace'
34 # The namespace associated with this cache. This allows easy separation of
35 # multiple, distinct caches without worrying about key collision. Defaults
36 # to $DEFAULT_NAMESPACE.
37 # * 'cache_root' (Cache::FileCache compatibile),
38 # 'root_dir' (CHI::Driver::File compatibile),
39 # The location in the filesystem that will hold the root of the cache.
40 # Defaults to $DEFAULT_CACHE_ROOT.
41 # * 'cache_depth' (Cache::FileCache compatibile),
42 # 'depth' (CHI::Driver::File compatibile),
43 # The number of subdirectories deep to cache object item. This should be
44 # large enough that no cache directory has more than a few hundred objects.
45 # Defaults to $DEFAULT_CACHE_DEPTH unless explicitly set.
46 # * 'default_expires_in' (Cache::Cache compatibile),
47 # 'expires_in' (CHI compatibile) [seconds]
48 # The expiration time for objects place in the cache.
49 # Defaults to -1 (never expire) if not explicitly set.
50 # Sets 'expires_min' to given value.
51 # * 'expires_min' [seconds]
52 # The minimum expiration time for objects in cache (e.g. with 0% CPU load).
53 # Used as lower bound in adaptive cache lifetime / expiration.
54 # Defaults to 20 seconds; 'expires_in' sets it also.
55 # * 'expires_max' [seconds]
56 # The maximum expiration time for objects in cache.
57 # Used as upper bound in adaptive cache lifetime / expiration.
58 # Defaults to 1200 seconds, if not set;
59 # defaults to 'expires_min' if 'expires_in' is used.
60 # * 'check_load'
61 # Subroutine (code) used for adaptive cache lifetime / expiration.
62 # If unset, adaptive caching is turned off; defaults to unset.
63 # * 'increase_factor' [seconds / 100% CPU load]
64 # Factor multiplying 'check_load' result when calculating cache lietime.
65 # Defaults to 60 seconds for 100% SPU load ('check_load' returning 1.0).
67 # (all the above are inherited from GitwebCache::SimpleFileCache)
69 # * 'max_lifetime' [seconds]
70 # If it is greater than 0, and cache entry is expired but not older
71 # than it, serve stale data when waiting for cache entry to be
72 # regenerated (refreshed). Non-adaptive.
73 # Defaults to -1 (never expire / always serve stale).
74 # * 'background_cache' (boolean)
75 # This enables/disables regenerating cache in background process.
76 # Defaults to true.
77 sub new {
78 my $class = shift;
79 my %opts = ref $_[0] ? %{ $_[0] } : @_;
81 my $self = $class->SUPER::new(\%opts);
83 my ($max_lifetime, $background_cache);
84 if (%opts) {
85 $max_lifetime =
86 $opts{'max_lifetime'} ||
87 $opts{'max_cache_lifetime'};
88 $background_cache = $opts{'background_cache'};
90 $max_lifetime = -1 unless defined($max_lifetime);
91 $background_cache = 1 unless defined($background_cache);
93 $self->set_max_lifetime($max_lifetime);
94 $self->set_background_cache($background_cache);
96 return $self;
99 # ......................................................................
100 # accessors
102 # http://perldesignpatterns.com/perldesignpatterns.html#AccessorPattern
104 # creates get_depth() and set_depth($depth) etc. methods
105 foreach my $i (qw(max_lifetime background_cache)) {
106 my $field = $i;
107 no strict 'refs';
108 *{"get_$field"} = sub {
109 my $self = shift;
110 return $self->{$field};
112 *{"set_$field"} = sub {
113 my ($self, $value) = @_;
114 $self->{$field} = $value;
118 # ----------------------------------------------------------------------
119 # utility functions and methods
121 # Take an human readable key, and return path to be used for lockfile
122 # Ensures that file can be created, if needed.
123 sub get_lockname {
124 my ($self, $key) = @_;
126 my $lockfile = $self->path_to_key($key, \my $dir) . '.lock';
128 # ensure that directory leading to lockfile exists
129 if (!-d $dir) {
130 eval { mkpath($dir, 0, 0777); 1 }
131 or die "Couldn't mkpath '$dir' for lockfile: $!";
134 return $lockfile;
137 # ----------------------------------------------------------------------
138 # "private" utility functions and methods
140 # take a file path to cache entry, and its directory
141 # return filehandle and filename of open temporary file,
142 # like File::Temp::tempfile
143 sub _tempfile_to_path {
144 my ($self, $file, $dir) = @_;
146 my $tempname = "$file.tmp";
147 open my $temp_fh, '>', $tempname
148 or die "Couldn't open temporary file '$tempname' for writing: $!";
150 return ($temp_fh, $tempname);
153 # ......................................................................
154 # interface methods
156 sub _set_maybe_background {
157 my ($self, $key, $fetch_code, $set_code) = @_;
159 my $pid;
160 my (@result, @stale_result);
162 if ($self->{'background_cache'}) {
163 # try to retrieve stale data
164 @stale_result = $fetch_code->()
165 if $self->is_valid($key, $self->get_max_lifetime());
167 # fork if there is stale data, for background process
168 # to regenerate/refresh the cache (generate data)
169 $pid = fork() if (@stale_result);
172 if ($pid) {
173 ## forked and are in parent process
174 # reap child, which spawned grandchild process (detaching it)
175 waitpid $pid, 0;
177 } else {
178 ## didn't fork, or are in background process
180 # daemonize background process, detaching it from parent
181 # see also Proc::Daemonize, Apache2::SubProcess
182 if (defined $pid) {
183 ## in background process
184 POSIX::setsid(); # or setpgrp(0, 0);
185 fork() && CORE::exit(0);
188 @result = $set_code->();
190 if (defined $pid) {
191 ## in background process; parent will serve stale data
193 # lockfile will be automatically closed on exit,
194 # and therefore lockfile would be unlocked
195 CORE::exit(0);
199 return @result > 0 ? @result : @stale_result;
202 sub _compute_generic {
203 my ($self, $key,
204 $get_code, $fetch_code, $set_code, $fetch_locked) = @_;
206 my @result = $get_code->();
207 return @result if @result;
209 my $lockfile = $self->get_lockname($key);
211 # this loop is to protect against situation where process that
212 # acquired exclusive lock (writer) dies or exits (die_error)
213 # before writing data to cache
214 my $lock_state; # needed for loop condition
215 do {
216 open my $lock_fh, '+>', $lockfile
217 or die "Could't open lockfile '$lockfile': $!";
219 $lock_state = flock($lock_fh, LOCK_EX | LOCK_NB);
220 if ($lock_state) {
221 ## acquired writers lock, have to generate data
222 @result = $self->_set_maybe_background($key, $fetch_code, $set_code);
224 # closing lockfile releases lock
225 close $lock_fh
226 or die "Could't close lockfile '$lockfile': $!";
228 } else {
229 ## didn't acquire writers lock, get stale data or wait for regeneration
231 # try to retrieve stale data
232 @result = $fetch_code->()
233 if $self->is_valid($key, $self->get_max_lifetime());
234 return @result if @result;
236 # get readers lock (wait for writer)
237 # if there is no stale data to serve
238 flock($lock_fh, LOCK_SH);
239 # closing lockfile releases lock
240 if ($fetch_locked) {
241 @result = $fetch_code->();
242 close $lock_fh
243 or die "Could't close lockfile '$lockfile': $!";
244 } else {
245 close $lock_fh
246 or die "Could't close lockfile '$lockfile': $!";
247 @result = $fetch_code->();
250 } until (@result || $lock_state);
251 # repeat until we have data, or we tried generating data oneself and failed
252 return @result;
255 # $data = $cache->compute($key, $code);
257 # Combines the get and set operations in a single call. Attempts to
258 # get $key; if successful, returns the value. Otherwise, calls $code
259 # and uses the return value as the new value for $key, which is then
260 # returned.
262 # Uses file locking to have only one process updating value for $key
263 # to avoid 'cache miss stampede' (aka 'stampeding herd') problem.
264 sub compute {
265 my ($self, $key, $code) = @_;
267 return ($self->_compute_generic($key,
268 sub {
269 return $self->get($key);
271 sub {
272 return $self->fetch($key);
274 sub {
275 my $data = $code->();
276 $self->set($key, $data);
277 return $data;
279 0 # $self->get($key); is outside LOCK_SH critical section
280 ))[0]; # return single value: $data
283 # ($fh, $filename) = $cache->compute_fh($key, $code);
285 # Combines the get and set operations in a single call. Attempts to
286 # get $key; if successful, returns the filehandle it can be read from.
287 # Otherwise, calls $code passing filehandle to write to as a
288 # parameter; contents of this file is then used as the new value for
289 # $key; returns filehandle from which one can read newly generated data.
291 # Uses file locking to have only one process updating value for $key
292 # to avoid 'cache miss stampede' (aka 'stampeding herd') problem.
293 sub compute_fh {
294 my ($self, $key, $code_fh) = @_;
296 return $self->_compute_generic($key,
297 sub {
298 return $self->get_fh($key);
300 sub {
301 return $self->fetch_fh($key);
303 sub {
304 return $self->set_coderef_fh($key, $code_fh);
306 1 # $self->fetch_fh($key); just opens file
311 __END__
312 # end of package GitwebCache::FileCacheWithLocking