gitweb/lib - Add support for setting error handler in cache
[git/jnareb-git.git] / gitweb / lib / GitwebCache / SimpleFileCache.pm
blob8d0a6d93c5a771a539173154578fb84114d5b2d5
1 # gitweb - simple web interface to track changes in git repositories
3 # (C) 2006, John 'Warthog9' Hawley <warthog19@eaglescrag.net>
4 # (C) 2010, Jakub Narebski <jnareb@gmail.com>
6 # This program is licensed under the GPLv2
9 # Gitweb caching engine, simple file-based cache
12 # Minimalistic cache that stores data in the filesystem, without serialization
13 # and currently without any kind of cache expiration (all keys last forever till
14 # they got explicitely removed).
16 # It follows Cache::Cache and CHI interfaces (but does not implement it fully)
18 package GitwebCache::SimpleFileCache;
20 use strict;
21 use warnings;
23 use Carp;
24 use File::Path qw(mkpath);
25 use File::Temp qw(tempfile);
26 use Digest::MD5 qw(md5_hex);
28 # by default, the cache nests all entries on the filesystem single
29 # directory deep, i.e. '60/b725f10c9c85c70d97880dfe8191b3' for
30 # key name (key digest) 60b725f10c9c85c70d97880dfe8191b3.
32 our $DEFAULT_CACHE_DEPTH = 1;
34 # by default, the root of the cache is located in 'cache'.
36 our $DEFAULT_CACHE_ROOT = "cache";
38 # by default we don't use cache namespace (empty namespace);
39 # empty namespace does not allow for simple implementation of clear() method.
41 our $DEFAULT_NAMESPACE = '';
43 # ......................................................................
44 # constructor
46 # The options are set by passing in a reference to a hash containing
47 # any of the following keys:
48 # * 'namespace'
49 # The namespace associated with this cache. This allows easy separation of
50 # multiple, distinct caches without worrying about key collision. Defaults
51 # to $DEFAULT_NAMESPACE.
52 # * 'cache_root' (Cache::FileCache compatibile),
53 # 'root_dir' (CHI::Driver::File compatibile),
54 # The location in the filesystem that will hold the root of the cache.
55 # Defaults to $DEFAULT_CACHE_ROOT.
56 # * 'cache_depth' (Cache::FileCache compatibile),
57 # 'depth' (CHI::Driver::File compatibile),
58 # The number of subdirectories deep to cache object item. This should be
59 # large enough that no cache directory has more than a few hundred objects.
60 # Defaults to $DEFAULT_CACHE_DEPTH unless explicitly set.
61 # * 'default_expires_in' (Cache::Cache compatibile),
62 # 'expires_in' (CHI compatibile) [seconds]
63 # The expiration time for objects place in the cache.
64 # Defaults to -1 (never expire) if not explicitly set.
65 # Sets 'expires_min' to given value.
66 # * 'expires_min' [seconds]
67 # The minimum expiration time for objects in cache (e.g. with 0% CPU load).
68 # Used as lower bound in adaptive cache lifetime / expiration.
69 # Defaults to 20 seconds; 'expires_in' sets it also.
70 # * 'expires_max' [seconds]
71 # The maximum expiration time for objects in cache.
72 # Used as upper bound in adaptive cache lifetime / expiration.
73 # Defaults to 1200 seconds, if not set;
74 # defaults to 'expires_min' if 'expires_in' is used.
75 # * 'check_load'
76 # Subroutine (code) used for adaptive cache lifetime / expiration.
77 # If unset, adaptive caching is turned off; defaults to unset.
78 # * 'increase_factor' [seconds / 100% CPU load]
79 # Factor multiplying 'check_load' result when calculating cache lietime.
80 # Defaults to 60 seconds for 100% SPU load ('check_load' returning 1.0).
81 # * 'on_error' (similar to CHI 'on_get_error'/'on_set_error')
82 # How to handle runtime errors occurring during cache gets and cache
83 # sets, which may or may not be considered fatal in your application.
84 # Options are:
85 # * "die" (the default) - call die() with an appropriate message
86 # * "warn" - call warn() with an appropriate message
87 # * "ignore" - do nothing
88 # * <coderef> - call this code reference with an appropriate message
89 sub new {
90 my $class = shift;
91 my %opts = ref $_[0] ? %{ $_[0] } : @_;
93 my $self = {};
94 $self = bless($self, $class);
96 my ($root, $depth, $ns);
97 my ($expires_min, $expires_max, $increase_factor, $check_load);
98 my ($on_error);
99 if (%opts) {
100 $root =
101 $opts{'cache_root'} ||
102 $opts{'root_dir'};
103 $depth =
104 $opts{'cache_depth'} ||
105 $opts{'depth'};
106 $ns = $opts{'namespace'};
107 $expires_min =
108 $opts{'expires_min'} ||
109 $opts{'default_expires_in'} ||
110 $opts{'expires_in'};
111 $expires_max =
112 $opts{'expires_max'};
113 $increase_factor = $opts{'expires_factor'};
114 $check_load = $opts{'check_load'};
115 $on_error =
116 $opts{'on_error'} ||
117 $opts{'on_get_error'} ||
118 $opts{'on_set_error'} ||
119 $opts{'error_handler'};
121 $root = $DEFAULT_CACHE_ROOT unless defined($root);
122 $depth = $DEFAULT_CACHE_DEPTH unless defined($depth);
123 $ns = $DEFAULT_NAMESPACE unless defined($ns);
124 $expires_min = -1 unless defined($expires_min);
125 $expires_max = $expires_min
126 if (!defined($expires_max) && exists $opts{'expires_in'});
127 $expires_max = -1 unless (defined($expires_max));
128 $increase_factor = 60 unless defined($increase_factor);
129 $on_error = "die"
130 unless (defined $on_error &&
131 (ref($on_error) eq 'CODE' || $on_error =~ /^die|warn|ignore$/));
133 $self->set_root($root);
134 $self->set_depth($depth);
135 $self->set_namespace($ns);
136 $self->set_expires_min($expires_min);
137 $self->set_expires_max($expires_max);
138 $self->set_increase_factor($increase_factor);
139 $self->set_check_load($check_load);
140 $self->set_on_error($on_error);
142 return $self;
146 # ......................................................................
147 # accessors
149 # http://perldesignpatterns.com/perldesignpatterns.html#AccessorPattern
151 # creates get_depth() and set_depth($depth) etc. methods
152 foreach my $i (qw(depth root namespace
153 expires_min expires_max increase_factor check_load
154 on_error)) {
155 my $field = $i;
156 no strict 'refs';
157 *{"get_$field"} = sub {
158 my $self = shift;
159 return $self->{$field};
161 *{"set_$field"} = sub {
162 my ($self, $value) = @_;
163 $self->{$field} = $value;
167 # ......................................................................
168 # pseudo-accessors
170 # returns adaptive lifetime of cache entry for given $key [seconds]
171 sub get_expires_in {
172 my ($self) = @_;
174 # short-circuit
175 if (!defined $self->{'check_load'} ||
176 $self->{'expires_max'} <= $self->{'expires_min'}) {
177 return $self->{'expires_min'};
180 my $expires_in =
181 #$self->{'expires_min'} +
182 $self->{'increase_factor'} * $self->check_load();
184 if ($expires_in < $self->{'expires_min'}) {
185 return $self->{'expires_min'};
186 } elsif ($expires_in > $self->{'expires_max'}) {
187 return $self->{'expires_max'};
190 return $expires_in;
193 # sets expiration time to $duration, turns off adaptive cache lifetime
194 sub set_expires_in {
195 my ($self, $duration) = @_;
197 $self->{'expires_min'} = $self->{'expires_max'} = $duration;
200 # runs 'check_load' subroutine, for adaptive cache lifetime.
201 # Note: check in caller that 'check_load' exists.
202 sub check_load {
203 my $self = shift;
204 return $self->{'check_load'}->();
207 # ----------------------------------------------------------------------
208 # utility functions and methods
210 # Return root dir for namespace (lazily built, cached)
211 sub path_to_namespace {
212 my ($self) = @_;
214 if (!exists $self->{'path_to_namespace'}) {
215 if (defined $self->{'namespace'} &&
216 $self->{'namespace'} ne '') {
217 $self->{'path_to_namespace'} = "$self->{'root'}/$self->{'namespace'}";
218 } else {
219 $self->{'path_to_namespace'} = $self->{'root'};
222 return $self->{'path_to_namespace'};
225 # $path = $cache->path_to_key($key);
226 # $path = $cache->path_to_key($key, \$dir);
228 # Take an human readable key, and return file path.
229 # Puts dirname of file path in second argument, if it is provided.
230 sub path_to_key {
231 my ($self, $key, $dir_ref) = @_;
233 my @paths = ( $self->path_to_namespace() );
235 # Create a unique (hashed) key from human readable key
236 my $filename = md5_hex($key); # or $digester->add($key)->hexdigest();
238 # Split filename so that it have DEPTH subdirectories,
239 # where each subdirectory has a two-letter name
240 push @paths, unpack("(a2)[$self->{'depth'}] a*", $filename);
241 $filename = pop @paths;
243 # Join paths together, computing dir separately if $dir_ref was passed.
244 my $filepath;
245 if (defined $dir_ref && ref($dir_ref)) {
246 my $dir = join('/', @paths);
247 $filepath = "$dir/$filename";
248 $$dir_ref = $dir;
249 } else {
250 $filepath = join('/', @paths, $filename);
253 return $filepath;
256 sub read_file {
257 my ($self, $filename) = @_;
259 # Fast slurp, adapted from File::Slurp::read, with unnecessary options removed
260 # via CHI::Driver::File (from CHI-0.33)
261 my $buf = '';
262 open my $read_fh, '<', $filename
263 or return;
264 binmode $read_fh, ':raw';
266 my $size_left = -s $read_fh;
268 while ($size_left > 0) {
269 my $read_cnt = sysread($read_fh, $buf, $size_left, length($buf));
270 return unless defined $read_cnt;
272 last if $read_cnt == 0;
273 $size_left -= $read_cnt;
274 #last if $size_left <= 0;
277 close $read_fh
278 or $self->_handle_error("Couldn't close file '$filename' opened for reading: $!");
279 return $buf;
282 sub write_fh {
283 my ($self, $write_fh, $filename, $data) = @_;
285 # Fast spew, adapted from File::Slurp::write, with unnecessary options removed
286 # via CHI::Driver::File (from CHI-0.33)
287 binmode $write_fh, ':raw';
289 my $size_left = length($data);
290 my $offset = 0;
292 while ($size_left > 0) {
293 my $write_cnt = syswrite($write_fh, $data, $size_left, $offset);
294 return unless defined $write_cnt;
296 $size_left -= $write_cnt;
297 $offset += $write_cnt; # == length($data);
300 close $write_fh
301 or $self->_handle_error("Couldn't close file '$filename' opened for writing: $!");
304 sub ensure_path {
305 my $self = shift;
306 my $dir = shift || return;
308 if (!-d $dir) {
309 # mkpath will croak()/die() if there is an error
310 eval {
311 mkpath($dir, 0, 0777);
313 } or $self->_handle_error($@);
317 # ----------------------------------------------------------------------
318 # "private" utility functions and methods
320 # take a file path to cache entry, and its directory
321 # return filehandle and filename of open temporary file,
322 # like File::Temp::tempfile
323 sub _tempfile_to_path {
324 my ($self, $file, $dir) = @_;
326 # tempfile will croak() if there is an error
327 my ($temp_fh, $tempname);
328 eval {
329 ($temp_fh, $tempname) = tempfile("${file}_XXXXX",
330 #DIR => $dir,
331 'UNLINK' => 0, # ensure that we don't unlink on close; file is renamed
332 'SUFFIX' => '.tmp');
333 } or $self->_handle_error($@);
334 return ($temp_fh, $tempname);
337 # based on _handle_get_error and _dispatch_error_msg from CHI::Driver
338 sub _handle_error {
339 my ($self, $error) = @_;
341 for ($self->get_on_error()) {
342 (ref($_) eq 'CODE') && do { $_->($error) };
343 /^ignore$/ && do { };
344 /^warn$/ && do { carp $error };
345 /^die$/ && do { croak $error };
349 # ----------------------------------------------------------------------
350 # worker methods
352 sub fetch {
353 my ($self, $key) = @_;
355 my $file = $self->path_to_key($key);
356 return unless (defined $file && -f $file);
358 return $self->read_file($file);
361 sub store {
362 my ($self, $key, $data) = @_;
364 my $dir;
365 my $file = $self->path_to_key($key, \$dir);
366 return unless (defined $file && defined $dir);
368 # ensure that directory leading to cache file exists
369 $self->ensure_path($dir);
371 # generate a temporary file
372 my ($temp_fh, $tempname) = $self->_tempfile_to_path($file, $dir);
373 chmod 0666, $tempname
374 or warn "Couldn't change permissions to 0666 / -rw-rw-rw- for '$tempname': $!";
376 $self->write_fh($temp_fh, $tempname, $data);
378 rename($tempname, $file)
379 or $self->_handle_error("Couldn't rename temporary file '$tempname' to '$file': $!");
382 # get size of an element associated with the $key (not the size of whole cache)
383 sub get_size {
384 my ($self, $key) = @_;
386 my $path = $self->path_to_key($key)
387 or return undef;
388 if (-f $path) {
389 return -s $path;
391 return 0;
395 # ......................................................................
396 # interface methods
398 # Removing and expiring
400 # $cache->remove($key)
402 # Remove the data associated with the $key from the cache.
403 sub remove {
404 my ($self, $key) = @_;
406 my $file = $self->path_to_key($key)
407 or return;
408 return unless -f $file;
409 unlink($file)
410 or $self->_handle_error("Couldn't remove file '$file': $!");
413 # $cache->is_valid($key[, $expires_in])
415 # Returns a boolean indicating whether $key exists in the cache
416 # and has not expired. Uses global per-cache expires time, unless
417 # passed optional $expires_in argument.
418 sub is_valid {
419 my ($self, $key, $expires_in) = @_;
421 my $path = $self->path_to_key($key);
423 # does file exists in cache?
424 return 0 unless -f $path;
425 # get its modification time
426 my $mtime = (stat(_))[9] # _ to reuse stat structure used in -f test
427 or $self->_handle_error("Couldn't stat file '$path': $!");
428 # cache entry is invalid if it is size 0 (in bytes)
429 return 0 unless ((stat(_))[7] > 0);
431 # expire time can be set to never
432 $expires_in = defined $expires_in ? $expires_in : $self->get_expires_in();
433 return 1 unless (defined $expires_in && $expires_in >= 0);
435 # is file expired?
436 my $now = time();
438 return (($now - $mtime) < $expires_in);
441 # Getting and setting
443 # $cache->set($key, $data);
445 # Associates $data with $key in the cache, overwriting any existing entry.
446 # Returns $data.
447 sub set {
448 my ($self, $key, $data) = @_;
450 return unless (defined $key && defined $data);
452 $self->store($key, $data);
454 return $data;
457 # $data = $cache->get($key);
459 # Returns the data associated with $key. If $key does not exist
460 # or has expired, returns undef.
461 sub get {
462 my ($self, $key) = @_;
464 return unless $self->is_valid($key);
466 return $self->fetch($key);;
469 # $data = $cache->compute($key, $code);
471 # Combines the get and set operations in a single call. Attempts to
472 # get $key; if successful, returns the value. Otherwise, calls $code
473 # and uses the return value as the new value for $key, which is then
474 # returned.
475 sub compute {
476 my ($self, $key, $code) = @_;
478 my $data = $self->get($key);
479 if (!defined $data) {
480 $data = $code->();
481 $self->set($key, $data);
484 return $data;
487 # ......................................................................
488 # nonstandard interface methods
490 sub fetch_fh {
491 my ($self, $key) = @_;
493 my $path = $self->path_to_key($key);
494 return unless (defined $path);
496 open my $fh, '<', $path or return;
497 return ($fh, $path);
501 sub get_fh {
502 my ($self, $key) = @_;
504 return unless ($self->is_valid($key));
506 return $self->fetch_fh($key);
509 sub set_coderef_fh {
510 my ($self, $key, $code) = @_;
512 my $path = $self->path_to_key($key, \my $dir);
513 return unless (defined $path && defined $dir);
515 # ensure that directory leading to cache file exists
516 $self->ensure_path($dir);
518 # generate a temporary file
519 my ($fh, $tempfile) = $self->_tempfile_to_path($path, $dir);
521 # code writes to filehandle or file
522 $code->($fh, $tempfile);
524 close $fh;
525 rename($tempfile, $path)
526 or $self->_handle_error("Couldn't rename temporary file '$tempfile' to '$path': $!");
528 open $fh, '<', $path or return;
529 return ($fh, $path);
532 # ($fh, $filename) = $cache->compute_fh($key, $code);
534 # Combines the get and set operations in a single call. Attempts to
535 # get $key; if successful, returns the filehandle it can be read from.
536 # Otherwise, calls $code passing filehandle to write to as a
537 # parameter; contents of this file is then used as the new value for
538 # $key; returns filehandle from which one can read newly generated data.
539 sub compute_fh {
540 my ($self, $key, $code) = @_;
542 my ($fh, $filename) = $self->get_fh($key);
543 if (!defined $fh) {
544 ($fh, $filename) = $self->set_coderef_fh($key, $code);
547 return ($fh, $filename);
551 __END__
552 # end of package GitwebCache::SimpleFileCache;