gitweb/lib - Add clear() and size() methods to caching interface
[git/jnareb-git.git] / gitweb / lib / GitwebCache / SimpleFileCache.pm
blobcd489f8e9205640fd62b70d572af5f8e3cac7be5
1 # gitweb - simple web interface to track changes in git repositories
3 # (C) 2006, John 'Warthog9' Hawley <warthog19@eaglescrag.net>
4 # (C) 2010, Jakub Narebski <jnareb@gmail.com>
6 # This program is licensed under the GPLv2
9 # Gitweb caching engine, simple file-based cache
12 # Minimalistic cache that stores data in the filesystem, without serialization
13 # and currently without any kind of cache expiration (all keys last forever till
14 # they got explicitely removed).
16 # It follows Cache::Cache and CHI interfaces (but does not implement it fully)
18 package GitwebCache::SimpleFileCache;
20 use strict;
21 use warnings;
23 use Carp;
24 use File::Path qw(mkpath rmtree);
25 use File::Temp qw(tempfile mktemp);
26 use File::Find qw(find);
27 use Digest::MD5 qw(md5_hex);
29 # by default, the cache nests all entries on the filesystem single
30 # directory deep, i.e. '60/b725f10c9c85c70d97880dfe8191b3' for
31 # key name (key digest) 60b725f10c9c85c70d97880dfe8191b3.
33 our $DEFAULT_CACHE_DEPTH = 1;
35 # by default, the root of the cache is located in 'cache'.
37 our $DEFAULT_CACHE_ROOT = "cache";
39 # by default we don't use cache namespace (empty namespace);
40 # empty namespace does not allow for simple implementation of clear() method.
42 our $DEFAULT_NAMESPACE = "gitweb";
44 # ......................................................................
45 # constructor
47 # The options are set by passing in a reference to a hash containing
48 # any of the following keys:
49 # * 'namespace'
50 # The namespace associated with this cache. This allows easy separation of
51 # multiple, distinct caches without worrying about key collision. Defaults
52 # to $DEFAULT_NAMESPACE.
53 # * 'cache_root' (Cache::FileCache compatibile),
54 # 'root_dir' (CHI::Driver::File compatibile),
55 # The location in the filesystem that will hold the root of the cache.
56 # Defaults to $DEFAULT_CACHE_ROOT.
57 # * 'cache_depth' (Cache::FileCache compatibile),
58 # 'depth' (CHI::Driver::File compatibile),
59 # The number of subdirectories deep to cache object item. This should be
60 # large enough that no cache directory has more than a few hundred objects.
61 # Defaults to $DEFAULT_CACHE_DEPTH unless explicitly set.
62 # * 'default_expires_in' (Cache::Cache compatibile),
63 # 'expires_in' (CHI compatibile) [seconds]
64 # The expiration time for objects place in the cache.
65 # Defaults to -1 (never expire) if not explicitly set.
66 # Sets 'expires_min' to given value.
67 # * 'expires_min' [seconds]
68 # The minimum expiration time for objects in cache (e.g. with 0% CPU load).
69 # Used as lower bound in adaptive cache lifetime / expiration.
70 # Defaults to 20 seconds; 'expires_in' sets it also.
71 # * 'expires_max' [seconds]
72 # The maximum expiration time for objects in cache.
73 # Used as upper bound in adaptive cache lifetime / expiration.
74 # Defaults to 1200 seconds, if not set;
75 # defaults to 'expires_min' if 'expires_in' is used.
76 # * 'check_load'
77 # Subroutine (code) used for adaptive cache lifetime / expiration.
78 # If unset, adaptive caching is turned off; defaults to unset.
79 # * 'increase_factor' [seconds / 100% CPU load]
80 # Factor multiplying 'check_load' result when calculating cache lietime.
81 # Defaults to 60 seconds for 100% SPU load ('check_load' returning 1.0).
82 # * 'on_error' (similar to CHI 'on_get_error'/'on_set_error')
83 # How to handle runtime errors occurring during cache gets and cache
84 # sets, which may or may not be considered fatal in your application.
85 # Options are:
86 # * "die" (the default) - call die() with an appropriate message
87 # * "warn" - call warn() with an appropriate message
88 # * "ignore" - do nothing
89 # * <coderef> - call this code reference with an appropriate message
90 sub new {
91 my $class = shift;
92 my %opts = ref $_[0] ? %{ $_[0] } : @_;
94 my $self = {};
95 $self = bless($self, $class);
97 my ($root, $depth, $ns);
98 my ($expires_min, $expires_max, $increase_factor, $check_load);
99 my ($on_error);
100 if (%opts) {
101 $root =
102 $opts{'cache_root'} ||
103 $opts{'root_dir'};
104 $depth =
105 $opts{'cache_depth'} ||
106 $opts{'depth'};
107 $ns = $opts{'namespace'};
108 $expires_min =
109 $opts{'expires_min'} ||
110 $opts{'default_expires_in'} ||
111 $opts{'expires_in'};
112 $expires_max =
113 $opts{'expires_max'};
114 $increase_factor = $opts{'expires_factor'};
115 $check_load = $opts{'check_load'};
116 $on_error =
117 $opts{'on_error'} ||
118 $opts{'on_get_error'} ||
119 $opts{'on_set_error'} ||
120 $opts{'error_handler'};
122 $root = $DEFAULT_CACHE_ROOT unless defined($root);
123 $depth = $DEFAULT_CACHE_DEPTH unless defined($depth);
124 $ns = $DEFAULT_NAMESPACE unless defined($ns);
125 $expires_min = -1 unless defined($expires_min);
126 $expires_max = $expires_min
127 if (!defined($expires_max) && exists $opts{'expires_in'});
128 $expires_max = -1 unless (defined($expires_max));
129 $increase_factor = 60 unless defined($increase_factor);
130 $on_error = "die"
131 unless (defined $on_error &&
132 (ref($on_error) eq 'CODE' || $on_error =~ /^die|warn|ignore$/));
134 $self->set_root($root);
135 $self->set_depth($depth);
136 $self->set_namespace($ns);
137 $self->set_expires_min($expires_min);
138 $self->set_expires_max($expires_max);
139 $self->set_increase_factor($increase_factor);
140 $self->set_check_load($check_load);
141 $self->set_on_error($on_error);
143 return $self;
147 # ......................................................................
148 # accessors
150 # http://perldesignpatterns.com/perldesignpatterns.html#AccessorPattern
152 # creates get_depth() and set_depth($depth) etc. methods
153 foreach my $i (qw(depth root namespace
154 expires_min expires_max increase_factor check_load
155 on_error)) {
156 my $field = $i;
157 no strict 'refs';
158 *{"get_$field"} = sub {
159 my $self = shift;
160 return $self->{$field};
162 *{"set_$field"} = sub {
163 my ($self, $value) = @_;
164 $self->{$field} = $value;
168 # ......................................................................
169 # pseudo-accessors
171 # returns adaptive lifetime of cache entry for given $key [seconds]
172 sub get_expires_in {
173 my ($self) = @_;
175 # short-circuit
176 if (!defined $self->{'check_load'} ||
177 $self->{'expires_max'} <= $self->{'expires_min'}) {
178 return $self->{'expires_min'};
181 my $expires_in =
182 #$self->{'expires_min'} +
183 $self->{'increase_factor'} * $self->check_load();
185 if ($expires_in < $self->{'expires_min'}) {
186 return $self->{'expires_min'};
187 } elsif ($expires_in > $self->{'expires_max'}) {
188 return $self->{'expires_max'};
191 return $expires_in;
194 # sets expiration time to $duration, turns off adaptive cache lifetime
195 sub set_expires_in {
196 my ($self, $duration) = @_;
198 $self->{'expires_min'} = $self->{'expires_max'} = $duration;
201 # runs 'check_load' subroutine, for adaptive cache lifetime.
202 # Note: check in caller that 'check_load' exists.
203 sub check_load {
204 my $self = shift;
205 return $self->{'check_load'}->();
208 # ----------------------------------------------------------------------
209 # utility functions and methods
211 # Return root dir for namespace (lazily built, cached)
212 sub path_to_namespace {
213 my ($self) = @_;
215 if (!exists $self->{'path_to_namespace'}) {
216 if (defined $self->{'namespace'} &&
217 $self->{'namespace'} ne '') {
218 $self->{'path_to_namespace'} = "$self->{'root'}/$self->{'namespace'}";
219 } else {
220 $self->{'path_to_namespace'} = $self->{'root'};
223 return $self->{'path_to_namespace'};
226 # $path = $cache->path_to_key($key);
227 # $path = $cache->path_to_key($key, \$dir);
229 # Take an human readable key, and return file path.
230 # Puts dirname of file path in second argument, if it is provided.
231 sub path_to_key {
232 my ($self, $key, $dir_ref) = @_;
234 my @paths = ( $self->path_to_namespace() );
236 # Create a unique (hashed) key from human readable key
237 my $filename = md5_hex($key); # or $digester->add($key)->hexdigest();
239 # Split filename so that it have DEPTH subdirectories,
240 # where each subdirectory has a two-letter name
241 push @paths, unpack("(a2)[$self->{'depth'}] a*", $filename);
242 $filename = pop @paths;
244 # Join paths together, computing dir separately if $dir_ref was passed.
245 my $filepath;
246 if (defined $dir_ref && ref($dir_ref)) {
247 my $dir = join('/', @paths);
248 $filepath = "$dir/$filename";
249 $$dir_ref = $dir;
250 } else {
251 $filepath = join('/', @paths, $filename);
254 return $filepath;
257 sub read_file {
258 my ($self, $filename) = @_;
260 # Fast slurp, adapted from File::Slurp::read, with unnecessary options removed
261 # via CHI::Driver::File (from CHI-0.33)
262 my $buf = '';
263 open my $read_fh, '<', $filename
264 or return;
265 binmode $read_fh, ':raw';
267 my $size_left = -s $read_fh;
269 while ($size_left > 0) {
270 my $read_cnt = sysread($read_fh, $buf, $size_left, length($buf));
271 return unless defined $read_cnt;
273 last if $read_cnt == 0;
274 $size_left -= $read_cnt;
275 #last if $size_left <= 0;
278 close $read_fh
279 or $self->_handle_error("Couldn't close file '$filename' opened for reading: $!");
280 return $buf;
283 sub write_fh {
284 my ($self, $write_fh, $filename, $data) = @_;
286 # Fast spew, adapted from File::Slurp::write, with unnecessary options removed
287 # via CHI::Driver::File (from CHI-0.33)
288 binmode $write_fh, ':raw';
290 my $size_left = length($data);
291 my $offset = 0;
293 while ($size_left > 0) {
294 my $write_cnt = syswrite($write_fh, $data, $size_left, $offset);
295 return unless defined $write_cnt;
297 $size_left -= $write_cnt;
298 $offset += $write_cnt; # == length($data);
301 close $write_fh
302 or $self->_handle_error("Couldn't close file '$filename' opened for writing: $!");
305 sub ensure_path {
306 my $self = shift;
307 my $dir = shift || return;
309 if (!-d $dir) {
310 # mkpath will croak()/die() if there is an error
311 eval {
312 mkpath($dir, 0, 0777);
314 } or $self->_handle_error($@);
318 # ----------------------------------------------------------------------
319 # "private" utility functions and methods
321 # take a file path to cache entry, and its directory
322 # return filehandle and filename of open temporary file,
323 # like File::Temp::tempfile
324 sub _tempfile_to_path {
325 my ($self, $file, $dir) = @_;
327 # tempfile will croak() if there is an error
328 my ($temp_fh, $tempname);
329 eval {
330 ($temp_fh, $tempname) = tempfile("${file}_XXXXX",
331 #DIR => $dir,
332 'UNLINK' => 0, # ensure that we don't unlink on close; file is renamed
333 'SUFFIX' => '.tmp');
334 } or $self->_handle_error($@);
335 return ($temp_fh, $tempname);
338 # based on _handle_get_error and _dispatch_error_msg from CHI::Driver
339 sub _handle_error {
340 my ($self, $error) = @_;
342 for ($self->get_on_error()) {
343 (ref($_) eq 'CODE') && do { $_->($error) };
344 /^ignore$/ && do { };
345 /^warn$/ && do { carp $error };
346 /^die$/ && do { croak $error };
350 # ----------------------------------------------------------------------
351 # worker methods
353 sub fetch {
354 my ($self, $key) = @_;
356 my $file = $self->path_to_key($key);
357 return unless (defined $file && -f $file);
359 return $self->read_file($file);
362 sub store {
363 my ($self, $key, $data) = @_;
365 my $dir;
366 my $file = $self->path_to_key($key, \$dir);
367 return unless (defined $file && defined $dir);
369 # ensure that directory leading to cache file exists
370 $self->ensure_path($dir);
372 # generate a temporary file
373 my ($temp_fh, $tempname) = $self->_tempfile_to_path($file, $dir);
374 chmod 0666, $tempname
375 or warn "Couldn't change permissions to 0666 / -rw-rw-rw- for '$tempname': $!";
377 $self->write_fh($temp_fh, $tempname, $data);
379 rename($tempname, $file)
380 or $self->_handle_error("Couldn't rename temporary file '$tempname' to '$file': $!");
383 # get size of an element associated with the $key (not the size of whole cache)
384 sub get_size {
385 my ($self, $key) = @_;
387 my $path = $self->path_to_key($key)
388 or return undef;
389 if (-f $path) {
390 return -s $path;
392 return 0;
396 # ......................................................................
397 # interface methods dealing with single item
399 # Removing and expiring
401 # $cache->remove($key)
403 # Remove the data associated with the $key from the cache.
404 sub remove {
405 my ($self, $key) = @_;
407 my $file = $self->path_to_key($key)
408 or return;
409 return unless -f $file;
410 unlink($file)
411 or $self->_handle_error("Couldn't remove file '$file': $!");
414 # $cache->is_valid($key[, $expires_in])
416 # Returns a boolean indicating whether $key exists in the cache
417 # and has not expired. Uses global per-cache expires time, unless
418 # passed optional $expires_in argument.
419 sub is_valid {
420 my ($self, $key, $expires_in) = @_;
422 my $path = $self->path_to_key($key);
424 # does file exists in cache?
425 return 0 unless -f $path;
426 # get its modification time
427 my $mtime = (stat(_))[9] # _ to reuse stat structure used in -f test
428 or $self->_handle_error("Couldn't stat file '$path': $!");
429 # cache entry is invalid if it is size 0 (in bytes)
430 return 0 unless ((stat(_))[7] > 0);
432 # expire time can be set to never
433 $expires_in = defined $expires_in ? $expires_in : $self->get_expires_in();
434 return 1 unless (defined $expires_in && $expires_in >= 0);
436 # is file expired?
437 my $now = time();
439 return (($now - $mtime) < $expires_in);
442 # Getting and setting
444 # $cache->set($key, $data);
446 # Associates $data with $key in the cache, overwriting any existing entry.
447 # Returns $data.
448 sub set {
449 my ($self, $key, $data) = @_;
451 return unless (defined $key && defined $data);
453 $self->store($key, $data);
455 return $data;
458 # $data = $cache->get($key);
460 # Returns the data associated with $key. If $key does not exist
461 # or has expired, returns undef.
462 sub get {
463 my ($self, $key) = @_;
465 return unless $self->is_valid($key);
467 return $self->fetch($key);;
470 # $data = $cache->compute($key, $code);
472 # Combines the get and set operations in a single call. Attempts to
473 # get $key; if successful, returns the value. Otherwise, calls $code
474 # and uses the return value as the new value for $key, which is then
475 # returned.
476 sub compute {
477 my ($self, $key, $code) = @_;
479 my $data = $self->get($key);
480 if (!defined $data) {
481 $data = $code->();
482 $self->set($key, $data);
485 return $data;
488 # ......................................................................
489 # nonstandard interface methods
491 sub fetch_fh {
492 my ($self, $key) = @_;
494 my $path = $self->path_to_key($key);
495 return unless (defined $path);
497 open my $fh, '<', $path or return;
498 return ($fh, $path);
502 sub get_fh {
503 my ($self, $key) = @_;
505 return unless ($self->is_valid($key));
507 return $self->fetch_fh($key);
510 sub set_coderef_fh {
511 my ($self, $key, $code) = @_;
513 my $path = $self->path_to_key($key, \my $dir);
514 return unless (defined $path && defined $dir);
516 # ensure that directory leading to cache file exists
517 $self->ensure_path($dir);
519 # generate a temporary file
520 my ($fh, $tempfile) = $self->_tempfile_to_path($path, $dir);
522 # code writes to filehandle or file
523 $code->($fh, $tempfile);
525 close $fh;
526 rename($tempfile, $path)
527 or $self->_handle_error("Couldn't rename temporary file '$tempfile' to '$path': $!");
529 open $fh, '<', $path or return;
530 return ($fh, $path);
533 # ($fh, $filename) = $cache->compute_fh($key, $code);
535 # Combines the get and set operations in a single call. Attempts to
536 # get $key; if successful, returns the filehandle it can be read from.
537 # Otherwise, calls $code passing filehandle to write to as a
538 # parameter; contents of this file is then used as the new value for
539 # $key; returns filehandle from which one can read newly generated data.
540 sub compute_fh {
541 my ($self, $key, $code) = @_;
543 my ($fh, $filename) = $self->get_fh($key);
544 if (!defined $fh) {
545 ($fh, $filename) = $self->set_coderef_fh($key, $code);
548 return ($fh, $filename);
551 # ......................................................................
552 # interface methods dealing with whole namespace
554 # $cache->clear();
556 # Remove all entries from the namespace.
557 # Namespace must be defined and not empty.
558 sub clear {
559 my $self = shift;
561 return unless $self->get_namespace();
563 my $namespace_dir = $self->path_to_namespace();
564 return if !-d $namespace_dir;
566 my $renamed_dir = mktemp($namespace_dir . '.XXXX');
567 rename($namespace_dir, $renamed_dir);
568 rmtree($renamed_dir);
569 die "Couldn't remove '$renamed_dir' directory"
570 if -d $renamed_dir;
573 # $size = $cache->size();
575 # Size of whole names (or whole cache if namespace empty)
576 sub size {
577 my $self = shift;
579 my $namespace_dir = $self->path_to_namespace();
580 return if !-d $namespace_dir;
582 my $total_size = 0;
583 my $add_size = sub { $total_size += -s $File::Find::name };
585 File::Find::find({ wanted => $add_size, no_chdir => 1 }, $namespace_dir);
587 return $total_size;
591 __END__
592 # end of package GitwebCache::SimpleFileCache;