581a57475123e7c1e1ca10dc815314ac98a389a0
[git/jnareb-git.git] / gitweb / lib / GitwebCache / SimpleFileCache.pm
blob581a57475123e7c1e1ca10dc815314ac98a389a0
1 # gitweb - simple web interface to track changes in git repositories
3 # (C) 2006, John 'Warthog9' Hawley <warthog19@eaglescrag.net>
4 # (C) 2010, Jakub Narebski <jnareb@gmail.com>
6 # This program is licensed under the GPLv2
9 # Gitweb caching engine, simple file-based cache
12 # Minimalistic cache that stores data in the filesystem, without serialization
13 # and currently without any kind of cache expiration (all keys last forever till
14 # they got explicitely removed).
16 # It follows Cache::Cache and CHI interfaces (but does not implement it fully)
18 package GitwebCache::SimpleFileCache;
20 use strict;
21 use warnings;
23 use File::Path qw(mkpath);
24 use File::Temp qw(tempfile);
25 use Digest::MD5 qw(md5_hex);
27 # by default, the cache nests all entries on the filesystem single
28 # directory deep, i.e. '60/b725f10c9c85c70d97880dfe8191b3' for
29 # key name (key digest) 60b725f10c9c85c70d97880dfe8191b3.
31 our $DEFAULT_CACHE_DEPTH = 1;
33 # by default, the root of the cache is located in 'cache'.
35 our $DEFAULT_CACHE_ROOT = "cache";
37 # by default we don't use cache namespace (empty namespace);
38 # empty namespace does not allow for simple implementation of clear() method.
40 our $DEFAULT_NAMESPACE = '';
42 # ......................................................................
43 # constructor
45 # The options are set by passing in a reference to a hash containing
46 # any of the following keys:
47 # * 'namespace'
48 # The namespace associated with this cache. This allows easy separation of
49 # multiple, distinct caches without worrying about key collision. Defaults
50 # to $DEFAULT_NAMESPACE.
51 # * 'cache_root' (Cache::FileCache compatibile),
52 # 'root_dir' (CHI::Driver::File compatibile),
53 # The location in the filesystem that will hold the root of the cache.
54 # Defaults to $DEFAULT_CACHE_ROOT.
55 # * 'cache_depth' (Cache::FileCache compatibile),
56 # 'depth' (CHI::Driver::File compatibile),
57 # The number of subdirectories deep to cache object item. This should be
58 # large enough that no cache directory has more than a few hundred objects.
59 # Defaults to $DEFAULT_CACHE_DEPTH unless explicitly set.
60 # * 'default_expires_in' (Cache::Cache compatibile),
61 # 'expires_in' (CHI compatibile) [seconds]
62 # The expiration time for objects place in the cache.
63 # Defaults to -1 (never expire) if not explicitly set.
64 # Sets 'expires_min' to given value.
65 # * 'expires_min' [seconds]
66 # The minimum expiration time for objects in cache (e.g. with 0% CPU load).
67 # Used as lower bound in adaptive cache lifetime / expiration.
68 # Defaults to 20 seconds; 'expires_in' sets it also.
69 # * 'expires_max' [seconds]
70 # The maximum expiration time for objects in cache.
71 # Used as upper bound in adaptive cache lifetime / expiration.
72 # Defaults to 1200 seconds, if not set;
73 # defaults to 'expires_min' if 'expires_in' is used.
74 # * 'check_load'
75 # Subroutine (code) used for adaptive cache lifetime / expiration.
76 # If unset, adaptive caching is turned off; defaults to unset.
77 # * 'increase_factor' [seconds / 100% CPU load]
78 # Factor multiplying 'check_load' result when calculating cache lietime.
79 # Defaults to 60 seconds for 100% SPU load ('check_load' returning 1.0).
80 sub new {
81 my $class = shift;
82 my %opts = ref $_[0] ? %{ $_[0] } : @_;
84 my $self = {};
85 $self = bless($self, $class);
87 my ($root, $depth, $ns);
88 my ($expires_min, $expires_max, $increase_factor, $check_load);
89 if (%opts) {
90 $root =
91 $opts{'cache_root'} ||
92 $opts{'root_dir'};
93 $depth =
94 $opts{'cache_depth'} ||
95 $opts{'depth'};
96 $ns = $opts{'namespace'};
97 $expires_min =
98 $opts{'expires_min'} ||
99 $opts{'default_expires_in'} ||
100 $opts{'expires_in'};
101 $expires_max =
102 $opts{'expires_max'};
103 $increase_factor = $opts{'expires_factor'};
104 $check_load = $opts{'check_load'};
106 $root = $DEFAULT_CACHE_ROOT unless defined($root);
107 $depth = $DEFAULT_CACHE_DEPTH unless defined($depth);
108 $ns = $DEFAULT_NAMESPACE unless defined($ns);
109 $expires_min = -1 unless defined($expires_min);
110 $expires_max = $expires_min
111 if (!defined($expires_max) && exists $opts{'expires_in'});
112 $expires_max = -1 unless (defined($expires_max));
113 $increase_factor = 60 unless defined($increase_factor);
115 $self->set_root($root);
116 $self->set_depth($depth);
117 $self->set_namespace($ns);
118 $self->set_expires_min($expires_min);
119 $self->set_expires_max($expires_max);
120 $self->set_increase_factor($increase_factor);
121 $self->set_check_load($check_load);
123 return $self;
127 # ......................................................................
128 # accessors
130 # http://perldesignpatterns.com/perldesignpatterns.html#AccessorPattern
132 # creates get_depth() and set_depth($depth) etc. methods
133 foreach my $i (qw(depth root namespace
134 expires_min expires_max increase_factor check_load)) {
135 my $field = $i;
136 no strict 'refs';
137 *{"get_$field"} = sub {
138 my $self = shift;
139 return $self->{$field};
141 *{"set_$field"} = sub {
142 my ($self, $value) = @_;
143 $self->{$field} = $value;
147 # ......................................................................
148 # pseudo-accessors
150 # returns adaptive lifetime of cache entry for given $key [seconds]
151 sub get_expires_in {
152 my ($self) = @_;
154 # short-circuit
155 if (!defined $self->{'check_load'} ||
156 $self->{'expires_max'} <= $self->{'expires_min'}) {
157 return $self->{'expires_min'};
160 my $expires_in =
161 #$self->{'expires_min'} +
162 $self->{'increase_factor'} * $self->check_load();
164 if ($expires_in < $self->{'expires_min'}) {
165 return $self->{'expires_min'};
166 } elsif ($expires_in > $self->{'expires_max'}) {
167 return $self->{'expires_max'};
170 return $expires_in;
173 # sets expiration time to $duration, turns off adaptive cache lifetime
174 sub set_expires_in {
175 my ($self, $duration) = @_;
177 $self->{'expires_min'} = $self->{'expires_max'} = $duration;
180 # runs 'check_load' subroutine, for adaptive cache lifetime.
181 # Note: check in caller that 'check_load' exists.
182 sub check_load {
183 my $self = shift;
184 return $self->{'check_load'}->();
187 # ----------------------------------------------------------------------
188 # utility functions and methods
190 # Return root dir for namespace (lazily built, cached)
191 sub path_to_namespace {
192 my ($self) = @_;
194 if (!exists $self->{'path_to_namespace'}) {
195 if (defined $self->{'namespace'} &&
196 $self->{'namespace'} ne '') {
197 $self->{'path_to_namespace'} = "$self->{'root'}/$self->{'namespace'}";
198 } else {
199 $self->{'path_to_namespace'} = $self->{'root'};
202 return $self->{'path_to_namespace'};
205 # $path = $cache->path_to_key($key);
206 # $path = $cache->path_to_key($key, \$dir);
208 # Take an human readable key, and return file path.
209 # Puts dirname of file path in second argument, if it is provided.
210 sub path_to_key {
211 my ($self, $key, $dir_ref) = @_;
213 my @paths = ( $self->path_to_namespace() );
215 # Create a unique (hashed) key from human readable key
216 my $filename = md5_hex($key); # or $digester->add($key)->hexdigest();
218 # Split filename so that it have DEPTH subdirectories,
219 # where each subdirectory has a two-letter name
220 push @paths, unpack("(a2)[$self->{'depth'}] a*", $filename);
221 $filename = pop @paths;
223 # Join paths together, computing dir separately if $dir_ref was passed.
224 my $filepath;
225 if (defined $dir_ref && ref($dir_ref)) {
226 my $dir = join('/', @paths);
227 $filepath = "$dir/$filename";
228 $$dir_ref = $dir;
229 } else {
230 $filepath = join('/', @paths, $filename);
233 return $filepath;
236 sub read_file {
237 my $filename = shift;
239 # Fast slurp, adapted from File::Slurp::read, with unnecessary options removed
240 # via CHI::Driver::File (from CHI-0.33)
241 my $buf = '';
242 open my $read_fh, '<', $filename
243 or return;
244 binmode $read_fh, ':raw';
246 my $size_left = -s $read_fh;
248 while ($size_left > 0) {
249 my $read_cnt = sysread($read_fh, $buf, $size_left, length($buf));
250 return unless defined $read_cnt;
252 last if $read_cnt == 0;
253 $size_left -= $read_cnt;
254 #last if $size_left <= 0;
257 close $read_fh
258 or die "Couldn't close file '$filename' opened for reading: $!";
259 return $buf;
262 sub write_fh {
263 my ($write_fh, $filename, $data) = @_;
265 # Fast spew, adapted from File::Slurp::write, with unnecessary options removed
266 # via CHI::Driver::File (from CHI-0.33)
267 binmode $write_fh, ':raw';
269 my $size_left = length($data);
270 my $offset = 0;
272 while ($size_left > 0) {
273 my $write_cnt = syswrite($write_fh, $data, $size_left, $offset);
274 return unless defined $write_cnt;
276 $size_left -= $write_cnt;
277 $offset += $write_cnt; # == length($data);
280 close $write_fh
281 or die "Couldn't close file '$filename' opened for writing: $!";
284 # ----------------------------------------------------------------------
285 # "private" utility functions and methods
287 # take a file path to cache entry, and its directory
288 # return filehandle and filename of open temporary file,
289 # like File::Temp::tempfile
290 sub _tempfile_to_path {
291 my ($file, $dir) = @_;
293 # tempfile will croak() if there is an error
294 return tempfile("${file}_XXXXX",
295 #DIR => $dir,
296 'UNLINK' => 0, # ensure that we don't unlink on close; file is renamed
297 'SUFFIX' => '.tmp');
301 # ----------------------------------------------------------------------
302 # worker methods
304 sub fetch {
305 my ($self, $key) = @_;
307 my $file = $self->path_to_key($key);
308 return unless (defined $file && -f $file);
310 return read_file($file);
313 sub store {
314 my ($self, $key, $data) = @_;
316 my $dir;
317 my $file = $self->path_to_key($key, \$dir);
318 return unless (defined $file && defined $dir);
320 # ensure that directory leading to cache file exists
321 if (!-d $dir) {
322 # mkpath will croak()/die() if there is an error
323 mkpath($dir, 0, 0777);
326 # generate a temporary file
327 my ($temp_fh, $tempname) = _tempfile_to_path($file, $dir);
328 chmod 0666, $tempname
329 or warn "Couldn't change permissions to 0666 / -rw-rw-rw- for '$tempname': $!";
331 write_fh($temp_fh, $tempname, $data);
333 rename($tempname, $file)
334 or die "Couldn't rename temporary file '$tempname' to '$file': $!";
337 # get size of an element associated with the $key (not the size of whole cache)
338 sub get_size {
339 my ($self, $key) = @_;
341 my $path = $self->path_to_key($key)
342 or return undef;
343 if (-f $path) {
344 return -s $path;
346 return 0;
350 # ......................................................................
351 # interface methods
353 # Removing and expiring
355 # $cache->remove($key)
357 # Remove the data associated with the $key from the cache.
358 sub remove {
359 my ($self, $key) = @_;
361 my $file = $self->path_to_key($key)
362 or return;
363 return unless -f $file;
364 unlink($file)
365 or die "Couldn't remove file '$file': $!";
368 # $cache->is_valid($key)
370 # Returns a boolean indicating whether $key exists in the cache
371 # and has not expired (global per-cache 'expires_in').
372 sub is_valid {
373 my ($self, $key) = @_;
375 my $path = $self->path_to_key($key);
377 # does file exists in cache?
378 return 0 unless -f $path;
379 # get its modification time
380 my $mtime = (stat(_))[9] # _ to reuse stat structure used in -f test
381 or die "Couldn't stat file '$path': $!";
382 # cache entry is invalid if it is size 0 (in bytes)
383 return 0 unless ((stat(_))[7] > 0);
385 # expire time can be set to never
386 my $expires_in = $self->get_expires_in();
387 return 1 unless (defined $expires_in && $expires_in >= 0);
389 # is file expired?
390 my $now = time();
392 return (($now - $mtime) < $expires_in);
395 # Getting and setting
397 # $cache->set($key, $data);
399 # Associates $data with $key in the cache, overwriting any existing entry.
400 # Returns $data.
401 sub set {
402 my ($self, $key, $data) = @_;
404 return unless (defined $key && defined $data);
406 $self->store($key, $data);
408 return $data;
411 # $data = $cache->get($key);
413 # Returns the data associated with $key. If $key does not exist
414 # or has expired, returns undef.
415 sub get {
416 my ($self, $key) = @_;
418 return unless $self->is_valid($key);
420 return $self->fetch($key);;
423 # $data = $cache->compute($key, $code);
425 # Combines the get and set operations in a single call. Attempts to
426 # get $key; if successful, returns the value. Otherwise, calls $code
427 # and uses the return value as the new value for $key, which is then
428 # returned.
429 sub compute {
430 my ($self, $key, $code) = @_;
432 my $data = $self->get($key);
433 if (!defined $data) {
434 $data = $code->();
435 $self->set($key, $data);
438 return $data;
442 __END__
443 # end of package GitwebCache::SimpleFileCache;