4 #include "stat_cache.h"
19 #ifdef HAVE_ATTR_ATTRIBUTES_H
20 # include <attr/attributes.h>
23 #ifdef HAVE_SYS_EXTATTR_H
24 # include <sys/extattr.h>
36 /* enables debug code for testing if all nodes in the stat-cache as accessable */
37 #define DEBUG_STAT_CACHE
43 * we cache the stat() calls in our own storage
44 * the directories are cached in FAM
46 * if we get a change-event from FAM, we increment the version in the FAM->dir mapping
48 * if the stat()-cache is queried we check if the version id for the directory is the
49 * same and return immediatly.
54 * - for each stat-cache entry we need a fast indirect lookup on the directory name
55 * - for each FAMRequest we have to find the version in the directory cache (index as userdata)
57 * stat <<-> directory <-> FAMRequest
59 * if file is deleted, directory is dirty, file is rechecked ...
60 * if directory is deleted, directory mapping is removed
74 /* the directory name is too long to always compare on it
76 * - the hash-key is used as sorting criteria for a tree
77 * - a splay-tree is used as we can use the caching effect of it
80 /* we want to cleanup the stat-cache every few seconds, let's say 10
82 * - remove entries which are outdated since 30s
83 * - remove entries which are fresh but havn't been used since 60s
84 * - if we don't have a stat-cache entry for a directory, release it from the monitor
87 #ifdef DEBUG_STAT_CACHE
95 static fake_keys ctrl
;
98 stat_cache
*stat_cache_init(void) {
99 stat_cache
*sc
= NULL
;
101 sc
= calloc(1, sizeof(*sc
));
102 force_assert(NULL
!= sc
);
104 sc
->dir_name
= buffer_init();
105 sc
->hash_key
= buffer_init();
108 sc
->fam_fcce_ndx
= -1;
111 #ifdef DEBUG_STAT_CACHE
118 static stat_cache_entry
* stat_cache_entry_init(void) {
119 stat_cache_entry
*sce
= NULL
;
121 sce
= calloc(1, sizeof(*sce
));
122 force_assert(NULL
!= sce
);
124 sce
->name
= buffer_init();
125 sce
->etag
= buffer_init();
126 sce
->content_type
= buffer_init();
131 static void stat_cache_entry_free(void *data
) {
132 stat_cache_entry
*sce
= data
;
135 buffer_free(sce
->etag
);
136 buffer_free(sce
->name
);
137 buffer_free(sce
->content_type
);
143 static fam_dir_entry
* fam_dir_entry_init(void) {
144 fam_dir_entry
*fam_dir
= NULL
;
146 fam_dir
= calloc(1, sizeof(*fam_dir
));
147 force_assert(NULL
!= fam_dir
);
149 fam_dir
->name
= buffer_init();
154 static void fam_dir_entry_free(FAMConnection
*fc
, void *data
) {
155 fam_dir_entry
*fam_dir
= data
;
157 if (!fam_dir
) return;
159 FAMCancelMonitor(fc
, fam_dir
->req
);
161 buffer_free(fam_dir
->name
);
168 void stat_cache_free(stat_cache
*sc
) {
171 splay_tree
*node
= sc
->files
;
173 osize
= sc
->files
->size
;
175 stat_cache_entry_free(node
->data
);
176 sc
->files
= splaytree_delete(sc
->files
, node
->key
);
178 force_assert(osize
- 1 == splaytree_size(sc
->files
));
181 buffer_free(sc
->dir_name
);
182 buffer_free(sc
->hash_key
);
187 splay_tree
*node
= sc
->dirs
;
189 osize
= sc
->dirs
->size
;
191 fam_dir_entry_free(&sc
->fam
, node
->data
);
192 sc
->dirs
= splaytree_delete(sc
->dirs
, node
->key
);
195 force_assert(NULL
== sc
->dirs
);
197 force_assert(osize
== (sc
->dirs
->size
+ 1));
201 if (-1 != sc
->fam_fcce_ndx
) {
202 /* fd events already gone */
203 sc
->fam_fcce_ndx
= -1;
211 #if defined(HAVE_XATTR)
212 static int stat_cache_attr_get(buffer
*buf
, char *name
, char *xattrname
) {
216 buffer_string_prepare_copy(buf
, 1023);
217 attrlen
= buf
->size
- 1;
218 if(0 == (ret
= attr_get(name
, xattrname
, buf
->ptr
, &attrlen
, 0))) {
219 buffer_commit(buf
, attrlen
);
223 #elif defined(HAVE_EXTATTR)
224 static int stat_cache_attr_get(buffer
*buf
, char *name
, char *xattrname
) {
227 buffer_string_prepare_copy(buf
, 1023);
229 if (-1 != (attrlen
= extattr_get_file(name
, EXTATTR_NAMESPACE_USER
, xattrname
, buf
->ptr
, buf
->size
- 1))) {
230 buf
->used
= attrlen
+ 1;
231 buf
->ptr
[attrlen
] = '\0';
238 /* the famous DJB hash function for strings */
239 static uint32_t hashme(buffer
*str
) {
240 uint32_t hash
= 5381;
242 for (s
= str
->ptr
; *s
; s
++) {
243 hash
= ((hash
<< 5) + hash
) + *s
;
246 hash
&= ~(((uint32_t)1) << 31); /* strip the highest bit */
252 handler_t
stat_cache_handle_fdevent(server
*srv
, void *_fce
, int revent
) {
254 stat_cache
*sc
= srv
->stat_cache
;
260 if (revent
& FDEVENT_IN
) {
261 events
= FAMPending(&sc
->fam
);
263 for (i
= 0; i
< events
; i
++) {
265 fam_dir_entry
*fam_dir
;
269 FAMNextEvent(&sc
->fam
, &fe
);
277 /* if the filename is a directory remove the entry */
279 fam_dir
= fe
.userdata
;
282 /* file/dir is still here */
283 if (fe
.code
== FAMChanged
) break;
285 /* we have 2 versions, follow and no-follow-symlink */
287 for (j
= 0; j
< 2; j
++) {
288 buffer_copy_string(sc
->hash_key
, fe
.filename
);
289 buffer_append_int(sc
->hash_key
, j
);
291 ndx
= hashme(sc
->hash_key
);
293 sc
->dirs
= splaytree_splay(sc
->dirs
, ndx
);
296 if (node
&& (node
->key
== ndx
)) {
297 int osize
= splaytree_size(sc
->dirs
);
299 fam_dir_entry_free(&sc
->fam
, node
->data
);
300 sc
->dirs
= splaytree_delete(sc
->dirs
, ndx
);
302 force_assert(osize
- 1 == splaytree_size(sc
->dirs
));
312 if (revent
& FDEVENT_HUP
) {
313 /* fam closed the connection */
314 fdevent_event_del(srv
->ev
, &(sc
->fam_fcce_ndx
), FAMCONNECTION_GETFD(&sc
->fam
));
315 fdevent_unregister(srv
->ev
, FAMCONNECTION_GETFD(&sc
->fam
));
320 return HANDLER_GO_ON
;
323 static int buffer_copy_dirname(buffer
*dst
, buffer
*file
) {
326 if (buffer_string_is_empty(file
)) return -1;
328 for (i
= buffer_string_length(file
); i
> 0; i
--) {
329 if (file
->ptr
[i
] == '/') {
330 buffer_copy_string_len(dst
, file
->ptr
, i
);
340 static int stat_cache_lstat(server
*srv
, buffer
*dname
, struct stat
*lst
) {
341 if (lstat(dname
->ptr
, lst
) == 0) {
342 return S_ISLNK(lst
->st_mode
) ? 0 : 1;
345 log_error_write(srv
, __FILE__
, __LINE__
, "sbs",
347 dname
, strerror(errno
));
358 * - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
359 * - HANDLER_ERROR on stat() failed -> see errno for problem
362 handler_t
stat_cache_get_entry(server
*srv
, connection
*con
, buffer
*name
, stat_cache_entry
**ret_sce
) {
364 fam_dir_entry
*fam_dir
= NULL
;
367 stat_cache_entry
*sce
= NULL
;
373 #ifdef DEBUG_STAT_CACHE
382 * check if the directory for this file has changed
385 sc
= srv
->stat_cache
;
387 buffer_copy_buffer(sc
->hash_key
, name
);
388 buffer_append_int(sc
->hash_key
, con
->conf
.follow_symlink
);
390 file_ndx
= hashme(sc
->hash_key
);
391 sc
->files
= splaytree_splay(sc
->files
, file_ndx
);
393 #ifdef DEBUG_STAT_CACHE
394 for (i
= 0; i
< ctrl
.used
; i
++) {
395 if (ctrl
.ptr
[i
] == file_ndx
) break;
399 if (sc
->files
&& (sc
->files
->key
== file_ndx
)) {
400 #ifdef DEBUG_STAT_CACHE
401 /* it was in the cache */
402 force_assert(i
< ctrl
.used
);
405 /* we have seen this file already and
406 * don't stat() it again in the same second */
408 sce
= sc
->files
->data
;
410 /* check if the name is the same, we might have a collision */
412 if (buffer_is_equal(name
, sce
->name
)) {
413 if (srv
->srvconf
.stat_cache_engine
== STAT_CACHE_ENGINE_SIMPLE
) {
414 if (sce
->stat_ts
== srv
->cur_ts
&& con
->conf
.follow_symlink
) {
416 return HANDLER_GO_ON
;
420 /* collision, forget about the entry */
424 #ifdef DEBUG_STAT_CACHE
425 if (i
!= ctrl
.used
) {
426 log_error_write(srv
, __FILE__
, __LINE__
, "xSB",
427 file_ndx
, "was already inserted but not found in cache, ", name
);
429 force_assert(i
== ctrl
.used
);
435 if (srv
->srvconf
.stat_cache_engine
== STAT_CACHE_ENGINE_FAM
) {
436 if (0 != buffer_copy_dirname(sc
->dir_name
, name
)) {
437 log_error_write(srv
, __FILE__
, __LINE__
, "sb",
438 "no '/' found in filename:", name
);
439 return HANDLER_ERROR
;
442 buffer_copy_buffer(sc
->hash_key
, sc
->dir_name
);
443 buffer_append_int(sc
->hash_key
, con
->conf
.follow_symlink
);
445 dir_ndx
= hashme(sc
->hash_key
);
447 sc
->dirs
= splaytree_splay(sc
->dirs
, dir_ndx
);
449 if ((NULL
!= sc
->dirs
) && (sc
->dirs
->key
== dir_ndx
)) {
450 fam_dir
= sc
->dirs
->data
;
452 /* check whether we got a collision */
453 if (buffer_is_equal(sc
->dir_name
, fam_dir
->name
)) {
454 /* test whether a found file cache entry is still ok */
455 if ((NULL
!= sce
) && (fam_dir
->version
== sce
->dir_version
)) {
456 /* the stat()-cache entry is still ok */
459 return HANDLER_GO_ON
;
462 /* hash collision, forget about the entry */
471 * - open() + fstat() on a named-pipe results in a (intended) hang.
472 * - stat() if regular file + open() to see if we can read from it is better
475 if (-1 == stat(name
->ptr
, &st
)) {
476 return HANDLER_ERROR
;
480 if (S_ISREG(st
.st_mode
)) {
481 /* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
482 if (name
->ptr
[buffer_string_length(name
) - 1] == '/') {
484 return HANDLER_ERROR
;
487 /* try to open the file to check if we can read it */
488 if (-1 == (fd
= open(name
->ptr
, O_RDONLY
))) {
489 return HANDLER_ERROR
;
496 sce
= stat_cache_entry_init();
497 buffer_copy_buffer(sce
->name
, name
);
499 /* already splayed file_ndx */
500 if ((NULL
!= sc
->files
) && (sc
->files
->key
== file_ndx
)) {
501 /* hash collision: replace old entry */
502 stat_cache_entry_free(sc
->files
->data
);
503 sc
->files
->data
= sce
;
505 int osize
= splaytree_size(sc
->files
);
507 sc
->files
= splaytree_insert(sc
->files
, file_ndx
, sce
);
508 force_assert(osize
+ 1 == splaytree_size(sc
->files
));
510 #ifdef DEBUG_STAT_CACHE
511 if (ctrl
.size
== 0) {
514 ctrl
.ptr
= malloc(ctrl
.size
* sizeof(*ctrl
.ptr
));
515 force_assert(NULL
!= ctrl
.ptr
);
516 } else if (ctrl
.size
== ctrl
.used
) {
518 ctrl
.ptr
= realloc(ctrl
.ptr
, ctrl
.size
* sizeof(*ctrl
.ptr
));
519 force_assert(NULL
!= ctrl
.ptr
);
522 ctrl
.ptr
[ctrl
.used
++] = file_ndx
;
525 force_assert(sc
->files
);
526 force_assert(sc
->files
->data
== sce
);
530 sce
->stat_ts
= srv
->cur_ts
;
532 /* catch the obvious symlinks
534 * this is not a secure check as we still have a race-condition between
535 * the stat() and the open. We can only solve this by
539 * and keeping the file open for the rest of the time. But this can
540 * only be done at network level.
542 * per default it is not a symlink
547 /* we want to only check for symlinks if we should block symlinks.
549 if (!con
->conf
.follow_symlink
) {
550 if (stat_cache_lstat(srv
, name
, &lst
) == 0) {
551 #ifdef DEBUG_STAT_CACHE
552 log_error_write(srv
, __FILE__
, __LINE__
, "sb",
553 "found symlink", name
);
559 * we assume "/" can not be symlink, so
560 * skip the symlink stuff if our path is /
562 else if (buffer_string_length(name
) > 1) {
566 dname
= buffer_init();
567 buffer_copy_buffer(dname
, name
);
569 while ((s_cur
= strrchr(dname
->ptr
, '/'))) {
570 buffer_string_set_length(dname
, s_cur
- dname
->ptr
);
571 if (dname
->ptr
== s_cur
) {
572 #ifdef DEBUG_STAT_CACHE
573 log_error_write(srv
, __FILE__
, __LINE__
, "s", "reached /");
577 #ifdef DEBUG_STAT_CACHE
578 log_error_write(srv
, __FILE__
, __LINE__
, "sbs",
579 "checking if", dname
, "is a symlink");
581 if (stat_cache_lstat(srv
, dname
, &lst
) == 0) {
583 #ifdef DEBUG_STAT_CACHE
584 log_error_write(srv
, __FILE__
, __LINE__
, "sb",
585 "found symlink", dname
);
595 if (S_ISREG(st
.st_mode
)) {
596 /* determine mimetype */
597 buffer_reset(sce
->content_type
);
598 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
599 if (con
->conf
.use_xattr
) {
600 stat_cache_attr_get(sce
->content_type
, name
->ptr
, srv
->srvconf
.xattr_name
->ptr
);
603 /* xattr did not set a content-type. ask the config */
604 if (buffer_string_is_empty(sce
->content_type
)) {
605 size_t namelen
= buffer_string_length(name
);
607 for (k
= 0; k
< con
->conf
.mimetypes
->used
; k
++) {
608 data_string
*ds
= (data_string
*)con
->conf
.mimetypes
->data
[k
];
609 buffer
*type
= ds
->key
;
610 size_t typelen
= buffer_string_length(type
);
612 if (buffer_is_empty(type
)) continue;
614 /* check if the right side is the same */
615 if (typelen
> namelen
) continue;
617 if (0 == strncasecmp(name
->ptr
+ namelen
- typelen
, type
->ptr
, typelen
)) {
618 buffer_copy_buffer(sce
->content_type
, ds
->value
);
623 etag_create(sce
->etag
, &(sce
->st
), con
->etag_flags
);
624 } else if (S_ISDIR(st
.st_mode
)) {
625 etag_create(sce
->etag
, &(sce
->st
), con
->etag_flags
);
629 if (srv
->srvconf
.stat_cache_engine
== STAT_CACHE_ENGINE_FAM
) {
630 /* is this directory already registered ? */
631 if (NULL
== fam_dir
) {
632 fam_dir
= fam_dir_entry_init();
634 buffer_copy_buffer(fam_dir
->name
, sc
->dir_name
);
636 fam_dir
->version
= 1;
638 fam_dir
->req
= calloc(1, sizeof(FAMRequest
));
639 force_assert(NULL
!= fam_dir
);
641 if (0 != FAMMonitorDirectory(&sc
->fam
, fam_dir
->name
->ptr
,
642 fam_dir
->req
, fam_dir
)) {
644 log_error_write(srv
, __FILE__
, __LINE__
, "sbsbs",
645 "monitoring dir failed:",
648 FamErrlist
[FAMErrno
]);
650 fam_dir_entry_free(&sc
->fam
, fam_dir
);
653 int osize
= splaytree_size(sc
->dirs
);
655 /* already splayed dir_ndx */
656 if ((NULL
!= sc
->dirs
) && (sc
->dirs
->key
== dir_ndx
)) {
657 /* hash collision: replace old entry */
658 fam_dir_entry_free(&sc
->fam
, sc
->dirs
->data
);
659 sc
->dirs
->data
= fam_dir
;
661 sc
->dirs
= splaytree_insert(sc
->dirs
, dir_ndx
, fam_dir
);
662 force_assert(osize
== (splaytree_size(sc
->dirs
) - 1));
665 force_assert(sc
->dirs
);
666 force_assert(sc
->dirs
->data
== fam_dir
);
670 /* bind the fam_fc to the stat() cache entry */
673 sce
->dir_version
= fam_dir
->version
;
680 return HANDLER_GO_ON
;
683 int stat_cache_open_rdonly_fstat (server
*srv
, connection
*con
, buffer
*name
, struct stat
*st
) {
684 /*(Note: O_NOFOLLOW affects only the final path segment, the target file,
685 * not any intermediate symlinks along the path)*/
690 #define O_LARGEFILE 0
701 const int oflags
= O_BINARY
| O_LARGEFILE
| O_NOCTTY
| O_NONBLOCK
702 | (con
->conf
.follow_symlink
? 0 : O_NOFOLLOW
);
703 const int fd
= open(name
->ptr
, O_RDONLY
| oflags
);
705 if (0 == fstat(fd
, st
)) {
711 UNUSED(srv
); /*(might log_error_write(srv, ...) in the future)*/
716 * remove stat() from cache which havn't been stat()ed for
717 * more than 10 seconds
720 * walk though the stat-cache, collect the ids which are too old
721 * and remove them in a second loop
724 static int stat_cache_tag_old_entries(server
*srv
, splay_tree
*t
, int *keys
, size_t *ndx
) {
725 stat_cache_entry
*sce
;
729 stat_cache_tag_old_entries(srv
, t
->left
, keys
, ndx
);
730 stat_cache_tag_old_entries(srv
, t
->right
, keys
, ndx
);
734 if (srv
->cur_ts
- sce
->stat_ts
> 2) {
735 keys
[(*ndx
)++] = t
->key
;
741 int stat_cache_trigger_cleanup(server
*srv
) {
743 size_t max_ndx
= 0, i
;
746 sc
= srv
->stat_cache
;
748 if (!sc
->files
) return 0;
750 keys
= calloc(1, sizeof(int) * sc
->files
->size
);
751 force_assert(NULL
!= keys
);
753 stat_cache_tag_old_entries(srv
, sc
->files
, keys
, &max_ndx
);
755 for (i
= 0; i
< max_ndx
; i
++) {
759 sc
->files
= splaytree_splay(sc
->files
, ndx
);
763 if (node
&& (node
->key
== ndx
)) {
764 #ifdef DEBUG_STAT_CACHE
766 int osize
= splaytree_size(sc
->files
);
767 stat_cache_entry
*sce
= node
->data
;
769 stat_cache_entry_free(node
->data
);
770 sc
->files
= splaytree_delete(sc
->files
, ndx
);
772 #ifdef DEBUG_STAT_CACHE
773 for (j
= 0; j
< ctrl
.used
; j
++) {
774 if (ctrl
.ptr
[j
] == ndx
) {
775 ctrl
.ptr
[j
] = ctrl
.ptr
[--ctrl
.used
];
780 force_assert(osize
- 1 == splaytree_size(sc
->files
));