4 #include "stat_cache.h"
19 #ifdef HAVE_ATTR_ATTRIBUTES_H
20 # include <attr/attributes.h>
23 #ifdef HAVE_SYS_EXTATTR_H
24 # include <sys/extattr.h>
33 /* NetBSD 1.3.x needs it */
35 # define MAP_FAILED -1
39 # define O_LARGEFILE 0
47 /* enables debug code for testing if all nodes in the stat-cache as accessable */
48 #define DEBUG_STAT_CACHE
54 * we cache the stat() calls in our own storage
55 * the directories are cached in FAM
57 * if we get a change-event from FAM, we increment the version in the FAM->dir mapping
59 * if the stat()-cache is queried we check if the version id for the directory is the
60 * same and return immediatly.
65 * - for each stat-cache entry we need a fast indirect lookup on the directory name
66 * - for each FAMRequest we have to find the version in the directory cache (index as userdata)
68 * stat <<-> directory <-> FAMRequest
70 * if file is deleted, directory is dirty, file is rechecked ...
71 * if directory is deleted, directory mapping is removed
85 /* the directory name is too long to always compare on it
87 * - the hash-key is used as sorting criteria for a tree
88 * - a splay-tree is used as we can use the caching effect of it
91 /* we want to cleanup the stat-cache every few seconds, let's say 10
93 * - remove entries which are outdated since 30s
94 * - remove entries which are fresh but havn't been used since 60s
95 * - if we don't have a stat-cache entry for a directory, release it from the monitor
98 #ifdef DEBUG_STAT_CACHE
106 static fake_keys ctrl
;
109 stat_cache
*stat_cache_init(void) {
110 stat_cache
*sc
= NULL
;
112 sc
= calloc(1, sizeof(*sc
));
113 force_assert(NULL
!= sc
);
115 sc
->dir_name
= buffer_init();
116 sc
->hash_key
= buffer_init();
119 sc
->fam_fcce_ndx
= -1;
122 #ifdef DEBUG_STAT_CACHE
129 static stat_cache_entry
* stat_cache_entry_init(void) {
130 stat_cache_entry
*sce
= NULL
;
132 sce
= calloc(1, sizeof(*sce
));
133 force_assert(NULL
!= sce
);
135 sce
->name
= buffer_init();
136 sce
->etag
= buffer_init();
137 sce
->content_type
= buffer_init();
142 static void stat_cache_entry_free(void *data
) {
143 stat_cache_entry
*sce
= data
;
146 buffer_free(sce
->etag
);
147 buffer_free(sce
->name
);
148 buffer_free(sce
->content_type
);
154 static fam_dir_entry
* fam_dir_entry_init(void) {
155 fam_dir_entry
*fam_dir
= NULL
;
157 fam_dir
= calloc(1, sizeof(*fam_dir
));
158 force_assert(NULL
!= fam_dir
);
160 fam_dir
->name
= buffer_init();
165 static void fam_dir_entry_free(FAMConnection
*fc
, void *data
) {
166 fam_dir_entry
*fam_dir
= data
;
168 if (!fam_dir
) return;
170 FAMCancelMonitor(fc
, fam_dir
->req
);
172 buffer_free(fam_dir
->name
);
179 void stat_cache_free(stat_cache
*sc
) {
182 splay_tree
*node
= sc
->files
;
184 osize
= sc
->files
->size
;
186 stat_cache_entry_free(node
->data
);
187 sc
->files
= splaytree_delete(sc
->files
, node
->key
);
189 force_assert(osize
- 1 == splaytree_size(sc
->files
));
192 buffer_free(sc
->dir_name
);
193 buffer_free(sc
->hash_key
);
198 splay_tree
*node
= sc
->dirs
;
200 osize
= sc
->dirs
->size
;
202 fam_dir_entry_free(&sc
->fam
, node
->data
);
203 sc
->dirs
= splaytree_delete(sc
->dirs
, node
->key
);
206 force_assert(NULL
== sc
->dirs
);
208 force_assert(osize
== (sc
->dirs
->size
+ 1));
212 if (-1 != sc
->fam_fcce_ndx
) {
213 /* fd events already gone */
214 sc
->fam_fcce_ndx
= -1;
222 #if defined(HAVE_XATTR)
223 static int stat_cache_attr_get(buffer
*buf
, char *name
, char *xattrname
) {
227 buffer_string_prepare_copy(buf
, 1023);
228 attrlen
= buf
->size
- 1;
229 if(0 == (ret
= attr_get(name
, xattrname
, buf
->ptr
, &attrlen
, 0))) {
230 buffer_commit(buf
, attrlen
);
234 #elif defined(HAVE_EXTATTR)
235 static int stat_cache_attr_get(buffer
*buf
, char *name
, char *xattrname
) {
238 buffer_string_prepare_copy(buf
, 1023);
240 if (-1 != (attrlen
= extattr_get_file(name
, EXTATTR_NAMESPACE_USER
, xattrname
, buf
->ptr
, buf
->size
- 1))) {
241 buf
->used
= attrlen
+ 1;
242 buf
->ptr
[attrlen
] = '\0';
249 /* the famous DJB hash function for strings */
250 static uint32_t hashme(buffer
*str
) {
251 uint32_t hash
= 5381;
253 for (s
= str
->ptr
; *s
; s
++) {
254 hash
= ((hash
<< 5) + hash
) + *s
;
257 hash
&= ~(((uint32_t)1) << 31); /* strip the highest bit */
263 handler_t
stat_cache_handle_fdevent(server
*srv
, void *_fce
, int revent
) {
265 stat_cache
*sc
= srv
->stat_cache
;
271 if (revent
& FDEVENT_IN
) {
272 events
= FAMPending(&sc
->fam
);
274 for (i
= 0; i
< events
; i
++) {
276 fam_dir_entry
*fam_dir
;
280 FAMNextEvent(&sc
->fam
, &fe
);
288 /* if the filename is a directory remove the entry */
290 fam_dir
= fe
.userdata
;
293 /* file/dir is still here */
294 if (fe
.code
== FAMChanged
) break;
296 /* we have 2 versions, follow and no-follow-symlink */
298 for (j
= 0; j
< 2; j
++) {
299 buffer_copy_string(sc
->hash_key
, fe
.filename
);
300 buffer_append_int(sc
->hash_key
, j
);
302 ndx
= hashme(sc
->hash_key
);
304 sc
->dirs
= splaytree_splay(sc
->dirs
, ndx
);
307 if (node
&& (node
->key
== ndx
)) {
308 int osize
= splaytree_size(sc
->dirs
);
310 fam_dir_entry_free(&sc
->fam
, node
->data
);
311 sc
->dirs
= splaytree_delete(sc
->dirs
, ndx
);
313 force_assert(osize
- 1 == splaytree_size(sc
->dirs
));
323 if (revent
& FDEVENT_HUP
) {
324 /* fam closed the connection */
325 fdevent_event_del(srv
->ev
, &(sc
->fam_fcce_ndx
), FAMCONNECTION_GETFD(&sc
->fam
));
326 fdevent_unregister(srv
->ev
, FAMCONNECTION_GETFD(&sc
->fam
));
331 return HANDLER_GO_ON
;
334 static int buffer_copy_dirname(buffer
*dst
, buffer
*file
) {
337 if (buffer_string_is_empty(file
)) return -1;
339 for (i
= buffer_string_length(file
); i
> 0; i
--) {
340 if (file
->ptr
[i
] == '/') {
341 buffer_copy_string_len(dst
, file
->ptr
, i
);
351 static int stat_cache_lstat(server
*srv
, buffer
*dname
, struct stat
*lst
) {
352 if (lstat(dname
->ptr
, lst
) == 0) {
353 return S_ISLNK(lst
->st_mode
) ? 0 : 1;
356 log_error_write(srv
, __FILE__
, __LINE__
, "sbs",
358 dname
, strerror(errno
));
369 * - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
370 * - HANDLER_ERROR on stat() failed -> see errno for problem
373 handler_t
stat_cache_get_entry(server
*srv
, connection
*con
, buffer
*name
, stat_cache_entry
**ret_sce
) {
375 fam_dir_entry
*fam_dir
= NULL
;
378 stat_cache_entry
*sce
= NULL
;
384 #ifdef DEBUG_STAT_CACHE
393 * check if the directory for this file has changed
396 sc
= srv
->stat_cache
;
398 buffer_copy_buffer(sc
->hash_key
, name
);
399 buffer_append_int(sc
->hash_key
, con
->conf
.follow_symlink
);
401 file_ndx
= hashme(sc
->hash_key
);
402 sc
->files
= splaytree_splay(sc
->files
, file_ndx
);
404 #ifdef DEBUG_STAT_CACHE
405 for (i
= 0; i
< ctrl
.used
; i
++) {
406 if (ctrl
.ptr
[i
] == file_ndx
) break;
410 if (sc
->files
&& (sc
->files
->key
== file_ndx
)) {
411 #ifdef DEBUG_STAT_CACHE
412 /* it was in the cache */
413 force_assert(i
< ctrl
.used
);
416 /* we have seen this file already and
417 * don't stat() it again in the same second */
419 sce
= sc
->files
->data
;
421 /* check if the name is the same, we might have a collision */
423 if (buffer_is_equal(name
, sce
->name
)) {
424 if (srv
->srvconf
.stat_cache_engine
== STAT_CACHE_ENGINE_SIMPLE
) {
425 if (sce
->stat_ts
== srv
->cur_ts
&& con
->conf
.follow_symlink
) {
427 return HANDLER_GO_ON
;
431 /* collision, forget about the entry */
435 #ifdef DEBUG_STAT_CACHE
436 if (i
!= ctrl
.used
) {
437 log_error_write(srv
, __FILE__
, __LINE__
, "xSB",
438 file_ndx
, "was already inserted but not found in cache, ", name
);
440 force_assert(i
== ctrl
.used
);
446 if (srv
->srvconf
.stat_cache_engine
== STAT_CACHE_ENGINE_FAM
) {
447 if (0 != buffer_copy_dirname(sc
->dir_name
, name
)) {
448 log_error_write(srv
, __FILE__
, __LINE__
, "sb",
449 "no '/' found in filename:", name
);
450 return HANDLER_ERROR
;
453 buffer_copy_buffer(sc
->hash_key
, sc
->dir_name
);
454 buffer_append_int(sc
->hash_key
, con
->conf
.follow_symlink
);
456 dir_ndx
= hashme(sc
->hash_key
);
458 sc
->dirs
= splaytree_splay(sc
->dirs
, dir_ndx
);
460 if ((NULL
!= sc
->dirs
) && (sc
->dirs
->key
== dir_ndx
)) {
461 fam_dir
= sc
->dirs
->data
;
463 /* check whether we got a collision */
464 if (buffer_is_equal(sc
->dir_name
, fam_dir
->name
)) {
465 /* test whether a found file cache entry is still ok */
466 if ((NULL
!= sce
) && (fam_dir
->version
== sce
->dir_version
)) {
467 /* the stat()-cache entry is still ok */
470 return HANDLER_GO_ON
;
473 /* hash collision, forget about the entry */
482 * - open() + fstat() on a named-pipe results in a (intended) hang.
483 * - stat() if regular file + open() to see if we can read from it is better
486 if (-1 == stat(name
->ptr
, &st
)) {
487 return HANDLER_ERROR
;
491 if (S_ISREG(st
.st_mode
)) {
492 /* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
493 if (name
->ptr
[buffer_string_length(name
) - 1] == '/') {
495 return HANDLER_ERROR
;
498 /* try to open the file to check if we can read it */
499 if (-1 == (fd
= open(name
->ptr
, O_RDONLY
))) {
500 return HANDLER_ERROR
;
507 sce
= stat_cache_entry_init();
508 buffer_copy_buffer(sce
->name
, name
);
510 /* already splayed file_ndx */
511 if ((NULL
!= sc
->files
) && (sc
->files
->key
== file_ndx
)) {
512 /* hash collision: replace old entry */
513 stat_cache_entry_free(sc
->files
->data
);
514 sc
->files
->data
= sce
;
516 int osize
= splaytree_size(sc
->files
);
518 sc
->files
= splaytree_insert(sc
->files
, file_ndx
, sce
);
519 force_assert(osize
+ 1 == splaytree_size(sc
->files
));
521 #ifdef DEBUG_STAT_CACHE
522 if (ctrl
.size
== 0) {
525 ctrl
.ptr
= malloc(ctrl
.size
* sizeof(*ctrl
.ptr
));
526 force_assert(NULL
!= ctrl
.ptr
);
527 } else if (ctrl
.size
== ctrl
.used
) {
529 ctrl
.ptr
= realloc(ctrl
.ptr
, ctrl
.size
* sizeof(*ctrl
.ptr
));
530 force_assert(NULL
!= ctrl
.ptr
);
533 ctrl
.ptr
[ctrl
.used
++] = file_ndx
;
536 force_assert(sc
->files
);
537 force_assert(sc
->files
->data
== sce
);
541 sce
->stat_ts
= srv
->cur_ts
;
543 /* catch the obvious symlinks
545 * this is not a secure check as we still have a race-condition between
546 * the stat() and the open. We can only solve this by
550 * and keeping the file open for the rest of the time. But this can
551 * only be done at network level.
553 * per default it is not a symlink
558 /* we want to only check for symlinks if we should block symlinks.
560 if (!con
->conf
.follow_symlink
) {
561 if (stat_cache_lstat(srv
, name
, &lst
) == 0) {
562 #ifdef DEBUG_STAT_CACHE
563 log_error_write(srv
, __FILE__
, __LINE__
, "sb",
564 "found symlink", name
);
570 * we assume "/" can not be symlink, so
571 * skip the symlink stuff if our path is /
573 else if (buffer_string_length(name
) > 1) {
577 dname
= buffer_init();
578 buffer_copy_buffer(dname
, name
);
580 while ((s_cur
= strrchr(dname
->ptr
, '/'))) {
581 buffer_string_set_length(dname
, s_cur
- dname
->ptr
);
582 if (dname
->ptr
== s_cur
) {
583 #ifdef DEBUG_STAT_CACHE
584 log_error_write(srv
, __FILE__
, __LINE__
, "s", "reached /");
588 #ifdef DEBUG_STAT_CACHE
589 log_error_write(srv
, __FILE__
, __LINE__
, "sbs",
590 "checking if", dname
, "is a symlink");
592 if (stat_cache_lstat(srv
, dname
, &lst
) == 0) {
594 #ifdef DEBUG_STAT_CACHE
595 log_error_write(srv
, __FILE__
, __LINE__
, "sb",
596 "found symlink", dname
);
606 if (S_ISREG(st
.st_mode
)) {
607 /* determine mimetype */
608 buffer_reset(sce
->content_type
);
609 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
610 if (con
->conf
.use_xattr
) {
611 stat_cache_attr_get(sce
->content_type
, name
->ptr
, srv
->srvconf
.xattr_name
->ptr
);
614 /* xattr did not set a content-type. ask the config */
615 if (buffer_string_is_empty(sce
->content_type
)) {
616 size_t namelen
= buffer_string_length(name
);
618 for (k
= 0; k
< con
->conf
.mimetypes
->used
; k
++) {
619 data_string
*ds
= (data_string
*)con
->conf
.mimetypes
->data
[k
];
620 buffer
*type
= ds
->key
;
621 size_t typelen
= buffer_string_length(type
);
623 if (buffer_is_empty(type
)) continue;
625 /* check if the right side is the same */
626 if (typelen
> namelen
) continue;
628 if (0 == strncasecmp(name
->ptr
+ namelen
- typelen
, type
->ptr
, typelen
)) {
629 buffer_copy_buffer(sce
->content_type
, ds
->value
);
634 etag_create(sce
->etag
, &(sce
->st
), con
->etag_flags
);
635 } else if (S_ISDIR(st
.st_mode
)) {
636 etag_create(sce
->etag
, &(sce
->st
), con
->etag_flags
);
640 if (srv
->srvconf
.stat_cache_engine
== STAT_CACHE_ENGINE_FAM
) {
641 /* is this directory already registered ? */
642 if (NULL
== fam_dir
) {
643 fam_dir
= fam_dir_entry_init();
645 buffer_copy_buffer(fam_dir
->name
, sc
->dir_name
);
647 fam_dir
->version
= 1;
649 fam_dir
->req
= calloc(1, sizeof(FAMRequest
));
650 force_assert(NULL
!= fam_dir
);
652 if (0 != FAMMonitorDirectory(&sc
->fam
, fam_dir
->name
->ptr
,
653 fam_dir
->req
, fam_dir
)) {
655 log_error_write(srv
, __FILE__
, __LINE__
, "sbsbs",
656 "monitoring dir failed:",
659 FamErrlist
[FAMErrno
]);
661 fam_dir_entry_free(&sc
->fam
, fam_dir
);
664 int osize
= splaytree_size(sc
->dirs
);
666 /* already splayed dir_ndx */
667 if ((NULL
!= sc
->dirs
) && (sc
->dirs
->key
== dir_ndx
)) {
668 /* hash collision: replace old entry */
669 fam_dir_entry_free(&sc
->fam
, sc
->dirs
->data
);
670 sc
->dirs
->data
= fam_dir
;
672 sc
->dirs
= splaytree_insert(sc
->dirs
, dir_ndx
, fam_dir
);
673 force_assert(osize
== (splaytree_size(sc
->dirs
) - 1));
676 force_assert(sc
->dirs
);
677 force_assert(sc
->dirs
->data
== fam_dir
);
681 /* bind the fam_fc to the stat() cache entry */
684 sce
->dir_version
= fam_dir
->version
;
691 return HANDLER_GO_ON
;
694 int stat_cache_open_rdonly_fstat (server
*srv
, connection
*con
, buffer
*name
, struct stat
*st
) {
695 /*(Note: O_NOFOLLOW affects only the final path segment, the target file,
696 * not any intermediate symlinks along the path)*/
701 #define O_LARGEFILE 0
712 const int oflags
= O_BINARY
| O_LARGEFILE
| O_NOCTTY
| O_NONBLOCK
713 | (con
->conf
.follow_symlink
? 0 : O_NOFOLLOW
);
714 const int fd
= open(name
->ptr
, O_RDONLY
| oflags
);
716 if (0 == fstat(fd
, st
)) {
722 UNUSED(srv
); /*(might log_error_write(srv, ...) in the future)*/
727 * remove stat() from cache which havn't been stat()ed for
728 * more than 10 seconds
731 * walk though the stat-cache, collect the ids which are too old
732 * and remove them in a second loop
735 static int stat_cache_tag_old_entries(server
*srv
, splay_tree
*t
, int *keys
, size_t *ndx
) {
736 stat_cache_entry
*sce
;
740 stat_cache_tag_old_entries(srv
, t
->left
, keys
, ndx
);
741 stat_cache_tag_old_entries(srv
, t
->right
, keys
, ndx
);
745 if (srv
->cur_ts
- sce
->stat_ts
> 2) {
746 keys
[(*ndx
)++] = t
->key
;
752 int stat_cache_trigger_cleanup(server
*srv
) {
754 size_t max_ndx
= 0, i
;
757 sc
= srv
->stat_cache
;
759 if (!sc
->files
) return 0;
761 keys
= calloc(1, sizeof(int) * sc
->files
->size
);
762 force_assert(NULL
!= keys
);
764 stat_cache_tag_old_entries(srv
, sc
->files
, keys
, &max_ndx
);
766 for (i
= 0; i
< max_ndx
; i
++) {
770 sc
->files
= splaytree_splay(sc
->files
, ndx
);
774 if (node
&& (node
->key
== ndx
)) {
775 #ifdef DEBUG_STAT_CACHE
777 int osize
= splaytree_size(sc
->files
);
778 stat_cache_entry
*sce
= node
->data
;
780 stat_cache_entry_free(node
->data
);
781 sc
->files
= splaytree_delete(sc
->files
, ndx
);
783 #ifdef DEBUG_STAT_CACHE
784 for (j
= 0; j
< ctrl
.used
; j
++) {
785 if (ctrl
.ptr
[j
] == ndx
) {
786 ctrl
.ptr
[j
] = ctrl
.ptr
[--ctrl
.used
];
791 force_assert(osize
- 1 == splaytree_size(sc
->files
));