[security] encode quoting chars in HTML and XML
[lighttpd.git] / src / stat_cache.c
blobcdf5a187dd16177485b5769ce6c05b8bf06da47b
1 #include "first.h"
3 #include "log.h"
4 #include "stat_cache.h"
5 #include "fdevent.h"
6 #include "etag.h"
8 #include <sys/types.h>
9 #include <sys/stat.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <errno.h>
14 #include <unistd.h>
15 #include <stdio.h>
16 #include <fcntl.h>
17 #include <assert.h>
19 #ifdef HAVE_ATTR_ATTRIBUTES_H
20 # include <attr/attributes.h>
21 #endif
23 #ifdef HAVE_SYS_EXTATTR_H
24 # include <sys/extattr.h>
25 #endif
27 #ifdef HAVE_FAM_H
28 # include <fam.h>
29 #endif
31 #include "sys-mmap.h"
33 /* NetBSD 1.3.x needs it */
34 #ifndef MAP_FAILED
35 # define MAP_FAILED -1
36 #endif
38 #ifndef O_LARGEFILE
39 # define O_LARGEFILE 0
40 #endif
42 #ifndef HAVE_LSTAT
43 # define lstat stat
44 #endif
46 #if 0
47 /* enables debug code for testing if all nodes in the stat-cache as accessable */
48 #define DEBUG_STAT_CACHE
49 #endif
52 * stat-cache
54 * we cache the stat() calls in our own storage
55 * the directories are cached in FAM
57 * if we get a change-event from FAM, we increment the version in the FAM->dir mapping
59 * if the stat()-cache is queried we check if the version id for the directory is the
60 * same and return immediatly.
63 * What we need:
65 * - for each stat-cache entry we need a fast indirect lookup on the directory name
66 * - for each FAMRequest we have to find the version in the directory cache (index as userdata)
68 * stat <<-> directory <-> FAMRequest
70 * if file is deleted, directory is dirty, file is rechecked ...
71 * if directory is deleted, directory mapping is removed
73 * */
75 #ifdef HAVE_FAM_H
76 typedef struct {
77 FAMRequest *req;
79 buffer *name;
81 int version;
82 } fam_dir_entry;
83 #endif
85 /* the directory name is too long to always compare on it
86 * - we need a hash
87 * - the hash-key is used as sorting criteria for a tree
88 * - a splay-tree is used as we can use the caching effect of it
91 /* we want to cleanup the stat-cache every few seconds, let's say 10
93 * - remove entries which are outdated since 30s
94 * - remove entries which are fresh but havn't been used since 60s
95 * - if we don't have a stat-cache entry for a directory, release it from the monitor
98 #ifdef DEBUG_STAT_CACHE
99 typedef struct {
100 int *ptr;
102 size_t used;
103 size_t size;
104 } fake_keys;
106 static fake_keys ctrl;
107 #endif
109 stat_cache *stat_cache_init(void) {
110 stat_cache *sc = NULL;
112 sc = calloc(1, sizeof(*sc));
113 force_assert(NULL != sc);
115 sc->dir_name = buffer_init();
116 sc->hash_key = buffer_init();
118 #ifdef HAVE_FAM_H
119 sc->fam_fcce_ndx = -1;
120 #endif
122 #ifdef DEBUG_STAT_CACHE
123 ctrl.size = 0;
124 #endif
126 return sc;
129 static stat_cache_entry * stat_cache_entry_init(void) {
130 stat_cache_entry *sce = NULL;
132 sce = calloc(1, sizeof(*sce));
133 force_assert(NULL != sce);
135 sce->name = buffer_init();
136 sce->etag = buffer_init();
137 sce->content_type = buffer_init();
139 return sce;
142 static void stat_cache_entry_free(void *data) {
143 stat_cache_entry *sce = data;
144 if (!sce) return;
146 buffer_free(sce->etag);
147 buffer_free(sce->name);
148 buffer_free(sce->content_type);
150 free(sce);
153 #ifdef HAVE_FAM_H
154 static fam_dir_entry * fam_dir_entry_init(void) {
155 fam_dir_entry *fam_dir = NULL;
157 fam_dir = calloc(1, sizeof(*fam_dir));
158 force_assert(NULL != fam_dir);
160 fam_dir->name = buffer_init();
162 return fam_dir;
165 static void fam_dir_entry_free(FAMConnection *fc, void *data) {
166 fam_dir_entry *fam_dir = data;
168 if (!fam_dir) return;
170 FAMCancelMonitor(fc, fam_dir->req);
172 buffer_free(fam_dir->name);
173 free(fam_dir->req);
175 free(fam_dir);
177 #endif
179 void stat_cache_free(stat_cache *sc) {
180 while (sc->files) {
181 int osize;
182 splay_tree *node = sc->files;
184 osize = sc->files->size;
186 stat_cache_entry_free(node->data);
187 sc->files = splaytree_delete(sc->files, node->key);
189 force_assert(osize - 1 == splaytree_size(sc->files));
192 buffer_free(sc->dir_name);
193 buffer_free(sc->hash_key);
195 #ifdef HAVE_FAM_H
196 while (sc->dirs) {
197 int osize;
198 splay_tree *node = sc->dirs;
200 osize = sc->dirs->size;
202 fam_dir_entry_free(&sc->fam, node->data);
203 sc->dirs = splaytree_delete(sc->dirs, node->key);
205 if (osize == 1) {
206 force_assert(NULL == sc->dirs);
207 } else {
208 force_assert(osize == (sc->dirs->size + 1));
212 if (-1 != sc->fam_fcce_ndx) {
213 /* fd events already gone */
214 sc->fam_fcce_ndx = -1;
216 FAMClose(&sc->fam);
218 #endif
219 free(sc);
222 #if defined(HAVE_XATTR)
223 static int stat_cache_attr_get(buffer *buf, char *name, char *xattrname) {
224 int attrlen;
225 int ret;
227 buffer_string_prepare_copy(buf, 1023);
228 attrlen = buf->size - 1;
229 if(0 == (ret = attr_get(name, xattrname, buf->ptr, &attrlen, 0))) {
230 buffer_commit(buf, attrlen);
232 return ret;
234 #elif defined(HAVE_EXTATTR)
235 static int stat_cache_attr_get(buffer *buf, char *name, char *xattrname) {
236 ssize_t attrlen;
238 buffer_string_prepare_copy(buf, 1023);
240 if (-1 != (attrlen = extattr_get_file(name, EXTATTR_NAMESPACE_USER, xattrname, buf->ptr, buf->size - 1))) {
241 buf->used = attrlen + 1;
242 buf->ptr[attrlen] = '\0';
243 return 0;
245 return -1;
247 #endif
249 /* the famous DJB hash function for strings */
250 static uint32_t hashme(buffer *str) {
251 uint32_t hash = 5381;
252 const char *s;
253 for (s = str->ptr; *s; s++) {
254 hash = ((hash << 5) + hash) + *s;
257 hash &= ~(((uint32_t)1) << 31); /* strip the highest bit */
259 return hash;
262 #ifdef HAVE_FAM_H
263 handler_t stat_cache_handle_fdevent(server *srv, void *_fce, int revent) {
264 size_t i;
265 stat_cache *sc = srv->stat_cache;
266 size_t events;
268 UNUSED(_fce);
269 /* */
271 if (revent & FDEVENT_IN) {
272 events = FAMPending(&sc->fam);
274 for (i = 0; i < events; i++) {
275 FAMEvent fe;
276 fam_dir_entry *fam_dir;
277 splay_tree *node;
278 int ndx, j;
280 FAMNextEvent(&sc->fam, &fe);
282 /* handle event */
284 switch(fe.code) {
285 case FAMChanged:
286 case FAMDeleted:
287 case FAMMoved:
288 /* if the filename is a directory remove the entry */
290 fam_dir = fe.userdata;
291 fam_dir->version++;
293 /* file/dir is still here */
294 if (fe.code == FAMChanged) break;
296 /* we have 2 versions, follow and no-follow-symlink */
298 for (j = 0; j < 2; j++) {
299 buffer_copy_string(sc->hash_key, fe.filename);
300 buffer_append_int(sc->hash_key, j);
302 ndx = hashme(sc->hash_key);
304 sc->dirs = splaytree_splay(sc->dirs, ndx);
305 node = sc->dirs;
307 if (node && (node->key == ndx)) {
308 int osize = splaytree_size(sc->dirs);
310 fam_dir_entry_free(&sc->fam, node->data);
311 sc->dirs = splaytree_delete(sc->dirs, ndx);
313 force_assert(osize - 1 == splaytree_size(sc->dirs));
316 break;
317 default:
318 break;
323 if (revent & FDEVENT_HUP) {
324 /* fam closed the connection */
325 fdevent_event_del(srv->ev, &(sc->fam_fcce_ndx), FAMCONNECTION_GETFD(&sc->fam));
326 fdevent_unregister(srv->ev, FAMCONNECTION_GETFD(&sc->fam));
328 FAMClose(&sc->fam);
331 return HANDLER_GO_ON;
334 static int buffer_copy_dirname(buffer *dst, buffer *file) {
335 size_t i;
337 if (buffer_string_is_empty(file)) return -1;
339 for (i = buffer_string_length(file); i > 0; i--) {
340 if (file->ptr[i] == '/') {
341 buffer_copy_string_len(dst, file->ptr, i);
342 return 0;
346 return -1;
348 #endif
350 #ifdef HAVE_LSTAT
351 static int stat_cache_lstat(server *srv, buffer *dname, struct stat *lst) {
352 if (lstat(dname->ptr, lst) == 0) {
353 return S_ISLNK(lst->st_mode) ? 0 : 1;
355 else {
356 log_error_write(srv, __FILE__, __LINE__, "sbs",
357 "lstat failed for:",
358 dname, strerror(errno));
360 return -1;
362 #endif
364 /***
368 * returns:
369 * - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
370 * - HANDLER_ERROR on stat() failed -> see errno for problem
373 handler_t stat_cache_get_entry(server *srv, connection *con, buffer *name, stat_cache_entry **ret_sce) {
374 #ifdef HAVE_FAM_H
375 fam_dir_entry *fam_dir = NULL;
376 int dir_ndx = -1;
377 #endif
378 stat_cache_entry *sce = NULL;
379 stat_cache *sc;
380 struct stat st;
381 size_t k;
382 int fd;
383 struct stat lst;
384 #ifdef DEBUG_STAT_CACHE
385 size_t i;
386 #endif
388 int file_ndx;
390 *ret_sce = NULL;
393 * check if the directory for this file has changed
396 sc = srv->stat_cache;
398 buffer_copy_buffer(sc->hash_key, name);
399 buffer_append_int(sc->hash_key, con->conf.follow_symlink);
401 file_ndx = hashme(sc->hash_key);
402 sc->files = splaytree_splay(sc->files, file_ndx);
404 #ifdef DEBUG_STAT_CACHE
405 for (i = 0; i < ctrl.used; i++) {
406 if (ctrl.ptr[i] == file_ndx) break;
408 #endif
410 if (sc->files && (sc->files->key == file_ndx)) {
411 #ifdef DEBUG_STAT_CACHE
412 /* it was in the cache */
413 force_assert(i < ctrl.used);
414 #endif
416 /* we have seen this file already and
417 * don't stat() it again in the same second */
419 sce = sc->files->data;
421 /* check if the name is the same, we might have a collision */
423 if (buffer_is_equal(name, sce->name)) {
424 if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) {
425 if (sce->stat_ts == srv->cur_ts && con->conf.follow_symlink) {
426 *ret_sce = sce;
427 return HANDLER_GO_ON;
430 } else {
431 /* collision, forget about the entry */
432 sce = NULL;
434 } else {
435 #ifdef DEBUG_STAT_CACHE
436 if (i != ctrl.used) {
437 log_error_write(srv, __FILE__, __LINE__, "xSB",
438 file_ndx, "was already inserted but not found in cache, ", name);
440 force_assert(i == ctrl.used);
441 #endif
444 #ifdef HAVE_FAM_H
445 /* dir-check */
446 if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
447 if (0 != buffer_copy_dirname(sc->dir_name, name)) {
448 log_error_write(srv, __FILE__, __LINE__, "sb",
449 "no '/' found in filename:", name);
450 return HANDLER_ERROR;
453 buffer_copy_buffer(sc->hash_key, sc->dir_name);
454 buffer_append_int(sc->hash_key, con->conf.follow_symlink);
456 dir_ndx = hashme(sc->hash_key);
458 sc->dirs = splaytree_splay(sc->dirs, dir_ndx);
460 if ((NULL != sc->dirs) && (sc->dirs->key == dir_ndx)) {
461 fam_dir = sc->dirs->data;
463 /* check whether we got a collision */
464 if (buffer_is_equal(sc->dir_name, fam_dir->name)) {
465 /* test whether a found file cache entry is still ok */
466 if ((NULL != sce) && (fam_dir->version == sce->dir_version)) {
467 /* the stat()-cache entry is still ok */
469 *ret_sce = sce;
470 return HANDLER_GO_ON;
472 } else {
473 /* hash collision, forget about the entry */
474 fam_dir = NULL;
478 #endif
481 * *lol*
482 * - open() + fstat() on a named-pipe results in a (intended) hang.
483 * - stat() if regular file + open() to see if we can read from it is better
485 * */
486 if (-1 == stat(name->ptr, &st)) {
487 return HANDLER_ERROR;
491 if (S_ISREG(st.st_mode)) {
492 /* fix broken stat/open for symlinks to reg files with appended slash on freebsd,osx */
493 if (name->ptr[buffer_string_length(name) - 1] == '/') {
494 errno = ENOTDIR;
495 return HANDLER_ERROR;
498 /* try to open the file to check if we can read it */
499 if (-1 == (fd = open(name->ptr, O_RDONLY))) {
500 return HANDLER_ERROR;
502 close(fd);
505 if (NULL == sce) {
507 sce = stat_cache_entry_init();
508 buffer_copy_buffer(sce->name, name);
510 /* already splayed file_ndx */
511 if ((NULL != sc->files) && (sc->files->key == file_ndx)) {
512 /* hash collision: replace old entry */
513 stat_cache_entry_free(sc->files->data);
514 sc->files->data = sce;
515 } else {
516 int osize = splaytree_size(sc->files);
518 sc->files = splaytree_insert(sc->files, file_ndx, sce);
519 force_assert(osize + 1 == splaytree_size(sc->files));
521 #ifdef DEBUG_STAT_CACHE
522 if (ctrl.size == 0) {
523 ctrl.size = 16;
524 ctrl.used = 0;
525 ctrl.ptr = malloc(ctrl.size * sizeof(*ctrl.ptr));
526 force_assert(NULL != ctrl.ptr);
527 } else if (ctrl.size == ctrl.used) {
528 ctrl.size += 16;
529 ctrl.ptr = realloc(ctrl.ptr, ctrl.size * sizeof(*ctrl.ptr));
530 force_assert(NULL != ctrl.ptr);
533 ctrl.ptr[ctrl.used++] = file_ndx;
534 #endif
536 force_assert(sc->files);
537 force_assert(sc->files->data == sce);
540 sce->st = st;
541 sce->stat_ts = srv->cur_ts;
543 /* catch the obvious symlinks
545 * this is not a secure check as we still have a race-condition between
546 * the stat() and the open. We can only solve this by
547 * 1. open() the file
548 * 2. fstat() the fd
550 * and keeping the file open for the rest of the time. But this can
551 * only be done at network level.
553 * per default it is not a symlink
554 * */
555 #ifdef HAVE_LSTAT
556 sce->is_symlink = 0;
558 /* we want to only check for symlinks if we should block symlinks.
560 if (!con->conf.follow_symlink) {
561 if (stat_cache_lstat(srv, name, &lst) == 0) {
562 #ifdef DEBUG_STAT_CACHE
563 log_error_write(srv, __FILE__, __LINE__, "sb",
564 "found symlink", name);
565 #endif
566 sce->is_symlink = 1;
570 * we assume "/" can not be symlink, so
571 * skip the symlink stuff if our path is /
573 else if (buffer_string_length(name) > 1) {
574 buffer *dname;
575 char *s_cur;
577 dname = buffer_init();
578 buffer_copy_buffer(dname, name);
580 while ((s_cur = strrchr(dname->ptr, '/'))) {
581 buffer_string_set_length(dname, s_cur - dname->ptr);
582 if (dname->ptr == s_cur) {
583 #ifdef DEBUG_STAT_CACHE
584 log_error_write(srv, __FILE__, __LINE__, "s", "reached /");
585 #endif
586 break;
588 #ifdef DEBUG_STAT_CACHE
589 log_error_write(srv, __FILE__, __LINE__, "sbs",
590 "checking if", dname, "is a symlink");
591 #endif
592 if (stat_cache_lstat(srv, dname, &lst) == 0) {
593 sce->is_symlink = 1;
594 #ifdef DEBUG_STAT_CACHE
595 log_error_write(srv, __FILE__, __LINE__, "sb",
596 "found symlink", dname);
597 #endif
598 break;
601 buffer_free(dname);
604 #endif
606 if (S_ISREG(st.st_mode)) {
607 /* determine mimetype */
608 buffer_reset(sce->content_type);
609 #if defined(HAVE_XATTR) || defined(HAVE_EXTATTR)
610 if (con->conf.use_xattr) {
611 stat_cache_attr_get(sce->content_type, name->ptr, srv->srvconf.xattr_name->ptr);
613 #endif
614 /* xattr did not set a content-type. ask the config */
615 if (buffer_string_is_empty(sce->content_type)) {
616 size_t namelen = buffer_string_length(name);
618 for (k = 0; k < con->conf.mimetypes->used; k++) {
619 data_string *ds = (data_string *)con->conf.mimetypes->data[k];
620 buffer *type = ds->key;
621 size_t typelen = buffer_string_length(type);
623 if (buffer_is_empty(type)) continue;
625 /* check if the right side is the same */
626 if (typelen > namelen) continue;
628 if (0 == strncasecmp(name->ptr + namelen - typelen, type->ptr, typelen)) {
629 buffer_copy_buffer(sce->content_type, ds->value);
630 break;
634 etag_create(sce->etag, &(sce->st), con->etag_flags);
635 } else if (S_ISDIR(st.st_mode)) {
636 etag_create(sce->etag, &(sce->st), con->etag_flags);
639 #ifdef HAVE_FAM_H
640 if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) {
641 /* is this directory already registered ? */
642 if (NULL == fam_dir) {
643 fam_dir = fam_dir_entry_init();
645 buffer_copy_buffer(fam_dir->name, sc->dir_name);
647 fam_dir->version = 1;
649 fam_dir->req = calloc(1, sizeof(FAMRequest));
650 force_assert(NULL != fam_dir);
652 if (0 != FAMMonitorDirectory(&sc->fam, fam_dir->name->ptr,
653 fam_dir->req, fam_dir)) {
655 log_error_write(srv, __FILE__, __LINE__, "sbsbs",
656 "monitoring dir failed:",
657 fam_dir->name,
658 "file:", name,
659 FamErrlist[FAMErrno]);
661 fam_dir_entry_free(&sc->fam, fam_dir);
662 fam_dir = NULL;
663 } else {
664 int osize = splaytree_size(sc->dirs);
666 /* already splayed dir_ndx */
667 if ((NULL != sc->dirs) && (sc->dirs->key == dir_ndx)) {
668 /* hash collision: replace old entry */
669 fam_dir_entry_free(&sc->fam, sc->dirs->data);
670 sc->dirs->data = fam_dir;
671 } else {
672 sc->dirs = splaytree_insert(sc->dirs, dir_ndx, fam_dir);
673 force_assert(osize == (splaytree_size(sc->dirs) - 1));
676 force_assert(sc->dirs);
677 force_assert(sc->dirs->data == fam_dir);
681 /* bind the fam_fc to the stat() cache entry */
683 if (fam_dir) {
684 sce->dir_version = fam_dir->version;
687 #endif
689 *ret_sce = sce;
691 return HANDLER_GO_ON;
694 int stat_cache_open_rdonly_fstat (server *srv, connection *con, buffer *name, struct stat *st) {
695 /*(Note: O_NOFOLLOW affects only the final path segment, the target file,
696 * not any intermediate symlinks along the path)*/
697 #ifndef O_BINARY
698 #define O_BINARY 0
699 #endif
700 #ifndef O_LARGEFILE
701 #define O_LARGEFILE 0
702 #endif
703 #ifndef O_NOCTTY
704 #define O_NOCTTY 0
705 #endif
706 #ifndef O_NONBLOCK
707 #define O_NONBLOCK 0
708 #endif
709 #ifndef O_NOFOLLOW
710 #define O_NOFOLLOW 0
711 #endif
712 const int oflags = O_BINARY | O_LARGEFILE | O_NOCTTY | O_NONBLOCK
713 | (con->conf.follow_symlink ? 0 : O_NOFOLLOW);
714 const int fd = open(name->ptr, O_RDONLY | oflags);
715 if (fd >= 0) {
716 if (0 == fstat(fd, st)) {
717 return fd;
718 } else {
719 close(fd);
722 UNUSED(srv); /*(might log_error_write(srv, ...) in the future)*/
723 return -1;
727 * remove stat() from cache which havn't been stat()ed for
728 * more than 10 seconds
731 * walk though the stat-cache, collect the ids which are too old
732 * and remove them in a second loop
735 static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys, size_t *ndx) {
736 stat_cache_entry *sce;
738 if (!t) return 0;
740 stat_cache_tag_old_entries(srv, t->left, keys, ndx);
741 stat_cache_tag_old_entries(srv, t->right, keys, ndx);
743 sce = t->data;
745 if (srv->cur_ts - sce->stat_ts > 2) {
746 keys[(*ndx)++] = t->key;
749 return 0;
752 int stat_cache_trigger_cleanup(server *srv) {
753 stat_cache *sc;
754 size_t max_ndx = 0, i;
755 int *keys;
757 sc = srv->stat_cache;
759 if (!sc->files) return 0;
761 keys = calloc(1, sizeof(int) * sc->files->size);
762 force_assert(NULL != keys);
764 stat_cache_tag_old_entries(srv, sc->files, keys, &max_ndx);
766 for (i = 0; i < max_ndx; i++) {
767 int ndx = keys[i];
768 splay_tree *node;
770 sc->files = splaytree_splay(sc->files, ndx);
772 node = sc->files;
774 if (node && (node->key == ndx)) {
775 #ifdef DEBUG_STAT_CACHE
776 size_t j;
777 int osize = splaytree_size(sc->files);
778 stat_cache_entry *sce = node->data;
779 #endif
780 stat_cache_entry_free(node->data);
781 sc->files = splaytree_delete(sc->files, ndx);
783 #ifdef DEBUG_STAT_CACHE
784 for (j = 0; j < ctrl.used; j++) {
785 if (ctrl.ptr[j] == ndx) {
786 ctrl.ptr[j] = ctrl.ptr[--ctrl.used];
787 break;
791 force_assert(osize - 1 == splaytree_size(sc->files));
792 #endif
796 free(keys);
798 return 0;