2 * FUSE: Filesystem in Userspace
3 * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
5 * This program can be distributed under the terms of the GNU GPLv2.
6 * See the file COPYING.
11 * This file system mirrors the existing file system hierarchy of the
12 * system, starting at the root file system. This is implemented by
13 * just "passing through" all requests to the corresponding user-space
14 * libc functions. In contrast to passthrough.c and passthrough_fh.c,
15 * this implementation uses the low-level API. Its performance should
16 * be the least bad among the three, but many operations are not
17 * implemented. In particular, it is not possible to remove files (or
18 * directories) because the code necessary to defer actual removal
19 * until the file is not opened anymore would make the example much
22 * When writeback caching is enabled (-o writeback mount option), it
23 * is only possible to write to files for which the mounting user has
24 * read permissions. This is because the writeback cache requires the
25 * kernel to be able to issue read requests for all files (which the
26 * passthrough filesystem cannot satisfy if it can't read the file in
27 * the underlying filesystem).
31 * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o
35 * \include passthrough_ll.c
38 #include "qemu/osdep.h"
39 #include "qemu/timer.h"
40 #include "fuse_virtio.h"
42 #include "fuse_lowlevel.h"
57 #include <sys/mount.h>
58 #include <sys/prctl.h>
59 #include <sys/resource.h>
60 #include <sys/syscall.h>
61 #include <sys/types.h>
63 #include <sys/xattr.h>
67 #include "passthrough_helpers.h"
72 struct lo_inode
*inode
;
80 /* Maps FUSE fh or ino values to internal objects */
82 struct lo_map_elem
*elems
;
96 uint64_t refcount
; /* protected by lo->mutex */
112 pthread_mutex_t mutex
;
123 int readdirplus_clear
;
124 struct lo_inode root
;
125 GHashTable
*inodes
; /* protected by lo->mutex */
126 struct lo_map ino_map
; /* protected by lo->mutex */
127 struct lo_map dirp_map
; /* protected by lo->mutex */
128 struct lo_map fd_map
; /* protected by lo->mutex */
130 /* An O_PATH file descriptor to /proc/self/fd/ */
134 static const struct fuse_opt lo_opts
[] = {
135 { "writeback", offsetof(struct lo_data
, writeback
), 1 },
136 { "no_writeback", offsetof(struct lo_data
, writeback
), 0 },
137 { "source=%s", offsetof(struct lo_data
, source
), 0 },
138 { "flock", offsetof(struct lo_data
, flock
), 1 },
139 { "no_flock", offsetof(struct lo_data
, flock
), 0 },
140 { "xattr", offsetof(struct lo_data
, xattr
), 1 },
141 { "no_xattr", offsetof(struct lo_data
, xattr
), 0 },
142 { "timeout=%lf", offsetof(struct lo_data
, timeout
), 0 },
143 { "timeout=", offsetof(struct lo_data
, timeout_set
), 1 },
144 { "cache=none", offsetof(struct lo_data
, cache
), CACHE_NONE
},
145 { "cache=auto", offsetof(struct lo_data
, cache
), CACHE_AUTO
},
146 { "cache=always", offsetof(struct lo_data
, cache
), CACHE_ALWAYS
},
147 { "norace", offsetof(struct lo_data
, norace
), 1 },
148 { "readdirplus", offsetof(struct lo_data
, readdirplus_set
), 1 },
149 { "no_readdirplus", offsetof(struct lo_data
, readdirplus_clear
), 1 },
152 static bool use_syslog
= false;
153 static int current_log_level
;
154 static void unref_inode_lolocked(struct lo_data
*lo
, struct lo_inode
*inode
,
158 pthread_mutex_t mutex
;
161 /* That we loaded cap-ng in the current thread from the saved */
162 static __thread
bool cap_loaded
= 0;
164 static struct lo_inode
*lo_find(struct lo_data
*lo
, struct stat
*st
);
166 static int is_dot_or_dotdot(const char *name
)
168 return name
[0] == '.' &&
169 (name
[1] == '\0' || (name
[1] == '.' && name
[2] == '\0'));
172 /* Is `path` a single path component that is not "." or ".."? */
173 static int is_safe_path_component(const char *path
)
175 if (strchr(path
, '/')) {
179 return !is_dot_or_dotdot(path
);
182 static struct lo_data
*lo_data(fuse_req_t req
)
184 return (struct lo_data
*)fuse_req_userdata(req
);
188 * Load capng's state from our saved state if the current thread
189 * hadn't previously been loaded.
190 * returns 0 on success
192 static int load_capng(void)
195 pthread_mutex_lock(&cap
.mutex
);
196 capng_restore_state(&cap
.saved
);
198 * restore_state free's the saved copy
201 cap
.saved
= capng_save_state();
203 fuse_log(FUSE_LOG_ERR
, "capng_save_state (thread)\n");
206 pthread_mutex_unlock(&cap
.mutex
);
209 * We want to use the loaded state for our pid,
212 capng_setpid(syscall(SYS_gettid
));
219 * Helpers for dropping and regaining effective capabilities. Returns 0
220 * on success, error otherwise
222 static int drop_effective_cap(const char *cap_name
, bool *cap_dropped
)
226 cap
= capng_name_to_capability(cap_name
);
229 fuse_log(FUSE_LOG_ERR
, "capng_name_to_capability(%s) failed:%s\n",
230 cap_name
, strerror(errno
));
236 fuse_log(FUSE_LOG_ERR
, "load_capng() failed\n");
240 /* We dont have this capability in effective set already. */
241 if (!capng_have_capability(CAPNG_EFFECTIVE
, cap
)) {
246 if (capng_update(CAPNG_DROP
, CAPNG_EFFECTIVE
, cap
)) {
248 fuse_log(FUSE_LOG_ERR
, "capng_update(DROP,) failed\n");
252 if (capng_apply(CAPNG_SELECT_CAPS
)) {
254 fuse_log(FUSE_LOG_ERR
, "drop:capng_apply() failed\n");
267 static int gain_effective_cap(const char *cap_name
)
272 cap
= capng_name_to_capability(cap_name
);
275 fuse_log(FUSE_LOG_ERR
, "capng_name_to_capability(%s) failed:%s\n",
276 cap_name
, strerror(errno
));
282 fuse_log(FUSE_LOG_ERR
, "load_capng() failed\n");
286 if (capng_update(CAPNG_ADD
, CAPNG_EFFECTIVE
, cap
)) {
288 fuse_log(FUSE_LOG_ERR
, "capng_update(ADD,) failed\n");
292 if (capng_apply(CAPNG_SELECT_CAPS
)) {
294 fuse_log(FUSE_LOG_ERR
, "gain:capng_apply() failed\n");
303 static void lo_map_init(struct lo_map
*map
)
310 static void lo_map_destroy(struct lo_map
*map
)
315 static int lo_map_grow(struct lo_map
*map
, size_t new_nelems
)
317 struct lo_map_elem
*new_elems
;
320 if (new_nelems
<= map
->nelems
) {
324 new_elems
= realloc(map
->elems
, sizeof(map
->elems
[0]) * new_nelems
);
329 for (i
= map
->nelems
; i
< new_nelems
; i
++) {
330 new_elems
[i
].freelist
= i
+ 1;
331 new_elems
[i
].in_use
= false;
333 new_elems
[new_nelems
- 1].freelist
= -1;
335 map
->elems
= new_elems
;
336 map
->freelist
= map
->nelems
;
337 map
->nelems
= new_nelems
;
341 static struct lo_map_elem
*lo_map_alloc_elem(struct lo_map
*map
)
343 struct lo_map_elem
*elem
;
345 if (map
->freelist
== -1 && !lo_map_grow(map
, map
->nelems
+ 256)) {
349 elem
= &map
->elems
[map
->freelist
];
350 map
->freelist
= elem
->freelist
;
357 static struct lo_map_elem
*lo_map_reserve(struct lo_map
*map
, size_t key
)
361 if (!lo_map_grow(map
, key
+ 1)) {
365 for (prev
= &map
->freelist
; *prev
!= -1;
366 prev
= &map
->elems
[*prev
].freelist
) {
368 struct lo_map_elem
*elem
= &map
->elems
[key
];
370 *prev
= elem
->freelist
;
378 static struct lo_map_elem
*lo_map_get(struct lo_map
*map
, size_t key
)
380 if (key
>= map
->nelems
) {
383 if (!map
->elems
[key
].in_use
) {
386 return &map
->elems
[key
];
389 static void lo_map_remove(struct lo_map
*map
, size_t key
)
391 struct lo_map_elem
*elem
;
393 if (key
>= map
->nelems
) {
397 elem
= &map
->elems
[key
];
402 elem
->in_use
= false;
404 elem
->freelist
= map
->freelist
;
408 /* Assumes lo->mutex is held */
409 static ssize_t
lo_add_fd_mapping(fuse_req_t req
, int fd
)
411 struct lo_map_elem
*elem
;
413 elem
= lo_map_alloc_elem(&lo_data(req
)->fd_map
);
419 return elem
- lo_data(req
)->fd_map
.elems
;
422 /* Assumes lo->mutex is held */
423 static ssize_t
lo_add_dirp_mapping(fuse_req_t req
, struct lo_dirp
*dirp
)
425 struct lo_map_elem
*elem
;
427 elem
= lo_map_alloc_elem(&lo_data(req
)->dirp_map
);
433 return elem
- lo_data(req
)->dirp_map
.elems
;
436 /* Assumes lo->mutex is held */
437 static ssize_t
lo_add_inode_mapping(fuse_req_t req
, struct lo_inode
*inode
)
439 struct lo_map_elem
*elem
;
441 elem
= lo_map_alloc_elem(&lo_data(req
)->ino_map
);
447 return elem
- lo_data(req
)->ino_map
.elems
;
450 static struct lo_inode
*lo_inode(fuse_req_t req
, fuse_ino_t ino
)
452 struct lo_data
*lo
= lo_data(req
);
453 struct lo_map_elem
*elem
;
455 pthread_mutex_lock(&lo
->mutex
);
456 elem
= lo_map_get(&lo
->ino_map
, ino
);
457 pthread_mutex_unlock(&lo
->mutex
);
466 static int lo_fd(fuse_req_t req
, fuse_ino_t ino
)
468 struct lo_inode
*inode
= lo_inode(req
, ino
);
469 return inode
? inode
->fd
: -1;
472 static void lo_init(void *userdata
, struct fuse_conn_info
*conn
)
474 struct lo_data
*lo
= (struct lo_data
*)userdata
;
476 if (conn
->capable
& FUSE_CAP_EXPORT_SUPPORT
) {
477 conn
->want
|= FUSE_CAP_EXPORT_SUPPORT
;
480 if (lo
->writeback
&& conn
->capable
& FUSE_CAP_WRITEBACK_CACHE
) {
481 fuse_log(FUSE_LOG_DEBUG
, "lo_init: activating writeback\n");
482 conn
->want
|= FUSE_CAP_WRITEBACK_CACHE
;
484 if (lo
->flock
&& conn
->capable
& FUSE_CAP_FLOCK_LOCKS
) {
485 fuse_log(FUSE_LOG_DEBUG
, "lo_init: activating flock locks\n");
486 conn
->want
|= FUSE_CAP_FLOCK_LOCKS
;
488 if ((lo
->cache
== CACHE_NONE
&& !lo
->readdirplus_set
) ||
489 lo
->readdirplus_clear
) {
490 fuse_log(FUSE_LOG_DEBUG
, "lo_init: disabling readdirplus\n");
491 conn
->want
&= ~FUSE_CAP_READDIRPLUS
;
495 static void lo_getattr(fuse_req_t req
, fuse_ino_t ino
,
496 struct fuse_file_info
*fi
)
500 struct lo_data
*lo
= lo_data(req
);
505 fstatat(lo_fd(req
, ino
), "", &buf
, AT_EMPTY_PATH
| AT_SYMLINK_NOFOLLOW
);
507 return (void)fuse_reply_err(req
, errno
);
510 fuse_reply_attr(req
, &buf
, lo
->timeout
);
513 static int lo_parent_and_name(struct lo_data
*lo
, struct lo_inode
*inode
,
514 char path
[PATH_MAX
], struct lo_inode
**parent
)
524 sprintf(procname
, "%i", inode
->fd
);
526 res
= readlinkat(lo
->proc_self_fd
, procname
, path
, PATH_MAX
);
528 fuse_log(FUSE_LOG_WARNING
, "%s: readlink failed: %m\n", __func__
);
532 if (res
>= PATH_MAX
) {
533 fuse_log(FUSE_LOG_WARNING
, "%s: readlink overflowed\n", __func__
);
538 last
= strrchr(path
, '/');
540 /* Shouldn't happen */
543 "%s: INTERNAL ERROR: bad path read from proc\n", __func__
);
548 pthread_mutex_lock(&lo
->mutex
);
550 pthread_mutex_unlock(&lo
->mutex
);
553 res
= fstatat(AT_FDCWD
, last
== path
? "/" : path
, &stat
, 0);
556 fuse_log(FUSE_LOG_WARNING
,
557 "%s: failed to stat parent: %m\n", __func__
);
561 p
= lo_find(lo
, &stat
);
564 fuse_log(FUSE_LOG_WARNING
,
565 "%s: failed to find parent\n", __func__
);
571 res
= fstatat(p
->fd
, last
, &stat
, AT_SYMLINK_NOFOLLOW
);
574 fuse_log(FUSE_LOG_WARNING
,
575 "%s: failed to stat last\n", __func__
);
579 if (stat
.st_dev
!= inode
->key
.dev
|| stat
.st_ino
!= inode
->key
.ino
) {
581 fuse_log(FUSE_LOG_WARNING
,
582 "%s: failed to match last\n", __func__
);
587 memmove(path
, last
, strlen(last
) + 1);
592 unref_inode_lolocked(lo
, p
, 1);
603 static int utimensat_empty(struct lo_data
*lo
, struct lo_inode
*inode
,
604 const struct timespec
*tv
)
607 struct lo_inode
*parent
;
610 if (inode
->is_symlink
) {
611 res
= utimensat(inode
->fd
, "", tv
, AT_EMPTY_PATH
);
612 if (res
== -1 && errno
== EINVAL
) {
613 /* Sorry, no race free way to set times on symlink. */
622 sprintf(path
, "%i", inode
->fd
);
624 return utimensat(lo
->proc_self_fd
, path
, tv
, 0);
627 res
= lo_parent_and_name(lo
, inode
, path
, &parent
);
629 res
= utimensat(parent
->fd
, path
, tv
, AT_SYMLINK_NOFOLLOW
);
630 unref_inode_lolocked(lo
, parent
, 1);
636 static int lo_fi_fd(fuse_req_t req
, struct fuse_file_info
*fi
)
638 struct lo_data
*lo
= lo_data(req
);
639 struct lo_map_elem
*elem
;
641 pthread_mutex_lock(&lo
->mutex
);
642 elem
= lo_map_get(&lo
->fd_map
, fi
->fh
);
643 pthread_mutex_unlock(&lo
->mutex
);
652 static void lo_setattr(fuse_req_t req
, fuse_ino_t ino
, struct stat
*attr
,
653 int valid
, struct fuse_file_info
*fi
)
657 struct lo_data
*lo
= lo_data(req
);
658 struct lo_inode
*inode
;
663 inode
= lo_inode(req
, ino
);
665 fuse_reply_err(req
, EBADF
);
671 /* If fi->fh is invalid we'll report EBADF later */
673 fd
= lo_fi_fd(req
, fi
);
676 if (valid
& FUSE_SET_ATTR_MODE
) {
678 res
= fchmod(fd
, attr
->st_mode
);
680 sprintf(procname
, "%i", ifd
);
681 res
= fchmodat(lo
->proc_self_fd
, procname
, attr
->st_mode
, 0);
687 if (valid
& (FUSE_SET_ATTR_UID
| FUSE_SET_ATTR_GID
)) {
688 uid_t uid
= (valid
& FUSE_SET_ATTR_UID
) ? attr
->st_uid
: (uid_t
)-1;
689 gid_t gid
= (valid
& FUSE_SET_ATTR_GID
) ? attr
->st_gid
: (gid_t
)-1;
691 res
= fchownat(ifd
, "", uid
, gid
, AT_EMPTY_PATH
| AT_SYMLINK_NOFOLLOW
);
696 if (valid
& FUSE_SET_ATTR_SIZE
) {
702 sprintf(procname
, "%i", ifd
);
703 truncfd
= openat(lo
->proc_self_fd
, procname
, O_RDWR
);
709 res
= ftruncate(truncfd
, attr
->st_size
);
719 if (valid
& (FUSE_SET_ATTR_ATIME
| FUSE_SET_ATTR_MTIME
)) {
720 struct timespec tv
[2];
724 tv
[0].tv_nsec
= UTIME_OMIT
;
725 tv
[1].tv_nsec
= UTIME_OMIT
;
727 if (valid
& FUSE_SET_ATTR_ATIME_NOW
) {
728 tv
[0].tv_nsec
= UTIME_NOW
;
729 } else if (valid
& FUSE_SET_ATTR_ATIME
) {
730 tv
[0] = attr
->st_atim
;
733 if (valid
& FUSE_SET_ATTR_MTIME_NOW
) {
734 tv
[1].tv_nsec
= UTIME_NOW
;
735 } else if (valid
& FUSE_SET_ATTR_MTIME
) {
736 tv
[1] = attr
->st_mtim
;
740 res
= futimens(fd
, tv
);
742 res
= utimensat_empty(lo
, inode
, tv
);
749 return lo_getattr(req
, ino
, fi
);
753 fuse_reply_err(req
, saverr
);
756 static struct lo_inode
*lo_find(struct lo_data
*lo
, struct stat
*st
)
759 struct lo_key key
= {
764 pthread_mutex_lock(&lo
->mutex
);
765 p
= g_hash_table_lookup(lo
->inodes
, &key
);
767 assert(p
->refcount
> 0);
770 pthread_mutex_unlock(&lo
->mutex
);
775 static int lo_do_lookup(fuse_req_t req
, fuse_ino_t parent
, const char *name
,
776 struct fuse_entry_param
*e
)
781 struct lo_data
*lo
= lo_data(req
);
782 struct lo_inode
*inode
, *dir
= lo_inode(req
, parent
);
785 * name_to_handle_at() and open_by_handle_at() can reach here with fuse
786 * mount point in guest, but we don't have its inode info in the
793 memset(e
, 0, sizeof(*e
));
794 e
->attr_timeout
= lo
->timeout
;
795 e
->entry_timeout
= lo
->timeout
;
797 /* Do not allow escaping root directory */
798 if (dir
== &lo
->root
&& strcmp(name
, "..") == 0) {
802 newfd
= openat(dir
->fd
, name
, O_PATH
| O_NOFOLLOW
);
807 res
= fstatat(newfd
, "", &e
->attr
, AT_EMPTY_PATH
| AT_SYMLINK_NOFOLLOW
);
812 inode
= lo_find(lo
, &e
->attr
);
818 inode
= calloc(1, sizeof(struct lo_inode
));
823 inode
->is_symlink
= S_ISLNK(e
->attr
.st_mode
);
827 inode
->key
.ino
= e
->attr
.st_ino
;
828 inode
->key
.dev
= e
->attr
.st_dev
;
830 pthread_mutex_lock(&lo
->mutex
);
831 inode
->fuse_ino
= lo_add_inode_mapping(req
, inode
);
832 g_hash_table_insert(lo
->inodes
, &inode
->key
, inode
);
833 pthread_mutex_unlock(&lo
->mutex
);
835 e
->ino
= inode
->fuse_ino
;
837 fuse_log(FUSE_LOG_DEBUG
, " %lli/%s -> %lli\n", (unsigned long long)parent
,
838 name
, (unsigned long long)e
->ino
);
850 static void lo_lookup(fuse_req_t req
, fuse_ino_t parent
, const char *name
)
852 struct fuse_entry_param e
;
855 fuse_log(FUSE_LOG_DEBUG
, "lo_lookup(parent=%" PRIu64
", name=%s)\n", parent
,
859 * Don't use is_safe_path_component(), allow "." and ".." for NFS export
862 if (strchr(name
, '/')) {
863 fuse_reply_err(req
, EINVAL
);
867 err
= lo_do_lookup(req
, parent
, name
, &e
);
869 fuse_reply_err(req
, err
);
871 fuse_reply_entry(req
, &e
);
876 * On some archs, setres*id is limited to 2^16 but they
877 * provide setres*id32 variants that allow 2^32.
878 * Others just let setres*id do 2^32 anyway.
880 #ifdef SYS_setresgid32
881 #define OURSYS_setresgid SYS_setresgid32
883 #define OURSYS_setresgid SYS_setresgid
886 #ifdef SYS_setresuid32
887 #define OURSYS_setresuid SYS_setresuid32
889 #define OURSYS_setresuid SYS_setresuid
893 * Change to uid/gid of caller so that file is created with
894 * ownership of caller.
895 * TODO: What about selinux context?
897 static int lo_change_cred(fuse_req_t req
, struct lo_cred
*old
)
901 old
->euid
= geteuid();
902 old
->egid
= getegid();
904 res
= syscall(OURSYS_setresgid
, -1, fuse_req_ctx(req
)->gid
, -1);
909 res
= syscall(OURSYS_setresuid
, -1, fuse_req_ctx(req
)->uid
, -1);
911 int errno_save
= errno
;
913 syscall(OURSYS_setresgid
, -1, old
->egid
, -1);
920 /* Regain Privileges */
921 static void lo_restore_cred(struct lo_cred
*old
)
925 res
= syscall(OURSYS_setresuid
, -1, old
->euid
, -1);
927 fuse_log(FUSE_LOG_ERR
, "seteuid(%u): %m\n", old
->euid
);
931 res
= syscall(OURSYS_setresgid
, -1, old
->egid
, -1);
933 fuse_log(FUSE_LOG_ERR
, "setegid(%u): %m\n", old
->egid
);
938 static void lo_mknod_symlink(fuse_req_t req
, fuse_ino_t parent
,
939 const char *name
, mode_t mode
, dev_t rdev
,
944 struct lo_inode
*dir
;
945 struct fuse_entry_param e
;
946 struct lo_cred old
= {};
948 if (!is_safe_path_component(name
)) {
949 fuse_reply_err(req
, EINVAL
);
953 dir
= lo_inode(req
, parent
);
955 fuse_reply_err(req
, EBADF
);
961 saverr
= lo_change_cred(req
, &old
);
966 res
= mknod_wrapper(dir
->fd
, name
, link
, mode
, rdev
);
970 lo_restore_cred(&old
);
976 saverr
= lo_do_lookup(req
, parent
, name
, &e
);
981 fuse_log(FUSE_LOG_DEBUG
, " %lli/%s -> %lli\n", (unsigned long long)parent
,
982 name
, (unsigned long long)e
.ino
);
984 fuse_reply_entry(req
, &e
);
988 fuse_reply_err(req
, saverr
);
991 static void lo_mknod(fuse_req_t req
, fuse_ino_t parent
, const char *name
,
992 mode_t mode
, dev_t rdev
)
994 lo_mknod_symlink(req
, parent
, name
, mode
, rdev
, NULL
);
997 static void lo_mkdir(fuse_req_t req
, fuse_ino_t parent
, const char *name
,
1000 lo_mknod_symlink(req
, parent
, name
, S_IFDIR
| mode
, 0, NULL
);
1003 static void lo_symlink(fuse_req_t req
, const char *link
, fuse_ino_t parent
,
1006 lo_mknod_symlink(req
, parent
, name
, S_IFLNK
, 0, link
);
1009 static int linkat_empty_nofollow(struct lo_data
*lo
, struct lo_inode
*inode
,
1010 int dfd
, const char *name
)
1013 struct lo_inode
*parent
;
1014 char path
[PATH_MAX
];
1016 if (inode
->is_symlink
) {
1017 res
= linkat(inode
->fd
, "", dfd
, name
, AT_EMPTY_PATH
);
1018 if (res
== -1 && (errno
== ENOENT
|| errno
== EINVAL
)) {
1019 /* Sorry, no race free way to hard-link a symlink. */
1029 sprintf(path
, "%i", inode
->fd
);
1031 return linkat(lo
->proc_self_fd
, path
, dfd
, name
, AT_SYMLINK_FOLLOW
);
1034 res
= lo_parent_and_name(lo
, inode
, path
, &parent
);
1036 res
= linkat(parent
->fd
, path
, dfd
, name
, 0);
1037 unref_inode_lolocked(lo
, parent
, 1);
1043 static void lo_link(fuse_req_t req
, fuse_ino_t ino
, fuse_ino_t parent
,
1047 struct lo_data
*lo
= lo_data(req
);
1048 struct lo_inode
*inode
;
1049 struct fuse_entry_param e
;
1052 if (!is_safe_path_component(name
)) {
1053 fuse_reply_err(req
, EINVAL
);
1057 inode
= lo_inode(req
, ino
);
1059 fuse_reply_err(req
, EBADF
);
1063 memset(&e
, 0, sizeof(struct fuse_entry_param
));
1064 e
.attr_timeout
= lo
->timeout
;
1065 e
.entry_timeout
= lo
->timeout
;
1067 res
= linkat_empty_nofollow(lo
, inode
, lo_fd(req
, parent
), name
);
1072 res
= fstatat(inode
->fd
, "", &e
.attr
, AT_EMPTY_PATH
| AT_SYMLINK_NOFOLLOW
);
1077 pthread_mutex_lock(&lo
->mutex
);
1079 pthread_mutex_unlock(&lo
->mutex
);
1080 e
.ino
= inode
->fuse_ino
;
1082 fuse_log(FUSE_LOG_DEBUG
, " %lli/%s -> %lli\n", (unsigned long long)parent
,
1083 name
, (unsigned long long)e
.ino
);
1085 fuse_reply_entry(req
, &e
);
1090 fuse_reply_err(req
, saverr
);
1093 static void lo_rmdir(fuse_req_t req
, fuse_ino_t parent
, const char *name
)
1096 if (!is_safe_path_component(name
)) {
1097 fuse_reply_err(req
, EINVAL
);
1101 res
= unlinkat(lo_fd(req
, parent
), name
, AT_REMOVEDIR
);
1103 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1106 static void lo_rename(fuse_req_t req
, fuse_ino_t parent
, const char *name
,
1107 fuse_ino_t newparent
, const char *newname
,
1112 if (!is_safe_path_component(name
) || !is_safe_path_component(newname
)) {
1113 fuse_reply_err(req
, EINVAL
);
1118 #ifndef SYS_renameat2
1119 fuse_reply_err(req
, EINVAL
);
1121 res
= syscall(SYS_renameat2
, lo_fd(req
, parent
), name
,
1122 lo_fd(req
, newparent
), newname
, flags
);
1123 if (res
== -1 && errno
== ENOSYS
) {
1124 fuse_reply_err(req
, EINVAL
);
1126 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1132 res
= renameat(lo_fd(req
, parent
), name
, lo_fd(req
, newparent
), newname
);
1134 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1137 static void lo_unlink(fuse_req_t req
, fuse_ino_t parent
, const char *name
)
1141 if (!is_safe_path_component(name
)) {
1142 fuse_reply_err(req
, EINVAL
);
1146 res
= unlinkat(lo_fd(req
, parent
), name
, 0);
1148 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1151 static void unref_inode_lolocked(struct lo_data
*lo
, struct lo_inode
*inode
,
1158 pthread_mutex_lock(&lo
->mutex
);
1159 assert(inode
->refcount
>= n
);
1160 inode
->refcount
-= n
;
1161 if (!inode
->refcount
) {
1162 lo_map_remove(&lo
->ino_map
, inode
->fuse_ino
);
1163 g_hash_table_remove(lo
->inodes
, &inode
->key
);
1164 pthread_mutex_unlock(&lo
->mutex
);
1168 pthread_mutex_unlock(&lo
->mutex
);
1172 static int unref_all_inodes_cb(gpointer key
, gpointer value
, gpointer user_data
)
1174 struct lo_inode
*inode
= value
;
1175 struct lo_data
*lo
= user_data
;
1177 inode
->refcount
= 0;
1178 lo_map_remove(&lo
->ino_map
, inode
->fuse_ino
);
1184 static void unref_all_inodes(struct lo_data
*lo
)
1186 pthread_mutex_lock(&lo
->mutex
);
1187 g_hash_table_foreach_remove(lo
->inodes
, unref_all_inodes_cb
, lo
);
1188 pthread_mutex_unlock(&lo
->mutex
);
1191 static void lo_forget_one(fuse_req_t req
, fuse_ino_t ino
, uint64_t nlookup
)
1193 struct lo_data
*lo
= lo_data(req
);
1194 struct lo_inode
*inode
;
1196 inode
= lo_inode(req
, ino
);
1201 fuse_log(FUSE_LOG_DEBUG
, " forget %lli %lli -%lli\n",
1202 (unsigned long long)ino
, (unsigned long long)inode
->refcount
,
1203 (unsigned long long)nlookup
);
1205 unref_inode_lolocked(lo
, inode
, nlookup
);
1208 static void lo_forget(fuse_req_t req
, fuse_ino_t ino
, uint64_t nlookup
)
1210 lo_forget_one(req
, ino
, nlookup
);
1211 fuse_reply_none(req
);
1214 static void lo_forget_multi(fuse_req_t req
, size_t count
,
1215 struct fuse_forget_data
*forgets
)
1219 for (i
= 0; i
< count
; i
++) {
1220 lo_forget_one(req
, forgets
[i
].ino
, forgets
[i
].nlookup
);
1222 fuse_reply_none(req
);
1225 static void lo_readlink(fuse_req_t req
, fuse_ino_t ino
)
1227 char buf
[PATH_MAX
+ 1];
1230 res
= readlinkat(lo_fd(req
, ino
), "", buf
, sizeof(buf
));
1232 return (void)fuse_reply_err(req
, errno
);
1235 if (res
== sizeof(buf
)) {
1236 return (void)fuse_reply_err(req
, ENAMETOOLONG
);
1241 fuse_reply_readlink(req
, buf
);
1246 struct dirent
*entry
;
1250 static struct lo_dirp
*lo_dirp(fuse_req_t req
, struct fuse_file_info
*fi
)
1252 struct lo_data
*lo
= lo_data(req
);
1253 struct lo_map_elem
*elem
;
1255 pthread_mutex_lock(&lo
->mutex
);
1256 elem
= lo_map_get(&lo
->dirp_map
, fi
->fh
);
1257 pthread_mutex_unlock(&lo
->mutex
);
1265 static void lo_opendir(fuse_req_t req
, fuse_ino_t ino
,
1266 struct fuse_file_info
*fi
)
1269 struct lo_data
*lo
= lo_data(req
);
1274 d
= calloc(1, sizeof(struct lo_dirp
));
1279 fd
= openat(lo_fd(req
, ino
), ".", O_RDONLY
);
1284 d
->dp
= fdopendir(fd
);
1285 if (d
->dp
== NULL
) {
1292 pthread_mutex_lock(&lo
->mutex
);
1293 fh
= lo_add_dirp_mapping(req
, d
);
1294 pthread_mutex_unlock(&lo
->mutex
);
1300 if (lo
->cache
== CACHE_ALWAYS
) {
1303 fuse_reply_open(req
, fi
);
1318 fuse_reply_err(req
, error
);
1321 static void lo_do_readdir(fuse_req_t req
, fuse_ino_t ino
, size_t size
,
1322 off_t offset
, struct fuse_file_info
*fi
, int plus
)
1324 struct lo_data
*lo
= lo_data(req
);
1326 struct lo_inode
*dinode
;
1332 dinode
= lo_inode(req
, ino
);
1337 d
= lo_dirp(req
, fi
);
1343 buf
= calloc(1, size
);
1349 if (offset
!= d
->offset
) {
1350 seekdir(d
->dp
, offset
);
1361 d
->entry
= readdir(d
->dp
);
1363 if (errno
) { /* Error */
1366 } else { /* End of stream */
1371 nextoff
= d
->entry
->d_off
;
1372 name
= d
->entry
->d_name
;
1374 fuse_ino_t entry_ino
= 0;
1375 struct fuse_entry_param e
= (struct fuse_entry_param
){
1376 .attr
.st_ino
= d
->entry
->d_ino
,
1377 .attr
.st_mode
= d
->entry
->d_type
<< 12,
1380 /* Hide root's parent directory */
1381 if (dinode
== &lo
->root
&& strcmp(name
, "..") == 0) {
1382 e
.attr
.st_ino
= lo
->root
.key
.ino
;
1383 e
.attr
.st_mode
= DT_DIR
<< 12;
1387 if (!is_dot_or_dotdot(name
)) {
1388 err
= lo_do_lookup(req
, ino
, name
, &e
);
1395 entsize
= fuse_add_direntry_plus(req
, p
, rem
, name
, &e
, nextoff
);
1397 entsize
= fuse_add_direntry(req
, p
, rem
, name
, &e
.attr
, nextoff
);
1399 if (entsize
> rem
) {
1400 if (entry_ino
!= 0) {
1401 lo_forget_one(req
, entry_ino
, 1);
1410 d
->offset
= nextoff
;
1416 * If there's an error, we can only signal it if we haven't stored
1417 * any entries yet - otherwise we'd end up with wrong lookup
1418 * counts for the entries that are already in the buffer. So we
1419 * return what we've collected until that point.
1421 if (err
&& rem
== size
) {
1422 fuse_reply_err(req
, err
);
1424 fuse_reply_buf(req
, buf
, size
- rem
);
1429 static void lo_readdir(fuse_req_t req
, fuse_ino_t ino
, size_t size
,
1430 off_t offset
, struct fuse_file_info
*fi
)
1432 lo_do_readdir(req
, ino
, size
, offset
, fi
, 0);
1435 static void lo_readdirplus(fuse_req_t req
, fuse_ino_t ino
, size_t size
,
1436 off_t offset
, struct fuse_file_info
*fi
)
1438 lo_do_readdir(req
, ino
, size
, offset
, fi
, 1);
1441 static void lo_releasedir(fuse_req_t req
, fuse_ino_t ino
,
1442 struct fuse_file_info
*fi
)
1444 struct lo_data
*lo
= lo_data(req
);
1449 d
= lo_dirp(req
, fi
);
1451 fuse_reply_err(req
, EBADF
);
1455 pthread_mutex_lock(&lo
->mutex
);
1456 lo_map_remove(&lo
->dirp_map
, fi
->fh
);
1457 pthread_mutex_unlock(&lo
->mutex
);
1461 fuse_reply_err(req
, 0);
1464 static void lo_create(fuse_req_t req
, fuse_ino_t parent
, const char *name
,
1465 mode_t mode
, struct fuse_file_info
*fi
)
1468 struct lo_data
*lo
= lo_data(req
);
1469 struct fuse_entry_param e
;
1471 struct lo_cred old
= {};
1473 fuse_log(FUSE_LOG_DEBUG
, "lo_create(parent=%" PRIu64
", name=%s)\n", parent
,
1476 if (!is_safe_path_component(name
)) {
1477 fuse_reply_err(req
, EINVAL
);
1481 err
= lo_change_cred(req
, &old
);
1486 fd
= openat(lo_fd(req
, parent
), name
, (fi
->flags
| O_CREAT
) & ~O_NOFOLLOW
,
1488 err
= fd
== -1 ? errno
: 0;
1489 lo_restore_cred(&old
);
1494 pthread_mutex_lock(&lo
->mutex
);
1495 fh
= lo_add_fd_mapping(req
, fd
);
1496 pthread_mutex_unlock(&lo
->mutex
);
1499 fuse_reply_err(req
, ENOMEM
);
1504 err
= lo_do_lookup(req
, parent
, name
, &e
);
1506 if (lo
->cache
== CACHE_NONE
) {
1508 } else if (lo
->cache
== CACHE_ALWAYS
) {
1514 fuse_reply_err(req
, err
);
1516 fuse_reply_create(req
, &e
, fi
);
1520 static void lo_fsyncdir(fuse_req_t req
, fuse_ino_t ino
, int datasync
,
1521 struct fuse_file_info
*fi
)
1529 d
= lo_dirp(req
, fi
);
1531 fuse_reply_err(req
, EBADF
);
1537 res
= fdatasync(fd
);
1541 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1544 static void lo_open(fuse_req_t req
, fuse_ino_t ino
, struct fuse_file_info
*fi
)
1549 struct lo_data
*lo
= lo_data(req
);
1551 fuse_log(FUSE_LOG_DEBUG
, "lo_open(ino=%" PRIu64
", flags=%d)\n", ino
,
1555 * With writeback cache, kernel may send read requests even
1556 * when userspace opened write-only
1558 if (lo
->writeback
&& (fi
->flags
& O_ACCMODE
) == O_WRONLY
) {
1559 fi
->flags
&= ~O_ACCMODE
;
1560 fi
->flags
|= O_RDWR
;
1564 * With writeback cache, O_APPEND is handled by the kernel.
1565 * This breaks atomicity (since the file may change in the
1566 * underlying filesystem, so that the kernel's idea of the
1567 * end of the file isn't accurate anymore). In this example,
1568 * we just accept that. A more rigorous filesystem may want
1569 * to return an error here
1571 if (lo
->writeback
&& (fi
->flags
& O_APPEND
)) {
1572 fi
->flags
&= ~O_APPEND
;
1575 sprintf(buf
, "%i", lo_fd(req
, ino
));
1576 fd
= openat(lo
->proc_self_fd
, buf
, fi
->flags
& ~O_NOFOLLOW
);
1578 return (void)fuse_reply_err(req
, errno
);
1581 pthread_mutex_lock(&lo
->mutex
);
1582 fh
= lo_add_fd_mapping(req
, fd
);
1583 pthread_mutex_unlock(&lo
->mutex
);
1586 fuse_reply_err(req
, ENOMEM
);
1591 if (lo
->cache
== CACHE_NONE
) {
1593 } else if (lo
->cache
== CACHE_ALWAYS
) {
1596 fuse_reply_open(req
, fi
);
1599 static void lo_release(fuse_req_t req
, fuse_ino_t ino
,
1600 struct fuse_file_info
*fi
)
1602 struct lo_data
*lo
= lo_data(req
);
1607 fd
= lo_fi_fd(req
, fi
);
1609 pthread_mutex_lock(&lo
->mutex
);
1610 lo_map_remove(&lo
->fd_map
, fi
->fh
);
1611 pthread_mutex_unlock(&lo
->mutex
);
1614 fuse_reply_err(req
, 0);
1617 static void lo_flush(fuse_req_t req
, fuse_ino_t ino
, struct fuse_file_info
*fi
)
1621 res
= close(dup(lo_fi_fd(req
, fi
)));
1622 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1625 static void lo_fsync(fuse_req_t req
, fuse_ino_t ino
, int datasync
,
1626 struct fuse_file_info
*fi
)
1632 fuse_log(FUSE_LOG_DEBUG
, "lo_fsync(ino=%" PRIu64
", fi=0x%p)\n", ino
,
1636 struct lo_data
*lo
= lo_data(req
);
1638 res
= asprintf(&buf
, "%i", lo_fd(req
, ino
));
1640 return (void)fuse_reply_err(req
, errno
);
1643 fd
= openat(lo
->proc_self_fd
, buf
, O_RDWR
);
1646 return (void)fuse_reply_err(req
, errno
);
1649 fd
= lo_fi_fd(req
, fi
);
1653 res
= fdatasync(fd
);
1660 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1663 static void lo_read(fuse_req_t req
, fuse_ino_t ino
, size_t size
, off_t offset
,
1664 struct fuse_file_info
*fi
)
1666 struct fuse_bufvec buf
= FUSE_BUFVEC_INIT(size
);
1668 fuse_log(FUSE_LOG_DEBUG
,
1669 "lo_read(ino=%" PRIu64
", size=%zd, "
1671 ino
, size
, (unsigned long)offset
);
1673 buf
.buf
[0].flags
= FUSE_BUF_IS_FD
| FUSE_BUF_FD_SEEK
;
1674 buf
.buf
[0].fd
= lo_fi_fd(req
, fi
);
1675 buf
.buf
[0].pos
= offset
;
1677 fuse_reply_data(req
, &buf
);
1680 static void lo_write_buf(fuse_req_t req
, fuse_ino_t ino
,
1681 struct fuse_bufvec
*in_buf
, off_t off
,
1682 struct fuse_file_info
*fi
)
1686 struct fuse_bufvec out_buf
= FUSE_BUFVEC_INIT(fuse_buf_size(in_buf
));
1687 bool cap_fsetid_dropped
= false;
1689 out_buf
.buf
[0].flags
= FUSE_BUF_IS_FD
| FUSE_BUF_FD_SEEK
;
1690 out_buf
.buf
[0].fd
= lo_fi_fd(req
, fi
);
1691 out_buf
.buf
[0].pos
= off
;
1693 fuse_log(FUSE_LOG_DEBUG
,
1694 "lo_write_buf(ino=%" PRIu64
", size=%zd, off=%lu)\n", ino
,
1695 out_buf
.buf
[0].size
, (unsigned long)off
);
1698 * If kill_priv is set, drop CAP_FSETID which should lead to kernel
1699 * clearing setuid/setgid on file.
1701 if (fi
->kill_priv
) {
1702 res
= drop_effective_cap("FSETID", &cap_fsetid_dropped
);
1704 fuse_reply_err(req
, res
);
1709 res
= fuse_buf_copy(&out_buf
, in_buf
);
1711 fuse_reply_err(req
, -res
);
1713 fuse_reply_write(req
, (size_t)res
);
1716 if (cap_fsetid_dropped
) {
1717 res
= gain_effective_cap("FSETID");
1719 fuse_log(FUSE_LOG_ERR
, "Failed to gain CAP_FSETID\n");
1724 static void lo_statfs(fuse_req_t req
, fuse_ino_t ino
)
1727 struct statvfs stbuf
;
1729 res
= fstatvfs(lo_fd(req
, ino
), &stbuf
);
1731 fuse_reply_err(req
, errno
);
1733 fuse_reply_statfs(req
, &stbuf
);
1737 static void lo_fallocate(fuse_req_t req
, fuse_ino_t ino
, int mode
, off_t offset
,
1738 off_t length
, struct fuse_file_info
*fi
)
1740 int err
= EOPNOTSUPP
;
1743 #ifdef CONFIG_FALLOCATE
1744 err
= fallocate(lo_fi_fd(req
, fi
), mode
, offset
, length
);
1749 #elif defined(CONFIG_POSIX_FALLOCATE)
1751 fuse_reply_err(req
, EOPNOTSUPP
);
1755 err
= posix_fallocate(lo_fi_fd(req
, fi
), offset
, length
);
1758 fuse_reply_err(req
, err
);
1761 static void lo_flock(fuse_req_t req
, fuse_ino_t ino
, struct fuse_file_info
*fi
,
1767 res
= flock(lo_fi_fd(req
, fi
), op
);
1769 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1772 static void lo_getxattr(fuse_req_t req
, fuse_ino_t ino
, const char *name
,
1775 struct lo_data
*lo
= lo_data(req
);
1778 struct lo_inode
*inode
;
1783 inode
= lo_inode(req
, ino
);
1785 fuse_reply_err(req
, EBADF
);
1790 if (!lo_data(req
)->xattr
) {
1794 fuse_log(FUSE_LOG_DEBUG
, "lo_getxattr(ino=%" PRIu64
", name=%s size=%zd)\n",
1797 if (inode
->is_symlink
) {
1798 /* Sorry, no race free way to getxattr on symlink. */
1803 sprintf(procname
, "%i", inode
->fd
);
1804 fd
= openat(lo
->proc_self_fd
, procname
, O_RDONLY
);
1810 value
= malloc(size
);
1815 ret
= fgetxattr(fd
, name
, value
, size
);
1824 fuse_reply_buf(req
, value
, ret
);
1826 ret
= fgetxattr(fd
, name
, NULL
, 0);
1831 fuse_reply_xattr(req
, ret
);
1844 fuse_reply_err(req
, saverr
);
1848 static void lo_listxattr(fuse_req_t req
, fuse_ino_t ino
, size_t size
)
1850 struct lo_data
*lo
= lo_data(req
);
1853 struct lo_inode
*inode
;
1858 inode
= lo_inode(req
, ino
);
1860 fuse_reply_err(req
, EBADF
);
1865 if (!lo_data(req
)->xattr
) {
1869 fuse_log(FUSE_LOG_DEBUG
, "lo_listxattr(ino=%" PRIu64
", size=%zd)\n", ino
,
1872 if (inode
->is_symlink
) {
1873 /* Sorry, no race free way to listxattr on symlink. */
1878 sprintf(procname
, "%i", inode
->fd
);
1879 fd
= openat(lo
->proc_self_fd
, procname
, O_RDONLY
);
1885 value
= malloc(size
);
1890 ret
= flistxattr(fd
, value
, size
);
1899 fuse_reply_buf(req
, value
, ret
);
1901 ret
= flistxattr(fd
, NULL
, 0);
1906 fuse_reply_xattr(req
, ret
);
1919 fuse_reply_err(req
, saverr
);
1923 static void lo_setxattr(fuse_req_t req
, fuse_ino_t ino
, const char *name
,
1924 const char *value
, size_t size
, int flags
)
1927 struct lo_data
*lo
= lo_data(req
);
1928 struct lo_inode
*inode
;
1933 inode
= lo_inode(req
, ino
);
1935 fuse_reply_err(req
, EBADF
);
1940 if (!lo_data(req
)->xattr
) {
1944 fuse_log(FUSE_LOG_DEBUG
, "lo_setxattr(ino=%" PRIu64
1945 ", name=%s value=%s size=%zd)\n", ino
, name
, value
, size
);
1947 if (inode
->is_symlink
) {
1948 /* Sorry, no race free way to setxattr on symlink. */
1953 sprintf(procname
, "%i", inode
->fd
);
1954 fd
= openat(lo
->proc_self_fd
, procname
, O_RDWR
);
1960 ret
= fsetxattr(fd
, name
, value
, size
, flags
);
1961 saverr
= ret
== -1 ? errno
: 0;
1967 fuse_reply_err(req
, saverr
);
1970 static void lo_removexattr(fuse_req_t req
, fuse_ino_t ino
, const char *name
)
1973 struct lo_data
*lo
= lo_data(req
);
1974 struct lo_inode
*inode
;
1979 inode
= lo_inode(req
, ino
);
1981 fuse_reply_err(req
, EBADF
);
1986 if (!lo_data(req
)->xattr
) {
1990 fuse_log(FUSE_LOG_DEBUG
, "lo_removexattr(ino=%" PRIu64
", name=%s)\n", ino
,
1993 if (inode
->is_symlink
) {
1994 /* Sorry, no race free way to setxattr on symlink. */
1999 sprintf(procname
, "%i", inode
->fd
);
2000 fd
= openat(lo
->proc_self_fd
, procname
, O_RDWR
);
2006 ret
= fremovexattr(fd
, name
);
2007 saverr
= ret
== -1 ? errno
: 0;
2013 fuse_reply_err(req
, saverr
);
2016 #ifdef HAVE_COPY_FILE_RANGE
2017 static void lo_copy_file_range(fuse_req_t req
, fuse_ino_t ino_in
, off_t off_in
,
2018 struct fuse_file_info
*fi_in
, fuse_ino_t ino_out
,
2019 off_t off_out
, struct fuse_file_info
*fi_out
,
2020 size_t len
, int flags
)
2025 in_fd
= lo_fi_fd(req
, fi_in
);
2026 out_fd
= lo_fi_fd(req
, fi_out
);
2028 fuse_log(FUSE_LOG_DEBUG
,
2029 "lo_copy_file_range(ino=%" PRIu64
"/fd=%d, "
2030 "off=%lu, ino=%" PRIu64
"/fd=%d, "
2031 "off=%lu, size=%zd, flags=0x%x)\n",
2032 ino_in
, in_fd
, off_in
, ino_out
, out_fd
, off_out
, len
, flags
);
2034 res
= copy_file_range(in_fd
, &off_in
, out_fd
, &off_out
, len
, flags
);
2036 fuse_reply_err(req
, -errno
);
2038 fuse_reply_write(req
, res
);
2043 static void lo_lseek(fuse_req_t req
, fuse_ino_t ino
, off_t off
, int whence
,
2044 struct fuse_file_info
*fi
)
2049 res
= lseek(lo_fi_fd(req
, fi
), off
, whence
);
2051 fuse_reply_lseek(req
, res
);
2053 fuse_reply_err(req
, errno
);
2057 static void lo_destroy(void *userdata
)
2059 struct lo_data
*lo
= (struct lo_data
*)userdata
;
2060 unref_all_inodes(lo
);
2063 static struct fuse_lowlevel_ops lo_oper
= {
2065 .lookup
= lo_lookup
,
2068 .symlink
= lo_symlink
,
2070 .unlink
= lo_unlink
,
2072 .rename
= lo_rename
,
2073 .forget
= lo_forget
,
2074 .forget_multi
= lo_forget_multi
,
2075 .getattr
= lo_getattr
,
2076 .setattr
= lo_setattr
,
2077 .readlink
= lo_readlink
,
2078 .opendir
= lo_opendir
,
2079 .readdir
= lo_readdir
,
2080 .readdirplus
= lo_readdirplus
,
2081 .releasedir
= lo_releasedir
,
2082 .fsyncdir
= lo_fsyncdir
,
2083 .create
= lo_create
,
2085 .release
= lo_release
,
2089 .write_buf
= lo_write_buf
,
2090 .statfs
= lo_statfs
,
2091 .fallocate
= lo_fallocate
,
2093 .getxattr
= lo_getxattr
,
2094 .listxattr
= lo_listxattr
,
2095 .setxattr
= lo_setxattr
,
2096 .removexattr
= lo_removexattr
,
2097 #ifdef HAVE_COPY_FILE_RANGE
2098 .copy_file_range
= lo_copy_file_range
,
2101 .destroy
= lo_destroy
,
2104 /* Print vhost-user.json backend program capabilities */
2105 static void print_capabilities(void)
2108 printf(" \"type\": \"fs\"\n");
2113 * Move to a new mount, net, and pid namespaces to isolate this process.
2115 static void setup_namespaces(struct lo_data
*lo
, struct fuse_session
*se
)
2120 * Create a new pid namespace for *child* processes. We'll have to
2121 * fork in order to enter the new pid namespace. A new mount namespace
2122 * is also needed so that we can remount /proc for the new pid
2125 * Our UNIX domain sockets have been created. Now we can move to
2126 * an empty network namespace to prevent TCP/IP and other network
2127 * activity in case this process is compromised.
2129 if (unshare(CLONE_NEWPID
| CLONE_NEWNS
| CLONE_NEWNET
) != 0) {
2130 fuse_log(FUSE_LOG_ERR
, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n");
2136 fuse_log(FUSE_LOG_ERR
, "fork() failed: %m\n");
2143 /* The parent waits for the child */
2145 waited
= waitpid(child
, &wstatus
, 0);
2146 } while (waited
< 0 && errno
== EINTR
&& !se
->exited
);
2148 /* We were terminated by a signal, see fuse_signals.c */
2153 if (WIFEXITED(wstatus
)) {
2154 exit(WEXITSTATUS(wstatus
));
2160 /* Send us SIGTERM when the parent thread terminates, see prctl(2) */
2161 prctl(PR_SET_PDEATHSIG
, SIGTERM
);
2164 * If the mounts have shared propagation then we want to opt out so our
2165 * mount changes don't affect the parent mount namespace.
2167 if (mount(NULL
, "/", NULL
, MS_REC
| MS_SLAVE
, NULL
) < 0) {
2168 fuse_log(FUSE_LOG_ERR
, "mount(/, MS_REC|MS_SLAVE): %m\n");
2172 /* The child must remount /proc to use the new pid namespace */
2173 if (mount("proc", "/proc", "proc",
2174 MS_NODEV
| MS_NOEXEC
| MS_NOSUID
| MS_RELATIME
, NULL
) < 0) {
2175 fuse_log(FUSE_LOG_ERR
, "mount(/proc): %m\n");
2179 /* Now we can get our /proc/self/fd directory file descriptor */
2180 lo
->proc_self_fd
= open("/proc/self/fd", O_PATH
);
2181 if (lo
->proc_self_fd
== -1) {
2182 fuse_log(FUSE_LOG_ERR
, "open(/proc/self/fd, O_PATH): %m\n");
2188 * Capture the capability state, we'll need to restore this for individual
2189 * threads later; see load_capng.
2191 static void setup_capng(void)
2193 /* Note this accesses /proc so has to happen before the sandbox */
2194 if (capng_get_caps_process()) {
2195 fuse_log(FUSE_LOG_ERR
, "capng_get_caps_process\n");
2198 pthread_mutex_init(&cap
.mutex
, NULL
);
2199 pthread_mutex_lock(&cap
.mutex
);
2200 cap
.saved
= capng_save_state();
2202 fuse_log(FUSE_LOG_ERR
, "capng_save_state\n");
2205 pthread_mutex_unlock(&cap
.mutex
);
2208 static void cleanup_capng(void)
2212 pthread_mutex_destroy(&cap
.mutex
);
2217 * Make the source directory our root so symlinks cannot escape and no other
2218 * files are accessible. Assumes unshare(CLONE_NEWNS) was already called.
2220 static void setup_mounts(const char *source
)
2225 if (mount(source
, source
, NULL
, MS_BIND
, NULL
) < 0) {
2226 fuse_log(FUSE_LOG_ERR
, "mount(%s, %s, MS_BIND): %m\n", source
, source
);
2230 /* This magic is based on lxc's lxc_pivot_root() */
2231 oldroot
= open("/", O_DIRECTORY
| O_RDONLY
| O_CLOEXEC
);
2233 fuse_log(FUSE_LOG_ERR
, "open(/): %m\n");
2237 newroot
= open(source
, O_DIRECTORY
| O_RDONLY
| O_CLOEXEC
);
2239 fuse_log(FUSE_LOG_ERR
, "open(%s): %m\n", source
);
2243 if (fchdir(newroot
) < 0) {
2244 fuse_log(FUSE_LOG_ERR
, "fchdir(newroot): %m\n");
2248 if (syscall(__NR_pivot_root
, ".", ".") < 0) {
2249 fuse_log(FUSE_LOG_ERR
, "pivot_root(., .): %m\n");
2253 if (fchdir(oldroot
) < 0) {
2254 fuse_log(FUSE_LOG_ERR
, "fchdir(oldroot): %m\n");
2258 if (mount("", ".", "", MS_SLAVE
| MS_REC
, NULL
) < 0) {
2259 fuse_log(FUSE_LOG_ERR
, "mount(., MS_SLAVE | MS_REC): %m\n");
2263 if (umount2(".", MNT_DETACH
) < 0) {
2264 fuse_log(FUSE_LOG_ERR
, "umount2(., MNT_DETACH): %m\n");
2268 if (fchdir(newroot
) < 0) {
2269 fuse_log(FUSE_LOG_ERR
, "fchdir(newroot): %m\n");
2278 * Lock down this process to prevent access to other processes or files outside
2279 * source directory. This reduces the impact of arbitrary code execution bugs.
2281 static void setup_sandbox(struct lo_data
*lo
, struct fuse_session
*se
,
2284 setup_namespaces(lo
, se
);
2285 setup_mounts(lo
->source
);
2286 setup_seccomp(enable_syslog
);
2289 /* Raise the maximum number of open file descriptors */
2290 static void setup_nofile_rlimit(void)
2292 const rlim_t max_fds
= 1000000;
2295 if (getrlimit(RLIMIT_NOFILE
, &rlim
) < 0) {
2296 fuse_log(FUSE_LOG_ERR
, "getrlimit(RLIMIT_NOFILE): %m\n");
2300 if (rlim
.rlim_cur
>= max_fds
) {
2301 return; /* nothing to do */
2304 rlim
.rlim_cur
= max_fds
;
2305 rlim
.rlim_max
= max_fds
;
2307 if (setrlimit(RLIMIT_NOFILE
, &rlim
) < 0) {
2308 /* Ignore SELinux denials */
2309 if (errno
== EPERM
) {
2313 fuse_log(FUSE_LOG_ERR
, "setrlimit(RLIMIT_NOFILE): %m\n");
2318 static void log_func(enum fuse_log_level level
, const char *fmt
, va_list ap
)
2320 g_autofree
char *localfmt
= NULL
;
2322 if (current_log_level
< level
) {
2326 if (current_log_level
== FUSE_LOG_DEBUG
) {
2328 localfmt
= g_strdup_printf("[%" PRId64
"] [ID: %08ld] %s",
2329 get_clock(), syscall(__NR_gettid
), fmt
);
2331 localfmt
= g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid
),
2338 int priority
= LOG_ERR
;
2340 case FUSE_LOG_EMERG
:
2341 priority
= LOG_EMERG
;
2343 case FUSE_LOG_ALERT
:
2344 priority
= LOG_ALERT
;
2347 priority
= LOG_CRIT
;
2352 case FUSE_LOG_WARNING
:
2353 priority
= LOG_WARNING
;
2355 case FUSE_LOG_NOTICE
:
2356 priority
= LOG_NOTICE
;
2359 priority
= LOG_INFO
;
2361 case FUSE_LOG_DEBUG
:
2362 priority
= LOG_DEBUG
;
2365 vsyslog(priority
, fmt
, ap
);
2367 vfprintf(stderr
, fmt
, ap
);
2371 static void setup_root(struct lo_data
*lo
, struct lo_inode
*root
)
2376 fd
= open("/", O_PATH
);
2378 fuse_log(FUSE_LOG_ERR
, "open(%s, O_PATH): %m\n", lo
->source
);
2382 res
= fstatat(fd
, "", &stat
, AT_EMPTY_PATH
| AT_SYMLINK_NOFOLLOW
);
2384 fuse_log(FUSE_LOG_ERR
, "fstatat(%s): %m\n", lo
->source
);
2388 root
->is_symlink
= false;
2390 root
->key
.ino
= stat
.st_ino
;
2391 root
->key
.dev
= stat
.st_dev
;
2395 static guint
lo_key_hash(gconstpointer key
)
2397 const struct lo_key
*lkey
= key
;
2399 return (guint
)lkey
->ino
+ (guint
)lkey
->dev
;
2402 static gboolean
lo_key_equal(gconstpointer a
, gconstpointer b
)
2404 const struct lo_key
*la
= a
;
2405 const struct lo_key
*lb
= b
;
2407 return la
->ino
== lb
->ino
&& la
->dev
== lb
->dev
;
2410 static void fuse_lo_data_cleanup(struct lo_data
*lo
)
2413 g_hash_table_destroy(lo
->inodes
);
2415 lo_map_destroy(&lo
->fd_map
);
2416 lo_map_destroy(&lo
->dirp_map
);
2417 lo_map_destroy(&lo
->ino_map
);
2419 if (lo
->proc_self_fd
>= 0) {
2420 close(lo
->proc_self_fd
);
2423 if (lo
->root
.fd
>= 0) {
2430 int main(int argc
, char *argv
[])
2432 struct fuse_args args
= FUSE_ARGS_INIT(argc
, argv
);
2433 struct fuse_session
*se
;
2434 struct fuse_cmdline_opts opts
;
2435 struct lo_data lo
= {
2440 struct lo_map_elem
*root_elem
;
2443 /* Don't mask creation mode, kernel already did that */
2446 pthread_mutex_init(&lo
.mutex
, NULL
);
2447 lo
.inodes
= g_hash_table_new(lo_key_hash
, lo_key_equal
);
2449 lo
.root
.fuse_ino
= FUSE_ROOT_ID
;
2450 lo
.cache
= CACHE_AUTO
;
2453 * Set up the ino map like this:
2454 * [0] Reserved (will not be used)
2457 lo_map_init(&lo
.ino_map
);
2458 lo_map_reserve(&lo
.ino_map
, 0)->in_use
= false;
2459 root_elem
= lo_map_reserve(&lo
.ino_map
, lo
.root
.fuse_ino
);
2460 root_elem
->inode
= &lo
.root
;
2462 lo_map_init(&lo
.dirp_map
);
2463 lo_map_init(&lo
.fd_map
);
2465 if (fuse_parse_cmdline(&args
, &opts
) != 0) {
2468 fuse_set_log_func(log_func
);
2469 use_syslog
= opts
.syslog
;
2471 openlog("virtiofsd", LOG_PID
, LOG_DAEMON
);
2474 if (opts
.show_help
) {
2475 printf("usage: %s [options]\n\n", argv
[0]);
2476 fuse_cmdline_help();
2477 printf(" -o source=PATH shared directory tree\n");
2478 fuse_lowlevel_help();
2481 } else if (opts
.show_version
) {
2482 fuse_lowlevel_version();
2485 } else if (opts
.print_capabilities
) {
2486 print_capabilities();
2491 if (fuse_opt_parse(&args
, &lo
, lo_opts
, NULL
) == -1) {
2496 * log_level is 0 if not configured via cmd options (0 is LOG_EMERG,
2497 * and we don't use this log level).
2499 if (opts
.log_level
!= 0) {
2500 current_log_level
= opts
.log_level
;
2502 lo
.debug
= opts
.debug
;
2504 current_log_level
= FUSE_LOG_DEBUG
;
2510 res
= lstat(lo
.source
, &stat
);
2512 fuse_log(FUSE_LOG_ERR
, "failed to stat source (\"%s\"): %m\n",
2516 if (!S_ISDIR(stat
.st_mode
)) {
2517 fuse_log(FUSE_LOG_ERR
, "source is not a directory\n");
2521 lo
.source
= strdup("/");
2523 if (!lo
.timeout_set
) {
2534 lo
.timeout
= 86400.0;
2537 } else if (lo
.timeout
< 0) {
2538 fuse_log(FUSE_LOG_ERR
, "timeout is negative (%lf)\n", lo
.timeout
);
2542 se
= fuse_session_new(&args
, &lo_oper
, sizeof(lo_oper
), &lo
);
2547 if (fuse_set_signal_handlers(se
) != 0) {
2551 if (fuse_session_mount(se
) != 0) {
2555 fuse_daemonize(opts
.foreground
);
2557 setup_nofile_rlimit();
2559 /* Must be before sandbox since it wants /proc */
2562 setup_sandbox(&lo
, se
, opts
.syslog
);
2564 setup_root(&lo
, &lo
.root
);
2565 /* Block until ctrl+c or fusermount -u */
2566 ret
= virtio_loop(se
);
2568 fuse_session_unmount(se
);
2571 fuse_remove_signal_handlers(se
);
2573 fuse_session_destroy(se
);
2575 fuse_opt_free_args(&args
);
2577 fuse_lo_data_cleanup(&lo
);