2 * FUSE: Filesystem in Userspace
3 * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
5 * This program can be distributed under the terms of the GNU GPLv2.
6 * See the file COPYING.
11 * This file system mirrors the existing file system hierarchy of the
12 * system, starting at the root file system. This is implemented by
13 * just "passing through" all requests to the corresponding user-space
14 * libc functions. In contrast to passthrough.c and passthrough_fh.c,
15 * this implementation uses the low-level API. Its performance should
16 * be the least bad among the three, but many operations are not
17 * implemented. In particular, it is not possible to remove files (or
18 * directories) because the code necessary to defer actual removal
19 * until the file is not opened anymore would make the example much
22 * When writeback caching is enabled (-o writeback mount option), it
23 * is only possible to write to files for which the mounting user has
24 * read permissions. This is because the writeback cache requires the
25 * kernel to be able to issue read requests for all files (which the
26 * passthrough filesystem cannot satisfy if it can't read the file in
27 * the underlying filesystem).
31 * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o
35 * \include passthrough_ll.c
38 #include "qemu/osdep.h"
39 #include "qemu/timer.h"
40 #include "qemu-version.h"
41 #include "qemu-common.h"
42 #include "fuse_virtio.h"
44 #include "fuse_lowlevel.h"
45 #include "standard-headers/linux/fuse.h"
50 #include <sys/mount.h>
51 #include <sys/prctl.h>
52 #include <sys/resource.h>
53 #include <sys/syscall.h>
55 #include <sys/xattr.h>
58 #include "qemu/cutils.h"
59 #include "passthrough_helpers.h"
60 #include "passthrough_seccomp.h"
62 /* Keep track of inode posix locks for each owner. */
63 struct lo_inode_plock
{
65 int fd
; /* fd for OFD locks */
70 struct lo_inode
*inode
;
78 /* Maps FUSE fh or ino values to internal objects */
80 struct lo_map_elem
*elems
;
95 * Atomic reference count for this object. The nlookup field holds a
96 * reference and release it when nlookup reaches 0.
103 * This counter keeps the inode alive during the FUSE session.
104 * Incremented when the FUSE inode number is sent in a reply
105 * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is
106 * released by a FUSE_FORGET request.
108 * Note that this value is untrusted because the client can manipulate
109 * it arbitrarily using FUSE_FORGET requests.
111 * Protected by lo->mutex.
116 pthread_mutex_t plock_mutex
;
117 GHashTable
*posix_locks
; /* protected by lo_inode->plock_mutex */
138 typedef struct xattr_map_entry
{
145 pthread_mutex_t mutex
;
153 char *xattr_security_capability
;
160 int readdirplus_clear
;
162 int announce_submounts
;
164 struct lo_inode root
;
165 GHashTable
*inodes
; /* protected by lo->mutex */
166 struct lo_map ino_map
; /* protected by lo->mutex */
167 struct lo_map dirp_map
; /* protected by lo->mutex */
168 struct lo_map fd_map
; /* protected by lo->mutex */
169 XattrMapEntry
*xattr_map_list
;
170 size_t xattr_map_nentries
;
172 /* An O_PATH file descriptor to /proc/self/fd/ */
174 int user_killpriv_v2
, killpriv_v2
;
177 static const struct fuse_opt lo_opts
[] = {
178 { "sandbox=namespace",
179 offsetof(struct lo_data
, sandbox
),
182 offsetof(struct lo_data
, sandbox
),
184 { "writeback", offsetof(struct lo_data
, writeback
), 1 },
185 { "no_writeback", offsetof(struct lo_data
, writeback
), 0 },
186 { "source=%s", offsetof(struct lo_data
, source
), 0 },
187 { "flock", offsetof(struct lo_data
, flock
), 1 },
188 { "no_flock", offsetof(struct lo_data
, flock
), 0 },
189 { "posix_lock", offsetof(struct lo_data
, posix_lock
), 1 },
190 { "no_posix_lock", offsetof(struct lo_data
, posix_lock
), 0 },
191 { "xattr", offsetof(struct lo_data
, xattr
), 1 },
192 { "no_xattr", offsetof(struct lo_data
, xattr
), 0 },
193 { "xattrmap=%s", offsetof(struct lo_data
, xattrmap
), 0 },
194 { "modcaps=%s", offsetof(struct lo_data
, modcaps
), 0 },
195 { "timeout=%lf", offsetof(struct lo_data
, timeout
), 0 },
196 { "timeout=", offsetof(struct lo_data
, timeout_set
), 1 },
197 { "cache=none", offsetof(struct lo_data
, cache
), CACHE_NONE
},
198 { "cache=auto", offsetof(struct lo_data
, cache
), CACHE_AUTO
},
199 { "cache=always", offsetof(struct lo_data
, cache
), CACHE_ALWAYS
},
200 { "readdirplus", offsetof(struct lo_data
, readdirplus_set
), 1 },
201 { "no_readdirplus", offsetof(struct lo_data
, readdirplus_clear
), 1 },
202 { "allow_direct_io", offsetof(struct lo_data
, allow_direct_io
), 1 },
203 { "no_allow_direct_io", offsetof(struct lo_data
, allow_direct_io
), 0 },
204 { "announce_submounts", offsetof(struct lo_data
, announce_submounts
), 1 },
205 { "killpriv_v2", offsetof(struct lo_data
, user_killpriv_v2
), 1 },
206 { "no_killpriv_v2", offsetof(struct lo_data
, user_killpriv_v2
), 0 },
209 static bool use_syslog
= false;
210 static int current_log_level
;
211 static void unref_inode_lolocked(struct lo_data
*lo
, struct lo_inode
*inode
,
215 pthread_mutex_t mutex
;
218 /* That we loaded cap-ng in the current thread from the saved */
219 static __thread
bool cap_loaded
= 0;
221 static struct lo_inode
*lo_find(struct lo_data
*lo
, struct stat
*st
,
223 static int xattr_map_client(const struct lo_data
*lo
, const char *client_name
,
226 static bool is_dot_or_dotdot(const char *name
)
228 return name
[0] == '.' &&
229 (name
[1] == '\0' || (name
[1] == '.' && name
[2] == '\0'));
232 /* Is `path` a single path component that is not "." or ".."? */
233 static bool is_safe_path_component(const char *path
)
235 if (strchr(path
, '/')) {
239 return !is_dot_or_dotdot(path
);
242 static bool is_empty(const char *name
)
244 return name
[0] == '\0';
247 static struct lo_data
*lo_data(fuse_req_t req
)
249 return (struct lo_data
*)fuse_req_userdata(req
);
253 * Load capng's state from our saved state if the current thread
254 * hadn't previously been loaded.
255 * returns 0 on success
257 static int load_capng(void)
260 pthread_mutex_lock(&cap
.mutex
);
261 capng_restore_state(&cap
.saved
);
263 * restore_state free's the saved copy
266 cap
.saved
= capng_save_state();
268 pthread_mutex_unlock(&cap
.mutex
);
269 fuse_log(FUSE_LOG_ERR
, "capng_save_state (thread)\n");
272 pthread_mutex_unlock(&cap
.mutex
);
275 * We want to use the loaded state for our pid,
278 capng_setpid(syscall(SYS_gettid
));
285 * Helpers for dropping and regaining effective capabilities. Returns 0
286 * on success, error otherwise
288 static int drop_effective_cap(const char *cap_name
, bool *cap_dropped
)
292 cap
= capng_name_to_capability(cap_name
);
295 fuse_log(FUSE_LOG_ERR
, "capng_name_to_capability(%s) failed:%s\n",
296 cap_name
, strerror(errno
));
302 fuse_log(FUSE_LOG_ERR
, "load_capng() failed\n");
306 /* We dont have this capability in effective set already. */
307 if (!capng_have_capability(CAPNG_EFFECTIVE
, cap
)) {
312 if (capng_update(CAPNG_DROP
, CAPNG_EFFECTIVE
, cap
)) {
314 fuse_log(FUSE_LOG_ERR
, "capng_update(DROP,) failed\n");
318 if (capng_apply(CAPNG_SELECT_CAPS
)) {
320 fuse_log(FUSE_LOG_ERR
, "drop:capng_apply() failed\n");
333 static int gain_effective_cap(const char *cap_name
)
338 cap
= capng_name_to_capability(cap_name
);
341 fuse_log(FUSE_LOG_ERR
, "capng_name_to_capability(%s) failed:%s\n",
342 cap_name
, strerror(errno
));
348 fuse_log(FUSE_LOG_ERR
, "load_capng() failed\n");
352 if (capng_update(CAPNG_ADD
, CAPNG_EFFECTIVE
, cap
)) {
354 fuse_log(FUSE_LOG_ERR
, "capng_update(ADD,) failed\n");
358 if (capng_apply(CAPNG_SELECT_CAPS
)) {
360 fuse_log(FUSE_LOG_ERR
, "gain:capng_apply() failed\n");
370 * The host kernel normally drops security.capability xattr's on
371 * any write, however if we're remapping xattr names we need to drop
372 * whatever the clients security.capability is actually stored as.
374 static int drop_security_capability(const struct lo_data
*lo
, int fd
)
376 if (!lo
->xattr_security_capability
) {
377 /* We didn't remap the name, let the host kernel do it */
380 if (!fremovexattr(fd
, lo
->xattr_security_capability
)) {
387 /* Attribute didn't exist, that's fine */
391 /* FS didn't support attribute anyway, also fine */
395 /* Hmm other error */
400 static void lo_map_init(struct lo_map
*map
)
407 static void lo_map_destroy(struct lo_map
*map
)
412 static int lo_map_grow(struct lo_map
*map
, size_t new_nelems
)
414 struct lo_map_elem
*new_elems
;
417 if (new_nelems
<= map
->nelems
) {
421 new_elems
= realloc(map
->elems
, sizeof(map
->elems
[0]) * new_nelems
);
426 for (i
= map
->nelems
; i
< new_nelems
; i
++) {
427 new_elems
[i
].freelist
= i
+ 1;
428 new_elems
[i
].in_use
= false;
430 new_elems
[new_nelems
- 1].freelist
= -1;
432 map
->elems
= new_elems
;
433 map
->freelist
= map
->nelems
;
434 map
->nelems
= new_nelems
;
438 static struct lo_map_elem
*lo_map_alloc_elem(struct lo_map
*map
)
440 struct lo_map_elem
*elem
;
442 if (map
->freelist
== -1 && !lo_map_grow(map
, map
->nelems
+ 256)) {
446 elem
= &map
->elems
[map
->freelist
];
447 map
->freelist
= elem
->freelist
;
454 static struct lo_map_elem
*lo_map_reserve(struct lo_map
*map
, size_t key
)
458 if (!lo_map_grow(map
, key
+ 1)) {
462 for (prev
= &map
->freelist
; *prev
!= -1;
463 prev
= &map
->elems
[*prev
].freelist
) {
465 struct lo_map_elem
*elem
= &map
->elems
[key
];
467 *prev
= elem
->freelist
;
475 static struct lo_map_elem
*lo_map_get(struct lo_map
*map
, size_t key
)
477 if (key
>= map
->nelems
) {
480 if (!map
->elems
[key
].in_use
) {
483 return &map
->elems
[key
];
486 static void lo_map_remove(struct lo_map
*map
, size_t key
)
488 struct lo_map_elem
*elem
;
490 if (key
>= map
->nelems
) {
494 elem
= &map
->elems
[key
];
499 elem
->in_use
= false;
501 elem
->freelist
= map
->freelist
;
505 /* Assumes lo->mutex is held */
506 static ssize_t
lo_add_fd_mapping(struct lo_data
*lo
, int fd
)
508 struct lo_map_elem
*elem
;
510 elem
= lo_map_alloc_elem(&lo
->fd_map
);
516 return elem
- lo
->fd_map
.elems
;
519 /* Assumes lo->mutex is held */
520 static ssize_t
lo_add_dirp_mapping(fuse_req_t req
, struct lo_dirp
*dirp
)
522 struct lo_map_elem
*elem
;
524 elem
= lo_map_alloc_elem(&lo_data(req
)->dirp_map
);
530 return elem
- lo_data(req
)->dirp_map
.elems
;
533 /* Assumes lo->mutex is held */
534 static ssize_t
lo_add_inode_mapping(fuse_req_t req
, struct lo_inode
*inode
)
536 struct lo_map_elem
*elem
;
538 elem
= lo_map_alloc_elem(&lo_data(req
)->ino_map
);
544 return elem
- lo_data(req
)->ino_map
.elems
;
547 static void lo_inode_put(struct lo_data
*lo
, struct lo_inode
**inodep
)
549 struct lo_inode
*inode
= *inodep
;
557 if (g_atomic_int_dec_and_test(&inode
->refcount
)) {
563 /* Caller must release refcount using lo_inode_put() */
564 static struct lo_inode
*lo_inode(fuse_req_t req
, fuse_ino_t ino
)
566 struct lo_data
*lo
= lo_data(req
);
567 struct lo_map_elem
*elem
;
569 pthread_mutex_lock(&lo
->mutex
);
570 elem
= lo_map_get(&lo
->ino_map
, ino
);
572 g_atomic_int_inc(&elem
->inode
->refcount
);
574 pthread_mutex_unlock(&lo
->mutex
);
584 * TODO Remove this helper and force callers to hold an inode refcount until
585 * they are done with the fd. This will be done in a later patch to make
588 static int lo_fd(fuse_req_t req
, fuse_ino_t ino
)
590 struct lo_inode
*inode
= lo_inode(req
, ino
);
598 lo_inode_put(lo_data(req
), &inode
);
603 * Open a file descriptor for an inode. Returns -EBADF if the inode is not a
604 * regular file or a directory.
606 * Use this helper function instead of raw openat(2) to prevent security issues
607 * when a malicious client opens special files such as block device nodes.
608 * Symlink inodes are also rejected since symlinks must already have been
609 * traversed on the client side.
611 static int lo_inode_open(struct lo_data
*lo
, struct lo_inode
*inode
,
614 g_autofree
char *fd_str
= g_strdup_printf("%d", inode
->fd
);
617 if (!S_ISREG(inode
->filetype
) && !S_ISDIR(inode
->filetype
)) {
622 * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier
623 * that the inode is not a special file but if an external process races
624 * with us then symlinks are traversed here. It is not possible to escape
625 * the shared directory since it is mounted as "/" though.
627 fd
= openat(lo
->proc_self_fd
, fd_str
, open_flags
& ~O_NOFOLLOW
);
634 static void lo_init(void *userdata
, struct fuse_conn_info
*conn
)
636 struct lo_data
*lo
= (struct lo_data
*)userdata
;
638 if (conn
->capable
& FUSE_CAP_EXPORT_SUPPORT
) {
639 conn
->want
|= FUSE_CAP_EXPORT_SUPPORT
;
642 if (lo
->writeback
&& conn
->capable
& FUSE_CAP_WRITEBACK_CACHE
) {
643 fuse_log(FUSE_LOG_DEBUG
, "lo_init: activating writeback\n");
644 conn
->want
|= FUSE_CAP_WRITEBACK_CACHE
;
646 if (conn
->capable
& FUSE_CAP_FLOCK_LOCKS
) {
648 fuse_log(FUSE_LOG_DEBUG
, "lo_init: activating flock locks\n");
649 conn
->want
|= FUSE_CAP_FLOCK_LOCKS
;
651 fuse_log(FUSE_LOG_DEBUG
, "lo_init: disabling flock locks\n");
652 conn
->want
&= ~FUSE_CAP_FLOCK_LOCKS
;
656 if (conn
->capable
& FUSE_CAP_POSIX_LOCKS
) {
657 if (lo
->posix_lock
) {
658 fuse_log(FUSE_LOG_DEBUG
, "lo_init: activating posix locks\n");
659 conn
->want
|= FUSE_CAP_POSIX_LOCKS
;
661 fuse_log(FUSE_LOG_DEBUG
, "lo_init: disabling posix locks\n");
662 conn
->want
&= ~FUSE_CAP_POSIX_LOCKS
;
666 if ((lo
->cache
== CACHE_NONE
&& !lo
->readdirplus_set
) ||
667 lo
->readdirplus_clear
) {
668 fuse_log(FUSE_LOG_DEBUG
, "lo_init: disabling readdirplus\n");
669 conn
->want
&= ~FUSE_CAP_READDIRPLUS
;
672 if (!(conn
->capable
& FUSE_CAP_SUBMOUNTS
) && lo
->announce_submounts
) {
673 fuse_log(FUSE_LOG_WARNING
, "lo_init: Cannot announce submounts, client "
674 "does not support it\n");
675 lo
->announce_submounts
= false;
678 if (lo
->user_killpriv_v2
== 1) {
680 * User explicitly asked for this option. Enable it unconditionally.
681 * If connection does not have this capability, it should fail
684 fuse_log(FUSE_LOG_DEBUG
, "lo_init: enabling killpriv_v2\n");
685 conn
->want
|= FUSE_CAP_HANDLE_KILLPRIV_V2
;
687 } else if (lo
->user_killpriv_v2
== -1 &&
688 conn
->capable
& FUSE_CAP_HANDLE_KILLPRIV_V2
) {
690 * User did not specify a value for killpriv_v2. By default enable it
691 * if connection offers this capability
693 fuse_log(FUSE_LOG_DEBUG
, "lo_init: enabling killpriv_v2\n");
694 conn
->want
|= FUSE_CAP_HANDLE_KILLPRIV_V2
;
698 * Either user specified to disable killpriv_v2, or connection does
699 * not offer this capability. Disable killpriv_v2 in both the cases
701 fuse_log(FUSE_LOG_DEBUG
, "lo_init: disabling killpriv_v2\n");
702 conn
->want
&= ~FUSE_CAP_HANDLE_KILLPRIV_V2
;
707 static void lo_getattr(fuse_req_t req
, fuse_ino_t ino
,
708 struct fuse_file_info
*fi
)
712 struct lo_data
*lo
= lo_data(req
);
717 fstatat(lo_fd(req
, ino
), "", &buf
, AT_EMPTY_PATH
| AT_SYMLINK_NOFOLLOW
);
719 return (void)fuse_reply_err(req
, errno
);
722 fuse_reply_attr(req
, &buf
, lo
->timeout
);
725 static int lo_fi_fd(fuse_req_t req
, struct fuse_file_info
*fi
)
727 struct lo_data
*lo
= lo_data(req
);
728 struct lo_map_elem
*elem
;
730 pthread_mutex_lock(&lo
->mutex
);
731 elem
= lo_map_get(&lo
->fd_map
, fi
->fh
);
732 pthread_mutex_unlock(&lo
->mutex
);
741 static void lo_setattr(fuse_req_t req
, fuse_ino_t ino
, struct stat
*attr
,
742 int valid
, struct fuse_file_info
*fi
)
746 struct lo_data
*lo
= lo_data(req
);
747 struct lo_inode
*inode
;
752 inode
= lo_inode(req
, ino
);
754 fuse_reply_err(req
, EBADF
);
760 /* If fi->fh is invalid we'll report EBADF later */
762 fd
= lo_fi_fd(req
, fi
);
765 if (valid
& FUSE_SET_ATTR_MODE
) {
767 res
= fchmod(fd
, attr
->st_mode
);
769 sprintf(procname
, "%i", ifd
);
770 res
= fchmodat(lo
->proc_self_fd
, procname
, attr
->st_mode
, 0);
777 if (valid
& (FUSE_SET_ATTR_UID
| FUSE_SET_ATTR_GID
)) {
778 uid_t uid
= (valid
& FUSE_SET_ATTR_UID
) ? attr
->st_uid
: (uid_t
)-1;
779 gid_t gid
= (valid
& FUSE_SET_ATTR_GID
) ? attr
->st_gid
: (gid_t
)-1;
781 saverr
= drop_security_capability(lo
, ifd
);
786 res
= fchownat(ifd
, "", uid
, gid
, AT_EMPTY_PATH
| AT_SYMLINK_NOFOLLOW
);
792 if (valid
& FUSE_SET_ATTR_SIZE
) {
795 bool cap_fsetid_dropped
= false;
797 kill_suidgid
= lo
->killpriv_v2
&& (valid
& FUSE_SET_ATTR_KILL_SUIDGID
);
801 truncfd
= lo_inode_open(lo
, inode
, O_RDWR
);
808 saverr
= drop_security_capability(lo
, truncfd
);
817 res
= drop_effective_cap("FSETID", &cap_fsetid_dropped
);
827 res
= ftruncate(truncfd
, attr
->st_size
);
828 saverr
= res
== -1 ? errno
: 0;
830 if (cap_fsetid_dropped
) {
831 if (gain_effective_cap("FSETID")) {
832 fuse_log(FUSE_LOG_ERR
, "Failed to gain CAP_FSETID\n");
842 if (valid
& (FUSE_SET_ATTR_ATIME
| FUSE_SET_ATTR_MTIME
)) {
843 struct timespec tv
[2];
847 tv
[0].tv_nsec
= UTIME_OMIT
;
848 tv
[1].tv_nsec
= UTIME_OMIT
;
850 if (valid
& FUSE_SET_ATTR_ATIME_NOW
) {
851 tv
[0].tv_nsec
= UTIME_NOW
;
852 } else if (valid
& FUSE_SET_ATTR_ATIME
) {
853 tv
[0] = attr
->st_atim
;
856 if (valid
& FUSE_SET_ATTR_MTIME_NOW
) {
857 tv
[1].tv_nsec
= UTIME_NOW
;
858 } else if (valid
& FUSE_SET_ATTR_MTIME
) {
859 tv
[1] = attr
->st_mtim
;
863 res
= futimens(fd
, tv
);
865 sprintf(procname
, "%i", inode
->fd
);
866 res
= utimensat(lo
->proc_self_fd
, procname
, tv
, 0);
873 lo_inode_put(lo
, &inode
);
875 return lo_getattr(req
, ino
, fi
);
878 lo_inode_put(lo
, &inode
);
879 fuse_reply_err(req
, saverr
);
882 static struct lo_inode
*lo_find(struct lo_data
*lo
, struct stat
*st
,
886 struct lo_key key
= {
892 pthread_mutex_lock(&lo
->mutex
);
893 p
= g_hash_table_lookup(lo
->inodes
, &key
);
895 assert(p
->nlookup
> 0);
897 g_atomic_int_inc(&p
->refcount
);
899 pthread_mutex_unlock(&lo
->mutex
);
904 /* value_destroy_func for posix_locks GHashTable */
905 static void posix_locks_value_destroy(gpointer data
)
907 struct lo_inode_plock
*plock
= data
;
910 * We had used open() for locks and had only one fd. So
911 * closing this fd should release all OFD locks.
917 static int do_statx(struct lo_data
*lo
, int dirfd
, const char *pathname
,
918 struct stat
*statbuf
, int flags
, uint64_t *mnt_id
)
922 #if defined(CONFIG_STATX) && defined(STATX_MNT_ID)
924 struct statx statxbuf
;
926 res
= statx(dirfd
, pathname
, flags
, STATX_BASIC_STATS
| STATX_MNT_ID
,
929 memset(statbuf
, 0, sizeof(*statbuf
));
930 statbuf
->st_dev
= makedev(statxbuf
.stx_dev_major
,
931 statxbuf
.stx_dev_minor
);
932 statbuf
->st_ino
= statxbuf
.stx_ino
;
933 statbuf
->st_mode
= statxbuf
.stx_mode
;
934 statbuf
->st_nlink
= statxbuf
.stx_nlink
;
935 statbuf
->st_uid
= statxbuf
.stx_uid
;
936 statbuf
->st_gid
= statxbuf
.stx_gid
;
937 statbuf
->st_rdev
= makedev(statxbuf
.stx_rdev_major
,
938 statxbuf
.stx_rdev_minor
);
939 statbuf
->st_size
= statxbuf
.stx_size
;
940 statbuf
->st_blksize
= statxbuf
.stx_blksize
;
941 statbuf
->st_blocks
= statxbuf
.stx_blocks
;
942 statbuf
->st_atim
.tv_sec
= statxbuf
.stx_atime
.tv_sec
;
943 statbuf
->st_atim
.tv_nsec
= statxbuf
.stx_atime
.tv_nsec
;
944 statbuf
->st_mtim
.tv_sec
= statxbuf
.stx_mtime
.tv_sec
;
945 statbuf
->st_mtim
.tv_nsec
= statxbuf
.stx_mtime
.tv_nsec
;
946 statbuf
->st_ctim
.tv_sec
= statxbuf
.stx_ctime
.tv_sec
;
947 statbuf
->st_ctim
.tv_nsec
= statxbuf
.stx_ctime
.tv_nsec
;
949 if (statxbuf
.stx_mask
& STATX_MNT_ID
) {
950 *mnt_id
= statxbuf
.stx_mnt_id
;
955 } else if (errno
!= ENOSYS
) {
958 lo
->use_statx
= false;
962 res
= fstatat(dirfd
, pathname
, statbuf
, flags
);
972 * Increments nlookup on the inode on success. unref_inode_lolocked() must be
973 * called eventually to decrement nlookup again. If inodep is non-NULL, the
974 * inode pointer is stored and the caller must call lo_inode_put().
976 static int lo_do_lookup(fuse_req_t req
, fuse_ino_t parent
, const char *name
,
977 struct fuse_entry_param
*e
,
978 struct lo_inode
**inodep
)
984 struct lo_data
*lo
= lo_data(req
);
985 struct lo_inode
*inode
= NULL
;
986 struct lo_inode
*dir
= lo_inode(req
, parent
);
989 *inodep
= NULL
; /* in case there is an error */
993 * name_to_handle_at() and open_by_handle_at() can reach here with fuse
994 * mount point in guest, but we don't have its inode info in the
1001 memset(e
, 0, sizeof(*e
));
1002 e
->attr_timeout
= lo
->timeout
;
1003 e
->entry_timeout
= lo
->timeout
;
1005 /* Do not allow escaping root directory */
1006 if (dir
== &lo
->root
&& strcmp(name
, "..") == 0) {
1010 newfd
= openat(dir
->fd
, name
, O_PATH
| O_NOFOLLOW
);
1015 res
= do_statx(lo
, newfd
, "", &e
->attr
, AT_EMPTY_PATH
| AT_SYMLINK_NOFOLLOW
,
1021 if (S_ISDIR(e
->attr
.st_mode
) && lo
->announce_submounts
&&
1022 (e
->attr
.st_dev
!= dir
->key
.dev
|| mnt_id
!= dir
->key
.mnt_id
)) {
1023 e
->attr_flags
|= FUSE_ATTR_SUBMOUNT
;
1026 inode
= lo_find(lo
, &e
->attr
, mnt_id
);
1030 inode
= calloc(1, sizeof(struct lo_inode
));
1035 /* cache only filetype */
1036 inode
->filetype
= (e
->attr
.st_mode
& S_IFMT
);
1039 * One for the caller and one for nlookup (released in
1040 * unref_inode_lolocked())
1042 g_atomic_int_set(&inode
->refcount
, 2);
1046 inode
->key
.ino
= e
->attr
.st_ino
;
1047 inode
->key
.dev
= e
->attr
.st_dev
;
1048 inode
->key
.mnt_id
= mnt_id
;
1049 if (lo
->posix_lock
) {
1050 pthread_mutex_init(&inode
->plock_mutex
, NULL
);
1051 inode
->posix_locks
= g_hash_table_new_full(
1052 g_direct_hash
, g_direct_equal
, NULL
, posix_locks_value_destroy
);
1054 pthread_mutex_lock(&lo
->mutex
);
1055 inode
->fuse_ino
= lo_add_inode_mapping(req
, inode
);
1056 g_hash_table_insert(lo
->inodes
, &inode
->key
, inode
);
1057 pthread_mutex_unlock(&lo
->mutex
);
1059 e
->ino
= inode
->fuse_ino
;
1061 /* Transfer ownership of inode pointer to caller or drop it */
1065 lo_inode_put(lo
, &inode
);
1068 lo_inode_put(lo
, &dir
);
1070 fuse_log(FUSE_LOG_DEBUG
, " %lli/%s -> %lli\n", (unsigned long long)parent
,
1071 name
, (unsigned long long)e
->ino
);
1080 lo_inode_put(lo
, &inode
);
1081 lo_inode_put(lo
, &dir
);
1085 static void lo_lookup(fuse_req_t req
, fuse_ino_t parent
, const char *name
)
1087 struct fuse_entry_param e
;
1090 fuse_log(FUSE_LOG_DEBUG
, "lo_lookup(parent=%" PRIu64
", name=%s)\n", parent
,
1093 if (is_empty(name
)) {
1094 fuse_reply_err(req
, ENOENT
);
1099 * Don't use is_safe_path_component(), allow "." and ".." for NFS export
1102 if (strchr(name
, '/')) {
1103 fuse_reply_err(req
, EINVAL
);
1107 err
= lo_do_lookup(req
, parent
, name
, &e
, NULL
);
1109 fuse_reply_err(req
, err
);
1111 fuse_reply_entry(req
, &e
);
1116 * On some archs, setres*id is limited to 2^16 but they
1117 * provide setres*id32 variants that allow 2^32.
1118 * Others just let setres*id do 2^32 anyway.
1120 #ifdef SYS_setresgid32
1121 #define OURSYS_setresgid SYS_setresgid32
1123 #define OURSYS_setresgid SYS_setresgid
1126 #ifdef SYS_setresuid32
1127 #define OURSYS_setresuid SYS_setresuid32
1129 #define OURSYS_setresuid SYS_setresuid
1133 * Change to uid/gid of caller so that file is created with
1134 * ownership of caller.
1135 * TODO: What about selinux context?
1137 static int lo_change_cred(fuse_req_t req
, struct lo_cred
*old
)
1141 old
->euid
= geteuid();
1142 old
->egid
= getegid();
1144 res
= syscall(OURSYS_setresgid
, -1, fuse_req_ctx(req
)->gid
, -1);
1149 res
= syscall(OURSYS_setresuid
, -1, fuse_req_ctx(req
)->uid
, -1);
1151 int errno_save
= errno
;
1153 syscall(OURSYS_setresgid
, -1, old
->egid
, -1);
1160 /* Regain Privileges */
1161 static void lo_restore_cred(struct lo_cred
*old
)
1165 res
= syscall(OURSYS_setresuid
, -1, old
->euid
, -1);
1167 fuse_log(FUSE_LOG_ERR
, "seteuid(%u): %m\n", old
->euid
);
1171 res
= syscall(OURSYS_setresgid
, -1, old
->egid
, -1);
1173 fuse_log(FUSE_LOG_ERR
, "setegid(%u): %m\n", old
->egid
);
1178 static void lo_mknod_symlink(fuse_req_t req
, fuse_ino_t parent
,
1179 const char *name
, mode_t mode
, dev_t rdev
,
1184 struct lo_data
*lo
= lo_data(req
);
1185 struct lo_inode
*dir
;
1186 struct fuse_entry_param e
;
1187 struct lo_cred old
= {};
1189 if (is_empty(name
)) {
1190 fuse_reply_err(req
, ENOENT
);
1194 if (!is_safe_path_component(name
)) {
1195 fuse_reply_err(req
, EINVAL
);
1199 dir
= lo_inode(req
, parent
);
1201 fuse_reply_err(req
, EBADF
);
1205 saverr
= lo_change_cred(req
, &old
);
1210 res
= mknod_wrapper(dir
->fd
, name
, link
, mode
, rdev
);
1214 lo_restore_cred(&old
);
1220 saverr
= lo_do_lookup(req
, parent
, name
, &e
, NULL
);
1225 fuse_log(FUSE_LOG_DEBUG
, " %lli/%s -> %lli\n", (unsigned long long)parent
,
1226 name
, (unsigned long long)e
.ino
);
1228 fuse_reply_entry(req
, &e
);
1229 lo_inode_put(lo
, &dir
);
1233 lo_inode_put(lo
, &dir
);
1234 fuse_reply_err(req
, saverr
);
1237 static void lo_mknod(fuse_req_t req
, fuse_ino_t parent
, const char *name
,
1238 mode_t mode
, dev_t rdev
)
1240 lo_mknod_symlink(req
, parent
, name
, mode
, rdev
, NULL
);
1243 static void lo_mkdir(fuse_req_t req
, fuse_ino_t parent
, const char *name
,
1246 lo_mknod_symlink(req
, parent
, name
, S_IFDIR
| mode
, 0, NULL
);
1249 static void lo_symlink(fuse_req_t req
, const char *link
, fuse_ino_t parent
,
1252 lo_mknod_symlink(req
, parent
, name
, S_IFLNK
, 0, link
);
1255 static void lo_link(fuse_req_t req
, fuse_ino_t ino
, fuse_ino_t parent
,
1259 struct lo_data
*lo
= lo_data(req
);
1260 struct lo_inode
*parent_inode
;
1261 struct lo_inode
*inode
;
1262 struct fuse_entry_param e
;
1266 if (is_empty(name
)) {
1267 fuse_reply_err(req
, ENOENT
);
1271 if (!is_safe_path_component(name
)) {
1272 fuse_reply_err(req
, EINVAL
);
1276 parent_inode
= lo_inode(req
, parent
);
1277 inode
= lo_inode(req
, ino
);
1278 if (!parent_inode
|| !inode
) {
1283 memset(&e
, 0, sizeof(struct fuse_entry_param
));
1284 e
.attr_timeout
= lo
->timeout
;
1285 e
.entry_timeout
= lo
->timeout
;
1287 sprintf(procname
, "%i", inode
->fd
);
1288 res
= linkat(lo
->proc_self_fd
, procname
, parent_inode
->fd
, name
,
1294 res
= fstatat(inode
->fd
, "", &e
.attr
, AT_EMPTY_PATH
| AT_SYMLINK_NOFOLLOW
);
1299 pthread_mutex_lock(&lo
->mutex
);
1301 pthread_mutex_unlock(&lo
->mutex
);
1302 e
.ino
= inode
->fuse_ino
;
1304 fuse_log(FUSE_LOG_DEBUG
, " %lli/%s -> %lli\n", (unsigned long long)parent
,
1305 name
, (unsigned long long)e
.ino
);
1307 fuse_reply_entry(req
, &e
);
1308 lo_inode_put(lo
, &parent_inode
);
1309 lo_inode_put(lo
, &inode
);
1314 lo_inode_put(lo
, &parent_inode
);
1315 lo_inode_put(lo
, &inode
);
1316 fuse_reply_err(req
, saverr
);
1319 /* Increments nlookup and caller must release refcount using lo_inode_put() */
1320 static struct lo_inode
*lookup_name(fuse_req_t req
, fuse_ino_t parent
,
1326 struct lo_data
*lo
= lo_data(req
);
1327 struct lo_inode
*dir
= lo_inode(req
, parent
);
1333 res
= do_statx(lo
, dir
->fd
, name
, &attr
, AT_SYMLINK_NOFOLLOW
, &mnt_id
);
1334 lo_inode_put(lo
, &dir
);
1339 return lo_find(lo
, &attr
, mnt_id
);
1342 static void lo_rmdir(fuse_req_t req
, fuse_ino_t parent
, const char *name
)
1345 struct lo_inode
*inode
;
1346 struct lo_data
*lo
= lo_data(req
);
1348 if (is_empty(name
)) {
1349 fuse_reply_err(req
, ENOENT
);
1353 if (!is_safe_path_component(name
)) {
1354 fuse_reply_err(req
, EINVAL
);
1358 inode
= lookup_name(req
, parent
, name
);
1360 fuse_reply_err(req
, EIO
);
1364 res
= unlinkat(lo_fd(req
, parent
), name
, AT_REMOVEDIR
);
1366 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1367 unref_inode_lolocked(lo
, inode
, 1);
1368 lo_inode_put(lo
, &inode
);
1371 static void lo_rename(fuse_req_t req
, fuse_ino_t parent
, const char *name
,
1372 fuse_ino_t newparent
, const char *newname
,
1376 struct lo_inode
*parent_inode
;
1377 struct lo_inode
*newparent_inode
;
1378 struct lo_inode
*oldinode
= NULL
;
1379 struct lo_inode
*newinode
= NULL
;
1380 struct lo_data
*lo
= lo_data(req
);
1382 if (is_empty(name
) || is_empty(newname
)) {
1383 fuse_reply_err(req
, ENOENT
);
1387 if (!is_safe_path_component(name
) || !is_safe_path_component(newname
)) {
1388 fuse_reply_err(req
, EINVAL
);
1392 parent_inode
= lo_inode(req
, parent
);
1393 newparent_inode
= lo_inode(req
, newparent
);
1394 if (!parent_inode
|| !newparent_inode
) {
1395 fuse_reply_err(req
, EBADF
);
1399 oldinode
= lookup_name(req
, parent
, name
);
1400 newinode
= lookup_name(req
, newparent
, newname
);
1403 fuse_reply_err(req
, EIO
);
1408 #ifndef SYS_renameat2
1409 fuse_reply_err(req
, EINVAL
);
1411 res
= syscall(SYS_renameat2
, parent_inode
->fd
, name
,
1412 newparent_inode
->fd
, newname
, flags
);
1413 if (res
== -1 && errno
== ENOSYS
) {
1414 fuse_reply_err(req
, EINVAL
);
1416 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1422 res
= renameat(parent_inode
->fd
, name
, newparent_inode
->fd
, newname
);
1424 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1426 unref_inode_lolocked(lo
, oldinode
, 1);
1427 unref_inode_lolocked(lo
, newinode
, 1);
1428 lo_inode_put(lo
, &oldinode
);
1429 lo_inode_put(lo
, &newinode
);
1430 lo_inode_put(lo
, &parent_inode
);
1431 lo_inode_put(lo
, &newparent_inode
);
1434 static void lo_unlink(fuse_req_t req
, fuse_ino_t parent
, const char *name
)
1437 struct lo_inode
*inode
;
1438 struct lo_data
*lo
= lo_data(req
);
1440 if (is_empty(name
)) {
1441 fuse_reply_err(req
, ENOENT
);
1445 if (!is_safe_path_component(name
)) {
1446 fuse_reply_err(req
, EINVAL
);
1450 inode
= lookup_name(req
, parent
, name
);
1452 fuse_reply_err(req
, EIO
);
1456 res
= unlinkat(lo_fd(req
, parent
), name
, 0);
1458 fuse_reply_err(req
, res
== -1 ? errno
: 0);
1459 unref_inode_lolocked(lo
, inode
, 1);
1460 lo_inode_put(lo
, &inode
);
1463 /* To be called with lo->mutex held */
1464 static void unref_inode(struct lo_data
*lo
, struct lo_inode
*inode
, uint64_t n
)
1470 assert(inode
->nlookup
>= n
);
1471 inode
->nlookup
-= n
;
1472 if (!inode
->nlookup
) {
1473 lo_map_remove(&lo
->ino_map
, inode
->fuse_ino
);
1474 g_hash_table_remove(lo
->inodes
, &inode
->key
);
1475 if (lo
->posix_lock
) {
1476 if (g_hash_table_size(inode
->posix_locks
)) {
1477 fuse_log(FUSE_LOG_WARNING
, "Hash table is not empty\n");
1479 g_hash_table_destroy(inode
->posix_locks
);
1480 pthread_mutex_destroy(&inode
->plock_mutex
);
1482 /* Drop our refcount from lo_do_lookup() */
1483 lo_inode_put(lo
, &inode
);
1487 static void unref_inode_lolocked(struct lo_data
*lo
, struct lo_inode
*inode
,
1494 pthread_mutex_lock(&lo
->mutex
);
1495 unref_inode(lo
, inode
, n
);
1496 pthread_mutex_unlock(&lo
->mutex
);
1499 static void lo_forget_one(fuse_req_t req
, fuse_ino_t ino
, uint64_t nlookup
)
1501 struct lo_data
*lo
= lo_data(req
);
1502 struct lo_inode
*inode
;
1504 inode
= lo_inode(req
, ino
);
1509 fuse_log(FUSE_LOG_DEBUG
, " forget %lli %lli -%lli\n",
1510 (unsigned long long)ino
, (unsigned long long)inode
->nlookup
,
1511 (unsigned long long)nlookup
);
1513 unref_inode_lolocked(lo
, inode
, nlookup
);
1514 lo_inode_put(lo
, &inode
);
1517 static void lo_forget(fuse_req_t req
, fuse_ino_t ino
, uint64_t nlookup
)
1519 lo_forget_one(req
, ino
, nlookup
);
1520 fuse_reply_none(req
);
1523 static void lo_forget_multi(fuse_req_t req
, size_t count
,
1524 struct fuse_forget_data
*forgets
)
1528 for (i
= 0; i
< count
; i
++) {
1529 lo_forget_one(req
, forgets
[i
].ino
, forgets
[i
].nlookup
);
1531 fuse_reply_none(req
);
1534 static void lo_readlink(fuse_req_t req
, fuse_ino_t ino
)
1536 char buf
[PATH_MAX
+ 1];
1539 res
= readlinkat(lo_fd(req
, ino
), "", buf
, sizeof(buf
));
1541 return (void)fuse_reply_err(req
, errno
);
1544 if (res
== sizeof(buf
)) {
1545 return (void)fuse_reply_err(req
, ENAMETOOLONG
);
1550 fuse_reply_readlink(req
, buf
);
1556 struct dirent
*entry
;
1560 static void lo_dirp_put(struct lo_dirp
**dp
)
1562 struct lo_dirp
*d
= *dp
;
1569 if (g_atomic_int_dec_and_test(&d
->refcount
)) {
1575 /* Call lo_dirp_put() on the return value when no longer needed */
1576 static struct lo_dirp
*lo_dirp(fuse_req_t req
, struct fuse_file_info
*fi
)
1578 struct lo_data
*lo
= lo_data(req
);
1579 struct lo_map_elem
*elem
;
1581 pthread_mutex_lock(&lo
->mutex
);
1582 elem
= lo_map_get(&lo
->dirp_map
, fi
->fh
);
1584 g_atomic_int_inc(&elem
->dirp
->refcount
);
1586 pthread_mutex_unlock(&lo
->mutex
);
1594 static void lo_opendir(fuse_req_t req
, fuse_ino_t ino
,
1595 struct fuse_file_info
*fi
)
1598 struct lo_data
*lo
= lo_data(req
);
1603 d
= calloc(1, sizeof(struct lo_dirp
));
1608 fd
= openat(lo_fd(req
, ino
), ".", O_RDONLY
);
1613 d
->dp
= fdopendir(fd
);
1614 if (d
->dp
== NULL
) {
1621 g_atomic_int_set(&d
->refcount
, 1); /* paired with lo_releasedir() */
1622 pthread_mutex_lock(&lo
->mutex
);
1623 fh
= lo_add_dirp_mapping(req
, d
);
1624 pthread_mutex_unlock(&lo
->mutex
);
1630 if (lo
->cache
== CACHE_ALWAYS
) {
1631 fi
->cache_readdir
= 1;
1633 fuse_reply_open(req
, fi
);
1642 } else if (fd
!= -1) {
1647 fuse_reply_err(req
, error
);
1650 static void lo_do_readdir(fuse_req_t req
, fuse_ino_t ino
, size_t size
,
1651 off_t offset
, struct fuse_file_info
*fi
, int plus
)
1653 struct lo_data
*lo
= lo_data(req
);
1654 struct lo_dirp
*d
= NULL
;
1655 struct lo_inode
*dinode
;
1661 dinode
= lo_inode(req
, ino
);
1666 d
= lo_dirp(req
, fi
);
1672 buf
= calloc(1, size
);
1678 if (offset
!= d
->offset
) {
1679 seekdir(d
->dp
, offset
);
1690 d
->entry
= readdir(d
->dp
);
1692 if (errno
) { /* Error */
1695 } else { /* End of stream */
1700 nextoff
= d
->entry
->d_off
;
1701 name
= d
->entry
->d_name
;
1703 fuse_ino_t entry_ino
= 0;
1704 struct fuse_entry_param e
= (struct fuse_entry_param
){
1705 .attr
.st_ino
= d
->entry
->d_ino
,
1706 .attr
.st_mode
= d
->entry
->d_type
<< 12,
1709 /* Hide root's parent directory */
1710 if (dinode
== &lo
->root
&& strcmp(name
, "..") == 0) {
1711 e
.attr
.st_ino
= lo
->root
.key
.ino
;
1712 e
.attr
.st_mode
= DT_DIR
<< 12;
1716 if (!is_dot_or_dotdot(name
)) {
1717 err
= lo_do_lookup(req
, ino
, name
, &e
, NULL
);
1724 entsize
= fuse_add_direntry_plus(req
, p
, rem
, name
, &e
, nextoff
);
1726 entsize
= fuse_add_direntry(req
, p
, rem
, name
, &e
.attr
, nextoff
);
1728 if (entsize
> rem
) {
1729 if (entry_ino
!= 0) {
1730 lo_forget_one(req
, entry_ino
, 1);
1739 d
->offset
= nextoff
;
1745 lo_inode_put(lo
, &dinode
);
1748 * If there's an error, we can only signal it if we haven't stored
1749 * any entries yet - otherwise we'd end up with wrong lookup
1750 * counts for the entries that are already in the buffer. So we
1751 * return what we've collected until that point.
1753 if (err
&& rem
== size
) {
1754 fuse_reply_err(req
, err
);
1756 fuse_reply_buf(req
, buf
, size
- rem
);
1761 static void lo_readdir(fuse_req_t req
, fuse_ino_t ino
, size_t size
,
1762 off_t offset
, struct fuse_file_info
*fi
)
1764 lo_do_readdir(req
, ino
, size
, offset
, fi
, 0);
1767 static void lo_readdirplus(fuse_req_t req
, fuse_ino_t ino
, size_t size
,
1768 off_t offset
, struct fuse_file_info
*fi
)
1770 lo_do_readdir(req
, ino
, size
, offset
, fi
, 1);
1773 static void lo_releasedir(fuse_req_t req
, fuse_ino_t ino
,
1774 struct fuse_file_info
*fi
)
1776 struct lo_data
*lo
= lo_data(req
);
1777 struct lo_map_elem
*elem
;
1782 pthread_mutex_lock(&lo
->mutex
);
1783 elem
= lo_map_get(&lo
->dirp_map
, fi
->fh
);
1785 pthread_mutex_unlock(&lo
->mutex
);
1786 fuse_reply_err(req
, EBADF
);
1791 lo_map_remove(&lo
->dirp_map
, fi
->fh
);
1792 pthread_mutex_unlock(&lo
->mutex
);
1794 lo_dirp_put(&d
); /* paired with lo_opendir() */
1796 fuse_reply_err(req
, 0);
1799 static void update_open_flags(int writeback
, int allow_direct_io
,
1800 struct fuse_file_info
*fi
)
1803 * With writeback cache, kernel may send read requests even
1804 * when userspace opened write-only
1806 if (writeback
&& (fi
->flags
& O_ACCMODE
) == O_WRONLY
) {
1807 fi
->flags
&= ~O_ACCMODE
;
1808 fi
->flags
|= O_RDWR
;
1812 * With writeback cache, O_APPEND is handled by the kernel.
1813 * This breaks atomicity (since the file may change in the
1814 * underlying filesystem, so that the kernel's idea of the
1815 * end of the file isn't accurate anymore). In this example,
1816 * we just accept that. A more rigorous filesystem may want
1817 * to return an error here
1819 if (writeback
&& (fi
->flags
& O_APPEND
)) {
1820 fi
->flags
&= ~O_APPEND
;
1824 * O_DIRECT in guest should not necessarily mean bypassing page
1825 * cache on host as well. Therefore, we discard it by default
1826 * ('-o no_allow_direct_io'). If somebody needs that behavior,
1827 * the '-o allow_direct_io' option should be set.
1829 if (!allow_direct_io
) {
1830 fi
->flags
&= ~O_DIRECT
;
1835 * Open a regular file, set up an fd mapping, and fill out the struct
1836 * fuse_file_info for it. If existing_fd is not negative, use that fd instead
1837 * opening a new one. Takes ownership of existing_fd.
1839 * Returns 0 on success or a positive errno.
1841 static int lo_do_open(struct lo_data
*lo
, struct lo_inode
*inode
,
1842 int existing_fd
, struct fuse_file_info
*fi
)
1845 int fd
= existing_fd
;
1847 bool cap_fsetid_dropped
= false;
1848 bool kill_suidgid
= lo
->killpriv_v2
&& fi
->kill_priv
;
1850 update_open_flags(lo
->writeback
, lo
->allow_direct_io
, fi
);
1854 err
= drop_effective_cap("FSETID", &cap_fsetid_dropped
);
1860 fd
= lo_inode_open(lo
, inode
, fi
->flags
);
1862 if (cap_fsetid_dropped
) {
1863 if (gain_effective_cap("FSETID")) {
1864 fuse_log(FUSE_LOG_ERR
, "Failed to gain CAP_FSETID\n");
1870 if (fi
->flags
& (O_TRUNC
)) {
1871 int err
= drop_security_capability(lo
, fd
);
1879 pthread_mutex_lock(&lo
->mutex
);
1880 fh
= lo_add_fd_mapping(lo
, fd
);
1881 pthread_mutex_unlock(&lo
->mutex
);
1888 if (lo
->cache
== CACHE_NONE
) {
1890 } else if (lo
->cache
== CACHE_ALWAYS
) {
1896 static void lo_create(fuse_req_t req
, fuse_ino_t parent
, const char *name
,
1897 mode_t mode
, struct fuse_file_info
*fi
)
1900 struct lo_data
*lo
= lo_data(req
);
1901 struct lo_inode
*parent_inode
;
1902 struct lo_inode
*inode
= NULL
;
1903 struct fuse_entry_param e
;
1905 struct lo_cred old
= {};
1907 fuse_log(FUSE_LOG_DEBUG
, "lo_create(parent=%" PRIu64
", name=%s)"
1908 " kill_priv=%d\n", parent
, name
, fi
->kill_priv
);
1910 if (!is_safe_path_component(name
)) {
1911 fuse_reply_err(req
, EINVAL
);
1915 parent_inode
= lo_inode(req
, parent
);
1916 if (!parent_inode
) {
1917 fuse_reply_err(req
, EBADF
);
1921 err
= lo_change_cred(req
, &old
);
1926 update_open_flags(lo
->writeback
, lo
->allow_direct_io
, fi
);
1928 /* Try to create a new file but don't open existing files */
1929 fd
= openat(parent_inode
->fd
, name
, fi
->flags
| O_CREAT
| O_EXCL
, mode
);
1930 err
= fd
== -1 ? errno
: 0;
1932 lo_restore_cred(&old
);
1934 /* Ignore the error if file exists and O_EXCL was not given */
1935 if (err
&& (err
!= EEXIST
|| (fi
->flags
& O_EXCL
))) {
1939 err
= lo_do_lookup(req
, parent
, name
, &e
, &inode
);
1944 err
= lo_do_open(lo
, inode
, fd
, fi
);
1945 fd
= -1; /* lo_do_open() takes ownership of fd */
1947 /* Undo lo_do_lookup() nlookup ref */
1948 unref_inode_lolocked(lo
, inode
, 1);
1952 lo_inode_put(lo
, &inode
);
1953 lo_inode_put(lo
, &parent_inode
);
1960 fuse_reply_err(req
, err
);
1962 fuse_reply_create(req
, &e
, fi
);
1966 /* Should be called with inode->plock_mutex held */
1967 static struct lo_inode_plock
*lookup_create_plock_ctx(struct lo_data
*lo
,
1968 struct lo_inode
*inode
,
1969 uint64_t lock_owner
,
1970 pid_t pid
, int *err
)
1972 struct lo_inode_plock
*plock
;
1976 g_hash_table_lookup(inode
->posix_locks
, GUINT_TO_POINTER(lock_owner
));
1982 plock
= malloc(sizeof(struct lo_inode_plock
));
1988 /* Open another instance of file which can be used for ofd locks. */
1989 /* TODO: What if file is not writable? */
1990 fd
= lo_inode_open(lo
, inode
, O_RDWR
);
1997 plock
->lock_owner
= lock_owner
;
1999 g_hash_table_insert(inode
->posix_locks
, GUINT_TO_POINTER(plock
->lock_owner
),
2004 static void lo_getlk(fuse_req_t req
, fuse_ino_t ino
, struct fuse_file_info
*fi
,
2007 struct lo_data
*lo
= lo_data(req
);
2008 struct lo_inode
*inode
;
2009 struct lo_inode_plock
*plock
;
2010 int ret
, saverr
= 0;
2012 fuse_log(FUSE_LOG_DEBUG
,
2013 "lo_getlk(ino=%" PRIu64
", flags=%d)"
2014 " owner=0x%lx, l_type=%d l_start=0x%lx"
2016 ino
, fi
->flags
, fi
->lock_owner
, lock
->l_type
, lock
->l_start
,
2019 if (!lo
->posix_lock
) {
2020 fuse_reply_err(req
, ENOSYS
);
2024 inode
= lo_inode(req
, ino
);
2026 fuse_reply_err(req
, EBADF
);
2030 pthread_mutex_lock(&inode
->plock_mutex
);
2032 lookup_create_plock_ctx(lo
, inode
, fi
->lock_owner
, lock
->l_pid
, &ret
);
2038 ret
= fcntl(plock
->fd
, F_OFD_GETLK
, lock
);
2044 pthread_mutex_unlock(&inode
->plock_mutex
);
2045 lo_inode_put(lo
, &inode
);
2048 fuse_reply_err(req
, saverr
);
2050 fuse_reply_lock(req
, lock
);
2054 static void lo_setlk(fuse_req_t req
, fuse_ino_t ino
, struct fuse_file_info
*fi
,
2055 struct flock
*lock
, int sleep
)
2057 struct lo_data
*lo
= lo_data(req
);
2058 struct lo_inode
*inode
;
2059 struct lo_inode_plock
*plock
;
2060 int ret
, saverr
= 0;
2062 fuse_log(FUSE_LOG_DEBUG
,
2063 "lo_setlk(ino=%" PRIu64
", flags=%d)"
2064 " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d"
2065 " l_start=0x%lx l_len=0x%lx\n",
2066 ino
, fi
->flags
, lock
->l_type
, lock
->l_pid
, fi
->lock_owner
, sleep
,
2067 lock
->l_whence
, lock
->l_start
, lock
->l_len
);
2069 if (!lo
->posix_lock
) {
2070 fuse_reply_err(req
, ENOSYS
);
2075 fuse_reply_err(req
, EOPNOTSUPP
);
2079 inode
= lo_inode(req
, ino
);
2081 fuse_reply_err(req
, EBADF
);
2085 pthread_mutex_lock(&inode
->plock_mutex
);
2087 lookup_create_plock_ctx(lo
, inode
, fi
->lock_owner
, lock
->l_pid
, &ret
);
2094 /* TODO: Is it alright to modify flock? */
2096 ret
= fcntl(plock
->fd
, F_OFD_SETLK
, lock
);
2102 pthread_mutex_unlock(&inode
->plock_mutex
);
2103 lo_inode_put(lo
, &inode
);
2105 fuse_reply_err(req
, saverr
);
2108 static void lo_fsyncdir(fuse_req_t req
, fuse_ino_t ino
, int datasync
,
2109 struct fuse_file_info
*fi
)
2117 d
= lo_dirp(req
, fi
);
2119 fuse_reply_err(req
, EBADF
);
2125 res
= fdatasync(fd
);
2132 fuse_reply_err(req
, res
== -1 ? errno
: 0);
2135 static void lo_open(fuse_req_t req
, fuse_ino_t ino
, struct fuse_file_info
*fi
)
2137 struct lo_data
*lo
= lo_data(req
);
2138 struct lo_inode
*inode
= lo_inode(req
, ino
);
2141 fuse_log(FUSE_LOG_DEBUG
, "lo_open(ino=%" PRIu64
", flags=%d, kill_priv=%d)"
2142 "\n", ino
, fi
->flags
, fi
->kill_priv
);
2145 fuse_reply_err(req
, EBADF
);
2149 err
= lo_do_open(lo
, inode
, -1, fi
);
2150 lo_inode_put(lo
, &inode
);
2152 fuse_reply_err(req
, err
);
2154 fuse_reply_open(req
, fi
);
2158 static void lo_release(fuse_req_t req
, fuse_ino_t ino
,
2159 struct fuse_file_info
*fi
)
2161 struct lo_data
*lo
= lo_data(req
);
2162 struct lo_map_elem
*elem
;
2167 pthread_mutex_lock(&lo
->mutex
);
2168 elem
= lo_map_get(&lo
->fd_map
, fi
->fh
);
2172 lo_map_remove(&lo
->fd_map
, fi
->fh
);
2174 pthread_mutex_unlock(&lo
->mutex
);
2177 fuse_reply_err(req
, 0);
2180 static void lo_flush(fuse_req_t req
, fuse_ino_t ino
, struct fuse_file_info
*fi
)
2184 struct lo_inode
*inode
;
2185 struct lo_data
*lo
= lo_data(req
);
2187 inode
= lo_inode(req
, ino
);
2189 fuse_reply_err(req
, EBADF
);
2193 if (!S_ISREG(inode
->filetype
)) {
2194 lo_inode_put(lo
, &inode
);
2195 fuse_reply_err(req
, EBADF
);
2199 /* An fd is going away. Cleanup associated posix locks */
2200 if (lo
->posix_lock
) {
2201 pthread_mutex_lock(&inode
->plock_mutex
);
2202 g_hash_table_remove(inode
->posix_locks
,
2203 GUINT_TO_POINTER(fi
->lock_owner
));
2204 pthread_mutex_unlock(&inode
->plock_mutex
);
2206 res
= close(dup(lo_fi_fd(req
, fi
)));
2207 lo_inode_put(lo
, &inode
);
2208 fuse_reply_err(req
, res
== -1 ? errno
: 0);
2211 static void lo_fsync(fuse_req_t req
, fuse_ino_t ino
, int datasync
,
2212 struct fuse_file_info
*fi
)
2214 struct lo_inode
*inode
= lo_inode(req
, ino
);
2215 struct lo_data
*lo
= lo_data(req
);
2219 fuse_log(FUSE_LOG_DEBUG
, "lo_fsync(ino=%" PRIu64
", fi=0x%p)\n", ino
,
2223 fuse_reply_err(req
, EBADF
);
2228 fd
= lo_inode_open(lo
, inode
, O_RDWR
);
2234 fd
= lo_fi_fd(req
, fi
);
2238 res
= fdatasync(fd
) == -1 ? errno
: 0;
2240 res
= fsync(fd
) == -1 ? errno
: 0;
2246 lo_inode_put(lo
, &inode
);
2247 fuse_reply_err(req
, res
);
2250 static void lo_read(fuse_req_t req
, fuse_ino_t ino
, size_t size
, off_t offset
,
2251 struct fuse_file_info
*fi
)
2253 struct fuse_bufvec buf
= FUSE_BUFVEC_INIT(size
);
2255 fuse_log(FUSE_LOG_DEBUG
,
2256 "lo_read(ino=%" PRIu64
", size=%zd, "
2258 ino
, size
, (unsigned long)offset
);
2260 buf
.buf
[0].flags
= FUSE_BUF_IS_FD
| FUSE_BUF_FD_SEEK
;
2261 buf
.buf
[0].fd
= lo_fi_fd(req
, fi
);
2262 buf
.buf
[0].pos
= offset
;
2264 fuse_reply_data(req
, &buf
);
2267 static void lo_write_buf(fuse_req_t req
, fuse_ino_t ino
,
2268 struct fuse_bufvec
*in_buf
, off_t off
,
2269 struct fuse_file_info
*fi
)
2273 struct fuse_bufvec out_buf
= FUSE_BUFVEC_INIT(fuse_buf_size(in_buf
));
2274 bool cap_fsetid_dropped
= false;
2276 out_buf
.buf
[0].flags
= FUSE_BUF_IS_FD
| FUSE_BUF_FD_SEEK
;
2277 out_buf
.buf
[0].fd
= lo_fi_fd(req
, fi
);
2278 out_buf
.buf
[0].pos
= off
;
2280 fuse_log(FUSE_LOG_DEBUG
,
2281 "lo_write_buf(ino=%" PRIu64
", size=%zd, off=%lu kill_priv=%d)\n",
2282 ino
, out_buf
.buf
[0].size
, (unsigned long)off
, fi
->kill_priv
);
2284 res
= drop_security_capability(lo_data(req
), out_buf
.buf
[0].fd
);
2286 fuse_reply_err(req
, res
);
2291 * If kill_priv is set, drop CAP_FSETID which should lead to kernel
2292 * clearing setuid/setgid on file. Note, for WRITE, we need to do
2293 * this even if killpriv_v2 is not enabled. fuse direct write path
2296 if (fi
->kill_priv
) {
2297 res
= drop_effective_cap("FSETID", &cap_fsetid_dropped
);
2299 fuse_reply_err(req
, res
);
2304 res
= fuse_buf_copy(&out_buf
, in_buf
);
2306 fuse_reply_err(req
, -res
);
2308 fuse_reply_write(req
, (size_t)res
);
2311 if (cap_fsetid_dropped
) {
2312 res
= gain_effective_cap("FSETID");
2314 fuse_log(FUSE_LOG_ERR
, "Failed to gain CAP_FSETID\n");
2319 static void lo_statfs(fuse_req_t req
, fuse_ino_t ino
)
2322 struct statvfs stbuf
;
2324 res
= fstatvfs(lo_fd(req
, ino
), &stbuf
);
2326 fuse_reply_err(req
, errno
);
2328 fuse_reply_statfs(req
, &stbuf
);
2332 static void lo_fallocate(fuse_req_t req
, fuse_ino_t ino
, int mode
, off_t offset
,
2333 off_t length
, struct fuse_file_info
*fi
)
2335 int err
= EOPNOTSUPP
;
2338 #ifdef CONFIG_FALLOCATE
2339 err
= fallocate(lo_fi_fd(req
, fi
), mode
, offset
, length
);
2344 #elif defined(CONFIG_POSIX_FALLOCATE)
2346 fuse_reply_err(req
, EOPNOTSUPP
);
2350 err
= posix_fallocate(lo_fi_fd(req
, fi
), offset
, length
);
2353 fuse_reply_err(req
, err
);
2356 static void lo_flock(fuse_req_t req
, fuse_ino_t ino
, struct fuse_file_info
*fi
,
2362 res
= flock(lo_fi_fd(req
, fi
), op
);
2364 fuse_reply_err(req
, res
== -1 ? errno
: 0);
2369 * Exit; process attribute unmodified if matched.
2370 * An empty key applies to all.
2372 #define XATTR_MAP_FLAG_OK (1 << 0)
2374 * The attribute is unwanted;
2375 * EPERM on write, hidden on read.
2377 #define XATTR_MAP_FLAG_BAD (1 << 1)
2379 * For attr that start with 'key' prepend 'prepend'
2380 * 'key' may be empty to prepend for all attrs
2381 * key is defined from set/remove point of view.
2382 * Automatically reversed on read
2384 #define XATTR_MAP_FLAG_PREFIX (1 << 2)
2387 /* Apply rule to get/set/remove */
2388 #define XATTR_MAP_FLAG_CLIENT (1 << 16)
2389 /* Apply rule to list */
2390 #define XATTR_MAP_FLAG_SERVER (1 << 17)
2391 /* Apply rule to all */
2392 #define XATTR_MAP_FLAG_ALL (XATTR_MAP_FLAG_SERVER | XATTR_MAP_FLAG_CLIENT)
2394 static void add_xattrmap_entry(struct lo_data
*lo
,
2395 const XattrMapEntry
*new_entry
)
2397 XattrMapEntry
*res
= g_realloc_n(lo
->xattr_map_list
,
2398 lo
->xattr_map_nentries
+ 1,
2399 sizeof(XattrMapEntry
));
2400 res
[lo
->xattr_map_nentries
++] = *new_entry
;
2402 lo
->xattr_map_list
= res
;
2405 static void free_xattrmap(struct lo_data
*lo
)
2407 XattrMapEntry
*map
= lo
->xattr_map_list
;
2414 for (i
= 0; i
< lo
->xattr_map_nentries
; i
++) {
2416 g_free(map
[i
].prepend
);
2420 lo
->xattr_map_list
= NULL
;
2421 lo
->xattr_map_nentries
= -1;
2425 * Handle the 'map' type, which is sugar for a set of commands
2426 * for the common case of prefixing a subset or everything,
2427 * and allowing anything not prefixed through.
2428 * It must be the last entry in the stream, although there
2429 * can be other entries before it.
2433 * key maybe empty in which case all entries are prefixed.
2435 static void parse_xattrmap_map(struct lo_data
*lo
,
2436 const char *rule
, char sep
)
2441 XattrMapEntry tmp_entry
;
2444 fuse_log(FUSE_LOG_ERR
,
2445 "%s: Expecting '%c' after 'map' keyword, found '%c'\n",
2446 __func__
, sep
, *rule
);
2452 /* At start of 'key' field */
2453 tmp
= strchr(rule
, sep
);
2455 fuse_log(FUSE_LOG_ERR
,
2456 "%s: Missing '%c' at end of key field in map rule\n",
2461 key
= g_strndup(rule
, tmp
- rule
);
2464 /* At start of prefix field */
2465 tmp
= strchr(rule
, sep
);
2467 fuse_log(FUSE_LOG_ERR
,
2468 "%s: Missing '%c' at end of prefix field in map rule\n",
2473 prefix
= g_strndup(rule
, tmp
- rule
);
2477 * This should be the end of the string, we don't allow
2478 * any more commands after 'map'.
2481 fuse_log(FUSE_LOG_ERR
,
2482 "%s: Expecting end of command after map, found '%c'\n",
2487 /* 1st: Prefix matches/everything */
2488 tmp_entry
.flags
= XATTR_MAP_FLAG_PREFIX
| XATTR_MAP_FLAG_ALL
;
2489 tmp_entry
.key
= g_strdup(key
);
2490 tmp_entry
.prepend
= g_strdup(prefix
);
2491 add_xattrmap_entry(lo
, &tmp_entry
);
2494 /* Prefix all case */
2496 /* 2nd: Hide any non-prefixed entries on the host */
2497 tmp_entry
.flags
= XATTR_MAP_FLAG_BAD
| XATTR_MAP_FLAG_ALL
;
2498 tmp_entry
.key
= g_strdup("");
2499 tmp_entry
.prepend
= g_strdup("");
2500 add_xattrmap_entry(lo
, &tmp_entry
);
2502 /* Prefix matching case */
2504 /* 2nd: Hide non-prefixed but matching entries on the host */
2505 tmp_entry
.flags
= XATTR_MAP_FLAG_BAD
| XATTR_MAP_FLAG_SERVER
;
2506 tmp_entry
.key
= g_strdup(""); /* Not used */
2507 tmp_entry
.prepend
= g_strdup(key
);
2508 add_xattrmap_entry(lo
, &tmp_entry
);
2510 /* 3rd: Stop the client accessing prefixed attributes directly */
2511 tmp_entry
.flags
= XATTR_MAP_FLAG_BAD
| XATTR_MAP_FLAG_CLIENT
;
2512 tmp_entry
.key
= g_strdup(prefix
);
2513 tmp_entry
.prepend
= g_strdup(""); /* Not used */
2514 add_xattrmap_entry(lo
, &tmp_entry
);
2516 /* 4th: Everything else is OK */
2517 tmp_entry
.flags
= XATTR_MAP_FLAG_OK
| XATTR_MAP_FLAG_ALL
;
2518 tmp_entry
.key
= g_strdup("");
2519 tmp_entry
.prepend
= g_strdup("");
2520 add_xattrmap_entry(lo
, &tmp_entry
);
2527 static void parse_xattrmap(struct lo_data
*lo
)
2529 const char *map
= lo
->xattrmap
;
2533 lo
->xattr_map_nentries
= 0;
2535 XattrMapEntry tmp_entry
;
2538 if (isspace(*map
)) {
2542 /* The separator is the first non-space of the rule */
2548 tmp_entry
.flags
= 0;
2549 /* Start of 'type' */
2550 if (strstart(map
, "prefix", &map
)) {
2551 tmp_entry
.flags
|= XATTR_MAP_FLAG_PREFIX
;
2552 } else if (strstart(map
, "ok", &map
)) {
2553 tmp_entry
.flags
|= XATTR_MAP_FLAG_OK
;
2554 } else if (strstart(map
, "bad", &map
)) {
2555 tmp_entry
.flags
|= XATTR_MAP_FLAG_BAD
;
2556 } else if (strstart(map
, "map", &map
)) {
2558 * map is sugar that adds a number of rules, and must be
2561 parse_xattrmap_map(lo
, map
, sep
);
2564 fuse_log(FUSE_LOG_ERR
,
2565 "%s: Unexpected type;"
2566 "Expecting 'prefix', 'ok', 'bad' or 'map' in rule %zu\n",
2567 __func__
, lo
->xattr_map_nentries
);
2571 if (*map
++ != sep
) {
2572 fuse_log(FUSE_LOG_ERR
,
2573 "%s: Missing '%c' at end of type field of rule %zu\n",
2574 __func__
, sep
, lo
->xattr_map_nentries
);
2578 /* Start of 'scope' */
2579 if (strstart(map
, "client", &map
)) {
2580 tmp_entry
.flags
|= XATTR_MAP_FLAG_CLIENT
;
2581 } else if (strstart(map
, "server", &map
)) {
2582 tmp_entry
.flags
|= XATTR_MAP_FLAG_SERVER
;
2583 } else if (strstart(map
, "all", &map
)) {
2584 tmp_entry
.flags
|= XATTR_MAP_FLAG_ALL
;
2586 fuse_log(FUSE_LOG_ERR
,
2587 "%s: Unexpected scope;"
2588 " Expecting 'client', 'server', or 'all', in rule %zu\n",
2589 __func__
, lo
->xattr_map_nentries
);
2593 if (*map
++ != sep
) {
2594 fuse_log(FUSE_LOG_ERR
,
2595 "%s: Expecting '%c' found '%c'"
2596 " after scope in rule %zu\n",
2597 __func__
, sep
, *map
, lo
->xattr_map_nentries
);
2601 /* At start of 'key' field */
2602 tmp
= strchr(map
, sep
);
2604 fuse_log(FUSE_LOG_ERR
,
2605 "%s: Missing '%c' at end of key field of rule %zu",
2606 __func__
, sep
, lo
->xattr_map_nentries
);
2609 tmp_entry
.key
= g_strndup(map
, tmp
- map
);
2612 /* At start of 'prepend' field */
2613 tmp
= strchr(map
, sep
);
2615 fuse_log(FUSE_LOG_ERR
,
2616 "%s: Missing '%c' at end of prepend field of rule %zu",
2617 __func__
, sep
, lo
->xattr_map_nentries
);
2620 tmp_entry
.prepend
= g_strndup(map
, tmp
- map
);
2623 add_xattrmap_entry(lo
, &tmp_entry
);
2624 /* End of rule - go around again for another rule */
2627 if (!lo
->xattr_map_nentries
) {
2628 fuse_log(FUSE_LOG_ERR
, "Empty xattr map\n");
2632 ret
= xattr_map_client(lo
, "security.capability",
2633 &lo
->xattr_security_capability
);
2635 fuse_log(FUSE_LOG_ERR
, "Failed to map security.capability: %s\n",
2639 if (!lo
->xattr_security_capability
||
2640 !strcmp(lo
->xattr_security_capability
, "security.capability")) {
2641 /* 1-1 mapping, don't need to do anything */
2642 free(lo
->xattr_security_capability
);
2643 lo
->xattr_security_capability
= NULL
;
2648 * For use with getxattr/setxattr/removexattr, where the client
2649 * gives us a name and we may need to choose a different one.
2650 * Allocates a buffer for the result placing it in *out_name.
2651 * If there's no change then *out_name is not set.
2652 * Returns 0 on success
2653 * Can return -EPERM to indicate we block a given attribute
2654 * (in which case out_name is not allocated)
2655 * Can return -ENOMEM to indicate out_name couldn't be allocated.
2657 static int xattr_map_client(const struct lo_data
*lo
, const char *client_name
,
2661 for (i
= 0; i
< lo
->xattr_map_nentries
; i
++) {
2662 const XattrMapEntry
*cur_entry
= lo
->xattr_map_list
+ i
;
2664 if ((cur_entry
->flags
& XATTR_MAP_FLAG_CLIENT
) &&
2665 (strstart(client_name
, cur_entry
->key
, NULL
))) {
2666 if (cur_entry
->flags
& XATTR_MAP_FLAG_BAD
) {
2669 if (cur_entry
->flags
& XATTR_MAP_FLAG_OK
) {
2670 /* Unmodified name */
2673 if (cur_entry
->flags
& XATTR_MAP_FLAG_PREFIX
) {
2674 *out_name
= g_try_malloc(strlen(client_name
) +
2675 strlen(cur_entry
->prepend
) + 1);
2679 sprintf(*out_name
, "%s%s", cur_entry
->prepend
, client_name
);
2689 * For use with listxattr where the server fs gives us a name and we may need
2690 * to sanitize this for the client.
2691 * Returns a pointer to the result in *out_name
2692 * This is always the original string or the current string with some prefix
2693 * removed; no reallocation is done.
2694 * Returns 0 on success
2695 * Can return -ENODATA to indicate the name should be dropped from the list.
2697 static int xattr_map_server(const struct lo_data
*lo
, const char *server_name
,
2698 const char **out_name
)
2703 for (i
= 0; i
< lo
->xattr_map_nentries
; i
++) {
2704 const XattrMapEntry
*cur_entry
= lo
->xattr_map_list
+ i
;
2706 if ((cur_entry
->flags
& XATTR_MAP_FLAG_SERVER
) &&
2707 (strstart(server_name
, cur_entry
->prepend
, &end
))) {
2708 if (cur_entry
->flags
& XATTR_MAP_FLAG_BAD
) {
2711 if (cur_entry
->flags
& XATTR_MAP_FLAG_OK
) {
2712 *out_name
= server_name
;
2715 if (cur_entry
->flags
& XATTR_MAP_FLAG_PREFIX
) {
2726 static void lo_getxattr(fuse_req_t req
, fuse_ino_t ino
, const char *in_name
,
2729 struct lo_data
*lo
= lo_data(req
);
2734 struct lo_inode
*inode
;
2742 ret
= xattr_map_client(lo
, in_name
, &mapped_name
);
2744 if (ret
== -EPERM
) {
2747 fuse_reply_err(req
, -ret
);
2755 inode
= lo_inode(req
, ino
);
2757 fuse_reply_err(req
, EBADF
);
2758 g_free(mapped_name
);
2763 if (!lo_data(req
)->xattr
) {
2767 fuse_log(FUSE_LOG_DEBUG
, "lo_getxattr(ino=%" PRIu64
", name=%s size=%zd)\n",
2771 value
= malloc(size
);
2777 sprintf(procname
, "%i", inode
->fd
);
2779 * It is not safe to open() non-regular/non-dir files in file server
2780 * unless O_PATH is used, so use that method for regular files/dir
2781 * only (as it seems giving less performance overhead).
2782 * Otherwise, call fchdir() to avoid open().
2784 if (S_ISREG(inode
->filetype
) || S_ISDIR(inode
->filetype
)) {
2785 fd
= openat(lo
->proc_self_fd
, procname
, O_RDONLY
);
2789 ret
= fgetxattr(fd
, name
, value
, size
);
2791 /* fchdir should not fail here */
2792 assert(fchdir(lo
->proc_self_fd
) == 0);
2793 ret
= getxattr(procname
, name
, value
, size
);
2794 assert(fchdir(lo
->root
.fd
) == 0);
2805 fuse_reply_buf(req
, value
, ret
);
2807 fuse_reply_xattr(req
, ret
);
2816 lo_inode_put(lo
, &inode
);
2822 fuse_reply_err(req
, saverr
);
2823 g_free(mapped_name
);
2827 static void lo_listxattr(fuse_req_t req
, fuse_ino_t ino
, size_t size
)
2829 struct lo_data
*lo
= lo_data(req
);
2832 struct lo_inode
*inode
;
2837 inode
= lo_inode(req
, ino
);
2839 fuse_reply_err(req
, EBADF
);
2844 if (!lo_data(req
)->xattr
) {
2848 fuse_log(FUSE_LOG_DEBUG
, "lo_listxattr(ino=%" PRIu64
", size=%zd)\n", ino
,
2852 value
= malloc(size
);
2858 sprintf(procname
, "%i", inode
->fd
);
2859 if (S_ISREG(inode
->filetype
) || S_ISDIR(inode
->filetype
)) {
2860 fd
= openat(lo
->proc_self_fd
, procname
, O_RDONLY
);
2864 ret
= flistxattr(fd
, value
, size
);
2866 /* fchdir should not fail here */
2867 assert(fchdir(lo
->proc_self_fd
) == 0);
2868 ret
= listxattr(procname
, value
, size
);
2869 assert(fchdir(lo
->root
.fd
) == 0);
2881 if (lo
->xattr_map_list
) {
2883 * Map the names back, some attributes might be dropped,
2884 * some shortened, but not increased, so we shouldn't
2887 size_t out_index
, in_index
;
2890 while (in_index
< ret
) {
2891 const char *map_out
;
2892 char *in_ptr
= value
+ in_index
;
2893 /* Length of current attribute name */
2894 size_t in_len
= strlen(value
+ in_index
) + 1;
2896 int mapret
= xattr_map_server(lo
, in_ptr
, &map_out
);
2897 if (mapret
!= -ENODATA
&& mapret
!= 0) {
2898 /* Shouldn't happen */
2903 /* Either unchanged, or truncated */
2905 if (map_out
!= in_ptr
) {
2906 /* +1 copies the NIL */
2907 out_len
= strlen(map_out
) + 1;
2913 * Move result along, may still be needed for an unchanged
2914 * entry if a previous entry was changed.
2916 memmove(value
+ out_index
, map_out
, out_len
);
2918 out_index
+= out_len
;
2927 fuse_reply_buf(req
, value
, ret
);
2930 * xattrmap only ever shortens the result,
2931 * so we don't need to do anything clever with the
2932 * allocation length here.
2934 fuse_reply_xattr(req
, ret
);
2943 lo_inode_put(lo
, &inode
);
2949 fuse_reply_err(req
, saverr
);
2953 static void lo_setxattr(fuse_req_t req
, fuse_ino_t ino
, const char *in_name
,
2954 const char *value
, size_t size
, int flags
)
2959 struct lo_data
*lo
= lo_data(req
);
2960 struct lo_inode
*inode
;
2968 ret
= xattr_map_client(lo
, in_name
, &mapped_name
);
2970 fuse_reply_err(req
, -ret
);
2978 inode
= lo_inode(req
, ino
);
2980 fuse_reply_err(req
, EBADF
);
2981 g_free(mapped_name
);
2986 if (!lo_data(req
)->xattr
) {
2990 fuse_log(FUSE_LOG_DEBUG
, "lo_setxattr(ino=%" PRIu64
2991 ", name=%s value=%s size=%zd)\n", ino
, name
, value
, size
);
2993 sprintf(procname
, "%i", inode
->fd
);
2994 if (S_ISREG(inode
->filetype
) || S_ISDIR(inode
->filetype
)) {
2995 fd
= openat(lo
->proc_self_fd
, procname
, O_RDONLY
);
3000 ret
= fsetxattr(fd
, name
, value
, size
, flags
);
3002 /* fchdir should not fail here */
3003 assert(fchdir(lo
->proc_self_fd
) == 0);
3004 ret
= setxattr(procname
, name
, value
, size
, flags
);
3005 assert(fchdir(lo
->root
.fd
) == 0);
3008 saverr
= ret
== -1 ? errno
: 0;
3015 lo_inode_put(lo
, &inode
);
3016 g_free(mapped_name
);
3017 fuse_reply_err(req
, saverr
);
3020 static void lo_removexattr(fuse_req_t req
, fuse_ino_t ino
, const char *in_name
)
3025 struct lo_data
*lo
= lo_data(req
);
3026 struct lo_inode
*inode
;
3034 ret
= xattr_map_client(lo
, in_name
, &mapped_name
);
3036 fuse_reply_err(req
, -ret
);
3044 inode
= lo_inode(req
, ino
);
3046 fuse_reply_err(req
, EBADF
);
3047 g_free(mapped_name
);
3052 if (!lo_data(req
)->xattr
) {
3056 fuse_log(FUSE_LOG_DEBUG
, "lo_removexattr(ino=%" PRIu64
", name=%s)\n", ino
,
3059 sprintf(procname
, "%i", inode
->fd
);
3060 if (S_ISREG(inode
->filetype
) || S_ISDIR(inode
->filetype
)) {
3061 fd
= openat(lo
->proc_self_fd
, procname
, O_RDONLY
);
3066 ret
= fremovexattr(fd
, name
);
3068 /* fchdir should not fail here */
3069 assert(fchdir(lo
->proc_self_fd
) == 0);
3070 ret
= removexattr(procname
, name
);
3071 assert(fchdir(lo
->root
.fd
) == 0);
3074 saverr
= ret
== -1 ? errno
: 0;
3081 lo_inode_put(lo
, &inode
);
3082 g_free(mapped_name
);
3083 fuse_reply_err(req
, saverr
);
3086 #ifdef HAVE_COPY_FILE_RANGE
3087 static void lo_copy_file_range(fuse_req_t req
, fuse_ino_t ino_in
, off_t off_in
,
3088 struct fuse_file_info
*fi_in
, fuse_ino_t ino_out
,
3089 off_t off_out
, struct fuse_file_info
*fi_out
,
3090 size_t len
, int flags
)
3095 in_fd
= lo_fi_fd(req
, fi_in
);
3096 out_fd
= lo_fi_fd(req
, fi_out
);
3098 fuse_log(FUSE_LOG_DEBUG
,
3099 "lo_copy_file_range(ino=%" PRIu64
"/fd=%d, "
3100 "off=%lu, ino=%" PRIu64
"/fd=%d, "
3101 "off=%lu, size=%zd, flags=0x%x)\n",
3102 ino_in
, in_fd
, off_in
, ino_out
, out_fd
, off_out
, len
, flags
);
3104 res
= copy_file_range(in_fd
, &off_in
, out_fd
, &off_out
, len
, flags
);
3106 fuse_reply_err(req
, errno
);
3108 fuse_reply_write(req
, res
);
3113 static void lo_lseek(fuse_req_t req
, fuse_ino_t ino
, off_t off
, int whence
,
3114 struct fuse_file_info
*fi
)
3119 res
= lseek(lo_fi_fd(req
, fi
), off
, whence
);
3121 fuse_reply_lseek(req
, res
);
3123 fuse_reply_err(req
, errno
);
3127 static void lo_destroy(void *userdata
)
3129 struct lo_data
*lo
= (struct lo_data
*)userdata
;
3131 pthread_mutex_lock(&lo
->mutex
);
3133 GHashTableIter iter
;
3134 gpointer key
, value
;
3136 g_hash_table_iter_init(&iter
, lo
->inodes
);
3137 if (!g_hash_table_iter_next(&iter
, &key
, &value
)) {
3141 struct lo_inode
*inode
= value
;
3142 unref_inode(lo
, inode
, inode
->nlookup
);
3144 pthread_mutex_unlock(&lo
->mutex
);
3147 static struct fuse_lowlevel_ops lo_oper
= {
3149 .lookup
= lo_lookup
,
3152 .symlink
= lo_symlink
,
3154 .unlink
= lo_unlink
,
3156 .rename
= lo_rename
,
3157 .forget
= lo_forget
,
3158 .forget_multi
= lo_forget_multi
,
3159 .getattr
= lo_getattr
,
3160 .setattr
= lo_setattr
,
3161 .readlink
= lo_readlink
,
3162 .opendir
= lo_opendir
,
3163 .readdir
= lo_readdir
,
3164 .readdirplus
= lo_readdirplus
,
3165 .releasedir
= lo_releasedir
,
3166 .fsyncdir
= lo_fsyncdir
,
3167 .create
= lo_create
,
3171 .release
= lo_release
,
3175 .write_buf
= lo_write_buf
,
3176 .statfs
= lo_statfs
,
3177 .fallocate
= lo_fallocate
,
3179 .getxattr
= lo_getxattr
,
3180 .listxattr
= lo_listxattr
,
3181 .setxattr
= lo_setxattr
,
3182 .removexattr
= lo_removexattr
,
3183 #ifdef HAVE_COPY_FILE_RANGE
3184 .copy_file_range
= lo_copy_file_range
,
3187 .destroy
= lo_destroy
,
3190 /* Print vhost-user.json backend program capabilities */
3191 static void print_capabilities(void)
3194 printf(" \"type\": \"fs\"\n");
3199 * Drop all Linux capabilities because the wait parent process only needs to
3200 * sit in waitpid(2) and terminate.
3202 static void setup_wait_parent_capabilities(void)
3204 capng_setpid(syscall(SYS_gettid
));
3205 capng_clear(CAPNG_SELECT_BOTH
);
3206 capng_apply(CAPNG_SELECT_BOTH
);
3210 * Move to a new mount, net, and pid namespaces to isolate this process.
3212 static void setup_namespaces(struct lo_data
*lo
, struct fuse_session
*se
)
3217 * Create a new pid namespace for *child* processes. We'll have to
3218 * fork in order to enter the new pid namespace. A new mount namespace
3219 * is also needed so that we can remount /proc for the new pid
3222 * Our UNIX domain sockets have been created. Now we can move to
3223 * an empty network namespace to prevent TCP/IP and other network
3224 * activity in case this process is compromised.
3226 if (unshare(CLONE_NEWPID
| CLONE_NEWNS
| CLONE_NEWNET
) != 0) {
3227 fuse_log(FUSE_LOG_ERR
, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n");
3233 fuse_log(FUSE_LOG_ERR
, "fork() failed: %m\n");
3240 setup_wait_parent_capabilities();
3242 /* The parent waits for the child */
3244 waited
= waitpid(child
, &wstatus
, 0);
3245 } while (waited
< 0 && errno
== EINTR
&& !se
->exited
);
3247 /* We were terminated by a signal, see fuse_signals.c */
3252 if (WIFEXITED(wstatus
)) {
3253 exit(WEXITSTATUS(wstatus
));
3259 /* Send us SIGTERM when the parent thread terminates, see prctl(2) */
3260 prctl(PR_SET_PDEATHSIG
, SIGTERM
);
3263 * If the mounts have shared propagation then we want to opt out so our
3264 * mount changes don't affect the parent mount namespace.
3266 if (mount(NULL
, "/", NULL
, MS_REC
| MS_SLAVE
, NULL
) < 0) {
3267 fuse_log(FUSE_LOG_ERR
, "mount(/, MS_REC|MS_SLAVE): %m\n");
3271 /* The child must remount /proc to use the new pid namespace */
3272 if (mount("proc", "/proc", "proc",
3273 MS_NODEV
| MS_NOEXEC
| MS_NOSUID
| MS_RELATIME
, NULL
) < 0) {
3274 fuse_log(FUSE_LOG_ERR
, "mount(/proc): %m\n");
3279 * We only need /proc/self/fd. Prevent ".." from accessing parent
3280 * directories of /proc/self/fd by bind-mounting it over /proc. Since / was
3281 * previously remounted with MS_REC | MS_SLAVE this mount change only
3282 * affects our process.
3284 if (mount("/proc/self/fd", "/proc", NULL
, MS_BIND
, NULL
) < 0) {
3285 fuse_log(FUSE_LOG_ERR
, "mount(/proc/self/fd, MS_BIND): %m\n");
3289 /* Get the /proc (actually /proc/self/fd, see above) file descriptor */
3290 lo
->proc_self_fd
= open("/proc", O_PATH
);
3291 if (lo
->proc_self_fd
== -1) {
3292 fuse_log(FUSE_LOG_ERR
, "open(/proc, O_PATH): %m\n");
3298 * Capture the capability state, we'll need to restore this for individual
3299 * threads later; see load_capng.
3301 static void setup_capng(void)
3303 /* Note this accesses /proc so has to happen before the sandbox */
3304 if (capng_get_caps_process()) {
3305 fuse_log(FUSE_LOG_ERR
, "capng_get_caps_process\n");
3308 pthread_mutex_init(&cap
.mutex
, NULL
);
3309 pthread_mutex_lock(&cap
.mutex
);
3310 cap
.saved
= capng_save_state();
3312 fuse_log(FUSE_LOG_ERR
, "capng_save_state\n");
3315 pthread_mutex_unlock(&cap
.mutex
);
3318 static void cleanup_capng(void)
3322 pthread_mutex_destroy(&cap
.mutex
);
3327 * Make the source directory our root so symlinks cannot escape and no other
3328 * files are accessible. Assumes unshare(CLONE_NEWNS) was already called.
3330 static void setup_mounts(const char *source
)
3335 if (mount(source
, source
, NULL
, MS_BIND
| MS_REC
, NULL
) < 0) {
3336 fuse_log(FUSE_LOG_ERR
, "mount(%s, %s, MS_BIND): %m\n", source
, source
);
3340 /* This magic is based on lxc's lxc_pivot_root() */
3341 oldroot
= open("/", O_DIRECTORY
| O_RDONLY
| O_CLOEXEC
);
3343 fuse_log(FUSE_LOG_ERR
, "open(/): %m\n");
3347 newroot
= open(source
, O_DIRECTORY
| O_RDONLY
| O_CLOEXEC
);
3349 fuse_log(FUSE_LOG_ERR
, "open(%s): %m\n", source
);
3353 if (fchdir(newroot
) < 0) {
3354 fuse_log(FUSE_LOG_ERR
, "fchdir(newroot): %m\n");
3358 if (syscall(__NR_pivot_root
, ".", ".") < 0) {
3359 fuse_log(FUSE_LOG_ERR
, "pivot_root(., .): %m\n");
3363 if (fchdir(oldroot
) < 0) {
3364 fuse_log(FUSE_LOG_ERR
, "fchdir(oldroot): %m\n");
3368 if (mount("", ".", "", MS_SLAVE
| MS_REC
, NULL
) < 0) {
3369 fuse_log(FUSE_LOG_ERR
, "mount(., MS_SLAVE | MS_REC): %m\n");
3373 if (umount2(".", MNT_DETACH
) < 0) {
3374 fuse_log(FUSE_LOG_ERR
, "umount2(., MNT_DETACH): %m\n");
3378 if (fchdir(newroot
) < 0) {
3379 fuse_log(FUSE_LOG_ERR
, "fchdir(newroot): %m\n");
3388 * Only keep capabilities in allowlist that are needed for file system operation
3389 * The (possibly NULL) modcaps_in string passed in is free'd before exit.
3391 static void setup_capabilities(char *modcaps_in
)
3393 char *modcaps
= modcaps_in
;
3394 pthread_mutex_lock(&cap
.mutex
);
3395 capng_restore_state(&cap
.saved
);
3398 * Add to allowlist file system-related capabilities that are needed for a
3399 * file server to act like root. Drop everything else like networking and
3400 * sysadmin capabilities.
3403 * 1. CAP_LINUX_IMMUTABLE is not included because it's only used via ioctl
3404 * and we don't support that.
3405 * 2. CAP_MAC_OVERRIDE is not included because it only seems to be
3406 * used by the Smack LSM. Omit it until there is demand for it.
3408 capng_setpid(syscall(SYS_gettid
));
3409 capng_clear(CAPNG_SELECT_BOTH
);
3410 if (capng_updatev(CAPNG_ADD
, CAPNG_PERMITTED
| CAPNG_EFFECTIVE
,
3420 fuse_log(FUSE_LOG_ERR
, "%s: capng_updatev failed\n", __func__
);
3425 * The modcaps option is a colon separated list of caps,
3426 * each preceded by either + or -.
3432 char *next
= strchr(modcaps
, ':');
3438 switch (modcaps
[0]) {
3444 action
= CAPNG_DROP
;
3448 fuse_log(FUSE_LOG_ERR
,
3449 "%s: Expecting '+'/'-' in modcaps but found '%c'\n",
3450 __func__
, modcaps
[0]);
3453 cap
= capng_name_to_capability(modcaps
+ 1);
3455 fuse_log(FUSE_LOG_ERR
, "%s: Unknown capability '%s'\n", __func__
,
3459 if (capng_update(action
, CAPNG_PERMITTED
| CAPNG_EFFECTIVE
, cap
)) {
3460 fuse_log(FUSE_LOG_ERR
, "%s: capng_update failed for '%s'\n",
3469 if (capng_apply(CAPNG_SELECT_BOTH
)) {
3470 fuse_log(FUSE_LOG_ERR
, "%s: capng_apply failed\n", __func__
);
3474 cap
.saved
= capng_save_state();
3476 fuse_log(FUSE_LOG_ERR
, "%s: capng_save_state failed\n", __func__
);
3479 pthread_mutex_unlock(&cap
.mutex
);
3483 * Use chroot as a weaker sandbox for environments where the process is
3484 * launched without CAP_SYS_ADMIN.
3486 static void setup_chroot(struct lo_data
*lo
)
3488 lo
->proc_self_fd
= open("/proc/self/fd", O_PATH
);
3489 if (lo
->proc_self_fd
== -1) {
3490 fuse_log(FUSE_LOG_ERR
, "open(\"/proc/self/fd\", O_PATH): %m\n");
3495 * Make the shared directory the file system root so that FUSE_OPEN
3496 * (lo_open()) cannot escape the shared directory by opening a symlink.
3498 * The chroot(2) syscall is later disabled by seccomp and the
3499 * CAP_SYS_CHROOT capability is dropped so that tampering with the chroot
3502 * However, it's still possible to escape the chroot via lo->proc_self_fd
3503 * but that requires first gaining control of the process.
3505 if (chroot(lo
->source
) != 0) {
3506 fuse_log(FUSE_LOG_ERR
, "chroot(\"%s\"): %m\n", lo
->source
);
3510 /* Move into the chroot */
3511 if (chdir("/") != 0) {
3512 fuse_log(FUSE_LOG_ERR
, "chdir(\"/\"): %m\n");
3518 * Lock down this process to prevent access to other processes or files outside
3519 * source directory. This reduces the impact of arbitrary code execution bugs.
3521 static void setup_sandbox(struct lo_data
*lo
, struct fuse_session
*se
,
3524 if (lo
->sandbox
== SANDBOX_NAMESPACE
) {
3525 setup_namespaces(lo
, se
);
3526 setup_mounts(lo
->source
);
3531 setup_seccomp(enable_syslog
);
3532 setup_capabilities(g_strdup(lo
->modcaps
));
3535 /* Set the maximum number of open file descriptors */
3536 static void setup_nofile_rlimit(unsigned long rlimit_nofile
)
3538 struct rlimit rlim
= {
3539 .rlim_cur
= rlimit_nofile
,
3540 .rlim_max
= rlimit_nofile
,
3543 if (rlimit_nofile
== 0) {
3544 return; /* nothing to do */
3547 if (setrlimit(RLIMIT_NOFILE
, &rlim
) < 0) {
3548 /* Ignore SELinux denials */
3549 if (errno
== EPERM
) {
3553 fuse_log(FUSE_LOG_ERR
, "setrlimit(RLIMIT_NOFILE): %m\n");
3558 static void log_func(enum fuse_log_level level
, const char *fmt
, va_list ap
)
3560 g_autofree
char *localfmt
= NULL
;
3563 char sec_fmt
[sizeof "2020-12-07 18:17:54"];
3564 char zone_fmt
[sizeof "+0100"];
3566 if (current_log_level
< level
) {
3570 if (current_log_level
== FUSE_LOG_DEBUG
) {
3572 /* no timestamp needed */
3573 localfmt
= g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid
),
3576 /* try formatting a broken-down timestamp */
3577 if (clock_gettime(CLOCK_REALTIME
, &ts
) != -1 &&
3578 localtime_r(&ts
.tv_sec
, &tm
) != NULL
&&
3579 strftime(sec_fmt
, sizeof sec_fmt
, "%Y-%m-%d %H:%M:%S",
3581 strftime(zone_fmt
, sizeof zone_fmt
, "%z", &tm
) != 0) {
3582 localfmt
= g_strdup_printf("[%s.%02ld%s] [ID: %08ld] %s",
3584 ts
.tv_nsec
/ (10L * 1000 * 1000),
3585 zone_fmt
, syscall(__NR_gettid
),
3588 /* fall back to a flat timestamp */
3589 localfmt
= g_strdup_printf("[%" PRId64
"] [ID: %08ld] %s",
3590 get_clock(), syscall(__NR_gettid
),
3598 int priority
= LOG_ERR
;
3600 case FUSE_LOG_EMERG
:
3601 priority
= LOG_EMERG
;
3603 case FUSE_LOG_ALERT
:
3604 priority
= LOG_ALERT
;
3607 priority
= LOG_CRIT
;
3612 case FUSE_LOG_WARNING
:
3613 priority
= LOG_WARNING
;
3615 case FUSE_LOG_NOTICE
:
3616 priority
= LOG_NOTICE
;
3619 priority
= LOG_INFO
;
3621 case FUSE_LOG_DEBUG
:
3622 priority
= LOG_DEBUG
;
3625 vsyslog(priority
, fmt
, ap
);
3627 vfprintf(stderr
, fmt
, ap
);
3631 static void setup_root(struct lo_data
*lo
, struct lo_inode
*root
)
3637 fd
= open("/", O_PATH
);
3639 fuse_log(FUSE_LOG_ERR
, "open(%s, O_PATH): %m\n", lo
->source
);
3643 res
= do_statx(lo
, fd
, "", &stat
, AT_EMPTY_PATH
| AT_SYMLINK_NOFOLLOW
,
3646 fuse_log(FUSE_LOG_ERR
, "fstatat(%s): %m\n", lo
->source
);
3650 root
->filetype
= S_IFDIR
;
3652 root
->key
.ino
= stat
.st_ino
;
3653 root
->key
.dev
= stat
.st_dev
;
3654 root
->key
.mnt_id
= mnt_id
;
3656 g_atomic_int_set(&root
->refcount
, 2);
3657 if (lo
->posix_lock
) {
3658 pthread_mutex_init(&root
->plock_mutex
, NULL
);
3659 root
->posix_locks
= g_hash_table_new_full(
3660 g_direct_hash
, g_direct_equal
, NULL
, posix_locks_value_destroy
);
3664 static guint
lo_key_hash(gconstpointer key
)
3666 const struct lo_key
*lkey
= key
;
3668 return (guint
)lkey
->ino
+ (guint
)lkey
->dev
+ (guint
)lkey
->mnt_id
;
3671 static gboolean
lo_key_equal(gconstpointer a
, gconstpointer b
)
3673 const struct lo_key
*la
= a
;
3674 const struct lo_key
*lb
= b
;
3676 return la
->ino
== lb
->ino
&& la
->dev
== lb
->dev
&& la
->mnt_id
== lb
->mnt_id
;
3679 static void fuse_lo_data_cleanup(struct lo_data
*lo
)
3682 g_hash_table_destroy(lo
->inodes
);
3685 if (lo
->root
.posix_locks
) {
3686 g_hash_table_destroy(lo
->root
.posix_locks
);
3688 lo_map_destroy(&lo
->fd_map
);
3689 lo_map_destroy(&lo
->dirp_map
);
3690 lo_map_destroy(&lo
->ino_map
);
3692 if (lo
->proc_self_fd
>= 0) {
3693 close(lo
->proc_self_fd
);
3696 if (lo
->root
.fd
>= 0) {
3702 free(lo
->xattr_security_capability
);
3706 static void qemu_version(void)
3708 printf("virtiofsd version " QEMU_FULL_VERSION
"\n" QEMU_COPYRIGHT
"\n");
3711 int main(int argc
, char *argv
[])
3713 struct fuse_args args
= FUSE_ARGS_INIT(argc
, argv
);
3714 struct fuse_session
*se
;
3715 struct fuse_cmdline_opts opts
;
3716 struct lo_data lo
= {
3717 .sandbox
= SANDBOX_NAMESPACE
,
3721 .allow_direct_io
= 0,
3723 .user_killpriv_v2
= -1,
3725 struct lo_map_elem
*root_elem
;
3726 struct lo_map_elem
*reserve_elem
;
3729 /* Initialize time conversion information for localtime_r(). */
3732 /* Don't mask creation mode, kernel already did that */
3735 qemu_init_exec_dir(argv
[0]);
3737 pthread_mutex_init(&lo
.mutex
, NULL
);
3738 lo
.inodes
= g_hash_table_new(lo_key_hash
, lo_key_equal
);
3740 lo
.root
.fuse_ino
= FUSE_ROOT_ID
;
3741 lo
.cache
= CACHE_AUTO
;
3744 * Set up the ino map like this:
3745 * [0] Reserved (will not be used)
3748 lo_map_init(&lo
.ino_map
);
3749 reserve_elem
= lo_map_reserve(&lo
.ino_map
, 0);
3750 if (!reserve_elem
) {
3751 fuse_log(FUSE_LOG_ERR
, "failed to alloc reserve_elem.\n");
3754 reserve_elem
->in_use
= false;
3755 root_elem
= lo_map_reserve(&lo
.ino_map
, lo
.root
.fuse_ino
);
3757 fuse_log(FUSE_LOG_ERR
, "failed to alloc root_elem.\n");
3760 root_elem
->inode
= &lo
.root
;
3762 lo_map_init(&lo
.dirp_map
);
3763 lo_map_init(&lo
.fd_map
);
3765 if (fuse_parse_cmdline(&args
, &opts
) != 0) {
3768 fuse_set_log_func(log_func
);
3769 use_syslog
= opts
.syslog
;
3771 openlog("virtiofsd", LOG_PID
, LOG_DAEMON
);
3774 if (opts
.show_help
) {
3775 printf("usage: %s [options]\n\n", argv
[0]);
3776 fuse_cmdline_help();
3777 printf(" -o source=PATH shared directory tree\n");
3778 fuse_lowlevel_help();
3781 } else if (opts
.show_version
) {
3783 fuse_lowlevel_version();
3786 } else if (opts
.print_capabilities
) {
3787 print_capabilities();
3792 if (fuse_opt_parse(&args
, &lo
, lo_opts
, NULL
) == -1) {
3796 if (opts
.log_level
!= 0) {
3797 current_log_level
= opts
.log_level
;
3799 /* default log level is INFO */
3800 current_log_level
= FUSE_LOG_INFO
;
3802 lo
.debug
= opts
.debug
;
3804 current_log_level
= FUSE_LOG_DEBUG
;
3810 res
= lstat(lo
.source
, &stat
);
3812 fuse_log(FUSE_LOG_ERR
, "failed to stat source (\"%s\"): %m\n",
3816 if (!S_ISDIR(stat
.st_mode
)) {
3817 fuse_log(FUSE_LOG_ERR
, "source is not a directory\n");
3821 lo
.source
= strdup("/");
3823 fuse_log(FUSE_LOG_ERR
, "failed to strdup source\n");
3829 parse_xattrmap(&lo
);
3832 if (!lo
.timeout_set
) {
3843 lo
.timeout
= 86400.0;
3846 } else if (lo
.timeout
< 0) {
3847 fuse_log(FUSE_LOG_ERR
, "timeout is negative (%lf)\n", lo
.timeout
);
3851 lo
.use_statx
= true;
3853 se
= fuse_session_new(&args
, &lo_oper
, sizeof(lo_oper
), &lo
);
3858 if (fuse_set_signal_handlers(se
) != 0) {
3862 if (fuse_session_mount(se
) != 0) {
3866 fuse_daemonize(opts
.foreground
);
3868 setup_nofile_rlimit(opts
.rlimit_nofile
);
3870 /* Must be before sandbox since it wants /proc */
3873 setup_sandbox(&lo
, se
, opts
.syslog
);
3875 setup_root(&lo
, &lo
.root
);
3876 /* Block until ctrl+c or fusermount -u */
3877 ret
= virtio_loop(se
);
3879 fuse_session_unmount(se
);
3882 fuse_remove_signal_handlers(se
);
3884 fuse_session_destroy(se
);
3886 fuse_opt_free_args(&args
);
3888 fuse_lo_data_cleanup(&lo
);