2 * virtio-fs glue for FUSE
3 * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
6 * Dave Gilbert <dgilbert@redhat.com>
8 * Implements the glue between libfuse and libvhost-user
10 * This program can be distributed under the terms of the GNU LGPLv2.
11 * See the file COPYING.LIB
14 #include "qemu/osdep.h"
16 #include "fuse_virtio.h"
18 #include "standard-headers/linux/fuse.h"
19 #include "fuse_misc.h"
28 #include <sys/eventfd.h>
29 #include <sys/socket.h>
30 #include <sys/types.h>
34 #include "contrib/libvhost-user/libvhost-user.h"
39 struct fv_VuDev
*virtio_dev
;
41 /* Our queue index, corresponds to array position */
45 /* The element for the command currently being processed */
50 * We pass the dev element into libvhost-user
51 * and then use it to get back to the outer
52 * container for other data.
56 struct fuse_session
*se
;
59 * The following pair of fields are only accessed in the main
63 struct fv_QueueInfo
**qi
;
67 struct virtio_fs_config
{
72 /* Callback from libvhost-user */
73 static uint64_t fv_get_features(VuDev
*dev
)
75 return 1ULL << VIRTIO_F_VERSION_1
;
78 /* Callback from libvhost-user */
79 static void fv_set_features(VuDev
*dev
, uint64_t features
)
84 * Callback from libvhost-user if there's a new fd we're supposed to listen
85 * to, typically a queue kick?
87 static void fv_set_watch(VuDev
*dev
, int fd
, int condition
, vu_watch_cb cb
,
90 fuse_log(FUSE_LOG_WARNING
, "%s: TODO! fd=%d\n", __func__
, fd
);
94 * Callback from libvhost-user if we're no longer supposed to listen on an fd
96 static void fv_remove_watch(VuDev
*dev
, int fd
)
98 fuse_log(FUSE_LOG_WARNING
, "%s: TODO! fd=%d\n", __func__
, fd
);
101 /* Callback from libvhost-user to panic */
102 static void fv_panic(VuDev
*dev
, const char *err
)
104 fuse_log(FUSE_LOG_ERR
, "%s: libvhost-user: %s\n", __func__
, err
);
105 /* TODO: Allow reconnects?? */
110 * Copy from an iovec into a fuse_buf (memory only)
111 * Caller must ensure there is space
113 static void copy_from_iov(struct fuse_buf
*buf
, size_t out_num
,
114 const struct iovec
*out_sg
)
116 void *dest
= buf
->mem
;
119 size_t onelen
= out_sg
->iov_len
;
120 memcpy(dest
, out_sg
->iov_base
, onelen
);
128 * Copy from one iov to another, the given number of bytes
129 * The caller must have checked sizes.
131 static void copy_iov(struct iovec
*src_iov
, int src_count
,
132 struct iovec
*dst_iov
, int dst_count
, size_t to_copy
)
134 size_t dst_offset
= 0;
135 /* Outer loop copies 'src' elements */
138 size_t src_len
= src_iov
[0].iov_len
;
139 size_t src_offset
= 0;
141 if (src_len
> to_copy
) {
144 /* Inner loop copies contents of one 'src' to maybe multiple dst. */
147 size_t dst_len
= dst_iov
[0].iov_len
- dst_offset
;
148 if (dst_len
> src_len
) {
152 memcpy(dst_iov
[0].iov_base
+ dst_offset
,
153 src_iov
[0].iov_base
+ src_offset
, dst_len
);
156 src_offset
+= dst_len
;
157 dst_offset
+= dst_len
;
159 assert(dst_offset
<= dst_iov
[0].iov_len
);
160 if (dst_offset
== dst_iov
[0].iov_len
) {
172 * Called back by ll whenever it wants to send a reply/message back
173 * The 1st element of the iov starts with the fuse_out_header
174 * 'unique'==0 means it's a notify message.
176 int virtio_send_msg(struct fuse_session
*se
, struct fuse_chan
*ch
,
177 struct iovec
*iov
, int count
)
179 VuVirtqElement
*elem
;
183 assert(iov
[0].iov_len
>= sizeof(struct fuse_out_header
));
185 struct fuse_out_header
*out
= iov
[0].iov_base
;
186 /* TODO: Endianness! */
188 size_t tosend_len
= iov_size(iov
, count
);
190 /* unique == 0 is notification, which we don't support */
192 /* For virtio we always have ch */
195 q
= &ch
->qi
->virtio_dev
->dev
.vq
[ch
->qi
->qidx
];
197 /* The 'in' part of the elem is to qemu */
198 unsigned int in_num
= elem
->in_num
;
199 struct iovec
*in_sg
= elem
->in_sg
;
200 size_t in_len
= iov_size(in_sg
, in_num
);
201 fuse_log(FUSE_LOG_DEBUG
, "%s: elem %d: with %d in desc of length %zd\n",
202 __func__
, elem
->index
, in_num
, in_len
);
205 * The elem should have room for a 'fuse_out_header' (out from fuse)
206 * plus the data based on the len in the header.
208 if (in_len
< sizeof(struct fuse_out_header
)) {
209 fuse_log(FUSE_LOG_ERR
, "%s: elem %d too short for out_header\n",
210 __func__
, elem
->index
);
213 if (in_len
< tosend_len
) {
214 fuse_log(FUSE_LOG_ERR
, "%s: elem %d too small for data len %zd\n",
215 __func__
, elem
->index
, tosend_len
);
219 copy_iov(iov
, count
, in_sg
, in_num
, tosend_len
);
220 vu_queue_push(&se
->virtio_dev
->dev
, q
, elem
, tosend_len
);
221 vu_queue_notify(&se
->virtio_dev
->dev
, q
);
226 /* Thread function for individual queues, created when a queue is 'started' */
227 static void *fv_queue_thread(void *opaque
)
229 struct fv_QueueInfo
*qi
= opaque
;
230 struct VuDev
*dev
= &qi
->virtio_dev
->dev
;
231 struct VuVirtq
*q
= vu_get_queue(dev
, qi
->qidx
);
232 struct fuse_session
*se
= qi
->virtio_dev
->se
;
234 struct fuse_buf fbuf
;
239 fuse_mutex_init(&ch
.lock
);
240 ch
.fd
= (int)0xdaff0d111;
243 fuse_log(FUSE_LOG_INFO
, "%s: Start for queue %d kick_fd %d\n", __func__
,
244 qi
->qidx
, qi
->kick_fd
);
247 pf
[0].fd
= qi
->kick_fd
;
248 pf
[0].events
= POLLIN
;
251 fuse_log(FUSE_LOG_DEBUG
, "%s: Waiting for Queue %d event\n", __func__
,
253 int poll_res
= ppoll(pf
, 1, NULL
, NULL
);
255 if (poll_res
== -1) {
256 if (errno
== EINTR
) {
257 fuse_log(FUSE_LOG_INFO
, "%s: ppoll interrupted, going around\n",
261 fuse_log(FUSE_LOG_ERR
, "fv_queue_thread ppoll: %m\n");
264 assert(poll_res
== 1);
265 if (pf
[0].revents
& (POLLERR
| POLLHUP
| POLLNVAL
)) {
266 fuse_log(FUSE_LOG_ERR
, "%s: Unexpected poll revents %x Queue %d\n",
267 __func__
, pf
[0].revents
, qi
->qidx
);
270 assert(pf
[0].revents
& POLLIN
);
271 fuse_log(FUSE_LOG_DEBUG
, "%s: Got queue event on Queue %d\n", __func__
,
275 if (eventfd_read(qi
->kick_fd
, &evalue
)) {
276 fuse_log(FUSE_LOG_ERR
, "Eventfd_read for queue: %m\n");
279 /* out is from guest, in is too guest */
280 unsigned int in_bytes
, out_bytes
;
281 vu_queue_get_avail_bytes(dev
, q
, &in_bytes
, &out_bytes
, ~0, ~0);
283 fuse_log(FUSE_LOG_DEBUG
,
284 "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n",
285 __func__
, qi
->qidx
, (size_t)evalue
, in_bytes
, out_bytes
);
289 * An element contains one request and the space to send our
290 * response They're spread over multiple descriptors in a
291 * scatter/gather set and we can't trust the guest to keep them
292 * still; so copy in/out.
294 VuVirtqElement
*elem
= vu_queue_pop(dev
, q
, sizeof(VuVirtqElement
));
300 fbuf
.mem
= malloc(se
->bufsize
);
302 assert(se
->bufsize
> sizeof(struct fuse_in_header
));
304 /* The 'out' part of the elem is from qemu */
305 unsigned int out_num
= elem
->out_num
;
306 struct iovec
*out_sg
= elem
->out_sg
;
307 size_t out_len
= iov_size(out_sg
, out_num
);
308 fuse_log(FUSE_LOG_DEBUG
,
309 "%s: elem %d: with %d out desc of length %zd\n", __func__
,
310 elem
->index
, out_num
, out_len
);
313 * The elem should contain a 'fuse_in_header' (in to fuse)
314 * plus the data based on the len in the header.
316 if (out_len
< sizeof(struct fuse_in_header
)) {
317 fuse_log(FUSE_LOG_ERR
, "%s: elem %d too short for in_header\n",
318 __func__
, elem
->index
);
319 assert(0); /* TODO */
321 if (out_len
> se
->bufsize
) {
322 fuse_log(FUSE_LOG_ERR
, "%s: elem %d too large for buffer\n",
323 __func__
, elem
->index
);
324 assert(0); /* TODO */
326 copy_from_iov(&fbuf
, out_num
, out_sg
);
329 /* TODO! Endianness of header */
331 /* TODO: Add checks for fuse_session_exited */
332 fuse_session_process_buf_int(se
, &fbuf
, &ch
);
339 pthread_mutex_destroy(&ch
.lock
);
345 /* Callback from libvhost-user on start or stop of a queue */
346 static void fv_queue_set_started(VuDev
*dev
, int qidx
, bool started
)
348 struct fv_VuDev
*vud
= container_of(dev
, struct fv_VuDev
, dev
);
349 struct fv_QueueInfo
*ourqi
;
351 fuse_log(FUSE_LOG_INFO
, "%s: qidx=%d started=%d\n", __func__
, qidx
,
356 * Ignore additional request queues for now. passthrough_ll.c must be
357 * audited for thread-safety issues first. It was written with a
358 * well-behaved client in mind and may not protect against all types of
362 fuse_log(FUSE_LOG_ERR
,
363 "%s: multiple request queues not yet implemented, please only "
364 "configure 1 request queue\n",
370 /* Fire up a thread to watch this queue */
371 if (qidx
>= vud
->nqueues
) {
372 vud
->qi
= realloc(vud
->qi
, (qidx
+ 1) * sizeof(vud
->qi
[0]));
374 memset(vud
->qi
+ vud
->nqueues
, 0,
375 sizeof(vud
->qi
[0]) * (1 + (qidx
- vud
->nqueues
)));
376 vud
->nqueues
= qidx
+ 1;
378 if (!vud
->qi
[qidx
]) {
379 vud
->qi
[qidx
] = calloc(sizeof(struct fv_QueueInfo
), 1);
380 assert(vud
->qi
[qidx
]);
381 vud
->qi
[qidx
]->virtio_dev
= vud
;
382 vud
->qi
[qidx
]->qidx
= qidx
;
384 /* Shouldn't have been started */
385 assert(vud
->qi
[qidx
]->kick_fd
== -1);
387 ourqi
= vud
->qi
[qidx
];
388 ourqi
->kick_fd
= dev
->vq
[qidx
].kick_fd
;
389 if (pthread_create(&ourqi
->thread
, NULL
, fv_queue_thread
, ourqi
)) {
390 fuse_log(FUSE_LOG_ERR
, "%s: Failed to create thread for queue %d\n",
395 /* TODO: Kill the thread */
396 assert(qidx
< vud
->nqueues
);
397 ourqi
= vud
->qi
[qidx
];
402 static bool fv_queue_order(VuDev
*dev
, int qidx
)
407 static const VuDevIface fv_iface
= {
408 .get_features
= fv_get_features
,
409 .set_features
= fv_set_features
,
411 /* Don't need process message, we've not got any at vhost-user level */
412 .queue_set_started
= fv_queue_set_started
,
414 .queue_is_processed_in_order
= fv_queue_order
,
418 * Main loop; this mostly deals with events on the vhost-user
419 * socket itself, and not actual fuse data.
421 int virtio_loop(struct fuse_session
*se
)
423 fuse_log(FUSE_LOG_INFO
, "%s: Entry\n", __func__
);
425 while (!fuse_session_exited(se
)) {
427 pf
[0].fd
= se
->vu_socketfd
;
428 pf
[0].events
= POLLIN
;
431 fuse_log(FUSE_LOG_DEBUG
, "%s: Waiting for VU event\n", __func__
);
432 int poll_res
= ppoll(pf
, 1, NULL
, NULL
);
434 if (poll_res
== -1) {
435 if (errno
== EINTR
) {
436 fuse_log(FUSE_LOG_INFO
, "%s: ppoll interrupted, going around\n",
440 fuse_log(FUSE_LOG_ERR
, "virtio_loop ppoll: %m\n");
443 assert(poll_res
== 1);
444 if (pf
[0].revents
& (POLLERR
| POLLHUP
| POLLNVAL
)) {
445 fuse_log(FUSE_LOG_ERR
, "%s: Unexpected poll revents %x\n", __func__
,
449 assert(pf
[0].revents
& POLLIN
);
450 fuse_log(FUSE_LOG_DEBUG
, "%s: Got VU event\n", __func__
);
451 if (!vu_dispatch(&se
->virtio_dev
->dev
)) {
452 fuse_log(FUSE_LOG_ERR
, "%s: vu_dispatch failed\n", __func__
);
457 fuse_log(FUSE_LOG_INFO
, "%s: Exit\n", __func__
);
462 int virtio_session_mount(struct fuse_session
*se
)
464 struct sockaddr_un un
;
467 if (strlen(se
->vu_socket_path
) >= sizeof(un
.sun_path
)) {
468 fuse_log(FUSE_LOG_ERR
, "Socket path too long\n");
475 * Create the Unix socket to communicate with qemu
476 * based on QEMU's vhost-user-bridge
478 unlink(se
->vu_socket_path
);
479 strcpy(un
.sun_path
, se
->vu_socket_path
);
480 size_t addr_len
= sizeof(un
);
482 int listen_sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
483 if (listen_sock
== -1) {
484 fuse_log(FUSE_LOG_ERR
, "vhost socket creation: %m\n");
487 un
.sun_family
= AF_UNIX
;
490 * Unfortunately bind doesn't let you set the mask on the socket,
491 * so set umask to 077 and restore it later.
493 old_umask
= umask(0077);
494 if (bind(listen_sock
, (struct sockaddr
*)&un
, addr_len
) == -1) {
495 fuse_log(FUSE_LOG_ERR
, "vhost socket bind: %m\n");
501 if (listen(listen_sock
, 1) == -1) {
502 fuse_log(FUSE_LOG_ERR
, "vhost socket listen: %m\n");
506 fuse_log(FUSE_LOG_INFO
, "%s: Waiting for vhost-user socket connection...\n",
508 int data_sock
= accept(listen_sock
, NULL
, NULL
);
509 if (data_sock
== -1) {
510 fuse_log(FUSE_LOG_ERR
, "vhost socket accept: %m\n");
515 fuse_log(FUSE_LOG_INFO
, "%s: Received vhost-user socket connection\n",
518 /* TODO: Some cleanup/deallocation! */
519 se
->virtio_dev
= calloc(sizeof(struct fv_VuDev
), 1);
520 if (!se
->virtio_dev
) {
521 fuse_log(FUSE_LOG_ERR
, "%s: virtio_dev calloc failed\n", __func__
);
526 se
->vu_socketfd
= data_sock
;
527 se
->virtio_dev
->se
= se
;
528 vu_init(&se
->virtio_dev
->dev
, 2, se
->vu_socketfd
, fv_panic
, fv_set_watch
,
529 fv_remove_watch
, &fv_iface
);