git: Make submodule check only needed modules
[qemu/kevin.git] / tools / virtiofsd / fuse_virtio.c
blobdd1c605dbf63510f2bb335dd1ba2c91d877c53bd
1 /*
2 * virtio-fs glue for FUSE
3 * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
5 * Authors:
6 * Dave Gilbert <dgilbert@redhat.com>
8 * Implements the glue between libfuse and libvhost-user
10 * This program can be distributed under the terms of the GNU LGPLv2.
11 * See the file COPYING.LIB
14 #include "qemu/osdep.h"
15 #include "qemu/iov.h"
16 #include "qapi/error.h"
17 #include "fuse_i.h"
18 #include "standard-headers/linux/fuse.h"
19 #include "fuse_misc.h"
20 #include "fuse_opt.h"
21 #include "fuse_virtio.h"
23 #include <assert.h>
24 #include <errno.h>
25 #include <glib.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/eventfd.h>
31 #include <sys/socket.h>
32 #include <sys/types.h>
33 #include <sys/un.h>
34 #include <unistd.h>
36 #include "contrib/libvhost-user/libvhost-user.h"
38 struct fv_VuDev;
39 struct fv_QueueInfo {
40 pthread_t thread;
42 * This lock protects the VuVirtq preventing races between
43 * fv_queue_thread() and fv_queue_worker().
45 pthread_mutex_t vq_lock;
47 struct fv_VuDev *virtio_dev;
49 /* Our queue index, corresponds to array position */
50 int qidx;
51 int kick_fd;
52 int kill_fd; /* For killing the thread */
55 /* A FUSE request */
56 typedef struct {
57 VuVirtqElement elem;
58 struct fuse_chan ch;
60 /* Used to complete requests that involve no reply */
61 bool reply_sent;
62 } FVRequest;
65 * We pass the dev element into libvhost-user
66 * and then use it to get back to the outer
67 * container for other data.
69 struct fv_VuDev {
70 VuDev dev;
71 struct fuse_session *se;
74 * Either handle virtqueues or vhost-user protocol messages. Don't do
75 * both at the same time since that could lead to race conditions if
76 * virtqueues or memory tables change while another thread is accessing
77 * them.
79 * The assumptions are:
80 * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev.
81 * 2. virtio_loop() reads/writes virtqueues and VuDev.
83 pthread_rwlock_t vu_dispatch_rwlock;
86 * The following pair of fields are only accessed in the main
87 * virtio_loop
89 size_t nqueues;
90 struct fv_QueueInfo **qi;
93 /* From spec */
94 struct virtio_fs_config {
95 char tag[36];
96 uint32_t num_queues;
99 /* Callback from libvhost-user */
100 static uint64_t fv_get_features(VuDev *dev)
102 return 1ULL << VIRTIO_F_VERSION_1;
105 /* Callback from libvhost-user */
106 static void fv_set_features(VuDev *dev, uint64_t features)
111 * Callback from libvhost-user if there's a new fd we're supposed to listen
112 * to, typically a queue kick?
114 static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb,
115 void *data)
117 fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
121 * Callback from libvhost-user if we're no longer supposed to listen on an fd
123 static void fv_remove_watch(VuDev *dev, int fd)
125 fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
128 /* Callback from libvhost-user to panic */
129 static void fv_panic(VuDev *dev, const char *err)
131 fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err);
132 /* TODO: Allow reconnects?? */
133 exit(EXIT_FAILURE);
137 * Copy from an iovec into a fuse_buf (memory only)
138 * Caller must ensure there is space
140 static void copy_from_iov(struct fuse_buf *buf, size_t out_num,
141 const struct iovec *out_sg)
143 void *dest = buf->mem;
145 while (out_num) {
146 size_t onelen = out_sg->iov_len;
147 memcpy(dest, out_sg->iov_base, onelen);
148 dest += onelen;
149 out_sg++;
150 out_num--;
155 * Copy from one iov to another, the given number of bytes
156 * The caller must have checked sizes.
158 static void copy_iov(struct iovec *src_iov, int src_count,
159 struct iovec *dst_iov, int dst_count, size_t to_copy)
161 size_t dst_offset = 0;
162 /* Outer loop copies 'src' elements */
163 while (to_copy) {
164 assert(src_count);
165 size_t src_len = src_iov[0].iov_len;
166 size_t src_offset = 0;
168 if (src_len > to_copy) {
169 src_len = to_copy;
171 /* Inner loop copies contents of one 'src' to maybe multiple dst. */
172 while (src_len) {
173 assert(dst_count);
174 size_t dst_len = dst_iov[0].iov_len - dst_offset;
175 if (dst_len > src_len) {
176 dst_len = src_len;
179 memcpy(dst_iov[0].iov_base + dst_offset,
180 src_iov[0].iov_base + src_offset, dst_len);
181 src_len -= dst_len;
182 to_copy -= dst_len;
183 src_offset += dst_len;
184 dst_offset += dst_len;
186 assert(dst_offset <= dst_iov[0].iov_len);
187 if (dst_offset == dst_iov[0].iov_len) {
188 dst_offset = 0;
189 dst_iov++;
190 dst_count--;
193 src_iov++;
194 src_count--;
199 * Called back by ll whenever it wants to send a reply/message back
200 * The 1st element of the iov starts with the fuse_out_header
201 * 'unique'==0 means it's a notify message.
203 int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
204 struct iovec *iov, int count)
206 FVRequest *req = container_of(ch, FVRequest, ch);
207 struct fv_QueueInfo *qi = ch->qi;
208 VuDev *dev = &se->virtio_dev->dev;
209 VuVirtq *q = vu_get_queue(dev, qi->qidx);
210 VuVirtqElement *elem = &req->elem;
211 int ret = 0;
213 assert(count >= 1);
214 assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
216 struct fuse_out_header *out = iov[0].iov_base;
217 /* TODO: Endianness! */
219 size_t tosend_len = iov_size(iov, count);
221 /* unique == 0 is notification, which we don't support */
222 assert(out->unique);
223 assert(!req->reply_sent);
225 /* The 'in' part of the elem is to qemu */
226 unsigned int in_num = elem->in_num;
227 struct iovec *in_sg = elem->in_sg;
228 size_t in_len = iov_size(in_sg, in_num);
229 fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
230 __func__, elem->index, in_num, in_len);
233 * The elem should have room for a 'fuse_out_header' (out from fuse)
234 * plus the data based on the len in the header.
236 if (in_len < sizeof(struct fuse_out_header)) {
237 fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
238 __func__, elem->index);
239 ret = -E2BIG;
240 goto err;
242 if (in_len < tosend_len) {
243 fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
244 __func__, elem->index, tosend_len);
245 ret = -E2BIG;
246 goto err;
249 copy_iov(iov, count, in_sg, in_num, tosend_len);
251 pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
252 pthread_mutex_lock(&qi->vq_lock);
253 vu_queue_push(dev, q, elem, tosend_len);
254 vu_queue_notify(dev, q);
255 pthread_mutex_unlock(&qi->vq_lock);
256 pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
258 req->reply_sent = true;
260 err:
261 return ret;
265 * Callback from fuse_send_data_iov_* when it's virtio and the buffer
266 * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK
267 * We need send the iov and then the buffer.
268 * Return 0 on success
270 int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
271 struct iovec *iov, int count, struct fuse_bufvec *buf,
272 size_t len)
274 FVRequest *req = container_of(ch, FVRequest, ch);
275 struct fv_QueueInfo *qi = ch->qi;
276 VuDev *dev = &se->virtio_dev->dev;
277 VuVirtq *q = vu_get_queue(dev, qi->qidx);
278 VuVirtqElement *elem = &req->elem;
279 int ret = 0;
281 assert(count >= 1);
282 assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
284 struct fuse_out_header *out = iov[0].iov_base;
285 /* TODO: Endianness! */
287 size_t iov_len = iov_size(iov, count);
288 size_t tosend_len = iov_len + len;
290 out->len = tosend_len;
292 fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__,
293 count, len, iov_len);
295 /* unique == 0 is notification which we don't support */
296 assert(out->unique);
298 assert(!req->reply_sent);
300 /* The 'in' part of the elem is to qemu */
301 unsigned int in_num = elem->in_num;
302 struct iovec *in_sg = elem->in_sg;
303 size_t in_len = iov_size(in_sg, in_num);
304 fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
305 __func__, elem->index, in_num, in_len);
308 * The elem should have room for a 'fuse_out_header' (out from fuse)
309 * plus the data based on the len in the header.
311 if (in_len < sizeof(struct fuse_out_header)) {
312 fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
313 __func__, elem->index);
314 ret = E2BIG;
315 goto err;
317 if (in_len < tosend_len) {
318 fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
319 __func__, elem->index, tosend_len);
320 ret = E2BIG;
321 goto err;
324 /* TODO: Limit to 'len' */
326 /* First copy the header data from iov->in_sg */
327 copy_iov(iov, count, in_sg, in_num, iov_len);
330 * Build a copy of the the in_sg iov so we can skip bits in it,
331 * including changing the offsets
333 struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num);
334 assert(in_sg_cpy);
335 memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
336 /* These get updated as we skip */
337 struct iovec *in_sg_ptr = in_sg_cpy;
338 int in_sg_cpy_count = in_num;
340 /* skip over parts of in_sg that contained the header iov */
341 size_t skip_size = iov_len;
343 size_t in_sg_left = 0;
344 do {
345 while (skip_size != 0 && in_sg_cpy_count) {
346 if (skip_size >= in_sg_ptr[0].iov_len) {
347 skip_size -= in_sg_ptr[0].iov_len;
348 in_sg_ptr++;
349 in_sg_cpy_count--;
350 } else {
351 in_sg_ptr[0].iov_len -= skip_size;
352 in_sg_ptr[0].iov_base += skip_size;
353 break;
357 int i;
358 for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) {
359 in_sg_left += in_sg_ptr[i].iov_len;
361 fuse_log(FUSE_LOG_DEBUG,
362 "%s: after skip skip_size=%zd in_sg_cpy_count=%d "
363 "in_sg_left=%zd\n",
364 __func__, skip_size, in_sg_cpy_count, in_sg_left);
365 ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count,
366 buf->buf[0].pos);
368 if (ret == -1) {
369 ret = errno;
370 fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n",
371 __func__, len);
372 free(in_sg_cpy);
373 goto err;
375 fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__,
376 ret, len);
377 if (ret < len && ret) {
378 fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__);
379 /* Skip over this much next time around */
380 skip_size = ret;
381 buf->buf[0].pos += ret;
382 len -= ret;
384 /* Lets do another read */
385 continue;
387 if (!ret) {
388 /* EOF case? */
389 fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__,
390 in_sg_left);
391 break;
393 if (ret != len) {
394 fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__);
395 ret = EIO;
396 free(in_sg_cpy);
397 goto err;
399 in_sg_left -= ret;
400 len -= ret;
401 } while (in_sg_left);
402 free(in_sg_cpy);
404 /* Need to fix out->len on EOF */
405 if (len) {
406 struct fuse_out_header *out_sg = in_sg[0].iov_base;
408 tosend_len -= len;
409 out_sg->len = tosend_len;
412 ret = 0;
414 pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
415 pthread_mutex_lock(&qi->vq_lock);
416 vu_queue_push(dev, q, elem, tosend_len);
417 vu_queue_notify(dev, q);
418 pthread_mutex_unlock(&qi->vq_lock);
419 pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
421 err:
422 if (ret == 0) {
423 req->reply_sent = true;
426 return ret;
429 /* Process one FVRequest in a thread pool */
430 static void fv_queue_worker(gpointer data, gpointer user_data)
432 struct fv_QueueInfo *qi = user_data;
433 struct fuse_session *se = qi->virtio_dev->se;
434 struct VuDev *dev = &qi->virtio_dev->dev;
435 FVRequest *req = data;
436 VuVirtqElement *elem = &req->elem;
437 struct fuse_buf fbuf = {};
438 bool allocated_bufv = false;
439 struct fuse_bufvec bufv;
440 struct fuse_bufvec *pbufv;
442 assert(se->bufsize > sizeof(struct fuse_in_header));
445 * An element contains one request and the space to send our response
446 * They're spread over multiple descriptors in a scatter/gather set
447 * and we can't trust the guest to keep them still; so copy in/out.
449 fbuf.mem = malloc(se->bufsize);
450 assert(fbuf.mem);
452 fuse_mutex_init(&req->ch.lock);
453 req->ch.fd = -1;
454 req->ch.qi = qi;
456 /* The 'out' part of the elem is from qemu */
457 unsigned int out_num = elem->out_num;
458 struct iovec *out_sg = elem->out_sg;
459 size_t out_len = iov_size(out_sg, out_num);
460 fuse_log(FUSE_LOG_DEBUG,
461 "%s: elem %d: with %d out desc of length %zd\n",
462 __func__, elem->index, out_num, out_len);
465 * The elem should contain a 'fuse_in_header' (in to fuse)
466 * plus the data based on the len in the header.
468 if (out_len < sizeof(struct fuse_in_header)) {
469 fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n",
470 __func__, elem->index);
471 assert(0); /* TODO */
473 if (out_len > se->bufsize) {
474 fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__,
475 elem->index);
476 assert(0); /* TODO */
478 /* Copy just the first element and look at it */
479 copy_from_iov(&fbuf, 1, out_sg);
481 pbufv = NULL; /* Compiler thinks an unitialised path */
482 if (out_num > 2 &&
483 out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
484 ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
485 out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
487 * For a write we don't actually need to copy the
488 * data, we can just do it straight out of guest memory
489 * but we must still copy the headers in case the guest
490 * was nasty and changed them while we were using them.
492 fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
494 /* copy the fuse_write_in header afte rthe fuse_in_header */
495 fbuf.mem += out_sg->iov_len;
496 copy_from_iov(&fbuf, 1, out_sg + 1);
497 fbuf.mem -= out_sg->iov_len;
498 fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
500 /* Allocate the bufv, with space for the rest of the iov */
501 pbufv = malloc(sizeof(struct fuse_bufvec) +
502 sizeof(struct fuse_buf) * (out_num - 2));
503 if (!pbufv) {
504 fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
505 __func__);
506 goto out;
509 allocated_bufv = true;
510 pbufv->count = 1;
511 pbufv->buf[0] = fbuf;
513 size_t iovindex, pbufvindex;
514 iovindex = 2; /* 2 headers, separate iovs */
515 pbufvindex = 1; /* 2 headers, 1 fusebuf */
517 for (; iovindex < out_num; iovindex++, pbufvindex++) {
518 pbufv->count++;
519 pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
520 pbufv->buf[pbufvindex].flags = 0;
521 pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
522 pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
524 } else {
525 /* Normal (non fast write) path */
527 /* Copy the rest of the buffer */
528 fbuf.mem += out_sg->iov_len;
529 copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
530 fbuf.mem -= out_sg->iov_len;
531 fbuf.size = out_len;
533 /* TODO! Endianness of header */
535 /* TODO: Add checks for fuse_session_exited */
536 bufv.buf[0] = fbuf;
537 bufv.count = 1;
538 pbufv = &bufv;
540 pbufv->idx = 0;
541 pbufv->off = 0;
542 fuse_session_process_buf_int(se, pbufv, &req->ch);
544 out:
545 if (allocated_bufv) {
546 free(pbufv);
549 /* If the request has no reply, still recycle the virtqueue element */
550 if (!req->reply_sent) {
551 struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
553 fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__,
554 elem->index);
556 pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
557 pthread_mutex_lock(&qi->vq_lock);
558 vu_queue_push(dev, q, elem, 0);
559 vu_queue_notify(dev, q);
560 pthread_mutex_unlock(&qi->vq_lock);
561 pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
564 pthread_mutex_destroy(&req->ch.lock);
565 free(fbuf.mem);
566 free(req);
569 /* Thread function for individual queues, created when a queue is 'started' */
570 static void *fv_queue_thread(void *opaque)
572 struct fv_QueueInfo *qi = opaque;
573 struct VuDev *dev = &qi->virtio_dev->dev;
574 struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
575 struct fuse_session *se = qi->virtio_dev->se;
576 GThreadPool *pool;
578 pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE,
579 NULL);
580 if (!pool) {
581 fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__);
582 return NULL;
585 fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
586 qi->qidx, qi->kick_fd);
587 while (1) {
588 struct pollfd pf[2];
589 int ret;
591 pf[0].fd = qi->kick_fd;
592 pf[0].events = POLLIN;
593 pf[0].revents = 0;
594 pf[1].fd = qi->kill_fd;
595 pf[1].events = POLLIN;
596 pf[1].revents = 0;
598 fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__,
599 qi->qidx);
600 int poll_res = ppoll(pf, 2, NULL, NULL);
602 if (poll_res == -1) {
603 if (errno == EINTR) {
604 fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
605 __func__);
606 continue;
608 fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n");
609 break;
611 assert(poll_res >= 1);
612 if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
613 fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n",
614 __func__, pf[0].revents, qi->qidx);
615 break;
617 if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) {
618 fuse_log(FUSE_LOG_ERR,
619 "%s: Unexpected poll revents %x Queue %d killfd\n",
620 __func__, pf[1].revents, qi->qidx);
621 break;
623 if (pf[1].revents) {
624 fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n",
625 __func__, qi->qidx);
626 break;
628 assert(pf[0].revents & POLLIN);
629 fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__,
630 qi->qidx);
632 eventfd_t evalue;
633 if (eventfd_read(qi->kick_fd, &evalue)) {
634 fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n");
635 break;
637 /* Mutual exclusion with virtio_loop() */
638 ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
639 assert(ret == 0); /* there is no possible error case */
640 pthread_mutex_lock(&qi->vq_lock);
641 /* out is from guest, in is too guest */
642 unsigned int in_bytes, out_bytes;
643 vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0);
645 fuse_log(FUSE_LOG_DEBUG,
646 "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n",
647 __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
649 while (1) {
650 FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest));
651 if (!req) {
652 break;
655 req->reply_sent = false;
657 g_thread_pool_push(pool, req, NULL);
660 pthread_mutex_unlock(&qi->vq_lock);
661 pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
664 g_thread_pool_free(pool, FALSE, TRUE);
666 return NULL;
669 static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx)
671 int ret;
672 struct fv_QueueInfo *ourqi;
674 assert(qidx < vud->nqueues);
675 ourqi = vud->qi[qidx];
677 /* Kill the thread */
678 if (eventfd_write(ourqi->kill_fd, 1)) {
679 fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n",
680 qidx, strerror(errno));
682 ret = pthread_join(ourqi->thread, NULL);
683 if (ret) {
684 fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n",
685 __func__, qidx, ret);
687 pthread_mutex_destroy(&ourqi->vq_lock);
688 close(ourqi->kill_fd);
689 ourqi->kick_fd = -1;
690 free(vud->qi[qidx]);
691 vud->qi[qidx] = NULL;
694 /* Callback from libvhost-user on start or stop of a queue */
695 static void fv_queue_set_started(VuDev *dev, int qidx, bool started)
697 struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev);
698 struct fv_QueueInfo *ourqi;
700 fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx,
701 started);
702 assert(qidx >= 0);
705 * Ignore additional request queues for now. passthrough_ll.c must be
706 * audited for thread-safety issues first. It was written with a
707 * well-behaved client in mind and may not protect against all types of
708 * races yet.
710 if (qidx > 1) {
711 fuse_log(FUSE_LOG_ERR,
712 "%s: multiple request queues not yet implemented, please only "
713 "configure 1 request queue\n",
714 __func__);
715 exit(EXIT_FAILURE);
718 if (started) {
719 /* Fire up a thread to watch this queue */
720 if (qidx >= vud->nqueues) {
721 vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0]));
722 assert(vud->qi);
723 memset(vud->qi + vud->nqueues, 0,
724 sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues)));
725 vud->nqueues = qidx + 1;
727 if (!vud->qi[qidx]) {
728 vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1);
729 assert(vud->qi[qidx]);
730 vud->qi[qidx]->virtio_dev = vud;
731 vud->qi[qidx]->qidx = qidx;
732 } else {
733 /* Shouldn't have been started */
734 assert(vud->qi[qidx]->kick_fd == -1);
736 ourqi = vud->qi[qidx];
737 ourqi->kick_fd = dev->vq[qidx].kick_fd;
739 ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE);
740 assert(ourqi->kill_fd != -1);
741 pthread_mutex_init(&ourqi->vq_lock, NULL);
743 if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) {
744 fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n",
745 __func__, qidx);
746 assert(0);
748 } else {
749 fv_queue_cleanup_thread(vud, qidx);
753 static bool fv_queue_order(VuDev *dev, int qidx)
755 return false;
758 static const VuDevIface fv_iface = {
759 .get_features = fv_get_features,
760 .set_features = fv_set_features,
762 /* Don't need process message, we've not got any at vhost-user level */
763 .queue_set_started = fv_queue_set_started,
765 .queue_is_processed_in_order = fv_queue_order,
769 * Main loop; this mostly deals with events on the vhost-user
770 * socket itself, and not actual fuse data.
772 int virtio_loop(struct fuse_session *se)
774 fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__);
776 while (!fuse_session_exited(se)) {
777 struct pollfd pf[1];
778 bool ok;
779 int ret;
780 pf[0].fd = se->vu_socketfd;
781 pf[0].events = POLLIN;
782 pf[0].revents = 0;
784 fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__);
785 int poll_res = ppoll(pf, 1, NULL, NULL);
787 if (poll_res == -1) {
788 if (errno == EINTR) {
789 fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
790 __func__);
791 continue;
793 fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n");
794 break;
796 assert(poll_res == 1);
797 if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
798 fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__,
799 pf[0].revents);
800 break;
802 assert(pf[0].revents & POLLIN);
803 fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__);
804 /* Mutual exclusion with fv_queue_thread() */
805 ret = pthread_rwlock_wrlock(&se->virtio_dev->vu_dispatch_rwlock);
806 assert(ret == 0); /* there is no possible error case */
808 ok = vu_dispatch(&se->virtio_dev->dev);
810 pthread_rwlock_unlock(&se->virtio_dev->vu_dispatch_rwlock);
812 if (!ok) {
813 fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__);
814 break;
819 * Make sure all fv_queue_thread()s quit on exit, as we're about to
820 * free virtio dev and fuse session, no one should access them anymore.
822 for (int i = 0; i < se->virtio_dev->nqueues; i++) {
823 if (!se->virtio_dev->qi[i]) {
824 continue;
827 fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i);
828 fv_queue_cleanup_thread(se->virtio_dev, i);
831 fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__);
833 return 0;
836 static void strreplace(char *s, char old, char new)
838 for (; *s; ++s) {
839 if (*s == old) {
840 *s = new;
845 static bool fv_socket_lock(struct fuse_session *se)
847 g_autofree gchar *sk_name = NULL;
848 g_autofree gchar *pidfile = NULL;
849 g_autofree gchar *dir = NULL;
850 Error *local_err = NULL;
852 dir = qemu_get_local_state_pathname("run/virtiofsd");
854 if (g_mkdir_with_parents(dir, S_IRWXU) < 0) {
855 fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s",
856 __func__, dir, strerror(errno));
857 return false;
860 sk_name = g_strdup(se->vu_socket_path);
861 strreplace(sk_name, '/', '.');
862 pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name);
864 if (!qemu_write_pidfile(pidfile, &local_err)) {
865 error_report_err(local_err);
866 return false;
869 return true;
872 static int fv_create_listen_socket(struct fuse_session *se)
874 struct sockaddr_un un;
875 mode_t old_umask;
877 /* Nothing to do if fd is already initialized */
878 if (se->vu_listen_fd >= 0) {
879 return 0;
882 if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) {
883 fuse_log(FUSE_LOG_ERR, "Socket path too long\n");
884 return -1;
887 if (!strlen(se->vu_socket_path)) {
888 fuse_log(FUSE_LOG_ERR, "Socket path is empty\n");
889 return -1;
892 /* Check the vu_socket_path is already used */
893 if (!fv_socket_lock(se)) {
894 return -1;
898 * Create the Unix socket to communicate with qemu
899 * based on QEMU's vhost-user-bridge
901 unlink(se->vu_socket_path);
902 strcpy(un.sun_path, se->vu_socket_path);
903 size_t addr_len = sizeof(un);
905 int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
906 if (listen_sock == -1) {
907 fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n");
908 return -1;
910 un.sun_family = AF_UNIX;
913 * Unfortunately bind doesn't let you set the mask on the socket,
914 * so set umask to 077 and restore it later.
916 old_umask = umask(0077);
917 if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) {
918 fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n");
919 close(listen_sock);
920 umask(old_umask);
921 return -1;
923 umask(old_umask);
925 if (listen(listen_sock, 1) == -1) {
926 fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n");
927 close(listen_sock);
928 return -1;
931 se->vu_listen_fd = listen_sock;
932 return 0;
935 int virtio_session_mount(struct fuse_session *se)
937 int ret;
939 ret = fv_create_listen_socket(se);
940 if (ret < 0) {
941 return ret;
944 se->fd = -1;
946 fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n",
947 __func__);
948 int data_sock = accept(se->vu_listen_fd, NULL, NULL);
949 if (data_sock == -1) {
950 fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n");
951 close(se->vu_listen_fd);
952 return -1;
954 close(se->vu_listen_fd);
955 se->vu_listen_fd = -1;
956 fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n",
957 __func__);
959 /* TODO: Some cleanup/deallocation! */
960 se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1);
961 if (!se->virtio_dev) {
962 fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__);
963 close(data_sock);
964 return -1;
967 se->vu_socketfd = data_sock;
968 se->virtio_dev->se = se;
969 pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL);
970 vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch,
971 fv_remove_watch, &fv_iface);
973 return 0;
976 void virtio_session_close(struct fuse_session *se)
978 close(se->vu_socketfd);
980 if (!se->virtio_dev) {
981 return;
984 free(se->virtio_dev->qi);
985 pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock);
986 free(se->virtio_dev);
987 se->virtio_dev = NULL;