virtiofsd: Send replies to messages
[qemu/kevin.git] / tools / virtiofsd / fuse_virtio.c
blob05d0e29f12f1dd6ed5c4fd7eb8ef10c78de42367
1 /*
2 * virtio-fs glue for FUSE
3 * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
5 * Authors:
6 * Dave Gilbert <dgilbert@redhat.com>
8 * Implements the glue between libfuse and libvhost-user
10 * This program can be distributed under the terms of the GNU LGPLv2.
11 * See the file COPYING.LIB
14 #include "qemu/osdep.h"
15 #include "qemu/iov.h"
16 #include "fuse_virtio.h"
17 #include "fuse_i.h"
18 #include "standard-headers/linux/fuse.h"
19 #include "fuse_misc.h"
20 #include "fuse_opt.h"
22 #include <assert.h>
23 #include <errno.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/eventfd.h>
29 #include <sys/socket.h>
30 #include <sys/types.h>
31 #include <sys/un.h>
32 #include <unistd.h>
34 #include "contrib/libvhost-user/libvhost-user.h"
36 struct fv_VuDev;
37 struct fv_QueueInfo {
38 pthread_t thread;
39 struct fv_VuDev *virtio_dev;
41 /* Our queue index, corresponds to array position */
42 int qidx;
43 int kick_fd;
45 /* The element for the command currently being processed */
46 VuVirtqElement *qe;
50 * We pass the dev element into libvhost-user
51 * and then use it to get back to the outer
52 * container for other data.
54 struct fv_VuDev {
55 VuDev dev;
56 struct fuse_session *se;
59 * The following pair of fields are only accessed in the main
60 * virtio_loop
62 size_t nqueues;
63 struct fv_QueueInfo **qi;
66 /* From spec */
67 struct virtio_fs_config {
68 char tag[36];
69 uint32_t num_queues;
72 /* Callback from libvhost-user */
73 static uint64_t fv_get_features(VuDev *dev)
75 return 1ULL << VIRTIO_F_VERSION_1;
78 /* Callback from libvhost-user */
79 static void fv_set_features(VuDev *dev, uint64_t features)
84 * Callback from libvhost-user if there's a new fd we're supposed to listen
85 * to, typically a queue kick?
87 static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb,
88 void *data)
90 fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
94 * Callback from libvhost-user if we're no longer supposed to listen on an fd
96 static void fv_remove_watch(VuDev *dev, int fd)
98 fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
101 /* Callback from libvhost-user to panic */
102 static void fv_panic(VuDev *dev, const char *err)
104 fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err);
105 /* TODO: Allow reconnects?? */
106 exit(EXIT_FAILURE);
110 * Copy from an iovec into a fuse_buf (memory only)
111 * Caller must ensure there is space
113 static void copy_from_iov(struct fuse_buf *buf, size_t out_num,
114 const struct iovec *out_sg)
116 void *dest = buf->mem;
118 while (out_num) {
119 size_t onelen = out_sg->iov_len;
120 memcpy(dest, out_sg->iov_base, onelen);
121 dest += onelen;
122 out_sg++;
123 out_num--;
128 * Copy from one iov to another, the given number of bytes
129 * The caller must have checked sizes.
131 static void copy_iov(struct iovec *src_iov, int src_count,
132 struct iovec *dst_iov, int dst_count, size_t to_copy)
134 size_t dst_offset = 0;
135 /* Outer loop copies 'src' elements */
136 while (to_copy) {
137 assert(src_count);
138 size_t src_len = src_iov[0].iov_len;
139 size_t src_offset = 0;
141 if (src_len > to_copy) {
142 src_len = to_copy;
144 /* Inner loop copies contents of one 'src' to maybe multiple dst. */
145 while (src_len) {
146 assert(dst_count);
147 size_t dst_len = dst_iov[0].iov_len - dst_offset;
148 if (dst_len > src_len) {
149 dst_len = src_len;
152 memcpy(dst_iov[0].iov_base + dst_offset,
153 src_iov[0].iov_base + src_offset, dst_len);
154 src_len -= dst_len;
155 to_copy -= dst_len;
156 src_offset += dst_len;
157 dst_offset += dst_len;
159 assert(dst_offset <= dst_iov[0].iov_len);
160 if (dst_offset == dst_iov[0].iov_len) {
161 dst_offset = 0;
162 dst_iov++;
163 dst_count--;
166 src_iov++;
167 src_count--;
172 * Called back by ll whenever it wants to send a reply/message back
173 * The 1st element of the iov starts with the fuse_out_header
174 * 'unique'==0 means it's a notify message.
176 int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
177 struct iovec *iov, int count)
179 VuVirtqElement *elem;
180 VuVirtq *q;
182 assert(count >= 1);
183 assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
185 struct fuse_out_header *out = iov[0].iov_base;
186 /* TODO: Endianness! */
188 size_t tosend_len = iov_size(iov, count);
190 /* unique == 0 is notification, which we don't support */
191 assert(out->unique);
192 /* For virtio we always have ch */
193 assert(ch);
194 elem = ch->qi->qe;
195 q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx];
197 /* The 'in' part of the elem is to qemu */
198 unsigned int in_num = elem->in_num;
199 struct iovec *in_sg = elem->in_sg;
200 size_t in_len = iov_size(in_sg, in_num);
201 fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
202 __func__, elem->index, in_num, in_len);
205 * The elem should have room for a 'fuse_out_header' (out from fuse)
206 * plus the data based on the len in the header.
208 if (in_len < sizeof(struct fuse_out_header)) {
209 fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
210 __func__, elem->index);
211 return -E2BIG;
213 if (in_len < tosend_len) {
214 fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
215 __func__, elem->index, tosend_len);
216 return -E2BIG;
219 copy_iov(iov, count, in_sg, in_num, tosend_len);
220 vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len);
221 vu_queue_notify(&se->virtio_dev->dev, q);
223 return 0;
226 /* Thread function for individual queues, created when a queue is 'started' */
227 static void *fv_queue_thread(void *opaque)
229 struct fv_QueueInfo *qi = opaque;
230 struct VuDev *dev = &qi->virtio_dev->dev;
231 struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
232 struct fuse_session *se = qi->virtio_dev->se;
233 struct fuse_chan ch;
234 struct fuse_buf fbuf;
236 fbuf.mem = NULL;
237 fbuf.flags = 0;
239 fuse_mutex_init(&ch.lock);
240 ch.fd = (int)0xdaff0d111;
241 ch.qi = qi;
243 fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
244 qi->qidx, qi->kick_fd);
245 while (1) {
246 struct pollfd pf[1];
247 pf[0].fd = qi->kick_fd;
248 pf[0].events = POLLIN;
249 pf[0].revents = 0;
251 fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__,
252 qi->qidx);
253 int poll_res = ppoll(pf, 1, NULL, NULL);
255 if (poll_res == -1) {
256 if (errno == EINTR) {
257 fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
258 __func__);
259 continue;
261 fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n");
262 break;
264 assert(poll_res == 1);
265 if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
266 fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n",
267 __func__, pf[0].revents, qi->qidx);
268 break;
270 assert(pf[0].revents & POLLIN);
271 fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__,
272 qi->qidx);
274 eventfd_t evalue;
275 if (eventfd_read(qi->kick_fd, &evalue)) {
276 fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n");
277 break;
279 /* out is from guest, in is too guest */
280 unsigned int in_bytes, out_bytes;
281 vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0);
283 fuse_log(FUSE_LOG_DEBUG,
284 "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n",
285 __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
287 while (1) {
289 * An element contains one request and the space to send our
290 * response They're spread over multiple descriptors in a
291 * scatter/gather set and we can't trust the guest to keep them
292 * still; so copy in/out.
294 VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement));
295 if (!elem) {
296 break;
299 if (!fbuf.mem) {
300 fbuf.mem = malloc(se->bufsize);
301 assert(fbuf.mem);
302 assert(se->bufsize > sizeof(struct fuse_in_header));
304 /* The 'out' part of the elem is from qemu */
305 unsigned int out_num = elem->out_num;
306 struct iovec *out_sg = elem->out_sg;
307 size_t out_len = iov_size(out_sg, out_num);
308 fuse_log(FUSE_LOG_DEBUG,
309 "%s: elem %d: with %d out desc of length %zd\n", __func__,
310 elem->index, out_num, out_len);
313 * The elem should contain a 'fuse_in_header' (in to fuse)
314 * plus the data based on the len in the header.
316 if (out_len < sizeof(struct fuse_in_header)) {
317 fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n",
318 __func__, elem->index);
319 assert(0); /* TODO */
321 if (out_len > se->bufsize) {
322 fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n",
323 __func__, elem->index);
324 assert(0); /* TODO */
326 copy_from_iov(&fbuf, out_num, out_sg);
327 fbuf.size = out_len;
329 /* TODO! Endianness of header */
331 /* TODO: Add checks for fuse_session_exited */
332 fuse_session_process_buf_int(se, &fbuf, &ch);
334 qi->qe = NULL;
335 free(elem);
336 elem = NULL;
339 pthread_mutex_destroy(&ch.lock);
340 free(fbuf.mem);
342 return NULL;
345 /* Callback from libvhost-user on start or stop of a queue */
346 static void fv_queue_set_started(VuDev *dev, int qidx, bool started)
348 struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev);
349 struct fv_QueueInfo *ourqi;
351 fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx,
352 started);
353 assert(qidx >= 0);
356 * Ignore additional request queues for now. passthrough_ll.c must be
357 * audited for thread-safety issues first. It was written with a
358 * well-behaved client in mind and may not protect against all types of
359 * races yet.
361 if (qidx > 1) {
362 fuse_log(FUSE_LOG_ERR,
363 "%s: multiple request queues not yet implemented, please only "
364 "configure 1 request queue\n",
365 __func__);
366 exit(EXIT_FAILURE);
369 if (started) {
370 /* Fire up a thread to watch this queue */
371 if (qidx >= vud->nqueues) {
372 vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0]));
373 assert(vud->qi);
374 memset(vud->qi + vud->nqueues, 0,
375 sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues)));
376 vud->nqueues = qidx + 1;
378 if (!vud->qi[qidx]) {
379 vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1);
380 assert(vud->qi[qidx]);
381 vud->qi[qidx]->virtio_dev = vud;
382 vud->qi[qidx]->qidx = qidx;
383 } else {
384 /* Shouldn't have been started */
385 assert(vud->qi[qidx]->kick_fd == -1);
387 ourqi = vud->qi[qidx];
388 ourqi->kick_fd = dev->vq[qidx].kick_fd;
389 if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) {
390 fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n",
391 __func__, qidx);
392 assert(0);
394 } else {
395 /* TODO: Kill the thread */
396 assert(qidx < vud->nqueues);
397 ourqi = vud->qi[qidx];
398 ourqi->kick_fd = -1;
402 static bool fv_queue_order(VuDev *dev, int qidx)
404 return false;
407 static const VuDevIface fv_iface = {
408 .get_features = fv_get_features,
409 .set_features = fv_set_features,
411 /* Don't need process message, we've not got any at vhost-user level */
412 .queue_set_started = fv_queue_set_started,
414 .queue_is_processed_in_order = fv_queue_order,
418 * Main loop; this mostly deals with events on the vhost-user
419 * socket itself, and not actual fuse data.
421 int virtio_loop(struct fuse_session *se)
423 fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__);
425 while (!fuse_session_exited(se)) {
426 struct pollfd pf[1];
427 pf[0].fd = se->vu_socketfd;
428 pf[0].events = POLLIN;
429 pf[0].revents = 0;
431 fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__);
432 int poll_res = ppoll(pf, 1, NULL, NULL);
434 if (poll_res == -1) {
435 if (errno == EINTR) {
436 fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
437 __func__);
438 continue;
440 fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n");
441 break;
443 assert(poll_res == 1);
444 if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
445 fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__,
446 pf[0].revents);
447 break;
449 assert(pf[0].revents & POLLIN);
450 fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__);
451 if (!vu_dispatch(&se->virtio_dev->dev)) {
452 fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__);
453 break;
457 fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__);
459 return 0;
462 int virtio_session_mount(struct fuse_session *se)
464 struct sockaddr_un un;
465 mode_t old_umask;
467 if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) {
468 fuse_log(FUSE_LOG_ERR, "Socket path too long\n");
469 return -1;
472 se->fd = -1;
475 * Create the Unix socket to communicate with qemu
476 * based on QEMU's vhost-user-bridge
478 unlink(se->vu_socket_path);
479 strcpy(un.sun_path, se->vu_socket_path);
480 size_t addr_len = sizeof(un);
482 int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
483 if (listen_sock == -1) {
484 fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n");
485 return -1;
487 un.sun_family = AF_UNIX;
490 * Unfortunately bind doesn't let you set the mask on the socket,
491 * so set umask to 077 and restore it later.
493 old_umask = umask(0077);
494 if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) {
495 fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n");
496 umask(old_umask);
497 return -1;
499 umask(old_umask);
501 if (listen(listen_sock, 1) == -1) {
502 fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n");
503 return -1;
506 fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n",
507 __func__);
508 int data_sock = accept(listen_sock, NULL, NULL);
509 if (data_sock == -1) {
510 fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n");
511 close(listen_sock);
512 return -1;
514 close(listen_sock);
515 fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n",
516 __func__);
518 /* TODO: Some cleanup/deallocation! */
519 se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1);
520 if (!se->virtio_dev) {
521 fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__);
522 close(data_sock);
523 return -1;
526 se->vu_socketfd = data_sock;
527 se->virtio_dev->se = se;
528 vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch,
529 fv_remove_watch, &fv_iface);
531 return 0;