Merge tag 'v9.0.0-rc3'
[qemu/ar7.git] / tests / vhost-user-bridge.c
bloba5c711b1de8e9c164dd1614f4329b8e3c05d0402
1 /*
2 * Vhost User Bridge
4 * Copyright (c) 2015 Red Hat, Inc.
6 * Authors:
7 * Victor Kaplansky <victork@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
14 * TODO:
15 * - main should get parameters from the command line.
16 * - implement all request handlers. Still not implemented:
17 * vubr_get_queue_num_exec()
18 * vubr_send_rarp_exec()
19 * - test for broken requests and virtqueue.
20 * - implement features defined by Virtio 1.0 spec.
21 * - support mergeable buffers and indirect descriptors.
22 * - implement clean shutdown.
23 * - implement non-blocking writes to UDP backend.
24 * - implement polling strategy.
25 * - implement clean starting/stopping of vq processing
26 * - implement clean starting/stopping of used and buffers
27 * dirty page logging.
30 #define _FILE_OFFSET_BITS 64
32 #include "qemu/osdep.h"
33 #include "qemu/atomic.h"
34 #include "qemu/ctype.h"
35 #include "qemu/iov.h"
36 #include "standard-headers/linux/virtio_net.h"
37 #include "libvhost-user.h"
39 #define VHOST_USER_BRIDGE_DEBUG 1
41 #define DPRINT(...) \
42 do { \
43 if (VHOST_USER_BRIDGE_DEBUG) { \
44 printf(__VA_ARGS__); \
45 } \
46 } while (0)
48 enum {
49 VHOST_USER_BRIDGE_MAX_QUEUES = 8,
52 typedef void (*CallbackFunc)(int sock, void *ctx);
54 typedef struct Event {
55 void *ctx;
56 CallbackFunc callback;
57 } Event;
59 typedef struct Dispatcher {
60 int max_sock;
61 fd_set fdset;
62 Event events[FD_SETSIZE];
63 } Dispatcher;
65 typedef struct VubrDev {
66 VuDev vudev;
67 Dispatcher dispatcher;
68 int backend_udp_sock;
69 struct sockaddr_in backend_udp_dest;
70 int hdrlen;
71 int sock;
72 int ready;
73 int quit;
74 struct {
75 int fd;
76 void *addr;
77 pthread_t thread;
78 } notifier;
79 } VubrDev;
81 static void
82 vubr_die(const char *s)
84 perror(s);
85 exit(1);
88 static int
89 dispatcher_init(Dispatcher *dispr)
91 FD_ZERO(&dispr->fdset);
92 dispr->max_sock = -1;
93 return 0;
96 static int
97 dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
99 if (sock >= FD_SETSIZE) {
100 fprintf(stderr,
101 "Error: Failed to add new event. sock %d should be less than %d\n",
102 sock, FD_SETSIZE);
103 return -1;
106 dispr->events[sock].ctx = ctx;
107 dispr->events[sock].callback = cb;
109 FD_SET(sock, &dispr->fdset);
110 if (sock > dispr->max_sock) {
111 dispr->max_sock = sock;
113 DPRINT("Added sock %d for watching. max_sock: %d\n",
114 sock, dispr->max_sock);
115 return 0;
118 static int
119 dispatcher_remove(Dispatcher *dispr, int sock)
121 if (sock >= FD_SETSIZE) {
122 fprintf(stderr,
123 "Error: Failed to remove event. sock %d should be less than %d\n",
124 sock, FD_SETSIZE);
125 return -1;
128 FD_CLR(sock, &dispr->fdset);
129 DPRINT("Sock %d removed from dispatcher watch.\n", sock);
130 return 0;
133 /* timeout in us */
134 static int
135 dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
137 struct timeval tv;
138 tv.tv_sec = timeout / 1000000;
139 tv.tv_usec = timeout % 1000000;
141 fd_set fdset = dispr->fdset;
143 /* wait until some of sockets become readable. */
144 int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
146 if (rc == -1) {
147 vubr_die("select");
150 /* Timeout */
151 if (rc == 0) {
152 return 0;
155 /* Now call callback for every ready socket. */
157 int sock;
158 for (sock = 0; sock < dispr->max_sock + 1; sock++) {
159 /* The callback on a socket can remove other sockets from the
160 * dispatcher, thus we have to check that the socket is
161 * still not removed from dispatcher's list
163 if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) {
164 Event *e = &dispr->events[sock];
165 e->callback(sock, e->ctx);
169 return 0;
172 static void
173 vubr_handle_tx(VuDev *dev, int qidx)
175 VuVirtq *vq = vu_get_queue(dev, qidx);
176 VubrDev *vubr = container_of(dev, VubrDev, vudev);
177 int hdrlen = vubr->hdrlen;
178 VuVirtqElement *elem = NULL;
180 assert(qidx % 2);
182 for (;;) {
183 ssize_t ret;
184 unsigned int out_num;
185 struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
187 elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
188 if (!elem) {
189 break;
192 out_num = elem->out_num;
193 out_sg = elem->out_sg;
194 if (out_num < 1) {
195 fprintf(stderr, "virtio-net header not in first element\n");
196 break;
198 if (VHOST_USER_BRIDGE_DEBUG) {
199 iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
202 if (hdrlen) {
203 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
204 out_sg, out_num,
205 hdrlen, -1);
206 out_num = sg_num;
207 out_sg = sg;
210 struct msghdr msg = {
211 .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
212 .msg_namelen = sizeof(struct sockaddr_in),
213 .msg_iov = out_sg,
214 .msg_iovlen = out_num,
216 do {
217 ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
218 } while (ret == -1 && (errno == EAGAIN || errno == EINTR));
220 if (ret == -1) {
221 vubr_die("sendmsg()");
224 vu_queue_push(dev, vq, elem, 0);
225 vu_queue_notify(dev, vq);
227 free(elem);
228 elem = NULL;
231 free(elem);
235 /* this function reverse the effect of iov_discard_front() it must be
236 * called with 'front' being the original struct iovec and 'bytes'
237 * being the number of bytes you shaved off
239 static void
240 iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
242 struct iovec *cur;
244 for (cur = front; cur != iov; cur++) {
245 assert(bytes >= cur->iov_len);
246 bytes -= cur->iov_len;
249 cur->iov_base -= bytes;
250 cur->iov_len += bytes;
253 static void
254 iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
256 unsigned i;
258 for (i = 0; i < iovc; i++, iov++) {
259 if (bytes < iov->iov_len) {
260 iov->iov_len = bytes;
261 return;
264 bytes -= iov->iov_len;
267 assert(!"couldn't truncate iov");
270 static void
271 vubr_backend_recv_cb(int sock, void *ctx)
273 VubrDev *vubr = (VubrDev *) ctx;
274 VuDev *dev = &vubr->vudev;
275 VuVirtq *vq = vu_get_queue(dev, 0);
276 VuVirtqElement *elem = NULL;
277 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
278 struct virtio_net_hdr_mrg_rxbuf mhdr;
279 unsigned mhdr_cnt = 0;
280 int hdrlen = vubr->hdrlen;
281 int i = 0;
282 struct virtio_net_hdr hdr = {
283 .flags = 0,
284 .gso_type = VIRTIO_NET_HDR_GSO_NONE
287 DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
288 DPRINT(" hdrlen = %d\n", hdrlen);
290 if (!vu_queue_enabled(dev, vq) ||
291 !vu_queue_started(dev, vq) ||
292 !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
293 DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
294 return;
297 while (1) {
298 struct iovec *sg;
299 ssize_t ret, total = 0;
300 unsigned int num;
302 elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
303 if (!elem) {
304 break;
307 if (elem->in_num < 1) {
308 fprintf(stderr, "virtio-net contains no in buffers\n");
309 break;
312 sg = elem->in_sg;
313 num = elem->in_num;
314 if (i == 0) {
315 if (hdrlen == 12) {
316 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
317 sg, elem->in_num,
318 offsetof(typeof(mhdr), num_buffers),
319 sizeof(mhdr.num_buffers));
321 iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
322 total += hdrlen;
323 ret = iov_discard_front(&sg, &num, hdrlen);
324 assert(ret == hdrlen);
327 struct msghdr msg = {
328 .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
329 .msg_namelen = sizeof(struct sockaddr_in),
330 .msg_iov = sg,
331 .msg_iovlen = num,
332 .msg_flags = MSG_DONTWAIT,
334 ret = RETRY_ON_EINTR(recvmsg(vubr->backend_udp_sock, &msg, 0));
336 if (i == 0) {
337 iov_restore_front(elem->in_sg, sg, hdrlen);
340 if (ret == -1) {
341 if (errno == EWOULDBLOCK) {
342 vu_queue_rewind(dev, vq, 1);
343 break;
346 vubr_die("recvmsg()");
349 total += ret;
350 iov_truncate(elem->in_sg, elem->in_num, total);
351 vu_queue_fill(dev, vq, elem, total, i++);
353 free(elem);
354 elem = NULL;
356 break; /* could loop if DONTWAIT worked? */
359 if (mhdr_cnt) {
360 mhdr.num_buffers = i;
361 iov_from_buf(mhdr_sg, mhdr_cnt,
363 &mhdr.num_buffers, sizeof mhdr.num_buffers);
366 vu_queue_flush(dev, vq, i);
367 vu_queue_notify(dev, vq);
369 free(elem);
372 static void
373 vubr_receive_cb(int sock, void *ctx)
375 VubrDev *vubr = (VubrDev *)ctx;
377 if (!vu_dispatch(&vubr->vudev)) {
378 fprintf(stderr, "Error while dispatching\n");
382 typedef struct WatchData {
383 VuDev *dev;
384 vu_watch_cb cb;
385 void *data;
386 } WatchData;
388 static void
389 watch_cb(int sock, void *ctx)
391 struct WatchData *wd = ctx;
393 wd->cb(wd->dev, VU_WATCH_IN, wd->data);
396 static void
397 vubr_set_watch(VuDev *dev, int fd, int condition,
398 vu_watch_cb cb, void *data)
400 VubrDev *vubr = container_of(dev, VubrDev, vudev);
401 static WatchData watches[FD_SETSIZE];
402 struct WatchData *wd = &watches[fd];
404 wd->cb = cb;
405 wd->data = data;
406 wd->dev = dev;
407 dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
410 static void
411 vubr_remove_watch(VuDev *dev, int fd)
413 VubrDev *vubr = container_of(dev, VubrDev, vudev);
415 dispatcher_remove(&vubr->dispatcher, fd);
418 static int
419 vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
421 DPRINT("Function %s() not implemented yet.\n", __func__);
422 return 0;
425 static int
426 vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
428 switch (vmsg->request) {
429 case VHOST_USER_SEND_RARP:
430 *do_reply = vubr_send_rarp_exec(dev, vmsg);
431 return 1;
432 default:
433 /* let the library handle the rest */
434 return 0;
437 return 0;
440 static void
441 vubr_set_features(VuDev *dev, uint64_t features)
443 VubrDev *vubr = container_of(dev, VubrDev, vudev);
445 if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
446 (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
447 vubr->hdrlen = 12;
448 } else {
449 vubr->hdrlen = 10;
453 static uint64_t
454 vubr_get_features(VuDev *dev)
456 return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
457 1ULL << VIRTIO_NET_F_MRG_RXBUF |
458 1ULL << VIRTIO_F_VERSION_1;
461 static void
462 vubr_queue_set_started(VuDev *dev, int qidx, bool started)
464 VubrDev *vubr = container_of(dev, VubrDev, vudev);
465 VuVirtq *vq = vu_get_queue(dev, qidx);
467 if (started && vubr->notifier.fd >= 0) {
468 vu_set_queue_host_notifier(dev, vq, vubr->notifier.fd,
469 qemu_real_host_page_size(),
470 qidx * qemu_real_host_page_size());
473 if (qidx % 2 == 1) {
474 vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
478 static void
479 vubr_panic(VuDev *dev, const char *msg)
481 VubrDev *vubr = container_of(dev, VubrDev, vudev);
483 fprintf(stderr, "PANIC: %s\n", msg);
485 dispatcher_remove(&vubr->dispatcher, dev->sock);
486 vubr->quit = 1;
489 static bool
490 vubr_queue_is_processed_in_order(VuDev *dev, int qidx)
492 return true;
495 static const VuDevIface vuiface = {
496 .get_features = vubr_get_features,
497 .set_features = vubr_set_features,
498 .process_msg = vubr_process_msg,
499 .queue_set_started = vubr_queue_set_started,
500 .queue_is_processed_in_order = vubr_queue_is_processed_in_order,
503 static void
504 vubr_accept_cb(int sock, void *ctx)
506 VubrDev *dev = (VubrDev *)ctx;
507 int conn_fd;
508 struct sockaddr_un un;
509 socklen_t len = sizeof(un);
511 conn_fd = accept(sock, (struct sockaddr *) &un, &len);
512 if (conn_fd == -1) {
513 vubr_die("accept()");
515 DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
517 if (!vu_init(&dev->vudev,
518 VHOST_USER_BRIDGE_MAX_QUEUES,
519 conn_fd,
520 vubr_panic,
521 NULL,
522 vubr_set_watch,
523 vubr_remove_watch,
524 &vuiface)) {
525 fprintf(stderr, "Failed to initialize libvhost-user\n");
526 exit(1);
529 dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
530 dispatcher_remove(&dev->dispatcher, sock);
533 static VubrDev *
534 vubr_new(const char *path, bool client)
536 VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
537 struct sockaddr_un un;
538 CallbackFunc cb;
539 size_t len;
541 if (strlen(path) >= sizeof(un.sun_path)) {
542 fprintf(stderr, "unix domain socket path '%s' is too long\n", path);
543 exit(1);
546 /* Get a UNIX socket. */
547 dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
548 if (dev->sock == -1) {
549 vubr_die("socket");
552 dev->notifier.fd = -1;
554 un.sun_family = AF_UNIX;
555 strcpy(un.sun_path, path);
556 len = sizeof(un.sun_family) + strlen(path);
558 if (!client) {
559 unlink(path);
561 if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
562 vubr_die("bind");
565 if (listen(dev->sock, 1) == -1) {
566 vubr_die("listen");
568 cb = vubr_accept_cb;
570 DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
571 } else {
572 if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
573 vubr_die("connect");
576 if (!vu_init(&dev->vudev,
577 VHOST_USER_BRIDGE_MAX_QUEUES,
578 dev->sock,
579 vubr_panic,
580 NULL,
581 vubr_set_watch,
582 vubr_remove_watch,
583 &vuiface)) {
584 fprintf(stderr, "Failed to initialize libvhost-user\n");
585 exit(1);
588 cb = vubr_receive_cb;
591 dispatcher_init(&dev->dispatcher);
593 dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
595 return dev;
598 static void *notifier_thread(void *arg)
600 VuDev *dev = (VuDev *)arg;
601 VubrDev *vubr = container_of(dev, VubrDev, vudev);
602 int pagesize = qemu_real_host_page_size();
603 int qidx;
605 while (true) {
606 for (qidx = 0; qidx < VHOST_USER_BRIDGE_MAX_QUEUES; qidx++) {
607 uint16_t *n = vubr->notifier.addr + pagesize * qidx;
609 if (*n == qidx) {
610 *n = 0xffff;
611 /* We won't miss notifications if we reset
612 * the memory first. */
613 smp_mb();
615 DPRINT("Got a notification for queue%d via host notifier.\n",
616 qidx);
618 if (qidx % 2 == 1) {
619 vubr_handle_tx(dev, qidx);
622 usleep(1000);
626 return NULL;
629 static void
630 vubr_host_notifier_setup(VubrDev *dev)
632 pthread_t thread;
633 size_t length;
634 void *addr;
635 int fd;
637 length = qemu_real_host_page_size() * VHOST_USER_BRIDGE_MAX_QUEUES;
639 fd = g_file_open_tmp("vubr-XXXXXX", NULL, NULL);
640 if (fd < 0) {
641 vubr_die("mkstemp()");
644 if (posix_fallocate(fd, 0, length) != 0) {
645 vubr_die("posix_fallocate()");
648 addr = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
649 if (addr == MAP_FAILED) {
650 vubr_die("mmap()");
653 memset(addr, 0xff, length);
655 if (pthread_create(&thread, NULL, notifier_thread, &dev->vudev) != 0) {
656 vubr_die("pthread_create()");
659 dev->notifier.fd = fd;
660 dev->notifier.addr = addr;
661 dev->notifier.thread = thread;
664 static void
665 vubr_set_host(struct sockaddr_in *saddr, const char *host)
667 if (qemu_isdigit(host[0])) {
668 if (!inet_aton(host, &saddr->sin_addr)) {
669 fprintf(stderr, "inet_aton() failed.\n");
670 exit(1);
672 } else {
673 struct hostent *he = gethostbyname(host);
675 if (!he) {
676 fprintf(stderr, "gethostbyname() failed.\n");
677 exit(1);
679 saddr->sin_addr = *(struct in_addr *)he->h_addr;
683 static void
684 vubr_backend_udp_setup(VubrDev *dev,
685 const char *local_host,
686 const char *local_port,
687 const char *remote_host,
688 const char *remote_port)
690 int sock;
691 const char *r;
693 int lport, rport;
695 lport = strtol(local_port, (char **)&r, 0);
696 if (r == local_port) {
697 fprintf(stderr, "lport parsing failed.\n");
698 exit(1);
701 rport = strtol(remote_port, (char **)&r, 0);
702 if (r == remote_port) {
703 fprintf(stderr, "rport parsing failed.\n");
704 exit(1);
707 struct sockaddr_in si_local = {
708 .sin_family = AF_INET,
709 .sin_port = htons(lport),
712 vubr_set_host(&si_local, local_host);
714 /* setup destination for sends */
715 dev->backend_udp_dest = (struct sockaddr_in) {
716 .sin_family = AF_INET,
717 .sin_port = htons(rport),
719 vubr_set_host(&dev->backend_udp_dest, remote_host);
721 sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
722 if (sock == -1) {
723 vubr_die("socket");
726 if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
727 vubr_die("bind");
730 dev->backend_udp_sock = sock;
731 dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
732 DPRINT("Waiting for data from udp backend on %s:%d...\n",
733 local_host, lport);
736 static void
737 vubr_run(VubrDev *dev)
739 while (!dev->quit) {
740 /* timeout 200ms */
741 dispatcher_wait(&dev->dispatcher, 200000);
742 /* Here one can try polling strategy. */
746 static int
747 vubr_parse_host_port(const char **host, const char **port, const char *buf)
749 char *p = strchr(buf, ':');
751 if (!p) {
752 return -1;
754 *p = '\0';
755 *host = strdup(buf);
756 *port = strdup(p + 1);
757 return 0;
760 #define DEFAULT_UD_SOCKET "/tmp/vubr.sock"
761 #define DEFAULT_LHOST "127.0.0.1"
762 #define DEFAULT_LPORT "4444"
763 #define DEFAULT_RHOST "127.0.0.1"
764 #define DEFAULT_RPORT "5555"
766 static const char *ud_socket_path = DEFAULT_UD_SOCKET;
767 static const char *lhost = DEFAULT_LHOST;
768 static const char *lport = DEFAULT_LPORT;
769 static const char *rhost = DEFAULT_RHOST;
770 static const char *rport = DEFAULT_RPORT;
773 main(int argc, char *argv[])
775 VubrDev *dev;
776 int opt;
777 bool client = false;
778 bool host_notifier = false;
780 while ((opt = getopt(argc, argv, "l:r:u:cH")) != -1) {
782 switch (opt) {
783 case 'l':
784 if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) {
785 goto out;
787 break;
788 case 'r':
789 if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) {
790 goto out;
792 break;
793 case 'u':
794 ud_socket_path = strdup(optarg);
795 break;
796 case 'c':
797 client = true;
798 break;
799 case 'H':
800 host_notifier = true;
801 break;
802 default:
803 goto out;
807 DPRINT("ud socket: %s (%s)\n", ud_socket_path,
808 client ? "client" : "server");
809 DPRINT("local: %s:%s\n", lhost, lport);
810 DPRINT("remote: %s:%s\n", rhost, rport);
812 dev = vubr_new(ud_socket_path, client);
813 if (!dev) {
814 return 1;
817 if (host_notifier) {
818 vubr_host_notifier_setup(dev);
821 vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
822 vubr_run(dev);
824 vu_deinit(&dev->vudev);
826 return 0;
828 out:
829 fprintf(stderr, "Usage: %s ", argv[0]);
830 fprintf(stderr, "[-c] [-H] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
831 fprintf(stderr, "\t-u path to unix domain socket. default: %s\n",
832 DEFAULT_UD_SOCKET);
833 fprintf(stderr, "\t-l local host and port. default: %s:%s\n",
834 DEFAULT_LHOST, DEFAULT_LPORT);
835 fprintf(stderr, "\t-r remote host and port. default: %s:%s\n",
836 DEFAULT_RHOST, DEFAULT_RPORT);
837 fprintf(stderr, "\t-c client mode\n");
838 fprintf(stderr, "\t-H use host notifier\n");
840 return 1;