slirp: replace qemu qtailq with slirp own copy
[qemu/kevin.git] / tests / vhost-user-bridge.c
blob0033b61f2ee690980f483031fd7d919b04c7c14a
1 /*
2 * Vhost User Bridge
4 * Copyright (c) 2015 Red Hat, Inc.
6 * Authors:
7 * Victor Kaplansky <victork@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
14 * TODO:
15 * - main should get parameters from the command line.
16 * - implement all request handlers. Still not implemented:
17 * vubr_get_queue_num_exec()
18 * vubr_send_rarp_exec()
19 * - test for broken requests and virtqueue.
20 * - implement features defined by Virtio 1.0 spec.
21 * - support mergeable buffers and indirect descriptors.
22 * - implement clean shutdown.
23 * - implement non-blocking writes to UDP backend.
24 * - implement polling strategy.
25 * - implement clean starting/stopping of vq processing
26 * - implement clean starting/stopping of used and buffers
27 * dirty page logging.
30 #define _FILE_OFFSET_BITS 64
32 #include "qemu/osdep.h"
33 #include "qemu/atomic.h"
34 #include "qemu/iov.h"
35 #include "standard-headers/linux/virtio_net.h"
36 #include "contrib/libvhost-user/libvhost-user.h"
38 #define VHOST_USER_BRIDGE_DEBUG 1
40 #define DPRINT(...) \
41 do { \
42 if (VHOST_USER_BRIDGE_DEBUG) { \
43 printf(__VA_ARGS__); \
44 } \
45 } while (0)
47 typedef void (*CallbackFunc)(int sock, void *ctx);
49 typedef struct Event {
50 void *ctx;
51 CallbackFunc callback;
52 } Event;
54 typedef struct Dispatcher {
55 int max_sock;
56 fd_set fdset;
57 Event events[FD_SETSIZE];
58 } Dispatcher;
60 typedef struct VubrDev {
61 VuDev vudev;
62 Dispatcher dispatcher;
63 int backend_udp_sock;
64 struct sockaddr_in backend_udp_dest;
65 int hdrlen;
66 int sock;
67 int ready;
68 int quit;
69 struct {
70 int fd;
71 void *addr;
72 pthread_t thread;
73 } notifier;
74 } VubrDev;
76 static void
77 vubr_die(const char *s)
79 perror(s);
80 exit(1);
83 static int
84 dispatcher_init(Dispatcher *dispr)
86 FD_ZERO(&dispr->fdset);
87 dispr->max_sock = -1;
88 return 0;
91 static int
92 dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
94 if (sock >= FD_SETSIZE) {
95 fprintf(stderr,
96 "Error: Failed to add new event. sock %d should be less than %d\n",
97 sock, FD_SETSIZE);
98 return -1;
101 dispr->events[sock].ctx = ctx;
102 dispr->events[sock].callback = cb;
104 FD_SET(sock, &dispr->fdset);
105 if (sock > dispr->max_sock) {
106 dispr->max_sock = sock;
108 DPRINT("Added sock %d for watching. max_sock: %d\n",
109 sock, dispr->max_sock);
110 return 0;
113 static int
114 dispatcher_remove(Dispatcher *dispr, int sock)
116 if (sock >= FD_SETSIZE) {
117 fprintf(stderr,
118 "Error: Failed to remove event. sock %d should be less than %d\n",
119 sock, FD_SETSIZE);
120 return -1;
123 FD_CLR(sock, &dispr->fdset);
124 DPRINT("Sock %d removed from dispatcher watch.\n", sock);
125 return 0;
128 /* timeout in us */
129 static int
130 dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
132 struct timeval tv;
133 tv.tv_sec = timeout / 1000000;
134 tv.tv_usec = timeout % 1000000;
136 fd_set fdset = dispr->fdset;
138 /* wait until some of sockets become readable. */
139 int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
141 if (rc == -1) {
142 vubr_die("select");
145 /* Timeout */
146 if (rc == 0) {
147 return 0;
150 /* Now call callback for every ready socket. */
152 int sock;
153 for (sock = 0; sock < dispr->max_sock + 1; sock++) {
154 /* The callback on a socket can remove other sockets from the
155 * dispatcher, thus we have to check that the socket is
156 * still not removed from dispatcher's list
158 if (FD_ISSET(sock, &fdset) && FD_ISSET(sock, &dispr->fdset)) {
159 Event *e = &dispr->events[sock];
160 e->callback(sock, e->ctx);
164 return 0;
167 static void
168 vubr_handle_tx(VuDev *dev, int qidx)
170 VuVirtq *vq = vu_get_queue(dev, qidx);
171 VubrDev *vubr = container_of(dev, VubrDev, vudev);
172 int hdrlen = vubr->hdrlen;
173 VuVirtqElement *elem = NULL;
175 assert(qidx % 2);
177 for (;;) {
178 ssize_t ret;
179 unsigned int out_num;
180 struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
182 elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
183 if (!elem) {
184 break;
187 out_num = elem->out_num;
188 out_sg = elem->out_sg;
189 if (out_num < 1) {
190 fprintf(stderr, "virtio-net header not in first element\n");
191 break;
193 if (VHOST_USER_BRIDGE_DEBUG) {
194 iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
197 if (hdrlen) {
198 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
199 out_sg, out_num,
200 hdrlen, -1);
201 out_num = sg_num;
202 out_sg = sg;
205 struct msghdr msg = {
206 .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
207 .msg_namelen = sizeof(struct sockaddr_in),
208 .msg_iov = out_sg,
209 .msg_iovlen = out_num,
211 do {
212 ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
213 } while (ret == -1 && (errno == EAGAIN || errno == EINTR));
215 if (ret == -1) {
216 vubr_die("sendmsg()");
219 vu_queue_push(dev, vq, elem, 0);
220 vu_queue_notify(dev, vq);
222 free(elem);
223 elem = NULL;
226 free(elem);
230 /* this function reverse the effect of iov_discard_front() it must be
231 * called with 'front' being the original struct iovec and 'bytes'
232 * being the number of bytes you shaved off
234 static void
235 iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
237 struct iovec *cur;
239 for (cur = front; cur != iov; cur++) {
240 assert(bytes >= cur->iov_len);
241 bytes -= cur->iov_len;
244 cur->iov_base -= bytes;
245 cur->iov_len += bytes;
248 static void
249 iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
251 unsigned i;
253 for (i = 0; i < iovc; i++, iov++) {
254 if (bytes < iov->iov_len) {
255 iov->iov_len = bytes;
256 return;
259 bytes -= iov->iov_len;
262 assert(!"couldn't truncate iov");
265 static void
266 vubr_backend_recv_cb(int sock, void *ctx)
268 VubrDev *vubr = (VubrDev *) ctx;
269 VuDev *dev = &vubr->vudev;
270 VuVirtq *vq = vu_get_queue(dev, 0);
271 VuVirtqElement *elem = NULL;
272 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
273 struct virtio_net_hdr_mrg_rxbuf mhdr;
274 unsigned mhdr_cnt = 0;
275 int hdrlen = vubr->hdrlen;
276 int i = 0;
277 struct virtio_net_hdr hdr = {
278 .flags = 0,
279 .gso_type = VIRTIO_NET_HDR_GSO_NONE
282 DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
283 DPRINT(" hdrlen = %d\n", hdrlen);
285 if (!vu_queue_enabled(dev, vq) ||
286 !vu_queue_started(dev, vq) ||
287 !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
288 DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
289 return;
292 while (1) {
293 struct iovec *sg;
294 ssize_t ret, total = 0;
295 unsigned int num;
297 elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
298 if (!elem) {
299 break;
302 if (elem->in_num < 1) {
303 fprintf(stderr, "virtio-net contains no in buffers\n");
304 break;
307 sg = elem->in_sg;
308 num = elem->in_num;
309 if (i == 0) {
310 if (hdrlen == 12) {
311 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
312 sg, elem->in_num,
313 offsetof(typeof(mhdr), num_buffers),
314 sizeof(mhdr.num_buffers));
316 iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
317 total += hdrlen;
318 ret = iov_discard_front(&sg, &num, hdrlen);
319 assert(ret == hdrlen);
322 struct msghdr msg = {
323 .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
324 .msg_namelen = sizeof(struct sockaddr_in),
325 .msg_iov = sg,
326 .msg_iovlen = num,
327 .msg_flags = MSG_DONTWAIT,
329 do {
330 ret = recvmsg(vubr->backend_udp_sock, &msg, 0);
331 } while (ret == -1 && (errno == EINTR));
333 if (i == 0) {
334 iov_restore_front(elem->in_sg, sg, hdrlen);
337 if (ret == -1) {
338 if (errno == EWOULDBLOCK) {
339 vu_queue_rewind(dev, vq, 1);
340 break;
343 vubr_die("recvmsg()");
346 total += ret;
347 iov_truncate(elem->in_sg, elem->in_num, total);
348 vu_queue_fill(dev, vq, elem, total, i++);
350 free(elem);
351 elem = NULL;
353 break; /* could loop if DONTWAIT worked? */
356 if (mhdr_cnt) {
357 mhdr.num_buffers = i;
358 iov_from_buf(mhdr_sg, mhdr_cnt,
360 &mhdr.num_buffers, sizeof mhdr.num_buffers);
363 vu_queue_flush(dev, vq, i);
364 vu_queue_notify(dev, vq);
366 free(elem);
369 static void
370 vubr_receive_cb(int sock, void *ctx)
372 VubrDev *vubr = (VubrDev *)ctx;
374 if (!vu_dispatch(&vubr->vudev)) {
375 fprintf(stderr, "Error while dispatching\n");
379 typedef struct WatchData {
380 VuDev *dev;
381 vu_watch_cb cb;
382 void *data;
383 } WatchData;
385 static void
386 watch_cb(int sock, void *ctx)
388 struct WatchData *wd = ctx;
390 wd->cb(wd->dev, VU_WATCH_IN, wd->data);
393 static void
394 vubr_set_watch(VuDev *dev, int fd, int condition,
395 vu_watch_cb cb, void *data)
397 VubrDev *vubr = container_of(dev, VubrDev, vudev);
398 static WatchData watches[FD_SETSIZE];
399 struct WatchData *wd = &watches[fd];
401 wd->cb = cb;
402 wd->data = data;
403 wd->dev = dev;
404 dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
407 static void
408 vubr_remove_watch(VuDev *dev, int fd)
410 VubrDev *vubr = container_of(dev, VubrDev, vudev);
412 dispatcher_remove(&vubr->dispatcher, fd);
415 static int
416 vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
418 DPRINT("Function %s() not implemented yet.\n", __func__);
419 return 0;
422 static int
423 vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
425 switch (vmsg->request) {
426 case VHOST_USER_SEND_RARP:
427 *do_reply = vubr_send_rarp_exec(dev, vmsg);
428 return 1;
429 default:
430 /* let the library handle the rest */
431 return 0;
434 return 0;
437 static void
438 vubr_set_features(VuDev *dev, uint64_t features)
440 VubrDev *vubr = container_of(dev, VubrDev, vudev);
442 if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
443 (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
444 vubr->hdrlen = 12;
445 } else {
446 vubr->hdrlen = 10;
450 static uint64_t
451 vubr_get_features(VuDev *dev)
453 return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
454 1ULL << VIRTIO_NET_F_MRG_RXBUF |
455 1ULL << VIRTIO_F_VERSION_1;
458 static void
459 vubr_queue_set_started(VuDev *dev, int qidx, bool started)
461 VubrDev *vubr = container_of(dev, VubrDev, vudev);
462 VuVirtq *vq = vu_get_queue(dev, qidx);
464 if (started && vubr->notifier.fd >= 0) {
465 vu_set_queue_host_notifier(dev, vq, vubr->notifier.fd,
466 getpagesize(),
467 qidx * getpagesize());
470 if (qidx % 2 == 1) {
471 vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
475 static void
476 vubr_panic(VuDev *dev, const char *msg)
478 VubrDev *vubr = container_of(dev, VubrDev, vudev);
480 fprintf(stderr, "PANIC: %s\n", msg);
482 dispatcher_remove(&vubr->dispatcher, dev->sock);
483 vubr->quit = 1;
486 static bool
487 vubr_queue_is_processed_in_order(VuDev *dev, int qidx)
489 return true;
492 static const VuDevIface vuiface = {
493 .get_features = vubr_get_features,
494 .set_features = vubr_set_features,
495 .process_msg = vubr_process_msg,
496 .queue_set_started = vubr_queue_set_started,
497 .queue_is_processed_in_order = vubr_queue_is_processed_in_order,
500 static void
501 vubr_accept_cb(int sock, void *ctx)
503 VubrDev *dev = (VubrDev *)ctx;
504 int conn_fd;
505 struct sockaddr_un un;
506 socklen_t len = sizeof(un);
508 conn_fd = accept(sock, (struct sockaddr *) &un, &len);
509 if (conn_fd == -1) {
510 vubr_die("accept()");
512 DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
514 vu_init(&dev->vudev,
515 conn_fd,
516 vubr_panic,
517 vubr_set_watch,
518 vubr_remove_watch,
519 &vuiface);
521 dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
522 dispatcher_remove(&dev->dispatcher, sock);
525 static VubrDev *
526 vubr_new(const char *path, bool client)
528 VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
529 struct sockaddr_un un;
530 CallbackFunc cb;
531 size_t len;
533 /* Get a UNIX socket. */
534 dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
535 if (dev->sock == -1) {
536 vubr_die("socket");
539 dev->notifier.fd = -1;
541 un.sun_family = AF_UNIX;
542 strcpy(un.sun_path, path);
543 len = sizeof(un.sun_family) + strlen(path);
545 if (!client) {
546 unlink(path);
548 if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
549 vubr_die("bind");
552 if (listen(dev->sock, 1) == -1) {
553 vubr_die("listen");
555 cb = vubr_accept_cb;
557 DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
558 } else {
559 if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
560 vubr_die("connect");
562 vu_init(&dev->vudev,
563 dev->sock,
564 vubr_panic,
565 vubr_set_watch,
566 vubr_remove_watch,
567 &vuiface);
568 cb = vubr_receive_cb;
571 dispatcher_init(&dev->dispatcher);
573 dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
575 return dev;
578 static void *notifier_thread(void *arg)
580 VuDev *dev = (VuDev *)arg;
581 VubrDev *vubr = container_of(dev, VubrDev, vudev);
582 int pagesize = getpagesize();
583 int qidx;
585 while (true) {
586 for (qidx = 0; qidx < VHOST_MAX_NR_VIRTQUEUE; qidx++) {
587 uint16_t *n = vubr->notifier.addr + pagesize * qidx;
589 if (*n == qidx) {
590 *n = 0xffff;
591 /* We won't miss notifications if we reset
592 * the memory first. */
593 smp_mb();
595 DPRINT("Got a notification for queue%d via host notifier.\n",
596 qidx);
598 if (qidx % 2 == 1) {
599 vubr_handle_tx(dev, qidx);
602 usleep(1000);
606 return NULL;
609 static void
610 vubr_host_notifier_setup(VubrDev *dev)
612 char template[] = "/tmp/vubr-XXXXXX";
613 pthread_t thread;
614 size_t length;
615 void *addr;
616 int fd;
618 length = getpagesize() * VHOST_MAX_NR_VIRTQUEUE;
620 fd = mkstemp(template);
621 if (fd < 0) {
622 vubr_die("mkstemp()");
625 if (posix_fallocate(fd, 0, length) != 0) {
626 vubr_die("posix_fallocate()");
629 addr = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
630 if (addr == MAP_FAILED) {
631 vubr_die("mmap()");
634 memset(addr, 0xff, length);
636 if (pthread_create(&thread, NULL, notifier_thread, &dev->vudev) != 0) {
637 vubr_die("pthread_create()");
640 dev->notifier.fd = fd;
641 dev->notifier.addr = addr;
642 dev->notifier.thread = thread;
645 static void
646 vubr_set_host(struct sockaddr_in *saddr, const char *host)
648 if (isdigit(host[0])) {
649 if (!inet_aton(host, &saddr->sin_addr)) {
650 fprintf(stderr, "inet_aton() failed.\n");
651 exit(1);
653 } else {
654 struct hostent *he = gethostbyname(host);
656 if (!he) {
657 fprintf(stderr, "gethostbyname() failed.\n");
658 exit(1);
660 saddr->sin_addr = *(struct in_addr *)he->h_addr;
664 static void
665 vubr_backend_udp_setup(VubrDev *dev,
666 const char *local_host,
667 const char *local_port,
668 const char *remote_host,
669 const char *remote_port)
671 int sock;
672 const char *r;
674 int lport, rport;
676 lport = strtol(local_port, (char **)&r, 0);
677 if (r == local_port) {
678 fprintf(stderr, "lport parsing failed.\n");
679 exit(1);
682 rport = strtol(remote_port, (char **)&r, 0);
683 if (r == remote_port) {
684 fprintf(stderr, "rport parsing failed.\n");
685 exit(1);
688 struct sockaddr_in si_local = {
689 .sin_family = AF_INET,
690 .sin_port = htons(lport),
693 vubr_set_host(&si_local, local_host);
695 /* setup destination for sends */
696 dev->backend_udp_dest = (struct sockaddr_in) {
697 .sin_family = AF_INET,
698 .sin_port = htons(rport),
700 vubr_set_host(&dev->backend_udp_dest, remote_host);
702 sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
703 if (sock == -1) {
704 vubr_die("socket");
707 if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
708 vubr_die("bind");
711 dev->backend_udp_sock = sock;
712 dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
713 DPRINT("Waiting for data from udp backend on %s:%d...\n",
714 local_host, lport);
717 static void
718 vubr_run(VubrDev *dev)
720 while (!dev->quit) {
721 /* timeout 200ms */
722 dispatcher_wait(&dev->dispatcher, 200000);
723 /* Here one can try polling strategy. */
727 static int
728 vubr_parse_host_port(const char **host, const char **port, const char *buf)
730 char *p = strchr(buf, ':');
732 if (!p) {
733 return -1;
735 *p = '\0';
736 *host = strdup(buf);
737 *port = strdup(p + 1);
738 return 0;
741 #define DEFAULT_UD_SOCKET "/tmp/vubr.sock"
742 #define DEFAULT_LHOST "127.0.0.1"
743 #define DEFAULT_LPORT "4444"
744 #define DEFAULT_RHOST "127.0.0.1"
745 #define DEFAULT_RPORT "5555"
747 static const char *ud_socket_path = DEFAULT_UD_SOCKET;
748 static const char *lhost = DEFAULT_LHOST;
749 static const char *lport = DEFAULT_LPORT;
750 static const char *rhost = DEFAULT_RHOST;
751 static const char *rport = DEFAULT_RPORT;
754 main(int argc, char *argv[])
756 VubrDev *dev;
757 int opt;
758 bool client = false;
759 bool host_notifier = false;
761 while ((opt = getopt(argc, argv, "l:r:u:cH")) != -1) {
763 switch (opt) {
764 case 'l':
765 if (vubr_parse_host_port(&lhost, &lport, optarg) < 0) {
766 goto out;
768 break;
769 case 'r':
770 if (vubr_parse_host_port(&rhost, &rport, optarg) < 0) {
771 goto out;
773 break;
774 case 'u':
775 ud_socket_path = strdup(optarg);
776 break;
777 case 'c':
778 client = true;
779 break;
780 case 'H':
781 host_notifier = true;
782 break;
783 default:
784 goto out;
788 DPRINT("ud socket: %s (%s)\n", ud_socket_path,
789 client ? "client" : "server");
790 DPRINT("local: %s:%s\n", lhost, lport);
791 DPRINT("remote: %s:%s\n", rhost, rport);
793 dev = vubr_new(ud_socket_path, client);
794 if (!dev) {
795 return 1;
798 if (host_notifier) {
799 vubr_host_notifier_setup(dev);
802 vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
803 vubr_run(dev);
805 vu_deinit(&dev->vudev);
807 return 0;
809 out:
810 fprintf(stderr, "Usage: %s ", argv[0]);
811 fprintf(stderr, "[-c] [-H] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
812 fprintf(stderr, "\t-u path to unix doman socket. default: %s\n",
813 DEFAULT_UD_SOCKET);
814 fprintf(stderr, "\t-l local host and port. default: %s:%s\n",
815 DEFAULT_LHOST, DEFAULT_LPORT);
816 fprintf(stderr, "\t-r remote host and port. default: %s:%s\n",
817 DEFAULT_RHOST, DEFAULT_RPORT);
818 fprintf(stderr, "\t-c client mode\n");
819 fprintf(stderr, "\t-H use host notifier\n");
821 return 1;