4 * Copyright (c) 2015 Red Hat, Inc.
7 * Victor Kaplansky <victork@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
15 * - main should get parameters from the command line.
16 * - implement all request handlers. Still not implemented:
17 * vubr_get_queue_num_exec()
18 * vubr_send_rarp_exec()
19 * - test for broken requests and virtqueue.
20 * - implement features defined by Virtio 1.0 spec.
21 * - support mergeable buffers and indirect descriptors.
22 * - implement clean shutdown.
23 * - implement non-blocking writes to UDP backend.
24 * - implement polling strategy.
25 * - implement clean starting/stopping of vq processing
26 * - implement clean starting/stopping of used and buffers
30 #define _FILE_OFFSET_BITS 64
32 #include "qemu/osdep.h"
33 #include "qemu/atomic.h"
34 #include "qemu/ctype.h"
36 #include "standard-headers/linux/virtio_net.h"
37 #include "libvhost-user.h"
39 #define VHOST_USER_BRIDGE_DEBUG 1
43 if (VHOST_USER_BRIDGE_DEBUG) { \
44 printf(__VA_ARGS__); \
49 VHOST_USER_BRIDGE_MAX_QUEUES
= 8,
52 typedef void (*CallbackFunc
)(int sock
, void *ctx
);
54 typedef struct Event
{
56 CallbackFunc callback
;
59 typedef struct Dispatcher
{
62 Event events
[FD_SETSIZE
];
65 typedef struct VubrDev
{
67 Dispatcher dispatcher
;
69 struct sockaddr_in backend_udp_dest
;
82 vubr_die(const char *s
)
89 dispatcher_init(Dispatcher
*dispr
)
91 FD_ZERO(&dispr
->fdset
);
97 dispatcher_add(Dispatcher
*dispr
, int sock
, void *ctx
, CallbackFunc cb
)
99 if (sock
>= FD_SETSIZE
) {
101 "Error: Failed to add new event. sock %d should be less than %d\n",
106 dispr
->events
[sock
].ctx
= ctx
;
107 dispr
->events
[sock
].callback
= cb
;
109 FD_SET(sock
, &dispr
->fdset
);
110 if (sock
> dispr
->max_sock
) {
111 dispr
->max_sock
= sock
;
113 DPRINT("Added sock %d for watching. max_sock: %d\n",
114 sock
, dispr
->max_sock
);
119 dispatcher_remove(Dispatcher
*dispr
, int sock
)
121 if (sock
>= FD_SETSIZE
) {
123 "Error: Failed to remove event. sock %d should be less than %d\n",
128 FD_CLR(sock
, &dispr
->fdset
);
129 DPRINT("Sock %d removed from dispatcher watch.\n", sock
);
135 dispatcher_wait(Dispatcher
*dispr
, uint32_t timeout
)
138 tv
.tv_sec
= timeout
/ 1000000;
139 tv
.tv_usec
= timeout
% 1000000;
141 fd_set fdset
= dispr
->fdset
;
143 /* wait until some of sockets become readable. */
144 int rc
= select(dispr
->max_sock
+ 1, &fdset
, 0, 0, &tv
);
155 /* Now call callback for every ready socket. */
158 for (sock
= 0; sock
< dispr
->max_sock
+ 1; sock
++) {
159 /* The callback on a socket can remove other sockets from the
160 * dispatcher, thus we have to check that the socket is
161 * still not removed from dispatcher's list
163 if (FD_ISSET(sock
, &fdset
) && FD_ISSET(sock
, &dispr
->fdset
)) {
164 Event
*e
= &dispr
->events
[sock
];
165 e
->callback(sock
, e
->ctx
);
173 vubr_handle_tx(VuDev
*dev
, int qidx
)
175 VuVirtq
*vq
= vu_get_queue(dev
, qidx
);
176 VubrDev
*vubr
= container_of(dev
, VubrDev
, vudev
);
177 int hdrlen
= vubr
->hdrlen
;
178 VuVirtqElement
*elem
= NULL
;
184 unsigned int out_num
;
185 struct iovec sg
[VIRTQUEUE_MAX_SIZE
], *out_sg
;
187 elem
= vu_queue_pop(dev
, vq
, sizeof(VuVirtqElement
));
192 out_num
= elem
->out_num
;
193 out_sg
= elem
->out_sg
;
195 fprintf(stderr
, "virtio-net header not in first element\n");
198 if (VHOST_USER_BRIDGE_DEBUG
) {
199 iov_hexdump(out_sg
, out_num
, stderr
, "TX:", 1024);
203 unsigned sg_num
= iov_copy(sg
, ARRAY_SIZE(sg
),
210 struct msghdr msg
= {
211 .msg_name
= (struct sockaddr
*) &vubr
->backend_udp_dest
,
212 .msg_namelen
= sizeof(struct sockaddr_in
),
214 .msg_iovlen
= out_num
,
217 ret
= sendmsg(vubr
->backend_udp_sock
, &msg
, 0);
218 } while (ret
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
221 vubr_die("sendmsg()");
224 vu_queue_push(dev
, vq
, elem
, 0);
225 vu_queue_notify(dev
, vq
);
235 /* this function reverse the effect of iov_discard_front() it must be
236 * called with 'front' being the original struct iovec and 'bytes'
237 * being the number of bytes you shaved off
240 iov_restore_front(struct iovec
*front
, struct iovec
*iov
, size_t bytes
)
244 for (cur
= front
; cur
!= iov
; cur
++) {
245 assert(bytes
>= cur
->iov_len
);
246 bytes
-= cur
->iov_len
;
249 cur
->iov_base
-= bytes
;
250 cur
->iov_len
+= bytes
;
254 iov_truncate(struct iovec
*iov
, unsigned iovc
, size_t bytes
)
258 for (i
= 0; i
< iovc
; i
++, iov
++) {
259 if (bytes
< iov
->iov_len
) {
260 iov
->iov_len
= bytes
;
264 bytes
-= iov
->iov_len
;
267 assert(!"couldn't truncate iov");
271 vubr_backend_recv_cb(int sock
, void *ctx
)
273 VubrDev
*vubr
= (VubrDev
*) ctx
;
274 VuDev
*dev
= &vubr
->vudev
;
275 VuVirtq
*vq
= vu_get_queue(dev
, 0);
276 VuVirtqElement
*elem
= NULL
;
277 struct iovec mhdr_sg
[VIRTQUEUE_MAX_SIZE
];
278 struct virtio_net_hdr_mrg_rxbuf mhdr
;
279 unsigned mhdr_cnt
= 0;
280 int hdrlen
= vubr
->hdrlen
;
282 struct virtio_net_hdr hdr
= {
284 .gso_type
= VIRTIO_NET_HDR_GSO_NONE
287 DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
288 DPRINT(" hdrlen = %d\n", hdrlen
);
290 if (!vu_queue_enabled(dev
, vq
) ||
291 !vu_queue_started(dev
, vq
) ||
292 !vu_queue_avail_bytes(dev
, vq
, hdrlen
, 0)) {
293 DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
299 ssize_t ret
, total
= 0;
302 elem
= vu_queue_pop(dev
, vq
, sizeof(VuVirtqElement
));
307 if (elem
->in_num
< 1) {
308 fprintf(stderr
, "virtio-net contains no in buffers\n");
316 mhdr_cnt
= iov_copy(mhdr_sg
, ARRAY_SIZE(mhdr_sg
),
318 offsetof(typeof(mhdr
), num_buffers
),
319 sizeof(mhdr
.num_buffers
));
321 iov_from_buf(sg
, elem
->in_num
, 0, &hdr
, sizeof hdr
);
323 ret
= iov_discard_front(&sg
, &num
, hdrlen
);
324 assert(ret
== hdrlen
);
327 struct msghdr msg
= {
328 .msg_name
= (struct sockaddr
*) &vubr
->backend_udp_dest
,
329 .msg_namelen
= sizeof(struct sockaddr_in
),
332 .msg_flags
= MSG_DONTWAIT
,
335 ret
= recvmsg(vubr
->backend_udp_sock
, &msg
, 0);
336 } while (ret
== -1 && (errno
== EINTR
));
339 iov_restore_front(elem
->in_sg
, sg
, hdrlen
);
343 if (errno
== EWOULDBLOCK
) {
344 vu_queue_rewind(dev
, vq
, 1);
348 vubr_die("recvmsg()");
352 iov_truncate(elem
->in_sg
, elem
->in_num
, total
);
353 vu_queue_fill(dev
, vq
, elem
, total
, i
++);
358 break; /* could loop if DONTWAIT worked? */
362 mhdr
.num_buffers
= i
;
363 iov_from_buf(mhdr_sg
, mhdr_cnt
,
365 &mhdr
.num_buffers
, sizeof mhdr
.num_buffers
);
368 vu_queue_flush(dev
, vq
, i
);
369 vu_queue_notify(dev
, vq
);
375 vubr_receive_cb(int sock
, void *ctx
)
377 VubrDev
*vubr
= (VubrDev
*)ctx
;
379 if (!vu_dispatch(&vubr
->vudev
)) {
380 fprintf(stderr
, "Error while dispatching\n");
384 typedef struct WatchData
{
391 watch_cb(int sock
, void *ctx
)
393 struct WatchData
*wd
= ctx
;
395 wd
->cb(wd
->dev
, VU_WATCH_IN
, wd
->data
);
399 vubr_set_watch(VuDev
*dev
, int fd
, int condition
,
400 vu_watch_cb cb
, void *data
)
402 VubrDev
*vubr
= container_of(dev
, VubrDev
, vudev
);
403 static WatchData watches
[FD_SETSIZE
];
404 struct WatchData
*wd
= &watches
[fd
];
409 dispatcher_add(&vubr
->dispatcher
, fd
, wd
, watch_cb
);
413 vubr_remove_watch(VuDev
*dev
, int fd
)
415 VubrDev
*vubr
= container_of(dev
, VubrDev
, vudev
);
417 dispatcher_remove(&vubr
->dispatcher
, fd
);
421 vubr_send_rarp_exec(VuDev
*dev
, VhostUserMsg
*vmsg
)
423 DPRINT("Function %s() not implemented yet.\n", __func__
);
428 vubr_process_msg(VuDev
*dev
, VhostUserMsg
*vmsg
, int *do_reply
)
430 switch (vmsg
->request
) {
431 case VHOST_USER_SEND_RARP
:
432 *do_reply
= vubr_send_rarp_exec(dev
, vmsg
);
435 /* let the library handle the rest */
443 vubr_set_features(VuDev
*dev
, uint64_t features
)
445 VubrDev
*vubr
= container_of(dev
, VubrDev
, vudev
);
447 if ((features
& (1ULL << VIRTIO_F_VERSION_1
)) ||
448 (features
& (1ULL << VIRTIO_NET_F_MRG_RXBUF
))) {
456 vubr_get_features(VuDev
*dev
)
458 return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE
|
459 1ULL << VIRTIO_NET_F_MRG_RXBUF
|
460 1ULL << VIRTIO_F_VERSION_1
;
464 vubr_queue_set_started(VuDev
*dev
, int qidx
, bool started
)
466 VubrDev
*vubr
= container_of(dev
, VubrDev
, vudev
);
467 VuVirtq
*vq
= vu_get_queue(dev
, qidx
);
469 if (started
&& vubr
->notifier
.fd
>= 0) {
470 vu_set_queue_host_notifier(dev
, vq
, vubr
->notifier
.fd
,
471 qemu_real_host_page_size(),
472 qidx
* qemu_real_host_page_size());
476 vu_set_queue_handler(dev
, vq
, started
? vubr_handle_tx
: NULL
);
481 vubr_panic(VuDev
*dev
, const char *msg
)
483 VubrDev
*vubr
= container_of(dev
, VubrDev
, vudev
);
485 fprintf(stderr
, "PANIC: %s\n", msg
);
487 dispatcher_remove(&vubr
->dispatcher
, dev
->sock
);
492 vubr_queue_is_processed_in_order(VuDev
*dev
, int qidx
)
497 static const VuDevIface vuiface
= {
498 .get_features
= vubr_get_features
,
499 .set_features
= vubr_set_features
,
500 .process_msg
= vubr_process_msg
,
501 .queue_set_started
= vubr_queue_set_started
,
502 .queue_is_processed_in_order
= vubr_queue_is_processed_in_order
,
506 vubr_accept_cb(int sock
, void *ctx
)
508 VubrDev
*dev
= (VubrDev
*)ctx
;
510 struct sockaddr_un un
;
511 socklen_t len
= sizeof(un
);
513 conn_fd
= accept(sock
, (struct sockaddr
*) &un
, &len
);
515 vubr_die("accept()");
517 DPRINT("Got connection from remote peer on sock %d\n", conn_fd
);
519 if (!vu_init(&dev
->vudev
,
520 VHOST_USER_BRIDGE_MAX_QUEUES
,
527 fprintf(stderr
, "Failed to initialize libvhost-user\n");
531 dispatcher_add(&dev
->dispatcher
, conn_fd
, ctx
, vubr_receive_cb
);
532 dispatcher_remove(&dev
->dispatcher
, sock
);
536 vubr_new(const char *path
, bool client
)
538 VubrDev
*dev
= (VubrDev
*) calloc(1, sizeof(VubrDev
));
539 struct sockaddr_un un
;
543 if (strlen(path
) >= sizeof(un
.sun_path
)) {
544 fprintf(stderr
, "unix domain socket path '%s' is too long\n", path
);
548 /* Get a UNIX socket. */
549 dev
->sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
550 if (dev
->sock
== -1) {
554 dev
->notifier
.fd
= -1;
556 un
.sun_family
= AF_UNIX
;
557 strcpy(un
.sun_path
, path
);
558 len
= sizeof(un
.sun_family
) + strlen(path
);
563 if (bind(dev
->sock
, (struct sockaddr
*) &un
, len
) == -1) {
567 if (listen(dev
->sock
, 1) == -1) {
572 DPRINT("Waiting for connections on UNIX socket %s ...\n", path
);
574 if (connect(dev
->sock
, (struct sockaddr
*)&un
, len
) == -1) {
578 if (!vu_init(&dev
->vudev
,
579 VHOST_USER_BRIDGE_MAX_QUEUES
,
586 fprintf(stderr
, "Failed to initialize libvhost-user\n");
590 cb
= vubr_receive_cb
;
593 dispatcher_init(&dev
->dispatcher
);
595 dispatcher_add(&dev
->dispatcher
, dev
->sock
, (void *)dev
, cb
);
600 static void *notifier_thread(void *arg
)
602 VuDev
*dev
= (VuDev
*)arg
;
603 VubrDev
*vubr
= container_of(dev
, VubrDev
, vudev
);
604 int pagesize
= qemu_real_host_page_size();
608 for (qidx
= 0; qidx
< VHOST_USER_BRIDGE_MAX_QUEUES
; qidx
++) {
609 uint16_t *n
= vubr
->notifier
.addr
+ pagesize
* qidx
;
613 /* We won't miss notifications if we reset
614 * the memory first. */
617 DPRINT("Got a notification for queue%d via host notifier.\n",
621 vubr_handle_tx(dev
, qidx
);
632 vubr_host_notifier_setup(VubrDev
*dev
)
634 char template[] = "/tmp/vubr-XXXXXX";
640 length
= qemu_real_host_page_size() * VHOST_USER_BRIDGE_MAX_QUEUES
;
642 fd
= mkstemp(template);
644 vubr_die("mkstemp()");
647 if (posix_fallocate(fd
, 0, length
) != 0) {
648 vubr_die("posix_fallocate()");
651 addr
= mmap(NULL
, length
, PROT_READ
| PROT_WRITE
, MAP_SHARED
, fd
, 0);
652 if (addr
== MAP_FAILED
) {
656 memset(addr
, 0xff, length
);
658 if (pthread_create(&thread
, NULL
, notifier_thread
, &dev
->vudev
) != 0) {
659 vubr_die("pthread_create()");
662 dev
->notifier
.fd
= fd
;
663 dev
->notifier
.addr
= addr
;
664 dev
->notifier
.thread
= thread
;
668 vubr_set_host(struct sockaddr_in
*saddr
, const char *host
)
670 if (qemu_isdigit(host
[0])) {
671 if (!inet_aton(host
, &saddr
->sin_addr
)) {
672 fprintf(stderr
, "inet_aton() failed.\n");
676 struct hostent
*he
= gethostbyname(host
);
679 fprintf(stderr
, "gethostbyname() failed.\n");
682 saddr
->sin_addr
= *(struct in_addr
*)he
->h_addr
;
687 vubr_backend_udp_setup(VubrDev
*dev
,
688 const char *local_host
,
689 const char *local_port
,
690 const char *remote_host
,
691 const char *remote_port
)
698 lport
= strtol(local_port
, (char **)&r
, 0);
699 if (r
== local_port
) {
700 fprintf(stderr
, "lport parsing failed.\n");
704 rport
= strtol(remote_port
, (char **)&r
, 0);
705 if (r
== remote_port
) {
706 fprintf(stderr
, "rport parsing failed.\n");
710 struct sockaddr_in si_local
= {
711 .sin_family
= AF_INET
,
712 .sin_port
= htons(lport
),
715 vubr_set_host(&si_local
, local_host
);
717 /* setup destination for sends */
718 dev
->backend_udp_dest
= (struct sockaddr_in
) {
719 .sin_family
= AF_INET
,
720 .sin_port
= htons(rport
),
722 vubr_set_host(&dev
->backend_udp_dest
, remote_host
);
724 sock
= socket(AF_INET
, SOCK_DGRAM
, IPPROTO_UDP
);
729 if (bind(sock
, (struct sockaddr
*)&si_local
, sizeof(si_local
)) == -1) {
733 dev
->backend_udp_sock
= sock
;
734 dispatcher_add(&dev
->dispatcher
, sock
, dev
, vubr_backend_recv_cb
);
735 DPRINT("Waiting for data from udp backend on %s:%d...\n",
740 vubr_run(VubrDev
*dev
)
744 dispatcher_wait(&dev
->dispatcher
, 200000);
745 /* Here one can try polling strategy. */
750 vubr_parse_host_port(const char **host
, const char **port
, const char *buf
)
752 char *p
= strchr(buf
, ':');
759 *port
= strdup(p
+ 1);
763 #define DEFAULT_UD_SOCKET "/tmp/vubr.sock"
764 #define DEFAULT_LHOST "127.0.0.1"
765 #define DEFAULT_LPORT "4444"
766 #define DEFAULT_RHOST "127.0.0.1"
767 #define DEFAULT_RPORT "5555"
769 static const char *ud_socket_path
= DEFAULT_UD_SOCKET
;
770 static const char *lhost
= DEFAULT_LHOST
;
771 static const char *lport
= DEFAULT_LPORT
;
772 static const char *rhost
= DEFAULT_RHOST
;
773 static const char *rport
= DEFAULT_RPORT
;
776 main(int argc
, char *argv
[])
781 bool host_notifier
= false;
783 while ((opt
= getopt(argc
, argv
, "l:r:u:cH")) != -1) {
787 if (vubr_parse_host_port(&lhost
, &lport
, optarg
) < 0) {
792 if (vubr_parse_host_port(&rhost
, &rport
, optarg
) < 0) {
797 ud_socket_path
= strdup(optarg
);
803 host_notifier
= true;
810 DPRINT("ud socket: %s (%s)\n", ud_socket_path
,
811 client
? "client" : "server");
812 DPRINT("local: %s:%s\n", lhost
, lport
);
813 DPRINT("remote: %s:%s\n", rhost
, rport
);
815 dev
= vubr_new(ud_socket_path
, client
);
821 vubr_host_notifier_setup(dev
);
824 vubr_backend_udp_setup(dev
, lhost
, lport
, rhost
, rport
);
827 vu_deinit(&dev
->vudev
);
832 fprintf(stderr
, "Usage: %s ", argv
[0]);
833 fprintf(stderr
, "[-c] [-H] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
834 fprintf(stderr
, "\t-u path to unix domain socket. default: %s\n",
836 fprintf(stderr
, "\t-l local host and port. default: %s:%s\n",
837 DEFAULT_LHOST
, DEFAULT_LPORT
);
838 fprintf(stderr
, "\t-r remote host and port. default: %s:%s\n",
839 DEFAULT_RHOST
, DEFAULT_RPORT
);
840 fprintf(stderr
, "\t-c client mode\n");
841 fprintf(stderr
, "\t-H use host notifier\n");