qemu-doc: Fix ivshmem usage example with shm=...
[qemu/ar7.git] / tests / vhost-user-bridge.c
blob7bdfc98615baf34c1e843f2e378b887517dee0e4
1 /*
2 * Vhost User Bridge
4 * Copyright (c) 2015 Red Hat, Inc.
6 * Authors:
7 * Victor Kaplansky <victork@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
14 * TODO:
15 * - main should get parameters from the command line.
16 * - implement all request handlers. Still not implemented:
17 * vubr_get_queue_num_exec()
18 * vubr_send_rarp_exec()
19 * - test for broken requests and virtqueue.
20 * - implement features defined by Virtio 1.0 spec.
21 * - support mergeable buffers and indirect descriptors.
22 * - implement clean shutdown.
23 * - implement non-blocking writes to UDP backend.
24 * - implement polling strategy.
25 * - implement clean starting/stopping of vq processing
26 * - implement clean starting/stopping of used and buffers
27 * dirty page logging.
30 #define _FILE_OFFSET_BITS 64
32 #include <stddef.h>
33 #include <assert.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stdint.h>
37 #include <inttypes.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <errno.h>
41 #include <sys/types.h>
42 #include <sys/socket.h>
43 #include <sys/un.h>
44 #include <sys/unistd.h>
45 #include <sys/mman.h>
46 #include <sys/eventfd.h>
47 #include <arpa/inet.h>
49 #include <linux/vhost.h>
51 #include "qemu/atomic.h"
52 #include "standard-headers/linux/virtio_net.h"
53 #include "standard-headers/linux/virtio_ring.h"
55 #define VHOST_USER_BRIDGE_DEBUG 1
57 #define DPRINT(...) \
58 do { \
59 if (VHOST_USER_BRIDGE_DEBUG) { \
60 printf(__VA_ARGS__); \
61 } \
62 } while (0)
64 typedef void (*CallbackFunc)(int sock, void *ctx);
66 typedef struct Event {
67 void *ctx;
68 CallbackFunc callback;
69 } Event;
71 typedef struct Dispatcher {
72 int max_sock;
73 fd_set fdset;
74 Event events[FD_SETSIZE];
75 } Dispatcher;
77 static void
78 vubr_die(const char *s)
80 perror(s);
81 exit(1);
84 static int
85 dispatcher_init(Dispatcher *dispr)
87 FD_ZERO(&dispr->fdset);
88 dispr->max_sock = -1;
89 return 0;
92 static int
93 dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
95 if (sock >= FD_SETSIZE) {
96 fprintf(stderr,
97 "Error: Failed to add new event. sock %d should be less than %d\n",
98 sock, FD_SETSIZE);
99 return -1;
102 dispr->events[sock].ctx = ctx;
103 dispr->events[sock].callback = cb;
105 FD_SET(sock, &dispr->fdset);
106 if (sock > dispr->max_sock) {
107 dispr->max_sock = sock;
109 DPRINT("Added sock %d for watching. max_sock: %d\n",
110 sock, dispr->max_sock);
111 return 0;
114 #if 0
115 /* dispatcher_remove() is not currently in use but may be useful
116 * in the future. */
117 static int
118 dispatcher_remove(Dispatcher *dispr, int sock)
120 if (sock >= FD_SETSIZE) {
121 fprintf(stderr,
122 "Error: Failed to remove event. sock %d should be less than %d\n",
123 sock, FD_SETSIZE);
124 return -1;
127 FD_CLR(sock, &dispr->fdset);
128 return 0;
130 #endif
132 /* timeout in us */
133 static int
134 dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
136 struct timeval tv;
137 tv.tv_sec = timeout / 1000000;
138 tv.tv_usec = timeout % 1000000;
140 fd_set fdset = dispr->fdset;
142 /* wait until some of sockets become readable. */
143 int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
145 if (rc == -1) {
146 vubr_die("select");
149 /* Timeout */
150 if (rc == 0) {
151 return 0;
154 /* Now call callback for every ready socket. */
156 int sock;
157 for (sock = 0; sock < dispr->max_sock + 1; sock++)
158 if (FD_ISSET(sock, &fdset)) {
159 Event *e = &dispr->events[sock];
160 e->callback(sock, e->ctx);
163 return 0;
166 typedef struct VubrVirtq {
167 int call_fd;
168 int kick_fd;
169 uint32_t size;
170 uint16_t last_avail_index;
171 uint16_t last_used_index;
172 struct vring_desc *desc;
173 struct vring_avail *avail;
174 struct vring_used *used;
175 uint64_t log_guest_addr;
176 int enable;
177 } VubrVirtq;
179 /* Based on qemu/hw/virtio/vhost-user.c */
181 #define VHOST_MEMORY_MAX_NREGIONS 8
182 #define VHOST_USER_F_PROTOCOL_FEATURES 30
184 #define VHOST_LOG_PAGE 4096
186 enum VhostUserProtocolFeature {
187 VHOST_USER_PROTOCOL_F_MQ = 0,
188 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
189 VHOST_USER_PROTOCOL_F_RARP = 2,
191 VHOST_USER_PROTOCOL_F_MAX
194 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
196 typedef enum VhostUserRequest {
197 VHOST_USER_NONE = 0,
198 VHOST_USER_GET_FEATURES = 1,
199 VHOST_USER_SET_FEATURES = 2,
200 VHOST_USER_SET_OWNER = 3,
201 VHOST_USER_RESET_OWNER = 4,
202 VHOST_USER_SET_MEM_TABLE = 5,
203 VHOST_USER_SET_LOG_BASE = 6,
204 VHOST_USER_SET_LOG_FD = 7,
205 VHOST_USER_SET_VRING_NUM = 8,
206 VHOST_USER_SET_VRING_ADDR = 9,
207 VHOST_USER_SET_VRING_BASE = 10,
208 VHOST_USER_GET_VRING_BASE = 11,
209 VHOST_USER_SET_VRING_KICK = 12,
210 VHOST_USER_SET_VRING_CALL = 13,
211 VHOST_USER_SET_VRING_ERR = 14,
212 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
213 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
214 VHOST_USER_GET_QUEUE_NUM = 17,
215 VHOST_USER_SET_VRING_ENABLE = 18,
216 VHOST_USER_SEND_RARP = 19,
217 VHOST_USER_MAX
218 } VhostUserRequest;
220 typedef struct VhostUserMemoryRegion {
221 uint64_t guest_phys_addr;
222 uint64_t memory_size;
223 uint64_t userspace_addr;
224 uint64_t mmap_offset;
225 } VhostUserMemoryRegion;
227 typedef struct VhostUserMemory {
228 uint32_t nregions;
229 uint32_t padding;
230 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
231 } VhostUserMemory;
233 typedef struct VhostUserLog {
234 uint64_t mmap_size;
235 uint64_t mmap_offset;
236 } VhostUserLog;
238 typedef struct VhostUserMsg {
239 VhostUserRequest request;
241 #define VHOST_USER_VERSION_MASK (0x3)
242 #define VHOST_USER_REPLY_MASK (0x1<<2)
243 uint32_t flags;
244 uint32_t size; /* the following payload size */
245 union {
246 #define VHOST_USER_VRING_IDX_MASK (0xff)
247 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
248 uint64_t u64;
249 struct vhost_vring_state state;
250 struct vhost_vring_addr addr;
251 VhostUserMemory memory;
252 VhostUserLog log;
253 } payload;
254 int fds[VHOST_MEMORY_MAX_NREGIONS];
255 int fd_num;
256 } QEMU_PACKED VhostUserMsg;
258 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
260 /* The version of the protocol we support */
261 #define VHOST_USER_VERSION (0x1)
263 #define MAX_NR_VIRTQUEUE (8)
265 typedef struct VubrDevRegion {
266 /* Guest Physical address. */
267 uint64_t gpa;
268 /* Memory region size. */
269 uint64_t size;
270 /* QEMU virtual address (userspace). */
271 uint64_t qva;
272 /* Starting offset in our mmaped space. */
273 uint64_t mmap_offset;
274 /* Start address of mmaped space. */
275 uint64_t mmap_addr;
276 } VubrDevRegion;
278 typedef struct VubrDev {
279 int sock;
280 Dispatcher dispatcher;
281 uint32_t nregions;
282 VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
283 VubrVirtq vq[MAX_NR_VIRTQUEUE];
284 int log_call_fd;
285 uint64_t log_size;
286 uint8_t *log_table;
287 int backend_udp_sock;
288 struct sockaddr_in backend_udp_dest;
289 int ready;
290 uint64_t features;
291 } VubrDev;
293 static const char *vubr_request_str[] = {
294 [VHOST_USER_NONE] = "VHOST_USER_NONE",
295 [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
296 [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
297 [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
298 [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
299 [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
300 [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
301 [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
302 [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
303 [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
304 [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
305 [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
306 [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
307 [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
308 [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
309 [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
310 [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
311 [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
312 [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
313 [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
314 [VHOST_USER_MAX] = "VHOST_USER_MAX",
317 static void
318 print_buffer(uint8_t *buf, size_t len)
320 int i;
321 printf("Raw buffer:\n");
322 for (i = 0; i < len; i++) {
323 if (i % 16 == 0) {
324 printf("\n");
326 if (i % 4 == 0) {
327 printf(" ");
329 printf("%02x ", buf[i]);
331 printf("\n............................................................\n");
334 /* Translate guest physical address to our virtual address. */
335 static uint64_t
336 gpa_to_va(VubrDev *dev, uint64_t guest_addr)
338 int i;
340 /* Find matching memory region. */
341 for (i = 0; i < dev->nregions; i++) {
342 VubrDevRegion *r = &dev->regions[i];
344 if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
345 return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
349 assert(!"address not found in regions");
350 return 0;
353 /* Translate qemu virtual address to our virtual address. */
354 static uint64_t
355 qva_to_va(VubrDev *dev, uint64_t qemu_addr)
357 int i;
359 /* Find matching memory region. */
360 for (i = 0; i < dev->nregions; i++) {
361 VubrDevRegion *r = &dev->regions[i];
363 if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
364 return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
368 assert(!"address not found in regions");
369 return 0;
372 static void
373 vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
375 char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
376 struct iovec iov = {
377 .iov_base = (char *)vmsg,
378 .iov_len = VHOST_USER_HDR_SIZE,
380 struct msghdr msg = {
381 .msg_iov = &iov,
382 .msg_iovlen = 1,
383 .msg_control = control,
384 .msg_controllen = sizeof(control),
386 size_t fd_size;
387 struct cmsghdr *cmsg;
388 int rc;
390 rc = recvmsg(conn_fd, &msg, 0);
392 if (rc == 0) {
393 vubr_die("recvmsg");
394 fprintf(stderr, "Peer disconnected.\n");
395 exit(1);
397 if (rc < 0) {
398 vubr_die("recvmsg");
401 vmsg->fd_num = 0;
402 for (cmsg = CMSG_FIRSTHDR(&msg);
403 cmsg != NULL;
404 cmsg = CMSG_NXTHDR(&msg, cmsg))
406 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
407 fd_size = cmsg->cmsg_len - CMSG_LEN(0);
408 vmsg->fd_num = fd_size / sizeof(int);
409 memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
410 break;
414 if (vmsg->size > sizeof(vmsg->payload)) {
415 fprintf(stderr,
416 "Error: too big message request: %d, size: vmsg->size: %u, "
417 "while sizeof(vmsg->payload) = %lu\n",
418 vmsg->request, vmsg->size, sizeof(vmsg->payload));
419 exit(1);
422 if (vmsg->size) {
423 rc = read(conn_fd, &vmsg->payload, vmsg->size);
424 if (rc == 0) {
425 vubr_die("recvmsg");
426 fprintf(stderr, "Peer disconnected.\n");
427 exit(1);
429 if (rc < 0) {
430 vubr_die("recvmsg");
433 assert(rc == vmsg->size);
437 static void
438 vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
440 int rc;
442 do {
443 rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
444 } while (rc < 0 && errno == EINTR);
446 if (rc < 0) {
447 vubr_die("write");
451 static void
452 vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
454 int slen = sizeof(struct sockaddr_in);
456 if (sendto(dev->backend_udp_sock, buf, len, 0,
457 (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
458 vubr_die("sendto()");
462 static int
463 vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
465 int slen = sizeof(struct sockaddr_in);
466 int rc;
468 rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
469 (struct sockaddr *) &dev->backend_udp_dest,
470 (socklen_t *)&slen);
471 if (rc == -1) {
472 vubr_die("recvfrom()");
475 return rc;
478 static void
479 vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
481 int hdrlen = sizeof(struct virtio_net_hdr_v1);
483 if (VHOST_USER_BRIDGE_DEBUG) {
484 print_buffer(buf, len);
486 vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
489 /* Kick the log_call_fd if required. */
490 static void
491 vubr_log_kick(VubrDev *dev)
493 if (dev->log_call_fd != -1) {
494 DPRINT("Kicking the QEMU's log...\n");
495 eventfd_write(dev->log_call_fd, 1);
499 /* Kick the guest if necessary. */
500 static void
501 vubr_virtqueue_kick(VubrVirtq *vq)
503 if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
504 DPRINT("Kicking the guest...\n");
505 eventfd_write(vq->call_fd, 1);
509 static void
510 vubr_log_page(uint8_t *log_table, uint64_t page)
512 DPRINT("Logged dirty guest page: %"PRId64"\n", page);
513 atomic_or(&log_table[page / 8], 1 << (page % 8));
516 static void
517 vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
519 uint64_t page;
521 if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
522 !dev->log_table || !length) {
523 return;
526 assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
528 page = address / VHOST_LOG_PAGE;
529 while (page * VHOST_LOG_PAGE < address + length) {
530 vubr_log_page(dev->log_table, page);
531 page += VHOST_LOG_PAGE;
533 vubr_log_kick(dev);
536 static void
537 vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
539 struct vring_desc *desc = vq->desc;
540 struct vring_avail *avail = vq->avail;
541 struct vring_used *used = vq->used;
542 uint64_t log_guest_addr = vq->log_guest_addr;
544 unsigned int size = vq->size;
546 uint16_t avail_index = atomic_mb_read(&avail->idx);
548 /* We check the available descriptors before posting the
549 * buffer, so here we assume that enough available
550 * descriptors. */
551 assert(vq->last_avail_index != avail_index);
552 uint16_t a_index = vq->last_avail_index % size;
553 uint16_t u_index = vq->last_used_index % size;
554 uint16_t d_index = avail->ring[a_index];
556 int i = d_index;
558 DPRINT("Post packet to guest on vq:\n");
559 DPRINT(" size = %d\n", vq->size);
560 DPRINT(" last_avail_index = %d\n", vq->last_avail_index);
561 DPRINT(" last_used_index = %d\n", vq->last_used_index);
562 DPRINT(" a_index = %d\n", a_index);
563 DPRINT(" u_index = %d\n", u_index);
564 DPRINT(" d_index = %d\n", d_index);
565 DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr);
566 DPRINT(" desc[%d].len = %d\n", i, desc[i].len);
567 DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags);
568 DPRINT(" avail->idx = %d\n", avail_index);
569 DPRINT(" used->idx = %d\n", used->idx);
571 if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
572 /* FIXME: we should find writable descriptor. */
573 fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
574 exit(1);
577 void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
578 uint32_t chunk_len = desc[i].len;
580 if (len <= chunk_len) {
581 memcpy(chunk_start, buf, len);
582 vubr_log_write(dev, desc[i].addr, len);
583 } else {
584 fprintf(stderr,
585 "Received too long packet from the backend. Dropping...\n");
586 return;
589 /* Add descriptor to the used ring. */
590 used->ring[u_index].id = d_index;
591 used->ring[u_index].len = len;
592 vubr_log_write(dev,
593 log_guest_addr + offsetof(struct vring_used, ring[u_index]),
594 sizeof(used->ring[u_index]));
596 vq->last_avail_index++;
597 vq->last_used_index++;
599 atomic_mb_set(&used->idx, vq->last_used_index);
600 vubr_log_write(dev,
601 log_guest_addr + offsetof(struct vring_used, idx),
602 sizeof(used->idx));
604 /* Kick the guest if necessary. */
605 vubr_virtqueue_kick(vq);
608 static int
609 vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
611 struct vring_desc *desc = vq->desc;
612 struct vring_avail *avail = vq->avail;
613 struct vring_used *used = vq->used;
614 uint64_t log_guest_addr = vq->log_guest_addr;
616 unsigned int size = vq->size;
618 uint16_t a_index = vq->last_avail_index % size;
619 uint16_t u_index = vq->last_used_index % size;
620 uint16_t d_index = avail->ring[a_index];
622 uint32_t i, len = 0;
623 size_t buf_size = 4096;
624 uint8_t buf[4096];
626 DPRINT("Chunks: ");
627 i = d_index;
628 do {
629 void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
630 uint32_t chunk_len = desc[i].len;
632 assert(!(desc[i].flags & VRING_DESC_F_WRITE));
634 if (len + chunk_len < buf_size) {
635 memcpy(buf + len, chunk_start, chunk_len);
636 DPRINT("%d ", chunk_len);
637 } else {
638 fprintf(stderr, "Error: too long packet. Dropping...\n");
639 break;
642 len += chunk_len;
644 if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
645 break;
648 i = desc[i].next;
649 } while (1);
650 DPRINT("\n");
652 if (!len) {
653 return -1;
656 /* Add descriptor to the used ring. */
657 used->ring[u_index].id = d_index;
658 used->ring[u_index].len = len;
659 vubr_log_write(dev,
660 log_guest_addr + offsetof(struct vring_used, ring[u_index]),
661 sizeof(used->ring[u_index]));
663 vubr_consume_raw_packet(dev, buf, len);
665 return 0;
668 static void
669 vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
671 struct vring_avail *avail = vq->avail;
672 struct vring_used *used = vq->used;
673 uint64_t log_guest_addr = vq->log_guest_addr;
675 while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
676 vubr_process_desc(dev, vq);
677 vq->last_avail_index++;
678 vq->last_used_index++;
681 atomic_mb_set(&used->idx, vq->last_used_index);
682 vubr_log_write(dev,
683 log_guest_addr + offsetof(struct vring_used, idx),
684 sizeof(used->idx));
687 static void
688 vubr_backend_recv_cb(int sock, void *ctx)
690 VubrDev *dev = (VubrDev *) ctx;
691 VubrVirtq *rx_vq = &dev->vq[0];
692 uint8_t buf[4096];
693 struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
694 int hdrlen = sizeof(struct virtio_net_hdr_v1);
695 int buflen = sizeof(buf);
696 int len;
698 if (!dev->ready) {
699 return;
702 DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
704 uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
706 /* If there is no available descriptors, just do nothing.
707 * The buffer will be handled by next arrived UDP packet,
708 * or next kick on receive virtq. */
709 if (rx_vq->last_avail_index == avail_index) {
710 DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
711 return;
714 len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
716 *hdr = (struct virtio_net_hdr_v1) { };
717 hdr->num_buffers = 1;
718 vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
721 static void
722 vubr_kick_cb(int sock, void *ctx)
724 VubrDev *dev = (VubrDev *) ctx;
725 eventfd_t kick_data;
726 ssize_t rc;
728 rc = eventfd_read(sock, &kick_data);
729 if (rc == -1) {
730 vubr_die("eventfd_read()");
731 } else {
732 DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
733 vubr_process_avail(dev, &dev->vq[1]);
737 static int
738 vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
740 DPRINT("Function %s() not implemented yet.\n", __func__);
741 return 0;
744 static int
745 vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
747 vmsg->payload.u64 =
748 ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
749 (1ULL << VHOST_F_LOG_ALL) |
750 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
752 vmsg->size = sizeof(vmsg->payload.u64);
754 DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
756 /* Reply */
757 return 1;
760 static int
761 vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
763 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
764 dev->features = vmsg->payload.u64;
765 return 0;
768 static int
769 vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
771 return 0;
774 static void
775 vubr_close_log(VubrDev *dev)
777 if (dev->log_table) {
778 if (munmap(dev->log_table, dev->log_size) != 0) {
779 vubr_die("munmap()");
782 dev->log_table = 0;
784 if (dev->log_call_fd != -1) {
785 close(dev->log_call_fd);
786 dev->log_call_fd = -1;
790 static int
791 vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
793 vubr_close_log(dev);
794 dev->ready = 0;
795 dev->features = 0;
796 return 0;
799 static int
800 vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
802 int i;
803 VhostUserMemory *memory = &vmsg->payload.memory;
804 dev->nregions = memory->nregions;
806 DPRINT("Nregions: %d\n", memory->nregions);
807 for (i = 0; i < dev->nregions; i++) {
808 void *mmap_addr;
809 VhostUserMemoryRegion *msg_region = &memory->regions[i];
810 VubrDevRegion *dev_region = &dev->regions[i];
812 DPRINT("Region %d\n", i);
813 DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
814 msg_region->guest_phys_addr);
815 DPRINT(" memory_size: 0x%016"PRIx64"\n",
816 msg_region->memory_size);
817 DPRINT(" userspace_addr 0x%016"PRIx64"\n",
818 msg_region->userspace_addr);
819 DPRINT(" mmap_offset 0x%016"PRIx64"\n",
820 msg_region->mmap_offset);
822 dev_region->gpa = msg_region->guest_phys_addr;
823 dev_region->size = msg_region->memory_size;
824 dev_region->qva = msg_region->userspace_addr;
825 dev_region->mmap_offset = msg_region->mmap_offset;
827 /* We don't use offset argument of mmap() since the
828 * mapped address has to be page aligned, and we use huge
829 * pages. */
830 mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
831 PROT_READ | PROT_WRITE, MAP_SHARED,
832 vmsg->fds[i], 0);
834 if (mmap_addr == MAP_FAILED) {
835 vubr_die("mmap");
838 dev_region->mmap_addr = (uint64_t) mmap_addr;
839 DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr);
842 return 0;
845 static int
846 vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
848 int fd;
849 uint64_t log_mmap_size, log_mmap_offset;
850 void *rc;
852 assert(vmsg->fd_num == 1);
853 fd = vmsg->fds[0];
855 assert(vmsg->size == sizeof(vmsg->payload.log));
856 log_mmap_offset = vmsg->payload.log.mmap_offset;
857 log_mmap_size = vmsg->payload.log.mmap_size;
858 DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
859 DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
861 rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
862 log_mmap_offset);
863 if (rc == MAP_FAILED) {
864 vubr_die("mmap");
866 dev->log_table = rc;
867 dev->log_size = log_mmap_size;
869 vmsg->size = sizeof(vmsg->payload.u64);
870 /* Reply */
871 return 1;
874 static int
875 vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
877 assert(vmsg->fd_num == 1);
878 dev->log_call_fd = vmsg->fds[0];
879 DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
880 return 0;
883 static int
884 vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
886 unsigned int index = vmsg->payload.state.index;
887 unsigned int num = vmsg->payload.state.num;
889 DPRINT("State.index: %d\n", index);
890 DPRINT("State.num: %d\n", num);
891 dev->vq[index].size = num;
892 return 0;
895 static int
896 vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
898 struct vhost_vring_addr *vra = &vmsg->payload.addr;
899 unsigned int index = vra->index;
900 VubrVirtq *vq = &dev->vq[index];
902 DPRINT("vhost_vring_addr:\n");
903 DPRINT(" index: %d\n", vra->index);
904 DPRINT(" flags: %d\n", vra->flags);
905 DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
906 DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
907 DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
908 DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
910 vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
911 vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
912 vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
913 vq->log_guest_addr = vra->log_guest_addr;
915 DPRINT("Setting virtq addresses:\n");
916 DPRINT(" vring_desc at %p\n", vq->desc);
917 DPRINT(" vring_used at %p\n", vq->used);
918 DPRINT(" vring_avail at %p\n", vq->avail);
920 vq->last_used_index = vq->used->idx;
921 return 0;
924 static int
925 vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
927 unsigned int index = vmsg->payload.state.index;
928 unsigned int num = vmsg->payload.state.num;
930 DPRINT("State.index: %d\n", index);
931 DPRINT("State.num: %d\n", num);
932 dev->vq[index].last_avail_index = num;
934 return 0;
937 static int
938 vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
940 unsigned int index = vmsg->payload.state.index;
942 DPRINT("State.index: %d\n", index);
943 vmsg->payload.state.num = dev->vq[index].last_avail_index;
944 vmsg->size = sizeof(vmsg->payload.state);
945 /* FIXME: this is a work-around for a bug in QEMU enabling
946 * too early vrings. When protocol features are enabled,
947 * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
948 dev->ready = 0;
950 /* Reply */
951 return 1;
954 static int
955 vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
957 uint64_t u64_arg = vmsg->payload.u64;
958 int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
960 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
962 assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
963 assert(vmsg->fd_num == 1);
965 dev->vq[index].kick_fd = vmsg->fds[0];
966 DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
968 if (index % 2 == 1) {
969 /* TX queue. */
970 dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
971 dev, vubr_kick_cb);
973 DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
974 dev->vq[index].kick_fd, index);
976 /* We temporarily use this hack to determine that both TX and RX
977 * queues are set up and ready for processing.
978 * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
979 * actual kicks. */
980 if (dev->vq[0].kick_fd != -1 &&
981 dev->vq[1].kick_fd != -1) {
982 dev->ready = 1;
983 DPRINT("vhost-user-bridge is ready for processing queues.\n");
985 return 0;
989 static int
990 vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
992 uint64_t u64_arg = vmsg->payload.u64;
993 int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
995 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
996 assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
997 assert(vmsg->fd_num == 1);
999 dev->vq[index].call_fd = vmsg->fds[0];
1000 DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
1002 return 0;
1005 static int
1006 vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
1008 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
1009 return 0;
1012 static int
1013 vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
1015 vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
1016 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
1017 vmsg->size = sizeof(vmsg->payload.u64);
1019 /* Reply */
1020 return 1;
1023 static int
1024 vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
1026 /* FIXME: unimplented */
1027 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
1028 return 0;
1031 static int
1032 vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
1034 DPRINT("Function %s() not implemented yet.\n", __func__);
1035 return 0;
1038 static int
1039 vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
1041 unsigned int index = vmsg->payload.state.index;
1042 unsigned int enable = vmsg->payload.state.num;
1044 DPRINT("State.index: %d\n", index);
1045 DPRINT("State.enable: %d\n", enable);
1046 dev->vq[index].enable = enable;
1047 return 0;
1050 static int
1051 vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
1053 DPRINT("Function %s() not implemented yet.\n", __func__);
1054 return 0;
1057 static int
1058 vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
1060 /* Print out generic part of the request. */
1061 DPRINT(
1062 "================== Vhost user message from QEMU ==================\n");
1063 DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
1064 vmsg->request);
1065 DPRINT("Flags: 0x%x\n", vmsg->flags);
1066 DPRINT("Size: %d\n", vmsg->size);
1068 if (vmsg->fd_num) {
1069 int i;
1070 DPRINT("Fds:");
1071 for (i = 0; i < vmsg->fd_num; i++) {
1072 DPRINT(" %d", vmsg->fds[i]);
1074 DPRINT("\n");
1077 switch (vmsg->request) {
1078 case VHOST_USER_NONE:
1079 return vubr_none_exec(dev, vmsg);
1080 case VHOST_USER_GET_FEATURES:
1081 return vubr_get_features_exec(dev, vmsg);
1082 case VHOST_USER_SET_FEATURES:
1083 return vubr_set_features_exec(dev, vmsg);
1084 case VHOST_USER_SET_OWNER:
1085 return vubr_set_owner_exec(dev, vmsg);
1086 case VHOST_USER_RESET_OWNER:
1087 return vubr_reset_device_exec(dev, vmsg);
1088 case VHOST_USER_SET_MEM_TABLE:
1089 return vubr_set_mem_table_exec(dev, vmsg);
1090 case VHOST_USER_SET_LOG_BASE:
1091 return vubr_set_log_base_exec(dev, vmsg);
1092 case VHOST_USER_SET_LOG_FD:
1093 return vubr_set_log_fd_exec(dev, vmsg);
1094 case VHOST_USER_SET_VRING_NUM:
1095 return vubr_set_vring_num_exec(dev, vmsg);
1096 case VHOST_USER_SET_VRING_ADDR:
1097 return vubr_set_vring_addr_exec(dev, vmsg);
1098 case VHOST_USER_SET_VRING_BASE:
1099 return vubr_set_vring_base_exec(dev, vmsg);
1100 case VHOST_USER_GET_VRING_BASE:
1101 return vubr_get_vring_base_exec(dev, vmsg);
1102 case VHOST_USER_SET_VRING_KICK:
1103 return vubr_set_vring_kick_exec(dev, vmsg);
1104 case VHOST_USER_SET_VRING_CALL:
1105 return vubr_set_vring_call_exec(dev, vmsg);
1106 case VHOST_USER_SET_VRING_ERR:
1107 return vubr_set_vring_err_exec(dev, vmsg);
1108 case VHOST_USER_GET_PROTOCOL_FEATURES:
1109 return vubr_get_protocol_features_exec(dev, vmsg);
1110 case VHOST_USER_SET_PROTOCOL_FEATURES:
1111 return vubr_set_protocol_features_exec(dev, vmsg);
1112 case VHOST_USER_GET_QUEUE_NUM:
1113 return vubr_get_queue_num_exec(dev, vmsg);
1114 case VHOST_USER_SET_VRING_ENABLE:
1115 return vubr_set_vring_enable_exec(dev, vmsg);
1116 case VHOST_USER_SEND_RARP:
1117 return vubr_send_rarp_exec(dev, vmsg);
1119 case VHOST_USER_MAX:
1120 assert(vmsg->request != VHOST_USER_MAX);
1122 return 0;
1125 static void
1126 vubr_receive_cb(int sock, void *ctx)
1128 VubrDev *dev = (VubrDev *) ctx;
1129 VhostUserMsg vmsg;
1130 int reply_requested;
1132 vubr_message_read(sock, &vmsg);
1133 reply_requested = vubr_execute_request(dev, &vmsg);
1134 if (reply_requested) {
1135 /* Set the version in the flags when sending the reply */
1136 vmsg.flags &= ~VHOST_USER_VERSION_MASK;
1137 vmsg.flags |= VHOST_USER_VERSION;
1138 vmsg.flags |= VHOST_USER_REPLY_MASK;
1139 vubr_message_write(sock, &vmsg);
1143 static void
1144 vubr_accept_cb(int sock, void *ctx)
1146 VubrDev *dev = (VubrDev *)ctx;
1147 int conn_fd;
1148 struct sockaddr_un un;
1149 socklen_t len = sizeof(un);
1151 conn_fd = accept(sock, (struct sockaddr *) &un, &len);
1152 if (conn_fd == -1) {
1153 vubr_die("accept()");
1155 DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
1156 dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
1159 static VubrDev *
1160 vubr_new(const char *path)
1162 VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
1163 dev->nregions = 0;
1164 int i;
1165 struct sockaddr_un un;
1166 size_t len;
1168 for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
1169 dev->vq[i] = (VubrVirtq) {
1170 .call_fd = -1, .kick_fd = -1,
1171 .size = 0,
1172 .last_avail_index = 0, .last_used_index = 0,
1173 .desc = 0, .avail = 0, .used = 0,
1174 .enable = 0,
1178 /* Init log */
1179 dev->log_call_fd = -1;
1180 dev->log_size = 0;
1181 dev->log_table = 0;
1182 dev->ready = 0;
1183 dev->features = 0;
1185 /* Get a UNIX socket. */
1186 dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
1187 if (dev->sock == -1) {
1188 vubr_die("socket");
1191 un.sun_family = AF_UNIX;
1192 strcpy(un.sun_path, path);
1193 len = sizeof(un.sun_family) + strlen(path);
1194 unlink(path);
1196 if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
1197 vubr_die("bind");
1200 if (listen(dev->sock, 1) == -1) {
1201 vubr_die("listen");
1204 dispatcher_init(&dev->dispatcher);
1205 dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev,
1206 vubr_accept_cb);
1208 DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
1209 return dev;
1212 static void
1213 vubr_backend_udp_setup(VubrDev *dev,
1214 const char *local_host,
1215 uint16_t local_port,
1216 const char *dest_host,
1217 uint16_t dest_port)
1219 int sock;
1220 struct sockaddr_in si_local = {
1221 .sin_family = AF_INET,
1222 .sin_port = htons(local_port),
1225 if (inet_aton(local_host, &si_local.sin_addr) == 0) {
1226 fprintf(stderr, "inet_aton() failed.\n");
1227 exit(1);
1230 /* setup destination for sends */
1231 dev->backend_udp_dest = (struct sockaddr_in) {
1232 .sin_family = AF_INET,
1233 .sin_port = htons(dest_port),
1235 if (inet_aton(dest_host, &dev->backend_udp_dest.sin_addr) == 0) {
1236 fprintf(stderr, "inet_aton() failed.\n");
1237 exit(1);
1240 sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
1241 if (sock == -1) {
1242 vubr_die("socket");
1245 if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
1246 vubr_die("bind");
1249 dev->backend_udp_sock = sock;
1250 dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
1251 DPRINT("Waiting for data from udp backend on %s:%d...\n",
1252 local_host, local_port);
1255 static void
1256 vubr_run(VubrDev *dev)
1258 while (1) {
1259 /* timeout 200ms */
1260 dispatcher_wait(&dev->dispatcher, 200000);
1261 /* Here one can try polling strategy. */
1266 main(int argc, char *argv[])
1268 VubrDev *dev;
1270 dev = vubr_new("/tmp/vubr.sock");
1271 if (!dev) {
1272 return 1;
1275 vubr_backend_udp_setup(dev,
1276 "127.0.0.1", 4444,
1277 "127.0.0.1", 5555);
1278 vubr_run(dev);
1279 return 0;