enable multi-function hot-add
[qemu/ar7.git] / tests / vhost-user-bridge.c
blobfa18ad55fbaaab346a4d5d059778787d0ce16714
1 /*
2 * Vhost User Bridge
4 * Copyright (c) 2015 Red Hat, Inc.
6 * Authors:
7 * Victor Kaplansky <victork@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
14 * TODO:
15 * - main should get parameters from the command line.
16 * - implement all request handlers.
17 * - test for broken requests and virtqueue.
18 * - implement features defined by Virtio 1.0 spec.
19 * - support mergeable buffers and indirect descriptors.
20 * - implement RESET_DEVICE request.
21 * - implement clean shutdown.
22 * - implement non-blocking writes to UDP backend.
23 * - implement polling strategy.
26 #include <stddef.h>
27 #include <assert.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <inttypes.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <errno.h>
35 #include <sys/types.h>
36 #include <sys/socket.h>
37 #include <sys/un.h>
38 #include <sys/unistd.h>
39 #include <sys/mman.h>
40 #include <sys/eventfd.h>
41 #include <arpa/inet.h>
43 #include <linux/vhost.h>
45 #include "qemu/atomic.h"
46 #include "standard-headers/linux/virtio_net.h"
47 #include "standard-headers/linux/virtio_ring.h"
49 #define VHOST_USER_BRIDGE_DEBUG 1
51 #define DPRINT(...) \
52 do { \
53 if (VHOST_USER_BRIDGE_DEBUG) { \
54 printf(__VA_ARGS__); \
55 } \
56 } while (0)
58 typedef void (*CallbackFunc)(int sock, void *ctx);
60 typedef struct Event {
61 void *ctx;
62 CallbackFunc callback;
63 } Event;
65 typedef struct Dispatcher {
66 int max_sock;
67 fd_set fdset;
68 Event events[FD_SETSIZE];
69 } Dispatcher;
71 static void
72 vubr_die(const char *s)
74 perror(s);
75 exit(1);
78 static int
79 dispatcher_init(Dispatcher *dispr)
81 FD_ZERO(&dispr->fdset);
82 dispr->max_sock = -1;
83 return 0;
86 static int
87 dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
89 if (sock >= FD_SETSIZE) {
90 fprintf(stderr,
91 "Error: Failed to add new event. sock %d should be less than %d\n",
92 sock, FD_SETSIZE);
93 return -1;
96 dispr->events[sock].ctx = ctx;
97 dispr->events[sock].callback = cb;
99 FD_SET(sock, &dispr->fdset);
100 if (sock > dispr->max_sock) {
101 dispr->max_sock = sock;
103 DPRINT("Added sock %d for watching. max_sock: %d\n",
104 sock, dispr->max_sock);
105 return 0;
108 #if 0
109 /* dispatcher_remove() is not currently in use but may be useful
110 * in the future. */
111 static int
112 dispatcher_remove(Dispatcher *dispr, int sock)
114 if (sock >= FD_SETSIZE) {
115 fprintf(stderr,
116 "Error: Failed to remove event. sock %d should be less than %d\n",
117 sock, FD_SETSIZE);
118 return -1;
121 FD_CLR(sock, &dispr->fdset);
122 return 0;
124 #endif
126 /* timeout in us */
127 static int
128 dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
130 struct timeval tv;
131 tv.tv_sec = timeout / 1000000;
132 tv.tv_usec = timeout % 1000000;
134 fd_set fdset = dispr->fdset;
136 /* wait until some of sockets become readable. */
137 int rc = select(dispr->max_sock + 1, &fdset, 0, 0, &tv);
139 if (rc == -1) {
140 vubr_die("select");
143 /* Timeout */
144 if (rc == 0) {
145 return 0;
148 /* Now call callback for every ready socket. */
150 int sock;
151 for (sock = 0; sock < dispr->max_sock + 1; sock++)
152 if (FD_ISSET(sock, &fdset)) {
153 Event *e = &dispr->events[sock];
154 e->callback(sock, e->ctx);
157 return 0;
160 typedef struct VubrVirtq {
161 int call_fd;
162 int kick_fd;
163 uint32_t size;
164 uint16_t last_avail_index;
165 uint16_t last_used_index;
166 struct vring_desc *desc;
167 struct vring_avail *avail;
168 struct vring_used *used;
169 } VubrVirtq;
171 /* Based on qemu/hw/virtio/vhost-user.c */
173 #define VHOST_MEMORY_MAX_NREGIONS 8
174 #define VHOST_USER_F_PROTOCOL_FEATURES 30
176 enum VhostUserProtocolFeature {
177 VHOST_USER_PROTOCOL_F_MQ = 0,
178 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
179 VHOST_USER_PROTOCOL_F_RARP = 2,
181 VHOST_USER_PROTOCOL_F_MAX
184 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
186 typedef enum VhostUserRequest {
187 VHOST_USER_NONE = 0,
188 VHOST_USER_GET_FEATURES = 1,
189 VHOST_USER_SET_FEATURES = 2,
190 VHOST_USER_SET_OWNER = 3,
191 VHOST_USER_RESET_DEVICE = 4,
192 VHOST_USER_SET_MEM_TABLE = 5,
193 VHOST_USER_SET_LOG_BASE = 6,
194 VHOST_USER_SET_LOG_FD = 7,
195 VHOST_USER_SET_VRING_NUM = 8,
196 VHOST_USER_SET_VRING_ADDR = 9,
197 VHOST_USER_SET_VRING_BASE = 10,
198 VHOST_USER_GET_VRING_BASE = 11,
199 VHOST_USER_SET_VRING_KICK = 12,
200 VHOST_USER_SET_VRING_CALL = 13,
201 VHOST_USER_SET_VRING_ERR = 14,
202 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
203 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
204 VHOST_USER_GET_QUEUE_NUM = 17,
205 VHOST_USER_SET_VRING_ENABLE = 18,
206 VHOST_USER_SEND_RARP = 19,
207 VHOST_USER_MAX
208 } VhostUserRequest;
210 typedef struct VhostUserMemoryRegion {
211 uint64_t guest_phys_addr;
212 uint64_t memory_size;
213 uint64_t userspace_addr;
214 uint64_t mmap_offset;
215 } VhostUserMemoryRegion;
217 typedef struct VhostUserMemory {
218 uint32_t nregions;
219 uint32_t padding;
220 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
221 } VhostUserMemory;
223 typedef struct VhostUserMsg {
224 VhostUserRequest request;
226 #define VHOST_USER_VERSION_MASK (0x3)
227 #define VHOST_USER_REPLY_MASK (0x1<<2)
228 uint32_t flags;
229 uint32_t size; /* the following payload size */
230 union {
231 #define VHOST_USER_VRING_IDX_MASK (0xff)
232 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
233 uint64_t u64;
234 struct vhost_vring_state state;
235 struct vhost_vring_addr addr;
236 VhostUserMemory memory;
237 } payload;
238 int fds[VHOST_MEMORY_MAX_NREGIONS];
239 int fd_num;
240 } QEMU_PACKED VhostUserMsg;
242 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
244 /* The version of the protocol we support */
245 #define VHOST_USER_VERSION (0x1)
247 #define MAX_NR_VIRTQUEUE (8)
249 typedef struct VubrDevRegion {
250 /* Guest Physical address. */
251 uint64_t gpa;
252 /* Memory region size. */
253 uint64_t size;
254 /* QEMU virtual address (userspace). */
255 uint64_t qva;
256 /* Starting offset in our mmaped space. */
257 uint64_t mmap_offset;
258 /* Start address of mmaped space. */
259 uint64_t mmap_addr;
260 } VubrDevRegion;
262 typedef struct VubrDev {
263 int sock;
264 Dispatcher dispatcher;
265 uint32_t nregions;
266 VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
267 VubrVirtq vq[MAX_NR_VIRTQUEUE];
268 int backend_udp_sock;
269 struct sockaddr_in backend_udp_dest;
270 } VubrDev;
272 static const char *vubr_request_str[] = {
273 [VHOST_USER_NONE] = "VHOST_USER_NONE",
274 [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
275 [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
276 [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
277 [VHOST_USER_RESET_DEVICE] = "VHOST_USER_RESET_DEVICE",
278 [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
279 [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
280 [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
281 [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
282 [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
283 [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
284 [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
285 [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
286 [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
287 [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
288 [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
289 [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
290 [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
291 [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
292 [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
293 [VHOST_USER_MAX] = "VHOST_USER_MAX",
296 static void
297 print_buffer(uint8_t *buf, size_t len)
299 int i;
300 printf("Raw buffer:\n");
301 for (i = 0; i < len; i++) {
302 if (i % 16 == 0) {
303 printf("\n");
305 if (i % 4 == 0) {
306 printf(" ");
308 printf("%02x ", buf[i]);
310 printf("\n............................................................\n");
313 /* Translate guest physical address to our virtual address. */
314 static uint64_t
315 gpa_to_va(VubrDev *dev, uint64_t guest_addr)
317 int i;
319 /* Find matching memory region. */
320 for (i = 0; i < dev->nregions; i++) {
321 VubrDevRegion *r = &dev->regions[i];
323 if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
324 return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
328 assert(!"address not found in regions");
329 return 0;
332 /* Translate qemu virtual address to our virtual address. */
333 static uint64_t
334 qva_to_va(VubrDev *dev, uint64_t qemu_addr)
336 int i;
338 /* Find matching memory region. */
339 for (i = 0; i < dev->nregions; i++) {
340 VubrDevRegion *r = &dev->regions[i];
342 if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
343 return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
347 assert(!"address not found in regions");
348 return 0;
351 static void
352 vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
354 char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
355 struct iovec iov = {
356 .iov_base = (char *)vmsg,
357 .iov_len = VHOST_USER_HDR_SIZE,
359 struct msghdr msg = {
360 .msg_iov = &iov,
361 .msg_iovlen = 1,
362 .msg_control = control,
363 .msg_controllen = sizeof(control),
365 size_t fd_size;
366 struct cmsghdr *cmsg;
367 int rc;
369 rc = recvmsg(conn_fd, &msg, 0);
371 if (rc <= 0) {
372 vubr_die("recvmsg");
375 vmsg->fd_num = 0;
376 for (cmsg = CMSG_FIRSTHDR(&msg);
377 cmsg != NULL;
378 cmsg = CMSG_NXTHDR(&msg, cmsg))
380 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
381 fd_size = cmsg->cmsg_len - CMSG_LEN(0);
382 vmsg->fd_num = fd_size / sizeof(int);
383 memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
384 break;
388 if (vmsg->size > sizeof(vmsg->payload)) {
389 fprintf(stderr,
390 "Error: too big message request: %d, size: vmsg->size: %u, "
391 "while sizeof(vmsg->payload) = %lu\n",
392 vmsg->request, vmsg->size, sizeof(vmsg->payload));
393 exit(1);
396 if (vmsg->size) {
397 rc = read(conn_fd, &vmsg->payload, vmsg->size);
398 if (rc <= 0) {
399 vubr_die("recvmsg");
402 assert(rc == vmsg->size);
406 static void
407 vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
409 int rc;
411 do {
412 rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
413 } while (rc < 0 && errno == EINTR);
415 if (rc < 0) {
416 vubr_die("write");
420 static void
421 vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
423 int slen = sizeof(struct sockaddr_in);
425 if (sendto(dev->backend_udp_sock, buf, len, 0,
426 (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
427 vubr_die("sendto()");
431 static int
432 vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
434 int slen = sizeof(struct sockaddr_in);
435 int rc;
437 rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
438 (struct sockaddr *) &dev->backend_udp_dest,
439 (socklen_t *)&slen);
440 if (rc == -1) {
441 vubr_die("recvfrom()");
444 return rc;
447 static void
448 vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
450 int hdrlen = sizeof(struct virtio_net_hdr_v1);
452 if (VHOST_USER_BRIDGE_DEBUG) {
453 print_buffer(buf, len);
455 vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
458 /* Kick the guest if necessary. */
459 static void
460 vubr_virtqueue_kick(VubrVirtq *vq)
462 if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
463 DPRINT("Kicking the guest...\n");
464 eventfd_write(vq->call_fd, 1);
468 static void
469 vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
471 struct vring_desc *desc = vq->desc;
472 struct vring_avail *avail = vq->avail;
473 struct vring_used *used = vq->used;
475 unsigned int size = vq->size;
477 uint16_t avail_index = atomic_mb_read(&avail->idx);
479 /* We check the available descriptors before posting the
480 * buffer, so here we assume that enough available
481 * descriptors. */
482 assert(vq->last_avail_index != avail_index);
483 uint16_t a_index = vq->last_avail_index % size;
484 uint16_t u_index = vq->last_used_index % size;
485 uint16_t d_index = avail->ring[a_index];
487 int i = d_index;
489 DPRINT("Post packet to guest on vq:\n");
490 DPRINT(" size = %d\n", vq->size);
491 DPRINT(" last_avail_index = %d\n", vq->last_avail_index);
492 DPRINT(" last_used_index = %d\n", vq->last_used_index);
493 DPRINT(" a_index = %d\n", a_index);
494 DPRINT(" u_index = %d\n", u_index);
495 DPRINT(" d_index = %d\n", d_index);
496 DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr);
497 DPRINT(" desc[%d].len = %d\n", i, desc[i].len);
498 DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags);
499 DPRINT(" avail->idx = %d\n", avail_index);
500 DPRINT(" used->idx = %d\n", used->idx);
502 if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
503 /* FIXME: we should find writable descriptor. */
504 fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
505 exit(1);
508 void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
509 uint32_t chunk_len = desc[i].len;
511 if (len <= chunk_len) {
512 memcpy(chunk_start, buf, len);
513 } else {
514 fprintf(stderr,
515 "Received too long packet from the backend. Dropping...\n");
516 return;
519 /* Add descriptor to the used ring. */
520 used->ring[u_index].id = d_index;
521 used->ring[u_index].len = len;
523 vq->last_avail_index++;
524 vq->last_used_index++;
526 atomic_mb_set(&used->idx, vq->last_used_index);
528 /* Kick the guest if necessary. */
529 vubr_virtqueue_kick(vq);
532 static int
533 vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
535 struct vring_desc *desc = vq->desc;
536 struct vring_avail *avail = vq->avail;
537 struct vring_used *used = vq->used;
539 unsigned int size = vq->size;
541 uint16_t a_index = vq->last_avail_index % size;
542 uint16_t u_index = vq->last_used_index % size;
543 uint16_t d_index = avail->ring[a_index];
545 uint32_t i, len = 0;
546 size_t buf_size = 4096;
547 uint8_t buf[4096];
549 DPRINT("Chunks: ");
550 i = d_index;
551 do {
552 void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr);
553 uint32_t chunk_len = desc[i].len;
555 if (len + chunk_len < buf_size) {
556 memcpy(buf + len, chunk_start, chunk_len);
557 DPRINT("%d ", chunk_len);
558 } else {
559 fprintf(stderr, "Error: too long packet. Dropping...\n");
560 break;
563 len += chunk_len;
565 if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
566 break;
569 i = desc[i].next;
570 } while (1);
571 DPRINT("\n");
573 if (!len) {
574 return -1;
577 /* Add descriptor to the used ring. */
578 used->ring[u_index].id = d_index;
579 used->ring[u_index].len = len;
581 vubr_consume_raw_packet(dev, buf, len);
583 return 0;
586 static void
587 vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
589 struct vring_avail *avail = vq->avail;
590 struct vring_used *used = vq->used;
592 while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
593 vubr_process_desc(dev, vq);
594 vq->last_avail_index++;
595 vq->last_used_index++;
598 atomic_mb_set(&used->idx, vq->last_used_index);
601 static void
602 vubr_backend_recv_cb(int sock, void *ctx)
604 VubrDev *dev = (VubrDev *) ctx;
605 VubrVirtq *rx_vq = &dev->vq[0];
606 uint8_t buf[4096];
607 struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
608 int hdrlen = sizeof(struct virtio_net_hdr_v1);
609 int buflen = sizeof(buf);
610 int len;
612 DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
614 uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
616 /* If there is no available descriptors, just do nothing.
617 * The buffer will be handled by next arrived UDP packet,
618 * or next kick on receive virtq. */
619 if (rx_vq->last_avail_index == avail_index) {
620 DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
621 return;
624 len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
626 *hdr = (struct virtio_net_hdr_v1) { };
627 hdr->num_buffers = 1;
628 vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
631 static void
632 vubr_kick_cb(int sock, void *ctx)
634 VubrDev *dev = (VubrDev *) ctx;
635 eventfd_t kick_data;
636 ssize_t rc;
638 rc = eventfd_read(sock, &kick_data);
639 if (rc == -1) {
640 vubr_die("eventfd_read()");
641 } else {
642 DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
643 vubr_process_avail(dev, &dev->vq[1]);
647 static int
648 vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
650 DPRINT("Function %s() not implemented yet.\n", __func__);
651 return 0;
654 static int
655 vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
657 vmsg->payload.u64 =
658 ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
659 (1ULL << VIRTIO_NET_F_CTRL_VQ) |
660 (1ULL << VIRTIO_NET_F_CTRL_RX) |
661 (1ULL << VHOST_F_LOG_ALL));
662 vmsg->size = sizeof(vmsg->payload.u64);
664 DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
666 /* reply */
667 return 1;
670 static int
671 vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
673 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
674 return 0;
677 static int
678 vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
680 return 0;
683 static int
684 vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
686 DPRINT("Function %s() not implemented yet.\n", __func__);
687 return 0;
690 static int
691 vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
693 int i;
694 VhostUserMemory *memory = &vmsg->payload.memory;
695 dev->nregions = memory->nregions;
697 DPRINT("Nregions: %d\n", memory->nregions);
698 for (i = 0; i < dev->nregions; i++) {
699 void *mmap_addr;
700 VhostUserMemoryRegion *msg_region = &memory->regions[i];
701 VubrDevRegion *dev_region = &dev->regions[i];
703 DPRINT("Region %d\n", i);
704 DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
705 msg_region->guest_phys_addr);
706 DPRINT(" memory_size: 0x%016"PRIx64"\n",
707 msg_region->memory_size);
708 DPRINT(" userspace_addr 0x%016"PRIx64"\n",
709 msg_region->userspace_addr);
710 DPRINT(" mmap_offset 0x%016"PRIx64"\n",
711 msg_region->mmap_offset);
713 dev_region->gpa = msg_region->guest_phys_addr;
714 dev_region->size = msg_region->memory_size;
715 dev_region->qva = msg_region->userspace_addr;
716 dev_region->mmap_offset = msg_region->mmap_offset;
718 /* We don't use offset argument of mmap() since the
719 * mapped address has to be page aligned, and we use huge
720 * pages. */
721 mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
722 PROT_READ | PROT_WRITE, MAP_SHARED,
723 vmsg->fds[i], 0);
725 if (mmap_addr == MAP_FAILED) {
726 vubr_die("mmap");
729 dev_region->mmap_addr = (uint64_t) mmap_addr;
730 DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr);
733 return 0;
736 static int
737 vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
739 DPRINT("Function %s() not implemented yet.\n", __func__);
740 return 0;
743 static int
744 vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
746 DPRINT("Function %s() not implemented yet.\n", __func__);
747 return 0;
750 static int
751 vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
753 unsigned int index = vmsg->payload.state.index;
754 unsigned int num = vmsg->payload.state.num;
756 DPRINT("State.index: %d\n", index);
757 DPRINT("State.num: %d\n", num);
758 dev->vq[index].size = num;
759 return 0;
762 static int
763 vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
765 struct vhost_vring_addr *vra = &vmsg->payload.addr;
766 unsigned int index = vra->index;
767 VubrVirtq *vq = &dev->vq[index];
769 DPRINT("vhost_vring_addr:\n");
770 DPRINT(" index: %d\n", vra->index);
771 DPRINT(" flags: %d\n", vra->flags);
772 DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
773 DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
774 DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
775 DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
777 vq->desc = (struct vring_desc *)qva_to_va(dev, vra->desc_user_addr);
778 vq->used = (struct vring_used *)qva_to_va(dev, vra->used_user_addr);
779 vq->avail = (struct vring_avail *)qva_to_va(dev, vra->avail_user_addr);
781 DPRINT("Setting virtq addresses:\n");
782 DPRINT(" vring_desc at %p\n", vq->desc);
783 DPRINT(" vring_used at %p\n", vq->used);
784 DPRINT(" vring_avail at %p\n", vq->avail);
786 vq->last_used_index = vq->used->idx;
787 return 0;
790 static int
791 vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
793 unsigned int index = vmsg->payload.state.index;
794 unsigned int num = vmsg->payload.state.num;
796 DPRINT("State.index: %d\n", index);
797 DPRINT("State.num: %d\n", num);
798 dev->vq[index].last_avail_index = num;
800 return 0;
803 static int
804 vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
806 DPRINT("Function %s() not implemented yet.\n", __func__);
807 return 0;
810 static int
811 vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
813 uint64_t u64_arg = vmsg->payload.u64;
814 int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
816 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
818 assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
819 assert(vmsg->fd_num == 1);
821 dev->vq[index].kick_fd = vmsg->fds[0];
822 DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
824 if (index % 2 == 1) {
825 /* TX queue. */
826 dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
827 dev, vubr_kick_cb);
829 DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
830 dev->vq[index].kick_fd, index);
832 return 0;
835 static int
836 vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
838 uint64_t u64_arg = vmsg->payload.u64;
839 int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
841 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
842 assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
843 assert(vmsg->fd_num == 1);
845 dev->vq[index].call_fd = vmsg->fds[0];
846 DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
848 return 0;
851 static int
852 vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
854 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
855 return 0;
858 static int
859 vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
861 /* FIXME: unimplented */
862 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
863 return 0;
866 static int
867 vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
869 /* FIXME: unimplented */
870 DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
871 return 0;
874 static int
875 vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
877 DPRINT("Function %s() not implemented yet.\n", __func__);
878 return 0;
881 static int
882 vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
884 DPRINT("Function %s() not implemented yet.\n", __func__);
885 return 0;
888 static int
889 vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
891 DPRINT("Function %s() not implemented yet.\n", __func__);
892 return 0;
895 static int
896 vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
898 /* Print out generic part of the request. */
899 DPRINT(
900 "================== Vhost user message from QEMU ==================\n");
901 DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
902 vmsg->request);
903 DPRINT("Flags: 0x%x\n", vmsg->flags);
904 DPRINT("Size: %d\n", vmsg->size);
906 if (vmsg->fd_num) {
907 int i;
908 DPRINT("Fds:");
909 for (i = 0; i < vmsg->fd_num; i++) {
910 DPRINT(" %d", vmsg->fds[i]);
912 DPRINT("\n");
915 switch (vmsg->request) {
916 case VHOST_USER_NONE:
917 return vubr_none_exec(dev, vmsg);
918 case VHOST_USER_GET_FEATURES:
919 return vubr_get_features_exec(dev, vmsg);
920 case VHOST_USER_SET_FEATURES:
921 return vubr_set_features_exec(dev, vmsg);
922 case VHOST_USER_SET_OWNER:
923 return vubr_set_owner_exec(dev, vmsg);
924 case VHOST_USER_RESET_DEVICE:
925 return vubr_reset_device_exec(dev, vmsg);
926 case VHOST_USER_SET_MEM_TABLE:
927 return vubr_set_mem_table_exec(dev, vmsg);
928 case VHOST_USER_SET_LOG_BASE:
929 return vubr_set_log_base_exec(dev, vmsg);
930 case VHOST_USER_SET_LOG_FD:
931 return vubr_set_log_fd_exec(dev, vmsg);
932 case VHOST_USER_SET_VRING_NUM:
933 return vubr_set_vring_num_exec(dev, vmsg);
934 case VHOST_USER_SET_VRING_ADDR:
935 return vubr_set_vring_addr_exec(dev, vmsg);
936 case VHOST_USER_SET_VRING_BASE:
937 return vubr_set_vring_base_exec(dev, vmsg);
938 case VHOST_USER_GET_VRING_BASE:
939 return vubr_get_vring_base_exec(dev, vmsg);
940 case VHOST_USER_SET_VRING_KICK:
941 return vubr_set_vring_kick_exec(dev, vmsg);
942 case VHOST_USER_SET_VRING_CALL:
943 return vubr_set_vring_call_exec(dev, vmsg);
944 case VHOST_USER_SET_VRING_ERR:
945 return vubr_set_vring_err_exec(dev, vmsg);
946 case VHOST_USER_GET_PROTOCOL_FEATURES:
947 return vubr_get_protocol_features_exec(dev, vmsg);
948 case VHOST_USER_SET_PROTOCOL_FEATURES:
949 return vubr_set_protocol_features_exec(dev, vmsg);
950 case VHOST_USER_GET_QUEUE_NUM:
951 return vubr_get_queue_num_exec(dev, vmsg);
952 case VHOST_USER_SET_VRING_ENABLE:
953 return vubr_set_vring_enable_exec(dev, vmsg);
954 case VHOST_USER_SEND_RARP:
955 return vubr_send_rarp_exec(dev, vmsg);
957 case VHOST_USER_MAX:
958 assert(vmsg->request != VHOST_USER_MAX);
960 return 0;
963 static void
964 vubr_receive_cb(int sock, void *ctx)
966 VubrDev *dev = (VubrDev *) ctx;
967 VhostUserMsg vmsg;
968 int reply_requested;
970 vubr_message_read(sock, &vmsg);
971 reply_requested = vubr_execute_request(dev, &vmsg);
972 if (reply_requested) {
973 /* Set the version in the flags when sending the reply */
974 vmsg.flags &= ~VHOST_USER_VERSION_MASK;
975 vmsg.flags |= VHOST_USER_VERSION;
976 vmsg.flags |= VHOST_USER_REPLY_MASK;
977 vubr_message_write(sock, &vmsg);
981 static void
982 vubr_accept_cb(int sock, void *ctx)
984 VubrDev *dev = (VubrDev *)ctx;
985 int conn_fd;
986 struct sockaddr_un un;
987 socklen_t len = sizeof(un);
989 conn_fd = accept(sock, (struct sockaddr *) &un, &len);
990 if (conn_fd == -1) {
991 vubr_die("accept()");
993 DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
994 dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
997 static VubrDev *
998 vubr_new(const char *path)
1000 VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
1001 dev->nregions = 0;
1002 int i;
1003 struct sockaddr_un un;
1004 size_t len;
1006 for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
1007 dev->vq[i] = (VubrVirtq) {
1008 .call_fd = -1, .kick_fd = -1,
1009 .size = 0,
1010 .last_avail_index = 0, .last_used_index = 0,
1011 .desc = 0, .avail = 0, .used = 0,
1015 /* Get a UNIX socket. */
1016 dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
1017 if (dev->sock == -1) {
1018 vubr_die("socket");
1021 un.sun_family = AF_UNIX;
1022 strcpy(un.sun_path, path);
1023 len = sizeof(un.sun_family) + strlen(path);
1024 unlink(path);
1026 if (bind(dev->sock, (struct sockaddr *) &un, len) == -1) {
1027 vubr_die("bind");
1030 if (listen(dev->sock, 1) == -1) {
1031 vubr_die("listen");
1034 dispatcher_init(&dev->dispatcher);
1035 dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev,
1036 vubr_accept_cb);
1038 DPRINT("Waiting for connections on UNIX socket %s ...\n", path);
1039 return dev;
1042 static void
1043 vubr_backend_udp_setup(VubrDev *dev,
1044 const char *local_host,
1045 uint16_t local_port,
1046 const char *dest_host,
1047 uint16_t dest_port)
1049 int sock;
1050 struct sockaddr_in si_local = {
1051 .sin_family = AF_INET,
1052 .sin_port = htons(local_port),
1055 if (inet_aton(local_host, &si_local.sin_addr) == 0) {
1056 fprintf(stderr, "inet_aton() failed.\n");
1057 exit(1);
1060 /* setup destination for sends */
1061 dev->backend_udp_dest = (struct sockaddr_in) {
1062 .sin_family = AF_INET,
1063 .sin_port = htons(dest_port),
1065 if (inet_aton(dest_host, &dev->backend_udp_dest.sin_addr) == 0) {
1066 fprintf(stderr, "inet_aton() failed.\n");
1067 exit(1);
1070 sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
1071 if (sock == -1) {
1072 vubr_die("socket");
1075 if (bind(sock, (struct sockaddr *)&si_local, sizeof(si_local)) == -1) {
1076 vubr_die("bind");
1079 dev->backend_udp_sock = sock;
1080 dispatcher_add(&dev->dispatcher, sock, dev, vubr_backend_recv_cb);
1081 DPRINT("Waiting for data from udp backend on %s:%d...\n",
1082 local_host, local_port);
1085 static void
1086 vubr_run(VubrDev *dev)
1088 while (1) {
1089 /* timeout 200ms */
1090 dispatcher_wait(&dev->dispatcher, 200000);
1091 /* Here one can try polling strategy. */
1096 main(int argc, char *argv[])
1098 VubrDev *dev;
1100 dev = vubr_new("/tmp/vubr.sock");
1101 if (!dev) {
1102 return 1;
1105 vubr_backend_udp_setup(dev,
1106 "127.0.0.1", 4444,
1107 "127.0.0.1", 5555);
1108 vubr_run(dev);
1109 return 0;