4 * Copyright (c) 2015 Red Hat, Inc.
7 * Victor Kaplansky <victork@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
15 * - main should get parameters from the command line.
16 * - implement all request handlers.
17 * - test for broken requests and virtqueue.
18 * - implement features defined by Virtio 1.0 spec.
19 * - support mergeable buffers and indirect descriptors.
20 * - implement RESET_DEVICE request.
21 * - implement clean shutdown.
22 * - implement non-blocking writes to UDP backend.
23 * - implement polling strategy.
35 #include <sys/types.h>
36 #include <sys/socket.h>
38 #include <sys/unistd.h>
40 #include <sys/eventfd.h>
41 #include <arpa/inet.h>
43 #include <linux/vhost.h>
45 #include "qemu/atomic.h"
46 #include "standard-headers/linux/virtio_net.h"
47 #include "standard-headers/linux/virtio_ring.h"
49 #define VHOST_USER_BRIDGE_DEBUG 1
53 if (VHOST_USER_BRIDGE_DEBUG) { \
54 printf(__VA_ARGS__); \
58 typedef void (*CallbackFunc
)(int sock
, void *ctx
);
60 typedef struct Event
{
62 CallbackFunc callback
;
65 typedef struct Dispatcher
{
68 Event events
[FD_SETSIZE
];
72 vubr_die(const char *s
)
79 dispatcher_init(Dispatcher
*dispr
)
81 FD_ZERO(&dispr
->fdset
);
87 dispatcher_add(Dispatcher
*dispr
, int sock
, void *ctx
, CallbackFunc cb
)
89 if (sock
>= FD_SETSIZE
) {
91 "Error: Failed to add new event. sock %d should be less than %d\n",
96 dispr
->events
[sock
].ctx
= ctx
;
97 dispr
->events
[sock
].callback
= cb
;
99 FD_SET(sock
, &dispr
->fdset
);
100 if (sock
> dispr
->max_sock
) {
101 dispr
->max_sock
= sock
;
103 DPRINT("Added sock %d for watching. max_sock: %d\n",
104 sock
, dispr
->max_sock
);
109 /* dispatcher_remove() is not currently in use but may be useful
112 dispatcher_remove(Dispatcher
*dispr
, int sock
)
114 if (sock
>= FD_SETSIZE
) {
116 "Error: Failed to remove event. sock %d should be less than %d\n",
121 FD_CLR(sock
, &dispr
->fdset
);
128 dispatcher_wait(Dispatcher
*dispr
, uint32_t timeout
)
131 tv
.tv_sec
= timeout
/ 1000000;
132 tv
.tv_usec
= timeout
% 1000000;
134 fd_set fdset
= dispr
->fdset
;
136 /* wait until some of sockets become readable. */
137 int rc
= select(dispr
->max_sock
+ 1, &fdset
, 0, 0, &tv
);
148 /* Now call callback for every ready socket. */
151 for (sock
= 0; sock
< dispr
->max_sock
+ 1; sock
++)
152 if (FD_ISSET(sock
, &fdset
)) {
153 Event
*e
= &dispr
->events
[sock
];
154 e
->callback(sock
, e
->ctx
);
160 typedef struct VubrVirtq
{
164 uint16_t last_avail_index
;
165 uint16_t last_used_index
;
166 struct vring_desc
*desc
;
167 struct vring_avail
*avail
;
168 struct vring_used
*used
;
171 /* Based on qemu/hw/virtio/vhost-user.c */
173 #define VHOST_MEMORY_MAX_NREGIONS 8
174 #define VHOST_USER_F_PROTOCOL_FEATURES 30
176 enum VhostUserProtocolFeature
{
177 VHOST_USER_PROTOCOL_F_MQ
= 0,
178 VHOST_USER_PROTOCOL_F_LOG_SHMFD
= 1,
179 VHOST_USER_PROTOCOL_F_RARP
= 2,
181 VHOST_USER_PROTOCOL_F_MAX
184 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
186 typedef enum VhostUserRequest
{
188 VHOST_USER_GET_FEATURES
= 1,
189 VHOST_USER_SET_FEATURES
= 2,
190 VHOST_USER_SET_OWNER
= 3,
191 VHOST_USER_RESET_OWNER
= 4,
192 VHOST_USER_SET_MEM_TABLE
= 5,
193 VHOST_USER_SET_LOG_BASE
= 6,
194 VHOST_USER_SET_LOG_FD
= 7,
195 VHOST_USER_SET_VRING_NUM
= 8,
196 VHOST_USER_SET_VRING_ADDR
= 9,
197 VHOST_USER_SET_VRING_BASE
= 10,
198 VHOST_USER_GET_VRING_BASE
= 11,
199 VHOST_USER_SET_VRING_KICK
= 12,
200 VHOST_USER_SET_VRING_CALL
= 13,
201 VHOST_USER_SET_VRING_ERR
= 14,
202 VHOST_USER_GET_PROTOCOL_FEATURES
= 15,
203 VHOST_USER_SET_PROTOCOL_FEATURES
= 16,
204 VHOST_USER_GET_QUEUE_NUM
= 17,
205 VHOST_USER_SET_VRING_ENABLE
= 18,
206 VHOST_USER_SEND_RARP
= 19,
210 typedef struct VhostUserMemoryRegion
{
211 uint64_t guest_phys_addr
;
212 uint64_t memory_size
;
213 uint64_t userspace_addr
;
214 uint64_t mmap_offset
;
215 } VhostUserMemoryRegion
;
217 typedef struct VhostUserMemory
{
220 VhostUserMemoryRegion regions
[VHOST_MEMORY_MAX_NREGIONS
];
223 typedef struct VhostUserMsg
{
224 VhostUserRequest request
;
226 #define VHOST_USER_VERSION_MASK (0x3)
227 #define VHOST_USER_REPLY_MASK (0x1<<2)
229 uint32_t size
; /* the following payload size */
231 #define VHOST_USER_VRING_IDX_MASK (0xff)
232 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
234 struct vhost_vring_state state
;
235 struct vhost_vring_addr addr
;
236 VhostUserMemory memory
;
238 int fds
[VHOST_MEMORY_MAX_NREGIONS
];
240 } QEMU_PACKED VhostUserMsg
;
242 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
244 /* The version of the protocol we support */
245 #define VHOST_USER_VERSION (0x1)
247 #define MAX_NR_VIRTQUEUE (8)
249 typedef struct VubrDevRegion
{
250 /* Guest Physical address. */
252 /* Memory region size. */
254 /* QEMU virtual address (userspace). */
256 /* Starting offset in our mmaped space. */
257 uint64_t mmap_offset
;
258 /* Start address of mmaped space. */
262 typedef struct VubrDev
{
264 Dispatcher dispatcher
;
266 VubrDevRegion regions
[VHOST_MEMORY_MAX_NREGIONS
];
267 VubrVirtq vq
[MAX_NR_VIRTQUEUE
];
268 int backend_udp_sock
;
269 struct sockaddr_in backend_udp_dest
;
272 static const char *vubr_request_str
[] = {
273 [VHOST_USER_NONE
] = "VHOST_USER_NONE",
274 [VHOST_USER_GET_FEATURES
] = "VHOST_USER_GET_FEATURES",
275 [VHOST_USER_SET_FEATURES
] = "VHOST_USER_SET_FEATURES",
276 [VHOST_USER_SET_OWNER
] = "VHOST_USER_SET_OWNER",
277 [VHOST_USER_RESET_OWNER
] = "VHOST_USER_RESET_OWNER",
278 [VHOST_USER_SET_MEM_TABLE
] = "VHOST_USER_SET_MEM_TABLE",
279 [VHOST_USER_SET_LOG_BASE
] = "VHOST_USER_SET_LOG_BASE",
280 [VHOST_USER_SET_LOG_FD
] = "VHOST_USER_SET_LOG_FD",
281 [VHOST_USER_SET_VRING_NUM
] = "VHOST_USER_SET_VRING_NUM",
282 [VHOST_USER_SET_VRING_ADDR
] = "VHOST_USER_SET_VRING_ADDR",
283 [VHOST_USER_SET_VRING_BASE
] = "VHOST_USER_SET_VRING_BASE",
284 [VHOST_USER_GET_VRING_BASE
] = "VHOST_USER_GET_VRING_BASE",
285 [VHOST_USER_SET_VRING_KICK
] = "VHOST_USER_SET_VRING_KICK",
286 [VHOST_USER_SET_VRING_CALL
] = "VHOST_USER_SET_VRING_CALL",
287 [VHOST_USER_SET_VRING_ERR
] = "VHOST_USER_SET_VRING_ERR",
288 [VHOST_USER_GET_PROTOCOL_FEATURES
] = "VHOST_USER_GET_PROTOCOL_FEATURES",
289 [VHOST_USER_SET_PROTOCOL_FEATURES
] = "VHOST_USER_SET_PROTOCOL_FEATURES",
290 [VHOST_USER_GET_QUEUE_NUM
] = "VHOST_USER_GET_QUEUE_NUM",
291 [VHOST_USER_SET_VRING_ENABLE
] = "VHOST_USER_SET_VRING_ENABLE",
292 [VHOST_USER_SEND_RARP
] = "VHOST_USER_SEND_RARP",
293 [VHOST_USER_MAX
] = "VHOST_USER_MAX",
297 print_buffer(uint8_t *buf
, size_t len
)
300 printf("Raw buffer:\n");
301 for (i
= 0; i
< len
; i
++) {
308 printf("%02x ", buf
[i
]);
310 printf("\n............................................................\n");
313 /* Translate guest physical address to our virtual address. */
315 gpa_to_va(VubrDev
*dev
, uint64_t guest_addr
)
319 /* Find matching memory region. */
320 for (i
= 0; i
< dev
->nregions
; i
++) {
321 VubrDevRegion
*r
= &dev
->regions
[i
];
323 if ((guest_addr
>= r
->gpa
) && (guest_addr
< (r
->gpa
+ r
->size
))) {
324 return guest_addr
- r
->gpa
+ r
->mmap_addr
+ r
->mmap_offset
;
328 assert(!"address not found in regions");
332 /* Translate qemu virtual address to our virtual address. */
334 qva_to_va(VubrDev
*dev
, uint64_t qemu_addr
)
338 /* Find matching memory region. */
339 for (i
= 0; i
< dev
->nregions
; i
++) {
340 VubrDevRegion
*r
= &dev
->regions
[i
];
342 if ((qemu_addr
>= r
->qva
) && (qemu_addr
< (r
->qva
+ r
->size
))) {
343 return qemu_addr
- r
->qva
+ r
->mmap_addr
+ r
->mmap_offset
;
347 assert(!"address not found in regions");
352 vubr_message_read(int conn_fd
, VhostUserMsg
*vmsg
)
354 char control
[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS
* sizeof(int))] = { };
356 .iov_base
= (char *)vmsg
,
357 .iov_len
= VHOST_USER_HDR_SIZE
,
359 struct msghdr msg
= {
362 .msg_control
= control
,
363 .msg_controllen
= sizeof(control
),
366 struct cmsghdr
*cmsg
;
369 rc
= recvmsg(conn_fd
, &msg
, 0);
376 for (cmsg
= CMSG_FIRSTHDR(&msg
);
378 cmsg
= CMSG_NXTHDR(&msg
, cmsg
))
380 if (cmsg
->cmsg_level
== SOL_SOCKET
&& cmsg
->cmsg_type
== SCM_RIGHTS
) {
381 fd_size
= cmsg
->cmsg_len
- CMSG_LEN(0);
382 vmsg
->fd_num
= fd_size
/ sizeof(int);
383 memcpy(vmsg
->fds
, CMSG_DATA(cmsg
), fd_size
);
388 if (vmsg
->size
> sizeof(vmsg
->payload
)) {
390 "Error: too big message request: %d, size: vmsg->size: %u, "
391 "while sizeof(vmsg->payload) = %lu\n",
392 vmsg
->request
, vmsg
->size
, sizeof(vmsg
->payload
));
397 rc
= read(conn_fd
, &vmsg
->payload
, vmsg
->size
);
402 assert(rc
== vmsg
->size
);
407 vubr_message_write(int conn_fd
, VhostUserMsg
*vmsg
)
412 rc
= write(conn_fd
, vmsg
, VHOST_USER_HDR_SIZE
+ vmsg
->size
);
413 } while (rc
< 0 && errno
== EINTR
);
421 vubr_backend_udp_sendbuf(VubrDev
*dev
, uint8_t *buf
, size_t len
)
423 int slen
= sizeof(struct sockaddr_in
);
425 if (sendto(dev
->backend_udp_sock
, buf
, len
, 0,
426 (struct sockaddr
*) &dev
->backend_udp_dest
, slen
) == -1) {
427 vubr_die("sendto()");
432 vubr_backend_udp_recvbuf(VubrDev
*dev
, uint8_t *buf
, size_t buflen
)
434 int slen
= sizeof(struct sockaddr_in
);
437 rc
= recvfrom(dev
->backend_udp_sock
, buf
, buflen
, 0,
438 (struct sockaddr
*) &dev
->backend_udp_dest
,
441 vubr_die("recvfrom()");
448 vubr_consume_raw_packet(VubrDev
*dev
, uint8_t *buf
, uint32_t len
)
450 int hdrlen
= sizeof(struct virtio_net_hdr_v1
);
452 if (VHOST_USER_BRIDGE_DEBUG
) {
453 print_buffer(buf
, len
);
455 vubr_backend_udp_sendbuf(dev
, buf
+ hdrlen
, len
- hdrlen
);
458 /* Kick the guest if necessary. */
460 vubr_virtqueue_kick(VubrVirtq
*vq
)
462 if (!(vq
->avail
->flags
& VRING_AVAIL_F_NO_INTERRUPT
)) {
463 DPRINT("Kicking the guest...\n");
464 eventfd_write(vq
->call_fd
, 1);
469 vubr_post_buffer(VubrDev
*dev
, VubrVirtq
*vq
, uint8_t *buf
, int32_t len
)
471 struct vring_desc
*desc
= vq
->desc
;
472 struct vring_avail
*avail
= vq
->avail
;
473 struct vring_used
*used
= vq
->used
;
475 unsigned int size
= vq
->size
;
477 uint16_t avail_index
= atomic_mb_read(&avail
->idx
);
479 /* We check the available descriptors before posting the
480 * buffer, so here we assume that enough available
482 assert(vq
->last_avail_index
!= avail_index
);
483 uint16_t a_index
= vq
->last_avail_index
% size
;
484 uint16_t u_index
= vq
->last_used_index
% size
;
485 uint16_t d_index
= avail
->ring
[a_index
];
489 DPRINT("Post packet to guest on vq:\n");
490 DPRINT(" size = %d\n", vq
->size
);
491 DPRINT(" last_avail_index = %d\n", vq
->last_avail_index
);
492 DPRINT(" last_used_index = %d\n", vq
->last_used_index
);
493 DPRINT(" a_index = %d\n", a_index
);
494 DPRINT(" u_index = %d\n", u_index
);
495 DPRINT(" d_index = %d\n", d_index
);
496 DPRINT(" desc[%d].addr = 0x%016"PRIx64
"\n", i
, desc
[i
].addr
);
497 DPRINT(" desc[%d].len = %d\n", i
, desc
[i
].len
);
498 DPRINT(" desc[%d].flags = %d\n", i
, desc
[i
].flags
);
499 DPRINT(" avail->idx = %d\n", avail_index
);
500 DPRINT(" used->idx = %d\n", used
->idx
);
502 if (!(desc
[i
].flags
& VRING_DESC_F_WRITE
)) {
503 /* FIXME: we should find writable descriptor. */
504 fprintf(stderr
, "Error: descriptor is not writable. Exiting.\n");
508 void *chunk_start
= (void *)gpa_to_va(dev
, desc
[i
].addr
);
509 uint32_t chunk_len
= desc
[i
].len
;
511 if (len
<= chunk_len
) {
512 memcpy(chunk_start
, buf
, len
);
515 "Received too long packet from the backend. Dropping...\n");
519 /* Add descriptor to the used ring. */
520 used
->ring
[u_index
].id
= d_index
;
521 used
->ring
[u_index
].len
= len
;
523 vq
->last_avail_index
++;
524 vq
->last_used_index
++;
526 atomic_mb_set(&used
->idx
, vq
->last_used_index
);
528 /* Kick the guest if necessary. */
529 vubr_virtqueue_kick(vq
);
533 vubr_process_desc(VubrDev
*dev
, VubrVirtq
*vq
)
535 struct vring_desc
*desc
= vq
->desc
;
536 struct vring_avail
*avail
= vq
->avail
;
537 struct vring_used
*used
= vq
->used
;
539 unsigned int size
= vq
->size
;
541 uint16_t a_index
= vq
->last_avail_index
% size
;
542 uint16_t u_index
= vq
->last_used_index
% size
;
543 uint16_t d_index
= avail
->ring
[a_index
];
546 size_t buf_size
= 4096;
552 void *chunk_start
= (void *)gpa_to_va(dev
, desc
[i
].addr
);
553 uint32_t chunk_len
= desc
[i
].len
;
555 if (len
+ chunk_len
< buf_size
) {
556 memcpy(buf
+ len
, chunk_start
, chunk_len
);
557 DPRINT("%d ", chunk_len
);
559 fprintf(stderr
, "Error: too long packet. Dropping...\n");
565 if (!(desc
[i
].flags
& VRING_DESC_F_NEXT
)) {
577 /* Add descriptor to the used ring. */
578 used
->ring
[u_index
].id
= d_index
;
579 used
->ring
[u_index
].len
= len
;
581 vubr_consume_raw_packet(dev
, buf
, len
);
587 vubr_process_avail(VubrDev
*dev
, VubrVirtq
*vq
)
589 struct vring_avail
*avail
= vq
->avail
;
590 struct vring_used
*used
= vq
->used
;
592 while (vq
->last_avail_index
!= atomic_mb_read(&avail
->idx
)) {
593 vubr_process_desc(dev
, vq
);
594 vq
->last_avail_index
++;
595 vq
->last_used_index
++;
598 atomic_mb_set(&used
->idx
, vq
->last_used_index
);
602 vubr_backend_recv_cb(int sock
, void *ctx
)
604 VubrDev
*dev
= (VubrDev
*) ctx
;
605 VubrVirtq
*rx_vq
= &dev
->vq
[0];
607 struct virtio_net_hdr_v1
*hdr
= (struct virtio_net_hdr_v1
*)buf
;
608 int hdrlen
= sizeof(struct virtio_net_hdr_v1
);
609 int buflen
= sizeof(buf
);
612 DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
614 uint16_t avail_index
= atomic_mb_read(&rx_vq
->avail
->idx
);
616 /* If there is no available descriptors, just do nothing.
617 * The buffer will be handled by next arrived UDP packet,
618 * or next kick on receive virtq. */
619 if (rx_vq
->last_avail_index
== avail_index
) {
620 DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
624 len
= vubr_backend_udp_recvbuf(dev
, buf
+ hdrlen
, buflen
- hdrlen
);
626 *hdr
= (struct virtio_net_hdr_v1
) { };
627 hdr
->num_buffers
= 1;
628 vubr_post_buffer(dev
, rx_vq
, buf
, len
+ hdrlen
);
632 vubr_kick_cb(int sock
, void *ctx
)
634 VubrDev
*dev
= (VubrDev
*) ctx
;
638 rc
= eventfd_read(sock
, &kick_data
);
640 vubr_die("eventfd_read()");
642 DPRINT("Got kick_data: %016"PRIx64
"\n", kick_data
);
643 vubr_process_avail(dev
, &dev
->vq
[1]);
648 vubr_none_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
650 DPRINT("Function %s() not implemented yet.\n", __func__
);
655 vubr_get_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
658 ((1ULL << VIRTIO_NET_F_MRG_RXBUF
) |
659 (1ULL << VIRTIO_NET_F_CTRL_VQ
) |
660 (1ULL << VIRTIO_NET_F_CTRL_RX
) |
661 (1ULL << VHOST_F_LOG_ALL
));
662 vmsg
->size
= sizeof(vmsg
->payload
.u64
);
664 DPRINT("Sending back to guest u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
671 vubr_set_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
673 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
678 vubr_set_owner_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
684 vubr_reset_device_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
686 DPRINT("Function %s() not implemented yet.\n", __func__
);
691 vubr_set_mem_table_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
694 VhostUserMemory
*memory
= &vmsg
->payload
.memory
;
695 dev
->nregions
= memory
->nregions
;
697 DPRINT("Nregions: %d\n", memory
->nregions
);
698 for (i
= 0; i
< dev
->nregions
; i
++) {
700 VhostUserMemoryRegion
*msg_region
= &memory
->regions
[i
];
701 VubrDevRegion
*dev_region
= &dev
->regions
[i
];
703 DPRINT("Region %d\n", i
);
704 DPRINT(" guest_phys_addr: 0x%016"PRIx64
"\n",
705 msg_region
->guest_phys_addr
);
706 DPRINT(" memory_size: 0x%016"PRIx64
"\n",
707 msg_region
->memory_size
);
708 DPRINT(" userspace_addr 0x%016"PRIx64
"\n",
709 msg_region
->userspace_addr
);
710 DPRINT(" mmap_offset 0x%016"PRIx64
"\n",
711 msg_region
->mmap_offset
);
713 dev_region
->gpa
= msg_region
->guest_phys_addr
;
714 dev_region
->size
= msg_region
->memory_size
;
715 dev_region
->qva
= msg_region
->userspace_addr
;
716 dev_region
->mmap_offset
= msg_region
->mmap_offset
;
718 /* We don't use offset argument of mmap() since the
719 * mapped address has to be page aligned, and we use huge
721 mmap_addr
= mmap(0, dev_region
->size
+ dev_region
->mmap_offset
,
722 PROT_READ
| PROT_WRITE
, MAP_SHARED
,
725 if (mmap_addr
== MAP_FAILED
) {
729 dev_region
->mmap_addr
= (uint64_t) mmap_addr
;
730 DPRINT(" mmap_addr: 0x%016"PRIx64
"\n", dev_region
->mmap_addr
);
737 vubr_set_log_base_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
739 DPRINT("Function %s() not implemented yet.\n", __func__
);
744 vubr_set_log_fd_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
746 DPRINT("Function %s() not implemented yet.\n", __func__
);
751 vubr_set_vring_num_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
753 unsigned int index
= vmsg
->payload
.state
.index
;
754 unsigned int num
= vmsg
->payload
.state
.num
;
756 DPRINT("State.index: %d\n", index
);
757 DPRINT("State.num: %d\n", num
);
758 dev
->vq
[index
].size
= num
;
763 vubr_set_vring_addr_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
765 struct vhost_vring_addr
*vra
= &vmsg
->payload
.addr
;
766 unsigned int index
= vra
->index
;
767 VubrVirtq
*vq
= &dev
->vq
[index
];
769 DPRINT("vhost_vring_addr:\n");
770 DPRINT(" index: %d\n", vra
->index
);
771 DPRINT(" flags: %d\n", vra
->flags
);
772 DPRINT(" desc_user_addr: 0x%016llx\n", vra
->desc_user_addr
);
773 DPRINT(" used_user_addr: 0x%016llx\n", vra
->used_user_addr
);
774 DPRINT(" avail_user_addr: 0x%016llx\n", vra
->avail_user_addr
);
775 DPRINT(" log_guest_addr: 0x%016llx\n", vra
->log_guest_addr
);
777 vq
->desc
= (struct vring_desc
*)qva_to_va(dev
, vra
->desc_user_addr
);
778 vq
->used
= (struct vring_used
*)qva_to_va(dev
, vra
->used_user_addr
);
779 vq
->avail
= (struct vring_avail
*)qva_to_va(dev
, vra
->avail_user_addr
);
781 DPRINT("Setting virtq addresses:\n");
782 DPRINT(" vring_desc at %p\n", vq
->desc
);
783 DPRINT(" vring_used at %p\n", vq
->used
);
784 DPRINT(" vring_avail at %p\n", vq
->avail
);
786 vq
->last_used_index
= vq
->used
->idx
;
791 vubr_set_vring_base_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
793 unsigned int index
= vmsg
->payload
.state
.index
;
794 unsigned int num
= vmsg
->payload
.state
.num
;
796 DPRINT("State.index: %d\n", index
);
797 DPRINT("State.num: %d\n", num
);
798 dev
->vq
[index
].last_avail_index
= num
;
804 vubr_get_vring_base_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
806 DPRINT("Function %s() not implemented yet.\n", __func__
);
811 vubr_set_vring_kick_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
813 uint64_t u64_arg
= vmsg
->payload
.u64
;
814 int index
= u64_arg
& VHOST_USER_VRING_IDX_MASK
;
816 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
818 assert((u64_arg
& VHOST_USER_VRING_NOFD_MASK
) == 0);
819 assert(vmsg
->fd_num
== 1);
821 dev
->vq
[index
].kick_fd
= vmsg
->fds
[0];
822 DPRINT("Got kick_fd: %d for vq: %d\n", vmsg
->fds
[0], index
);
824 if (index
% 2 == 1) {
826 dispatcher_add(&dev
->dispatcher
, dev
->vq
[index
].kick_fd
,
829 DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
830 dev
->vq
[index
].kick_fd
, index
);
836 vubr_set_vring_call_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
838 uint64_t u64_arg
= vmsg
->payload
.u64
;
839 int index
= u64_arg
& VHOST_USER_VRING_IDX_MASK
;
841 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
842 assert((u64_arg
& VHOST_USER_VRING_NOFD_MASK
) == 0);
843 assert(vmsg
->fd_num
== 1);
845 dev
->vq
[index
].call_fd
= vmsg
->fds
[0];
846 DPRINT("Got call_fd: %d for vq: %d\n", vmsg
->fds
[0], index
);
852 vubr_set_vring_err_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
854 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
859 vubr_get_protocol_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
861 /* FIXME: unimplented */
862 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
867 vubr_set_protocol_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
869 /* FIXME: unimplented */
870 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
875 vubr_get_queue_num_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
877 DPRINT("Function %s() not implemented yet.\n", __func__
);
882 vubr_set_vring_enable_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
884 DPRINT("Function %s() not implemented yet.\n", __func__
);
889 vubr_send_rarp_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
891 DPRINT("Function %s() not implemented yet.\n", __func__
);
896 vubr_execute_request(VubrDev
*dev
, VhostUserMsg
*vmsg
)
898 /* Print out generic part of the request. */
900 "================== Vhost user message from QEMU ==================\n");
901 DPRINT("Request: %s (%d)\n", vubr_request_str
[vmsg
->request
],
903 DPRINT("Flags: 0x%x\n", vmsg
->flags
);
904 DPRINT("Size: %d\n", vmsg
->size
);
909 for (i
= 0; i
< vmsg
->fd_num
; i
++) {
910 DPRINT(" %d", vmsg
->fds
[i
]);
915 switch (vmsg
->request
) {
916 case VHOST_USER_NONE
:
917 return vubr_none_exec(dev
, vmsg
);
918 case VHOST_USER_GET_FEATURES
:
919 return vubr_get_features_exec(dev
, vmsg
);
920 case VHOST_USER_SET_FEATURES
:
921 return vubr_set_features_exec(dev
, vmsg
);
922 case VHOST_USER_SET_OWNER
:
923 return vubr_set_owner_exec(dev
, vmsg
);
924 case VHOST_USER_RESET_OWNER
:
925 return vubr_reset_device_exec(dev
, vmsg
);
926 case VHOST_USER_SET_MEM_TABLE
:
927 return vubr_set_mem_table_exec(dev
, vmsg
);
928 case VHOST_USER_SET_LOG_BASE
:
929 return vubr_set_log_base_exec(dev
, vmsg
);
930 case VHOST_USER_SET_LOG_FD
:
931 return vubr_set_log_fd_exec(dev
, vmsg
);
932 case VHOST_USER_SET_VRING_NUM
:
933 return vubr_set_vring_num_exec(dev
, vmsg
);
934 case VHOST_USER_SET_VRING_ADDR
:
935 return vubr_set_vring_addr_exec(dev
, vmsg
);
936 case VHOST_USER_SET_VRING_BASE
:
937 return vubr_set_vring_base_exec(dev
, vmsg
);
938 case VHOST_USER_GET_VRING_BASE
:
939 return vubr_get_vring_base_exec(dev
, vmsg
);
940 case VHOST_USER_SET_VRING_KICK
:
941 return vubr_set_vring_kick_exec(dev
, vmsg
);
942 case VHOST_USER_SET_VRING_CALL
:
943 return vubr_set_vring_call_exec(dev
, vmsg
);
944 case VHOST_USER_SET_VRING_ERR
:
945 return vubr_set_vring_err_exec(dev
, vmsg
);
946 case VHOST_USER_GET_PROTOCOL_FEATURES
:
947 return vubr_get_protocol_features_exec(dev
, vmsg
);
948 case VHOST_USER_SET_PROTOCOL_FEATURES
:
949 return vubr_set_protocol_features_exec(dev
, vmsg
);
950 case VHOST_USER_GET_QUEUE_NUM
:
951 return vubr_get_queue_num_exec(dev
, vmsg
);
952 case VHOST_USER_SET_VRING_ENABLE
:
953 return vubr_set_vring_enable_exec(dev
, vmsg
);
954 case VHOST_USER_SEND_RARP
:
955 return vubr_send_rarp_exec(dev
, vmsg
);
958 assert(vmsg
->request
!= VHOST_USER_MAX
);
964 vubr_receive_cb(int sock
, void *ctx
)
966 VubrDev
*dev
= (VubrDev
*) ctx
;
970 vubr_message_read(sock
, &vmsg
);
971 reply_requested
= vubr_execute_request(dev
, &vmsg
);
972 if (reply_requested
) {
973 /* Set the version in the flags when sending the reply */
974 vmsg
.flags
&= ~VHOST_USER_VERSION_MASK
;
975 vmsg
.flags
|= VHOST_USER_VERSION
;
976 vmsg
.flags
|= VHOST_USER_REPLY_MASK
;
977 vubr_message_write(sock
, &vmsg
);
982 vubr_accept_cb(int sock
, void *ctx
)
984 VubrDev
*dev
= (VubrDev
*)ctx
;
986 struct sockaddr_un un
;
987 socklen_t len
= sizeof(un
);
989 conn_fd
= accept(sock
, (struct sockaddr
*) &un
, &len
);
991 vubr_die("accept()");
993 DPRINT("Got connection from remote peer on sock %d\n", conn_fd
);
994 dispatcher_add(&dev
->dispatcher
, conn_fd
, ctx
, vubr_receive_cb
);
998 vubr_new(const char *path
)
1000 VubrDev
*dev
= (VubrDev
*) calloc(1, sizeof(VubrDev
));
1003 struct sockaddr_un un
;
1006 for (i
= 0; i
< MAX_NR_VIRTQUEUE
; i
++) {
1007 dev
->vq
[i
] = (VubrVirtq
) {
1008 .call_fd
= -1, .kick_fd
= -1,
1010 .last_avail_index
= 0, .last_used_index
= 0,
1011 .desc
= 0, .avail
= 0, .used
= 0,
1015 /* Get a UNIX socket. */
1016 dev
->sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1017 if (dev
->sock
== -1) {
1021 un
.sun_family
= AF_UNIX
;
1022 strcpy(un
.sun_path
, path
);
1023 len
= sizeof(un
.sun_family
) + strlen(path
);
1026 if (bind(dev
->sock
, (struct sockaddr
*) &un
, len
) == -1) {
1030 if (listen(dev
->sock
, 1) == -1) {
1034 dispatcher_init(&dev
->dispatcher
);
1035 dispatcher_add(&dev
->dispatcher
, dev
->sock
, (void *)dev
,
1038 DPRINT("Waiting for connections on UNIX socket %s ...\n", path
);
1043 vubr_backend_udp_setup(VubrDev
*dev
,
1044 const char *local_host
,
1045 uint16_t local_port
,
1046 const char *dest_host
,
1050 struct sockaddr_in si_local
= {
1051 .sin_family
= AF_INET
,
1052 .sin_port
= htons(local_port
),
1055 if (inet_aton(local_host
, &si_local
.sin_addr
) == 0) {
1056 fprintf(stderr
, "inet_aton() failed.\n");
1060 /* setup destination for sends */
1061 dev
->backend_udp_dest
= (struct sockaddr_in
) {
1062 .sin_family
= AF_INET
,
1063 .sin_port
= htons(dest_port
),
1065 if (inet_aton(dest_host
, &dev
->backend_udp_dest
.sin_addr
) == 0) {
1066 fprintf(stderr
, "inet_aton() failed.\n");
1070 sock
= socket(AF_INET
, SOCK_DGRAM
, IPPROTO_UDP
);
1075 if (bind(sock
, (struct sockaddr
*)&si_local
, sizeof(si_local
)) == -1) {
1079 dev
->backend_udp_sock
= sock
;
1080 dispatcher_add(&dev
->dispatcher
, sock
, dev
, vubr_backend_recv_cb
);
1081 DPRINT("Waiting for data from udp backend on %s:%d...\n",
1082 local_host
, local_port
);
1086 vubr_run(VubrDev
*dev
)
1090 dispatcher_wait(&dev
->dispatcher
, 200000);
1091 /* Here one can try polling strategy. */
1096 main(int argc
, char *argv
[])
1100 dev
= vubr_new("/tmp/vubr.sock");
1105 vubr_backend_udp_setup(dev
,