4 * Copyright (c) 2015 Red Hat, Inc.
7 * Victor Kaplansky <victork@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
15 * - main should get parameters from the command line.
16 * - implement all request handlers. Still not implemented:
17 * vubr_get_queue_num_exec()
18 * vubr_send_rarp_exec()
19 * - test for broken requests and virtqueue.
20 * - implement features defined by Virtio 1.0 spec.
21 * - support mergeable buffers and indirect descriptors.
22 * - implement clean shutdown.
23 * - implement non-blocking writes to UDP backend.
24 * - implement polling strategy.
25 * - implement clean starting/stopping of vq processing
26 * - implement clean starting/stopping of used and buffers
30 #define _FILE_OFFSET_BITS 64
32 #include "qemu/osdep.h"
33 #include <sys/socket.h>
35 #include <sys/unistd.h>
36 #include <sys/eventfd.h>
37 #include <arpa/inet.h>
39 #include <linux/vhost.h>
41 #include "qemu/atomic.h"
42 #include "standard-headers/linux/virtio_net.h"
43 #include "standard-headers/linux/virtio_ring.h"
45 #define VHOST_USER_BRIDGE_DEBUG 1
49 if (VHOST_USER_BRIDGE_DEBUG) { \
50 printf(__VA_ARGS__); \
54 typedef void (*CallbackFunc
)(int sock
, void *ctx
);
56 typedef struct Event
{
58 CallbackFunc callback
;
61 typedef struct Dispatcher
{
64 Event events
[FD_SETSIZE
];
68 vubr_die(const char *s
)
75 dispatcher_init(Dispatcher
*dispr
)
77 FD_ZERO(&dispr
->fdset
);
83 dispatcher_add(Dispatcher
*dispr
, int sock
, void *ctx
, CallbackFunc cb
)
85 if (sock
>= FD_SETSIZE
) {
87 "Error: Failed to add new event. sock %d should be less than %d\n",
92 dispr
->events
[sock
].ctx
= ctx
;
93 dispr
->events
[sock
].callback
= cb
;
95 FD_SET(sock
, &dispr
->fdset
);
96 if (sock
> dispr
->max_sock
) {
97 dispr
->max_sock
= sock
;
99 DPRINT("Added sock %d for watching. max_sock: %d\n",
100 sock
, dispr
->max_sock
);
104 /* dispatcher_remove() is not currently in use but may be useful
107 dispatcher_remove(Dispatcher
*dispr
, int sock
)
109 if (sock
>= FD_SETSIZE
) {
111 "Error: Failed to remove event. sock %d should be less than %d\n",
116 FD_CLR(sock
, &dispr
->fdset
);
117 DPRINT("Sock %d removed from dispatcher watch.\n", sock
);
123 dispatcher_wait(Dispatcher
*dispr
, uint32_t timeout
)
126 tv
.tv_sec
= timeout
/ 1000000;
127 tv
.tv_usec
= timeout
% 1000000;
129 fd_set fdset
= dispr
->fdset
;
131 /* wait until some of sockets become readable. */
132 int rc
= select(dispr
->max_sock
+ 1, &fdset
, 0, 0, &tv
);
143 /* Now call callback for every ready socket. */
146 for (sock
= 0; sock
< dispr
->max_sock
+ 1; sock
++) {
147 /* The callback on a socket can remove other sockets from the
148 * dispatcher, thus we have to check that the socket is
149 * still not removed from dispatcher's list
151 if (FD_ISSET(sock
, &fdset
) && FD_ISSET(sock
, &dispr
->fdset
)) {
152 Event
*e
= &dispr
->events
[sock
];
153 e
->callback(sock
, e
->ctx
);
160 typedef struct VubrVirtq
{
164 uint16_t last_avail_index
;
165 uint16_t last_used_index
;
166 struct vring_desc
*desc
;
167 struct vring_avail
*avail
;
168 struct vring_used
*used
;
169 uint64_t log_guest_addr
;
173 /* Based on qemu/hw/virtio/vhost-user.c */
175 #define VHOST_MEMORY_MAX_NREGIONS 8
176 #define VHOST_USER_F_PROTOCOL_FEATURES 30
177 /* v1.0 compliant. */
178 #define VIRTIO_F_VERSION_1 32
180 #define VHOST_LOG_PAGE 4096
182 enum VhostUserProtocolFeature
{
183 VHOST_USER_PROTOCOL_F_MQ
= 0,
184 VHOST_USER_PROTOCOL_F_LOG_SHMFD
= 1,
185 VHOST_USER_PROTOCOL_F_RARP
= 2,
187 VHOST_USER_PROTOCOL_F_MAX
190 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
192 typedef enum VhostUserRequest
{
194 VHOST_USER_GET_FEATURES
= 1,
195 VHOST_USER_SET_FEATURES
= 2,
196 VHOST_USER_SET_OWNER
= 3,
197 VHOST_USER_RESET_OWNER
= 4,
198 VHOST_USER_SET_MEM_TABLE
= 5,
199 VHOST_USER_SET_LOG_BASE
= 6,
200 VHOST_USER_SET_LOG_FD
= 7,
201 VHOST_USER_SET_VRING_NUM
= 8,
202 VHOST_USER_SET_VRING_ADDR
= 9,
203 VHOST_USER_SET_VRING_BASE
= 10,
204 VHOST_USER_GET_VRING_BASE
= 11,
205 VHOST_USER_SET_VRING_KICK
= 12,
206 VHOST_USER_SET_VRING_CALL
= 13,
207 VHOST_USER_SET_VRING_ERR
= 14,
208 VHOST_USER_GET_PROTOCOL_FEATURES
= 15,
209 VHOST_USER_SET_PROTOCOL_FEATURES
= 16,
210 VHOST_USER_GET_QUEUE_NUM
= 17,
211 VHOST_USER_SET_VRING_ENABLE
= 18,
212 VHOST_USER_SEND_RARP
= 19,
216 typedef struct VhostUserMemoryRegion
{
217 uint64_t guest_phys_addr
;
218 uint64_t memory_size
;
219 uint64_t userspace_addr
;
220 uint64_t mmap_offset
;
221 } VhostUserMemoryRegion
;
223 typedef struct VhostUserMemory
{
226 VhostUserMemoryRegion regions
[VHOST_MEMORY_MAX_NREGIONS
];
229 typedef struct VhostUserLog
{
231 uint64_t mmap_offset
;
234 typedef struct VhostUserMsg
{
235 VhostUserRequest request
;
237 #define VHOST_USER_VERSION_MASK (0x3)
238 #define VHOST_USER_REPLY_MASK (0x1<<2)
240 uint32_t size
; /* the following payload size */
242 #define VHOST_USER_VRING_IDX_MASK (0xff)
243 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
245 struct vhost_vring_state state
;
246 struct vhost_vring_addr addr
;
247 VhostUserMemory memory
;
250 int fds
[VHOST_MEMORY_MAX_NREGIONS
];
252 } QEMU_PACKED VhostUserMsg
;
254 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
256 /* The version of the protocol we support */
257 #define VHOST_USER_VERSION (0x1)
259 #define MAX_NR_VIRTQUEUE (8)
261 typedef struct VubrDevRegion
{
262 /* Guest Physical address. */
264 /* Memory region size. */
266 /* QEMU virtual address (userspace). */
268 /* Starting offset in our mmaped space. */
269 uint64_t mmap_offset
;
270 /* Start address of mmaped space. */
274 typedef struct VubrDev
{
276 Dispatcher dispatcher
;
278 VubrDevRegion regions
[VHOST_MEMORY_MAX_NREGIONS
];
279 VubrVirtq vq
[MAX_NR_VIRTQUEUE
];
283 int backend_udp_sock
;
284 struct sockaddr_in backend_udp_dest
;
290 static const char *vubr_request_str
[] = {
291 [VHOST_USER_NONE
] = "VHOST_USER_NONE",
292 [VHOST_USER_GET_FEATURES
] = "VHOST_USER_GET_FEATURES",
293 [VHOST_USER_SET_FEATURES
] = "VHOST_USER_SET_FEATURES",
294 [VHOST_USER_SET_OWNER
] = "VHOST_USER_SET_OWNER",
295 [VHOST_USER_RESET_OWNER
] = "VHOST_USER_RESET_OWNER",
296 [VHOST_USER_SET_MEM_TABLE
] = "VHOST_USER_SET_MEM_TABLE",
297 [VHOST_USER_SET_LOG_BASE
] = "VHOST_USER_SET_LOG_BASE",
298 [VHOST_USER_SET_LOG_FD
] = "VHOST_USER_SET_LOG_FD",
299 [VHOST_USER_SET_VRING_NUM
] = "VHOST_USER_SET_VRING_NUM",
300 [VHOST_USER_SET_VRING_ADDR
] = "VHOST_USER_SET_VRING_ADDR",
301 [VHOST_USER_SET_VRING_BASE
] = "VHOST_USER_SET_VRING_BASE",
302 [VHOST_USER_GET_VRING_BASE
] = "VHOST_USER_GET_VRING_BASE",
303 [VHOST_USER_SET_VRING_KICK
] = "VHOST_USER_SET_VRING_KICK",
304 [VHOST_USER_SET_VRING_CALL
] = "VHOST_USER_SET_VRING_CALL",
305 [VHOST_USER_SET_VRING_ERR
] = "VHOST_USER_SET_VRING_ERR",
306 [VHOST_USER_GET_PROTOCOL_FEATURES
] = "VHOST_USER_GET_PROTOCOL_FEATURES",
307 [VHOST_USER_SET_PROTOCOL_FEATURES
] = "VHOST_USER_SET_PROTOCOL_FEATURES",
308 [VHOST_USER_GET_QUEUE_NUM
] = "VHOST_USER_GET_QUEUE_NUM",
309 [VHOST_USER_SET_VRING_ENABLE
] = "VHOST_USER_SET_VRING_ENABLE",
310 [VHOST_USER_SEND_RARP
] = "VHOST_USER_SEND_RARP",
311 [VHOST_USER_MAX
] = "VHOST_USER_MAX",
315 print_buffer(uint8_t *buf
, size_t len
)
318 printf("Raw buffer:\n");
319 for (i
= 0; i
< len
; i
++) {
326 printf("%02x ", buf
[i
]);
328 printf("\n............................................................\n");
331 /* Translate guest physical address to our virtual address. */
333 gpa_to_va(VubrDev
*dev
, uint64_t guest_addr
)
337 /* Find matching memory region. */
338 for (i
= 0; i
< dev
->nregions
; i
++) {
339 VubrDevRegion
*r
= &dev
->regions
[i
];
341 if ((guest_addr
>= r
->gpa
) && (guest_addr
< (r
->gpa
+ r
->size
))) {
342 return guest_addr
- r
->gpa
+ r
->mmap_addr
+ r
->mmap_offset
;
346 assert(!"address not found in regions");
350 /* Translate qemu virtual address to our virtual address. */
352 qva_to_va(VubrDev
*dev
, uint64_t qemu_addr
)
356 /* Find matching memory region. */
357 for (i
= 0; i
< dev
->nregions
; i
++) {
358 VubrDevRegion
*r
= &dev
->regions
[i
];
360 if ((qemu_addr
>= r
->qva
) && (qemu_addr
< (r
->qva
+ r
->size
))) {
361 return qemu_addr
- r
->qva
+ r
->mmap_addr
+ r
->mmap_offset
;
365 assert(!"address not found in regions");
370 vubr_message_read(int conn_fd
, VhostUserMsg
*vmsg
)
372 char control
[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS
* sizeof(int))] = { };
374 .iov_base
= (char *)vmsg
,
375 .iov_len
= VHOST_USER_HDR_SIZE
,
377 struct msghdr msg
= {
380 .msg_control
= control
,
381 .msg_controllen
= sizeof(control
),
384 struct cmsghdr
*cmsg
;
387 rc
= recvmsg(conn_fd
, &msg
, 0);
391 fprintf(stderr
, "Peer disconnected.\n");
399 for (cmsg
= CMSG_FIRSTHDR(&msg
);
401 cmsg
= CMSG_NXTHDR(&msg
, cmsg
))
403 if (cmsg
->cmsg_level
== SOL_SOCKET
&& cmsg
->cmsg_type
== SCM_RIGHTS
) {
404 fd_size
= cmsg
->cmsg_len
- CMSG_LEN(0);
405 vmsg
->fd_num
= fd_size
/ sizeof(int);
406 memcpy(vmsg
->fds
, CMSG_DATA(cmsg
), fd_size
);
411 if (vmsg
->size
> sizeof(vmsg
->payload
)) {
413 "Error: too big message request: %d, size: vmsg->size: %u, "
414 "while sizeof(vmsg->payload) = %zu\n",
415 vmsg
->request
, vmsg
->size
, sizeof(vmsg
->payload
));
420 rc
= read(conn_fd
, &vmsg
->payload
, vmsg
->size
);
423 fprintf(stderr
, "Peer disconnected.\n");
430 assert(rc
== vmsg
->size
);
435 vubr_message_write(int conn_fd
, VhostUserMsg
*vmsg
)
440 rc
= write(conn_fd
, vmsg
, VHOST_USER_HDR_SIZE
+ vmsg
->size
);
441 } while (rc
< 0 && errno
== EINTR
);
449 vubr_backend_udp_sendbuf(VubrDev
*dev
, uint8_t *buf
, size_t len
)
451 int slen
= sizeof(struct sockaddr_in
);
453 if (sendto(dev
->backend_udp_sock
, buf
, len
, 0,
454 (struct sockaddr
*) &dev
->backend_udp_dest
, slen
) == -1) {
455 vubr_die("sendto()");
460 vubr_backend_udp_recvbuf(VubrDev
*dev
, uint8_t *buf
, size_t buflen
)
462 int slen
= sizeof(struct sockaddr_in
);
465 rc
= recvfrom(dev
->backend_udp_sock
, buf
, buflen
, 0,
466 (struct sockaddr
*) &dev
->backend_udp_dest
,
469 vubr_die("recvfrom()");
476 vubr_consume_raw_packet(VubrDev
*dev
, uint8_t *buf
, uint32_t len
)
478 int hdrlen
= dev
->hdrlen
;
479 DPRINT(" hdrlen = %d\n", dev
->hdrlen
);
481 if (VHOST_USER_BRIDGE_DEBUG
) {
482 print_buffer(buf
, len
);
484 vubr_backend_udp_sendbuf(dev
, buf
+ hdrlen
, len
- hdrlen
);
487 /* Kick the log_call_fd if required. */
489 vubr_log_kick(VubrDev
*dev
)
491 if (dev
->log_call_fd
!= -1) {
492 DPRINT("Kicking the QEMU's log...\n");
493 eventfd_write(dev
->log_call_fd
, 1);
497 /* Kick the guest if necessary. */
499 vubr_virtqueue_kick(VubrVirtq
*vq
)
501 if (!(vq
->avail
->flags
& VRING_AVAIL_F_NO_INTERRUPT
)) {
502 DPRINT("Kicking the guest...\n");
503 eventfd_write(vq
->call_fd
, 1);
508 vubr_log_page(uint8_t *log_table
, uint64_t page
)
510 DPRINT("Logged dirty guest page: %"PRId64
"\n", page
);
511 atomic_or(&log_table
[page
/ 8], 1 << (page
% 8));
515 vubr_log_write(VubrDev
*dev
, uint64_t address
, uint64_t length
)
519 if (!(dev
->features
& (1ULL << VHOST_F_LOG_ALL
)) ||
520 !dev
->log_table
|| !length
) {
524 assert(dev
->log_size
> ((address
+ length
- 1) / VHOST_LOG_PAGE
/ 8));
526 page
= address
/ VHOST_LOG_PAGE
;
527 while (page
* VHOST_LOG_PAGE
< address
+ length
) {
528 vubr_log_page(dev
->log_table
, page
);
529 page
+= VHOST_LOG_PAGE
;
535 vubr_post_buffer(VubrDev
*dev
, VubrVirtq
*vq
, uint8_t *buf
, int32_t len
)
537 struct vring_desc
*desc
= vq
->desc
;
538 struct vring_avail
*avail
= vq
->avail
;
539 struct vring_used
*used
= vq
->used
;
540 uint64_t log_guest_addr
= vq
->log_guest_addr
;
541 int32_t remaining_len
= len
;
543 unsigned int size
= vq
->size
;
545 uint16_t avail_index
= atomic_mb_read(&avail
->idx
);
547 /* We check the available descriptors before posting the
548 * buffer, so here we assume that enough available
550 assert(vq
->last_avail_index
!= avail_index
);
551 uint16_t a_index
= vq
->last_avail_index
% size
;
552 uint16_t u_index
= vq
->last_used_index
% size
;
553 uint16_t d_index
= avail
->ring
[a_index
];
556 uint32_t written_len
= 0;
559 DPRINT("Post packet to guest on vq:\n");
560 DPRINT(" size = %d\n", vq
->size
);
561 DPRINT(" last_avail_index = %d\n", vq
->last_avail_index
);
562 DPRINT(" last_used_index = %d\n", vq
->last_used_index
);
563 DPRINT(" a_index = %d\n", a_index
);
564 DPRINT(" u_index = %d\n", u_index
);
565 DPRINT(" d_index = %d\n", d_index
);
566 DPRINT(" desc[%d].addr = 0x%016"PRIx64
"\n", i
, desc
[i
].addr
);
567 DPRINT(" desc[%d].len = %d\n", i
, desc
[i
].len
);
568 DPRINT(" desc[%d].flags = %d\n", i
, desc
[i
].flags
);
569 DPRINT(" avail->idx = %d\n", avail_index
);
570 DPRINT(" used->idx = %d\n", used
->idx
);
572 if (!(desc
[i
].flags
& VRING_DESC_F_WRITE
)) {
573 /* FIXME: we should find writable descriptor. */
574 fprintf(stderr
, "Error: descriptor is not writable. Exiting.\n");
578 void *chunk_start
= (void *)(uintptr_t)gpa_to_va(dev
, desc
[i
].addr
);
579 uint32_t chunk_len
= desc
[i
].len
;
580 uint32_t chunk_write_len
= MIN(remaining_len
, chunk_len
);
582 memcpy(chunk_start
, buf
+ written_len
, chunk_write_len
);
583 vubr_log_write(dev
, desc
[i
].addr
, chunk_write_len
);
584 remaining_len
-= chunk_write_len
;
585 written_len
+= chunk_write_len
;
587 if ((remaining_len
== 0) || !(desc
[i
].flags
& VRING_DESC_F_NEXT
)) {
594 if (remaining_len
> 0) {
596 "Too long packet for RX, remaining_len = %d, Dropping...\n",
601 /* Add descriptor to the used ring. */
602 used
->ring
[u_index
].id
= d_index
;
603 used
->ring
[u_index
].len
= len
;
605 log_guest_addr
+ offsetof(struct vring_used
, ring
[u_index
]),
606 sizeof(used
->ring
[u_index
]));
608 vq
->last_avail_index
++;
609 vq
->last_used_index
++;
611 atomic_mb_set(&used
->idx
, vq
->last_used_index
);
613 log_guest_addr
+ offsetof(struct vring_used
, idx
),
616 /* Kick the guest if necessary. */
617 vubr_virtqueue_kick(vq
);
621 vubr_process_desc(VubrDev
*dev
, VubrVirtq
*vq
)
623 struct vring_desc
*desc
= vq
->desc
;
624 struct vring_avail
*avail
= vq
->avail
;
625 struct vring_used
*used
= vq
->used
;
626 uint64_t log_guest_addr
= vq
->log_guest_addr
;
628 unsigned int size
= vq
->size
;
630 uint16_t a_index
= vq
->last_avail_index
% size
;
631 uint16_t u_index
= vq
->last_used_index
% size
;
632 uint16_t d_index
= avail
->ring
[a_index
];
635 size_t buf_size
= 4096;
641 void *chunk_start
= (void *)(uintptr_t)gpa_to_va(dev
, desc
[i
].addr
);
642 uint32_t chunk_len
= desc
[i
].len
;
644 assert(!(desc
[i
].flags
& VRING_DESC_F_WRITE
));
646 if (len
+ chunk_len
< buf_size
) {
647 memcpy(buf
+ len
, chunk_start
, chunk_len
);
648 DPRINT("%d ", chunk_len
);
650 fprintf(stderr
, "Error: too long packet. Dropping...\n");
656 if (!(desc
[i
].flags
& VRING_DESC_F_NEXT
)) {
668 /* Add descriptor to the used ring. */
669 used
->ring
[u_index
].id
= d_index
;
670 used
->ring
[u_index
].len
= len
;
672 log_guest_addr
+ offsetof(struct vring_used
, ring
[u_index
]),
673 sizeof(used
->ring
[u_index
]));
675 vubr_consume_raw_packet(dev
, buf
, len
);
681 vubr_process_avail(VubrDev
*dev
, VubrVirtq
*vq
)
683 struct vring_avail
*avail
= vq
->avail
;
684 struct vring_used
*used
= vq
->used
;
685 uint64_t log_guest_addr
= vq
->log_guest_addr
;
687 while (vq
->last_avail_index
!= atomic_mb_read(&avail
->idx
)) {
688 vubr_process_desc(dev
, vq
);
689 vq
->last_avail_index
++;
690 vq
->last_used_index
++;
693 atomic_mb_set(&used
->idx
, vq
->last_used_index
);
695 log_guest_addr
+ offsetof(struct vring_used
, idx
),
700 vubr_backend_recv_cb(int sock
, void *ctx
)
702 VubrDev
*dev
= (VubrDev
*) ctx
;
703 VubrVirtq
*rx_vq
= &dev
->vq
[0];
705 struct virtio_net_hdr_v1
*hdr
= (struct virtio_net_hdr_v1
*)buf
;
706 int hdrlen
= dev
->hdrlen
;
707 int buflen
= sizeof(buf
);
714 DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
715 DPRINT(" hdrlen = %d\n", hdrlen
);
717 uint16_t avail_index
= atomic_mb_read(&rx_vq
->avail
->idx
);
719 /* If there is no available descriptors, just do nothing.
720 * The buffer will be handled by next arrived UDP packet,
721 * or next kick on receive virtq. */
722 if (rx_vq
->last_avail_index
== avail_index
) {
723 DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
727 memset(buf
, 0, hdrlen
);
728 /* TODO: support mergeable buffers. */
730 hdr
->num_buffers
= 1;
731 len
= vubr_backend_udp_recvbuf(dev
, buf
+ hdrlen
, buflen
- hdrlen
);
733 vubr_post_buffer(dev
, rx_vq
, buf
, len
+ hdrlen
);
737 vubr_kick_cb(int sock
, void *ctx
)
739 VubrDev
*dev
= (VubrDev
*) ctx
;
743 rc
= eventfd_read(sock
, &kick_data
);
745 vubr_die("eventfd_read()");
747 DPRINT("Got kick_data: %016"PRIx64
"\n", kick_data
);
748 vubr_process_avail(dev
, &dev
->vq
[1]);
753 vubr_none_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
755 DPRINT("Function %s() not implemented yet.\n", __func__
);
760 vubr_get_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
763 ((1ULL << VIRTIO_NET_F_MRG_RXBUF
) |
764 (1ULL << VHOST_F_LOG_ALL
) |
765 (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE
) |
766 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES
));
768 vmsg
->size
= sizeof(vmsg
->payload
.u64
);
770 DPRINT("Sending back to guest u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
777 vubr_set_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
779 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
781 dev
->features
= vmsg
->payload
.u64
;
782 if ((dev
->features
& (1ULL << VIRTIO_F_VERSION_1
)) ||
783 (dev
->features
& (1ULL << VIRTIO_NET_F_MRG_RXBUF
))) {
793 vubr_set_owner_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
799 vubr_close_log(VubrDev
*dev
)
801 if (dev
->log_table
) {
802 if (munmap(dev
->log_table
, dev
->log_size
) != 0) {
803 vubr_die("munmap()");
808 if (dev
->log_call_fd
!= -1) {
809 close(dev
->log_call_fd
);
810 dev
->log_call_fd
= -1;
815 vubr_reset_device_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
824 vubr_set_mem_table_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
827 VhostUserMemory
*memory
= &vmsg
->payload
.memory
;
828 dev
->nregions
= memory
->nregions
;
830 DPRINT("Nregions: %d\n", memory
->nregions
);
831 for (i
= 0; i
< dev
->nregions
; i
++) {
833 VhostUserMemoryRegion
*msg_region
= &memory
->regions
[i
];
834 VubrDevRegion
*dev_region
= &dev
->regions
[i
];
836 DPRINT("Region %d\n", i
);
837 DPRINT(" guest_phys_addr: 0x%016"PRIx64
"\n",
838 msg_region
->guest_phys_addr
);
839 DPRINT(" memory_size: 0x%016"PRIx64
"\n",
840 msg_region
->memory_size
);
841 DPRINT(" userspace_addr 0x%016"PRIx64
"\n",
842 msg_region
->userspace_addr
);
843 DPRINT(" mmap_offset 0x%016"PRIx64
"\n",
844 msg_region
->mmap_offset
);
846 dev_region
->gpa
= msg_region
->guest_phys_addr
;
847 dev_region
->size
= msg_region
->memory_size
;
848 dev_region
->qva
= msg_region
->userspace_addr
;
849 dev_region
->mmap_offset
= msg_region
->mmap_offset
;
851 /* We don't use offset argument of mmap() since the
852 * mapped address has to be page aligned, and we use huge
854 mmap_addr
= mmap(0, dev_region
->size
+ dev_region
->mmap_offset
,
855 PROT_READ
| PROT_WRITE
, MAP_SHARED
,
858 if (mmap_addr
== MAP_FAILED
) {
861 dev_region
->mmap_addr
= (uint64_t)(uintptr_t)mmap_addr
;
862 DPRINT(" mmap_addr: 0x%016"PRIx64
"\n", dev_region
->mmap_addr
);
871 vubr_set_log_base_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
874 uint64_t log_mmap_size
, log_mmap_offset
;
877 assert(vmsg
->fd_num
== 1);
880 assert(vmsg
->size
== sizeof(vmsg
->payload
.log
));
881 log_mmap_offset
= vmsg
->payload
.log
.mmap_offset
;
882 log_mmap_size
= vmsg
->payload
.log
.mmap_size
;
883 DPRINT("Log mmap_offset: %"PRId64
"\n", log_mmap_offset
);
884 DPRINT("Log mmap_size: %"PRId64
"\n", log_mmap_size
);
886 rc
= mmap(0, log_mmap_size
, PROT_READ
| PROT_WRITE
, MAP_SHARED
, fd
,
888 if (rc
== MAP_FAILED
) {
892 dev
->log_size
= log_mmap_size
;
894 vmsg
->size
= sizeof(vmsg
->payload
.u64
);
900 vubr_set_log_fd_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
902 assert(vmsg
->fd_num
== 1);
903 dev
->log_call_fd
= vmsg
->fds
[0];
904 DPRINT("Got log_call_fd: %d\n", vmsg
->fds
[0]);
909 vubr_set_vring_num_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
911 unsigned int index
= vmsg
->payload
.state
.index
;
912 unsigned int num
= vmsg
->payload
.state
.num
;
914 DPRINT("State.index: %d\n", index
);
915 DPRINT("State.num: %d\n", num
);
916 dev
->vq
[index
].size
= num
;
921 vubr_set_vring_addr_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
923 struct vhost_vring_addr
*vra
= &vmsg
->payload
.addr
;
924 unsigned int index
= vra
->index
;
925 VubrVirtq
*vq
= &dev
->vq
[index
];
927 DPRINT("vhost_vring_addr:\n");
928 DPRINT(" index: %d\n", vra
->index
);
929 DPRINT(" flags: %d\n", vra
->flags
);
930 DPRINT(" desc_user_addr: 0x%016llx\n", vra
->desc_user_addr
);
931 DPRINT(" used_user_addr: 0x%016llx\n", vra
->used_user_addr
);
932 DPRINT(" avail_user_addr: 0x%016llx\n", vra
->avail_user_addr
);
933 DPRINT(" log_guest_addr: 0x%016llx\n", vra
->log_guest_addr
);
935 vq
->desc
= (struct vring_desc
*)(uintptr_t)qva_to_va(dev
, vra
->desc_user_addr
);
936 vq
->used
= (struct vring_used
*)(uintptr_t)qva_to_va(dev
, vra
->used_user_addr
);
937 vq
->avail
= (struct vring_avail
*)(uintptr_t)qva_to_va(dev
, vra
->avail_user_addr
);
938 vq
->log_guest_addr
= vra
->log_guest_addr
;
940 DPRINT("Setting virtq addresses:\n");
941 DPRINT(" vring_desc at %p\n", vq
->desc
);
942 DPRINT(" vring_used at %p\n", vq
->used
);
943 DPRINT(" vring_avail at %p\n", vq
->avail
);
945 vq
->last_used_index
= vq
->used
->idx
;
947 if (vq
->last_avail_index
!= vq
->used
->idx
) {
948 DPRINT("Last avail index != used index: %d != %d, resuming",
949 vq
->last_avail_index
, vq
->used
->idx
);
950 vq
->last_avail_index
= vq
->used
->idx
;
957 vubr_set_vring_base_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
959 unsigned int index
= vmsg
->payload
.state
.index
;
960 unsigned int num
= vmsg
->payload
.state
.num
;
962 DPRINT("State.index: %d\n", index
);
963 DPRINT("State.num: %d\n", num
);
964 dev
->vq
[index
].last_avail_index
= num
;
970 vubr_get_vring_base_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
972 unsigned int index
= vmsg
->payload
.state
.index
;
974 DPRINT("State.index: %d\n", index
);
975 vmsg
->payload
.state
.num
= dev
->vq
[index
].last_avail_index
;
976 vmsg
->size
= sizeof(vmsg
->payload
.state
);
977 /* FIXME: this is a work-around for a bug in QEMU enabling
978 * too early vrings. When protocol features are enabled,
979 * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
982 if (dev
->vq
[index
].call_fd
!= -1) {
983 close(dev
->vq
[index
].call_fd
);
984 dispatcher_remove(&dev
->dispatcher
, dev
->vq
[index
].call_fd
);
985 dev
->vq
[index
].call_fd
= -1;
987 if (dev
->vq
[index
].kick_fd
!= -1) {
988 close(dev
->vq
[index
].kick_fd
);
989 dispatcher_remove(&dev
->dispatcher
, dev
->vq
[index
].kick_fd
);
990 dev
->vq
[index
].kick_fd
= -1;
998 vubr_set_vring_kick_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1000 uint64_t u64_arg
= vmsg
->payload
.u64
;
1001 int index
= u64_arg
& VHOST_USER_VRING_IDX_MASK
;
1003 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
1005 assert((u64_arg
& VHOST_USER_VRING_NOFD_MASK
) == 0);
1006 assert(vmsg
->fd_num
== 1);
1008 if (dev
->vq
[index
].kick_fd
!= -1) {
1009 close(dev
->vq
[index
].kick_fd
);
1010 dispatcher_remove(&dev
->dispatcher
, dev
->vq
[index
].kick_fd
);
1012 dev
->vq
[index
].kick_fd
= vmsg
->fds
[0];
1013 DPRINT("Got kick_fd: %d for vq: %d\n", vmsg
->fds
[0], index
);
1015 if (index
% 2 == 1) {
1017 dispatcher_add(&dev
->dispatcher
, dev
->vq
[index
].kick_fd
,
1020 DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
1021 dev
->vq
[index
].kick_fd
, index
);
1023 /* We temporarily use this hack to determine that both TX and RX
1024 * queues are set up and ready for processing.
1025 * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
1027 if (dev
->vq
[0].kick_fd
!= -1 &&
1028 dev
->vq
[1].kick_fd
!= -1) {
1030 DPRINT("vhost-user-bridge is ready for processing queues.\n");
1037 vubr_set_vring_call_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1039 uint64_t u64_arg
= vmsg
->payload
.u64
;
1040 int index
= u64_arg
& VHOST_USER_VRING_IDX_MASK
;
1042 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
1043 assert((u64_arg
& VHOST_USER_VRING_NOFD_MASK
) == 0);
1044 assert(vmsg
->fd_num
== 1);
1046 if (dev
->vq
[index
].call_fd
!= -1) {
1047 close(dev
->vq
[index
].call_fd
);
1048 dispatcher_remove(&dev
->dispatcher
, dev
->vq
[index
].call_fd
);
1050 dev
->vq
[index
].call_fd
= vmsg
->fds
[0];
1051 DPRINT("Got call_fd: %d for vq: %d\n", vmsg
->fds
[0], index
);
1057 vubr_set_vring_err_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1059 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
1064 vubr_get_protocol_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1066 vmsg
->payload
.u64
= 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD
;
1067 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
1068 vmsg
->size
= sizeof(vmsg
->payload
.u64
);
1075 vubr_set_protocol_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1077 /* FIXME: unimplented */
1078 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
1083 vubr_get_queue_num_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1085 DPRINT("Function %s() not implemented yet.\n", __func__
);
1090 vubr_set_vring_enable_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1092 unsigned int index
= vmsg
->payload
.state
.index
;
1093 unsigned int enable
= vmsg
->payload
.state
.num
;
1095 DPRINT("State.index: %d\n", index
);
1096 DPRINT("State.enable: %d\n", enable
);
1097 dev
->vq
[index
].enable
= enable
;
1102 vubr_send_rarp_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1104 DPRINT("Function %s() not implemented yet.\n", __func__
);
1109 vubr_execute_request(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1111 /* Print out generic part of the request. */
1113 "================== Vhost user message from QEMU ==================\n");
1114 DPRINT("Request: %s (%d)\n", vubr_request_str
[vmsg
->request
],
1116 DPRINT("Flags: 0x%x\n", vmsg
->flags
);
1117 DPRINT("Size: %d\n", vmsg
->size
);
1122 for (i
= 0; i
< vmsg
->fd_num
; i
++) {
1123 DPRINT(" %d", vmsg
->fds
[i
]);
1128 switch (vmsg
->request
) {
1129 case VHOST_USER_NONE
:
1130 return vubr_none_exec(dev
, vmsg
);
1131 case VHOST_USER_GET_FEATURES
:
1132 return vubr_get_features_exec(dev
, vmsg
);
1133 case VHOST_USER_SET_FEATURES
:
1134 return vubr_set_features_exec(dev
, vmsg
);
1135 case VHOST_USER_SET_OWNER
:
1136 return vubr_set_owner_exec(dev
, vmsg
);
1137 case VHOST_USER_RESET_OWNER
:
1138 return vubr_reset_device_exec(dev
, vmsg
);
1139 case VHOST_USER_SET_MEM_TABLE
:
1140 return vubr_set_mem_table_exec(dev
, vmsg
);
1141 case VHOST_USER_SET_LOG_BASE
:
1142 return vubr_set_log_base_exec(dev
, vmsg
);
1143 case VHOST_USER_SET_LOG_FD
:
1144 return vubr_set_log_fd_exec(dev
, vmsg
);
1145 case VHOST_USER_SET_VRING_NUM
:
1146 return vubr_set_vring_num_exec(dev
, vmsg
);
1147 case VHOST_USER_SET_VRING_ADDR
:
1148 return vubr_set_vring_addr_exec(dev
, vmsg
);
1149 case VHOST_USER_SET_VRING_BASE
:
1150 return vubr_set_vring_base_exec(dev
, vmsg
);
1151 case VHOST_USER_GET_VRING_BASE
:
1152 return vubr_get_vring_base_exec(dev
, vmsg
);
1153 case VHOST_USER_SET_VRING_KICK
:
1154 return vubr_set_vring_kick_exec(dev
, vmsg
);
1155 case VHOST_USER_SET_VRING_CALL
:
1156 return vubr_set_vring_call_exec(dev
, vmsg
);
1157 case VHOST_USER_SET_VRING_ERR
:
1158 return vubr_set_vring_err_exec(dev
, vmsg
);
1159 case VHOST_USER_GET_PROTOCOL_FEATURES
:
1160 return vubr_get_protocol_features_exec(dev
, vmsg
);
1161 case VHOST_USER_SET_PROTOCOL_FEATURES
:
1162 return vubr_set_protocol_features_exec(dev
, vmsg
);
1163 case VHOST_USER_GET_QUEUE_NUM
:
1164 return vubr_get_queue_num_exec(dev
, vmsg
);
1165 case VHOST_USER_SET_VRING_ENABLE
:
1166 return vubr_set_vring_enable_exec(dev
, vmsg
);
1167 case VHOST_USER_SEND_RARP
:
1168 return vubr_send_rarp_exec(dev
, vmsg
);
1170 case VHOST_USER_MAX
:
1171 assert(vmsg
->request
!= VHOST_USER_MAX
);
1177 vubr_receive_cb(int sock
, void *ctx
)
1179 VubrDev
*dev
= (VubrDev
*) ctx
;
1181 int reply_requested
;
1183 vubr_message_read(sock
, &vmsg
);
1184 reply_requested
= vubr_execute_request(dev
, &vmsg
);
1185 if (reply_requested
) {
1186 /* Set the version in the flags when sending the reply */
1187 vmsg
.flags
&= ~VHOST_USER_VERSION_MASK
;
1188 vmsg
.flags
|= VHOST_USER_VERSION
;
1189 vmsg
.flags
|= VHOST_USER_REPLY_MASK
;
1190 vubr_message_write(sock
, &vmsg
);
1195 vubr_accept_cb(int sock
, void *ctx
)
1197 VubrDev
*dev
= (VubrDev
*)ctx
;
1199 struct sockaddr_un un
;
1200 socklen_t len
= sizeof(un
);
1202 conn_fd
= accept(sock
, (struct sockaddr
*) &un
, &len
);
1203 if (conn_fd
== -1) {
1204 vubr_die("accept()");
1206 DPRINT("Got connection from remote peer on sock %d\n", conn_fd
);
1207 dispatcher_add(&dev
->dispatcher
, conn_fd
, ctx
, vubr_receive_cb
);
1211 vubr_new(const char *path
, bool client
)
1213 VubrDev
*dev
= (VubrDev
*) calloc(1, sizeof(VubrDev
));
1216 struct sockaddr_un un
;
1220 for (i
= 0; i
< MAX_NR_VIRTQUEUE
; i
++) {
1221 dev
->vq
[i
] = (VubrVirtq
) {
1222 .call_fd
= -1, .kick_fd
= -1,
1224 .last_avail_index
= 0, .last_used_index
= 0,
1225 .desc
= 0, .avail
= 0, .used
= 0,
1231 dev
->log_call_fd
= -1;
1237 /* Get a UNIX socket. */
1238 dev
->sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1239 if (dev
->sock
== -1) {
1243 un
.sun_family
= AF_UNIX
;
1244 strcpy(un
.sun_path
, path
);
1245 len
= sizeof(un
.sun_family
) + strlen(path
);
1250 if (bind(dev
->sock
, (struct sockaddr
*) &un
, len
) == -1) {
1254 if (listen(dev
->sock
, 1) == -1) {
1257 cb
= vubr_accept_cb
;
1259 DPRINT("Waiting for connections on UNIX socket %s ...\n", path
);
1261 if (connect(dev
->sock
, (struct sockaddr
*)&un
, len
) == -1) {
1262 vubr_die("connect");
1264 cb
= vubr_receive_cb
;
1267 dispatcher_init(&dev
->dispatcher
);
1268 dispatcher_add(&dev
->dispatcher
, dev
->sock
, (void *)dev
, cb
);
1274 vubr_set_host(struct sockaddr_in
*saddr
, const char *host
)
1276 if (isdigit(host
[0])) {
1277 if (!inet_aton(host
, &saddr
->sin_addr
)) {
1278 fprintf(stderr
, "inet_aton() failed.\n");
1282 struct hostent
*he
= gethostbyname(host
);
1285 fprintf(stderr
, "gethostbyname() failed.\n");
1288 saddr
->sin_addr
= *(struct in_addr
*)he
->h_addr
;
1293 vubr_backend_udp_setup(VubrDev
*dev
,
1294 const char *local_host
,
1295 const char *local_port
,
1296 const char *remote_host
,
1297 const char *remote_port
)
1304 lport
= strtol(local_port
, (char **)&r
, 0);
1305 if (r
== local_port
) {
1306 fprintf(stderr
, "lport parsing failed.\n");
1310 rport
= strtol(remote_port
, (char **)&r
, 0);
1311 if (r
== remote_port
) {
1312 fprintf(stderr
, "rport parsing failed.\n");
1316 struct sockaddr_in si_local
= {
1317 .sin_family
= AF_INET
,
1318 .sin_port
= htons(lport
),
1321 vubr_set_host(&si_local
, local_host
);
1323 /* setup destination for sends */
1324 dev
->backend_udp_dest
= (struct sockaddr_in
) {
1325 .sin_family
= AF_INET
,
1326 .sin_port
= htons(rport
),
1328 vubr_set_host(&dev
->backend_udp_dest
, remote_host
);
1330 sock
= socket(AF_INET
, SOCK_DGRAM
, IPPROTO_UDP
);
1335 if (bind(sock
, (struct sockaddr
*)&si_local
, sizeof(si_local
)) == -1) {
1339 dev
->backend_udp_sock
= sock
;
1340 dispatcher_add(&dev
->dispatcher
, sock
, dev
, vubr_backend_recv_cb
);
1341 DPRINT("Waiting for data from udp backend on %s:%d...\n",
1346 vubr_run(VubrDev
*dev
)
1350 dispatcher_wait(&dev
->dispatcher
, 200000);
1351 /* Here one can try polling strategy. */
1356 vubr_parse_host_port(const char **host
, const char **port
, const char *buf
)
1358 char *p
= strchr(buf
, ':');
1364 *host
= strdup(buf
);
1365 *port
= strdup(p
+ 1);
1369 #define DEFAULT_UD_SOCKET "/tmp/vubr.sock"
1370 #define DEFAULT_LHOST "127.0.0.1"
1371 #define DEFAULT_LPORT "4444"
1372 #define DEFAULT_RHOST "127.0.0.1"
1373 #define DEFAULT_RPORT "5555"
1375 static const char *ud_socket_path
= DEFAULT_UD_SOCKET
;
1376 static const char *lhost
= DEFAULT_LHOST
;
1377 static const char *lport
= DEFAULT_LPORT
;
1378 static const char *rhost
= DEFAULT_RHOST
;
1379 static const char *rport
= DEFAULT_RPORT
;
1382 main(int argc
, char *argv
[])
1386 bool client
= false;
1388 while ((opt
= getopt(argc
, argv
, "l:r:u:c")) != -1) {
1392 if (vubr_parse_host_port(&lhost
, &lport
, optarg
) < 0) {
1397 if (vubr_parse_host_port(&rhost
, &rport
, optarg
) < 0) {
1402 ud_socket_path
= strdup(optarg
);
1412 DPRINT("ud socket: %s (%s)\n", ud_socket_path
,
1413 client
? "client" : "server");
1414 DPRINT("local: %s:%s\n", lhost
, lport
);
1415 DPRINT("remote: %s:%s\n", rhost
, rport
);
1417 dev
= vubr_new(ud_socket_path
, client
);
1422 vubr_backend_udp_setup(dev
, lhost
, lport
, rhost
, rport
);
1427 fprintf(stderr
, "Usage: %s ", argv
[0]);
1428 fprintf(stderr
, "[-c] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
1429 fprintf(stderr
, "\t-u path to unix doman socket. default: %s\n",
1431 fprintf(stderr
, "\t-l local host and port. default: %s:%s\n",
1432 DEFAULT_LHOST
, DEFAULT_LPORT
);
1433 fprintf(stderr
, "\t-r remote host and port. default: %s:%s\n",
1434 DEFAULT_RHOST
, DEFAULT_RPORT
);
1435 fprintf(stderr
, "\t-c client mode\n");