4 * Copyright (c) 2015 Red Hat, Inc.
7 * Victor Kaplansky <victork@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
15 * - main should get parameters from the command line.
16 * - implement all request handlers. Still not implemented:
17 * vubr_get_queue_num_exec()
18 * vubr_send_rarp_exec()
19 * - test for broken requests and virtqueue.
20 * - implement features defined by Virtio 1.0 spec.
21 * - support mergeable buffers and indirect descriptors.
22 * - implement clean shutdown.
23 * - implement non-blocking writes to UDP backend.
24 * - implement polling strategy.
25 * - implement clean starting/stopping of vq processing
26 * - implement clean starting/stopping of used and buffers
30 #define _FILE_OFFSET_BITS 64
41 #include <sys/types.h>
42 #include <sys/socket.h>
44 #include <sys/unistd.h>
46 #include <sys/eventfd.h>
47 #include <arpa/inet.h>
49 #include <linux/vhost.h>
51 #include "qemu/atomic.h"
52 #include "standard-headers/linux/virtio_net.h"
53 #include "standard-headers/linux/virtio_ring.h"
55 #define VHOST_USER_BRIDGE_DEBUG 1
59 if (VHOST_USER_BRIDGE_DEBUG) { \
60 printf(__VA_ARGS__); \
64 typedef void (*CallbackFunc
)(int sock
, void *ctx
);
66 typedef struct Event
{
68 CallbackFunc callback
;
71 typedef struct Dispatcher
{
74 Event events
[FD_SETSIZE
];
78 vubr_die(const char *s
)
85 dispatcher_init(Dispatcher
*dispr
)
87 FD_ZERO(&dispr
->fdset
);
93 dispatcher_add(Dispatcher
*dispr
, int sock
, void *ctx
, CallbackFunc cb
)
95 if (sock
>= FD_SETSIZE
) {
97 "Error: Failed to add new event. sock %d should be less than %d\n",
102 dispr
->events
[sock
].ctx
= ctx
;
103 dispr
->events
[sock
].callback
= cb
;
105 FD_SET(sock
, &dispr
->fdset
);
106 if (sock
> dispr
->max_sock
) {
107 dispr
->max_sock
= sock
;
109 DPRINT("Added sock %d for watching. max_sock: %d\n",
110 sock
, dispr
->max_sock
);
115 /* dispatcher_remove() is not currently in use but may be useful
118 dispatcher_remove(Dispatcher
*dispr
, int sock
)
120 if (sock
>= FD_SETSIZE
) {
122 "Error: Failed to remove event. sock %d should be less than %d\n",
127 FD_CLR(sock
, &dispr
->fdset
);
134 dispatcher_wait(Dispatcher
*dispr
, uint32_t timeout
)
137 tv
.tv_sec
= timeout
/ 1000000;
138 tv
.tv_usec
= timeout
% 1000000;
140 fd_set fdset
= dispr
->fdset
;
142 /* wait until some of sockets become readable. */
143 int rc
= select(dispr
->max_sock
+ 1, &fdset
, 0, 0, &tv
);
154 /* Now call callback for every ready socket. */
157 for (sock
= 0; sock
< dispr
->max_sock
+ 1; sock
++)
158 if (FD_ISSET(sock
, &fdset
)) {
159 Event
*e
= &dispr
->events
[sock
];
160 e
->callback(sock
, e
->ctx
);
166 typedef struct VubrVirtq
{
170 uint16_t last_avail_index
;
171 uint16_t last_used_index
;
172 struct vring_desc
*desc
;
173 struct vring_avail
*avail
;
174 struct vring_used
*used
;
175 uint64_t log_guest_addr
;
179 /* Based on qemu/hw/virtio/vhost-user.c */
181 #define VHOST_MEMORY_MAX_NREGIONS 8
182 #define VHOST_USER_F_PROTOCOL_FEATURES 30
184 #define VHOST_LOG_PAGE 4096
186 enum VhostUserProtocolFeature
{
187 VHOST_USER_PROTOCOL_F_MQ
= 0,
188 VHOST_USER_PROTOCOL_F_LOG_SHMFD
= 1,
189 VHOST_USER_PROTOCOL_F_RARP
= 2,
191 VHOST_USER_PROTOCOL_F_MAX
194 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
196 typedef enum VhostUserRequest
{
198 VHOST_USER_GET_FEATURES
= 1,
199 VHOST_USER_SET_FEATURES
= 2,
200 VHOST_USER_SET_OWNER
= 3,
201 VHOST_USER_RESET_OWNER
= 4,
202 VHOST_USER_SET_MEM_TABLE
= 5,
203 VHOST_USER_SET_LOG_BASE
= 6,
204 VHOST_USER_SET_LOG_FD
= 7,
205 VHOST_USER_SET_VRING_NUM
= 8,
206 VHOST_USER_SET_VRING_ADDR
= 9,
207 VHOST_USER_SET_VRING_BASE
= 10,
208 VHOST_USER_GET_VRING_BASE
= 11,
209 VHOST_USER_SET_VRING_KICK
= 12,
210 VHOST_USER_SET_VRING_CALL
= 13,
211 VHOST_USER_SET_VRING_ERR
= 14,
212 VHOST_USER_GET_PROTOCOL_FEATURES
= 15,
213 VHOST_USER_SET_PROTOCOL_FEATURES
= 16,
214 VHOST_USER_GET_QUEUE_NUM
= 17,
215 VHOST_USER_SET_VRING_ENABLE
= 18,
216 VHOST_USER_SEND_RARP
= 19,
220 typedef struct VhostUserMemoryRegion
{
221 uint64_t guest_phys_addr
;
222 uint64_t memory_size
;
223 uint64_t userspace_addr
;
224 uint64_t mmap_offset
;
225 } VhostUserMemoryRegion
;
227 typedef struct VhostUserMemory
{
230 VhostUserMemoryRegion regions
[VHOST_MEMORY_MAX_NREGIONS
];
233 typedef struct VhostUserLog
{
235 uint64_t mmap_offset
;
238 typedef struct VhostUserMsg
{
239 VhostUserRequest request
;
241 #define VHOST_USER_VERSION_MASK (0x3)
242 #define VHOST_USER_REPLY_MASK (0x1<<2)
244 uint32_t size
; /* the following payload size */
246 #define VHOST_USER_VRING_IDX_MASK (0xff)
247 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
249 struct vhost_vring_state state
;
250 struct vhost_vring_addr addr
;
251 VhostUserMemory memory
;
254 int fds
[VHOST_MEMORY_MAX_NREGIONS
];
256 } QEMU_PACKED VhostUserMsg
;
258 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
260 /* The version of the protocol we support */
261 #define VHOST_USER_VERSION (0x1)
263 #define MAX_NR_VIRTQUEUE (8)
265 typedef struct VubrDevRegion
{
266 /* Guest Physical address. */
268 /* Memory region size. */
270 /* QEMU virtual address (userspace). */
272 /* Starting offset in our mmaped space. */
273 uint64_t mmap_offset
;
274 /* Start address of mmaped space. */
278 typedef struct VubrDev
{
280 Dispatcher dispatcher
;
282 VubrDevRegion regions
[VHOST_MEMORY_MAX_NREGIONS
];
283 VubrVirtq vq
[MAX_NR_VIRTQUEUE
];
287 int backend_udp_sock
;
288 struct sockaddr_in backend_udp_dest
;
293 static const char *vubr_request_str
[] = {
294 [VHOST_USER_NONE
] = "VHOST_USER_NONE",
295 [VHOST_USER_GET_FEATURES
] = "VHOST_USER_GET_FEATURES",
296 [VHOST_USER_SET_FEATURES
] = "VHOST_USER_SET_FEATURES",
297 [VHOST_USER_SET_OWNER
] = "VHOST_USER_SET_OWNER",
298 [VHOST_USER_RESET_OWNER
] = "VHOST_USER_RESET_OWNER",
299 [VHOST_USER_SET_MEM_TABLE
] = "VHOST_USER_SET_MEM_TABLE",
300 [VHOST_USER_SET_LOG_BASE
] = "VHOST_USER_SET_LOG_BASE",
301 [VHOST_USER_SET_LOG_FD
] = "VHOST_USER_SET_LOG_FD",
302 [VHOST_USER_SET_VRING_NUM
] = "VHOST_USER_SET_VRING_NUM",
303 [VHOST_USER_SET_VRING_ADDR
] = "VHOST_USER_SET_VRING_ADDR",
304 [VHOST_USER_SET_VRING_BASE
] = "VHOST_USER_SET_VRING_BASE",
305 [VHOST_USER_GET_VRING_BASE
] = "VHOST_USER_GET_VRING_BASE",
306 [VHOST_USER_SET_VRING_KICK
] = "VHOST_USER_SET_VRING_KICK",
307 [VHOST_USER_SET_VRING_CALL
] = "VHOST_USER_SET_VRING_CALL",
308 [VHOST_USER_SET_VRING_ERR
] = "VHOST_USER_SET_VRING_ERR",
309 [VHOST_USER_GET_PROTOCOL_FEATURES
] = "VHOST_USER_GET_PROTOCOL_FEATURES",
310 [VHOST_USER_SET_PROTOCOL_FEATURES
] = "VHOST_USER_SET_PROTOCOL_FEATURES",
311 [VHOST_USER_GET_QUEUE_NUM
] = "VHOST_USER_GET_QUEUE_NUM",
312 [VHOST_USER_SET_VRING_ENABLE
] = "VHOST_USER_SET_VRING_ENABLE",
313 [VHOST_USER_SEND_RARP
] = "VHOST_USER_SEND_RARP",
314 [VHOST_USER_MAX
] = "VHOST_USER_MAX",
318 print_buffer(uint8_t *buf
, size_t len
)
321 printf("Raw buffer:\n");
322 for (i
= 0; i
< len
; i
++) {
329 printf("%02x ", buf
[i
]);
331 printf("\n............................................................\n");
334 /* Translate guest physical address to our virtual address. */
336 gpa_to_va(VubrDev
*dev
, uint64_t guest_addr
)
340 /* Find matching memory region. */
341 for (i
= 0; i
< dev
->nregions
; i
++) {
342 VubrDevRegion
*r
= &dev
->regions
[i
];
344 if ((guest_addr
>= r
->gpa
) && (guest_addr
< (r
->gpa
+ r
->size
))) {
345 return guest_addr
- r
->gpa
+ r
->mmap_addr
+ r
->mmap_offset
;
349 assert(!"address not found in regions");
353 /* Translate qemu virtual address to our virtual address. */
355 qva_to_va(VubrDev
*dev
, uint64_t qemu_addr
)
359 /* Find matching memory region. */
360 for (i
= 0; i
< dev
->nregions
; i
++) {
361 VubrDevRegion
*r
= &dev
->regions
[i
];
363 if ((qemu_addr
>= r
->qva
) && (qemu_addr
< (r
->qva
+ r
->size
))) {
364 return qemu_addr
- r
->qva
+ r
->mmap_addr
+ r
->mmap_offset
;
368 assert(!"address not found in regions");
373 vubr_message_read(int conn_fd
, VhostUserMsg
*vmsg
)
375 char control
[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS
* sizeof(int))] = { };
377 .iov_base
= (char *)vmsg
,
378 .iov_len
= VHOST_USER_HDR_SIZE
,
380 struct msghdr msg
= {
383 .msg_control
= control
,
384 .msg_controllen
= sizeof(control
),
387 struct cmsghdr
*cmsg
;
390 rc
= recvmsg(conn_fd
, &msg
, 0);
394 fprintf(stderr
, "Peer disconnected.\n");
402 for (cmsg
= CMSG_FIRSTHDR(&msg
);
404 cmsg
= CMSG_NXTHDR(&msg
, cmsg
))
406 if (cmsg
->cmsg_level
== SOL_SOCKET
&& cmsg
->cmsg_type
== SCM_RIGHTS
) {
407 fd_size
= cmsg
->cmsg_len
- CMSG_LEN(0);
408 vmsg
->fd_num
= fd_size
/ sizeof(int);
409 memcpy(vmsg
->fds
, CMSG_DATA(cmsg
), fd_size
);
414 if (vmsg
->size
> sizeof(vmsg
->payload
)) {
416 "Error: too big message request: %d, size: vmsg->size: %u, "
417 "while sizeof(vmsg->payload) = %lu\n",
418 vmsg
->request
, vmsg
->size
, sizeof(vmsg
->payload
));
423 rc
= read(conn_fd
, &vmsg
->payload
, vmsg
->size
);
426 fprintf(stderr
, "Peer disconnected.\n");
433 assert(rc
== vmsg
->size
);
438 vubr_message_write(int conn_fd
, VhostUserMsg
*vmsg
)
443 rc
= write(conn_fd
, vmsg
, VHOST_USER_HDR_SIZE
+ vmsg
->size
);
444 } while (rc
< 0 && errno
== EINTR
);
452 vubr_backend_udp_sendbuf(VubrDev
*dev
, uint8_t *buf
, size_t len
)
454 int slen
= sizeof(struct sockaddr_in
);
456 if (sendto(dev
->backend_udp_sock
, buf
, len
, 0,
457 (struct sockaddr
*) &dev
->backend_udp_dest
, slen
) == -1) {
458 vubr_die("sendto()");
463 vubr_backend_udp_recvbuf(VubrDev
*dev
, uint8_t *buf
, size_t buflen
)
465 int slen
= sizeof(struct sockaddr_in
);
468 rc
= recvfrom(dev
->backend_udp_sock
, buf
, buflen
, 0,
469 (struct sockaddr
*) &dev
->backend_udp_dest
,
472 vubr_die("recvfrom()");
479 vubr_consume_raw_packet(VubrDev
*dev
, uint8_t *buf
, uint32_t len
)
481 int hdrlen
= sizeof(struct virtio_net_hdr_v1
);
483 if (VHOST_USER_BRIDGE_DEBUG
) {
484 print_buffer(buf
, len
);
486 vubr_backend_udp_sendbuf(dev
, buf
+ hdrlen
, len
- hdrlen
);
489 /* Kick the log_call_fd if required. */
491 vubr_log_kick(VubrDev
*dev
)
493 if (dev
->log_call_fd
!= -1) {
494 DPRINT("Kicking the QEMU's log...\n");
495 eventfd_write(dev
->log_call_fd
, 1);
499 /* Kick the guest if necessary. */
501 vubr_virtqueue_kick(VubrVirtq
*vq
)
503 if (!(vq
->avail
->flags
& VRING_AVAIL_F_NO_INTERRUPT
)) {
504 DPRINT("Kicking the guest...\n");
505 eventfd_write(vq
->call_fd
, 1);
510 vubr_log_page(uint8_t *log_table
, uint64_t page
)
512 DPRINT("Logged dirty guest page: %"PRId64
"\n", page
);
513 atomic_or(&log_table
[page
/ 8], 1 << (page
% 8));
517 vubr_log_write(VubrDev
*dev
, uint64_t address
, uint64_t length
)
521 if (!(dev
->features
& (1ULL << VHOST_F_LOG_ALL
)) ||
522 !dev
->log_table
|| !length
) {
526 assert(dev
->log_size
> ((address
+ length
- 1) / VHOST_LOG_PAGE
/ 8));
528 page
= address
/ VHOST_LOG_PAGE
;
529 while (page
* VHOST_LOG_PAGE
< address
+ length
) {
530 vubr_log_page(dev
->log_table
, page
);
531 page
+= VHOST_LOG_PAGE
;
537 vubr_post_buffer(VubrDev
*dev
, VubrVirtq
*vq
, uint8_t *buf
, int32_t len
)
539 struct vring_desc
*desc
= vq
->desc
;
540 struct vring_avail
*avail
= vq
->avail
;
541 struct vring_used
*used
= vq
->used
;
542 uint64_t log_guest_addr
= vq
->log_guest_addr
;
544 unsigned int size
= vq
->size
;
546 uint16_t avail_index
= atomic_mb_read(&avail
->idx
);
548 /* We check the available descriptors before posting the
549 * buffer, so here we assume that enough available
551 assert(vq
->last_avail_index
!= avail_index
);
552 uint16_t a_index
= vq
->last_avail_index
% size
;
553 uint16_t u_index
= vq
->last_used_index
% size
;
554 uint16_t d_index
= avail
->ring
[a_index
];
558 DPRINT("Post packet to guest on vq:\n");
559 DPRINT(" size = %d\n", vq
->size
);
560 DPRINT(" last_avail_index = %d\n", vq
->last_avail_index
);
561 DPRINT(" last_used_index = %d\n", vq
->last_used_index
);
562 DPRINT(" a_index = %d\n", a_index
);
563 DPRINT(" u_index = %d\n", u_index
);
564 DPRINT(" d_index = %d\n", d_index
);
565 DPRINT(" desc[%d].addr = 0x%016"PRIx64
"\n", i
, desc
[i
].addr
);
566 DPRINT(" desc[%d].len = %d\n", i
, desc
[i
].len
);
567 DPRINT(" desc[%d].flags = %d\n", i
, desc
[i
].flags
);
568 DPRINT(" avail->idx = %d\n", avail_index
);
569 DPRINT(" used->idx = %d\n", used
->idx
);
571 if (!(desc
[i
].flags
& VRING_DESC_F_WRITE
)) {
572 /* FIXME: we should find writable descriptor. */
573 fprintf(stderr
, "Error: descriptor is not writable. Exiting.\n");
577 void *chunk_start
= (void *)gpa_to_va(dev
, desc
[i
].addr
);
578 uint32_t chunk_len
= desc
[i
].len
;
580 if (len
<= chunk_len
) {
581 memcpy(chunk_start
, buf
, len
);
582 vubr_log_write(dev
, desc
[i
].addr
, len
);
585 "Received too long packet from the backend. Dropping...\n");
589 /* Add descriptor to the used ring. */
590 used
->ring
[u_index
].id
= d_index
;
591 used
->ring
[u_index
].len
= len
;
593 log_guest_addr
+ offsetof(struct vring_used
, ring
[u_index
]),
594 sizeof(used
->ring
[u_index
]));
596 vq
->last_avail_index
++;
597 vq
->last_used_index
++;
599 atomic_mb_set(&used
->idx
, vq
->last_used_index
);
601 log_guest_addr
+ offsetof(struct vring_used
, idx
),
604 /* Kick the guest if necessary. */
605 vubr_virtqueue_kick(vq
);
609 vubr_process_desc(VubrDev
*dev
, VubrVirtq
*vq
)
611 struct vring_desc
*desc
= vq
->desc
;
612 struct vring_avail
*avail
= vq
->avail
;
613 struct vring_used
*used
= vq
->used
;
614 uint64_t log_guest_addr
= vq
->log_guest_addr
;
616 unsigned int size
= vq
->size
;
618 uint16_t a_index
= vq
->last_avail_index
% size
;
619 uint16_t u_index
= vq
->last_used_index
% size
;
620 uint16_t d_index
= avail
->ring
[a_index
];
623 size_t buf_size
= 4096;
629 void *chunk_start
= (void *)gpa_to_va(dev
, desc
[i
].addr
);
630 uint32_t chunk_len
= desc
[i
].len
;
632 assert(!(desc
[i
].flags
& VRING_DESC_F_WRITE
));
634 if (len
+ chunk_len
< buf_size
) {
635 memcpy(buf
+ len
, chunk_start
, chunk_len
);
636 DPRINT("%d ", chunk_len
);
638 fprintf(stderr
, "Error: too long packet. Dropping...\n");
644 if (!(desc
[i
].flags
& VRING_DESC_F_NEXT
)) {
656 /* Add descriptor to the used ring. */
657 used
->ring
[u_index
].id
= d_index
;
658 used
->ring
[u_index
].len
= len
;
660 log_guest_addr
+ offsetof(struct vring_used
, ring
[u_index
]),
661 sizeof(used
->ring
[u_index
]));
663 vubr_consume_raw_packet(dev
, buf
, len
);
669 vubr_process_avail(VubrDev
*dev
, VubrVirtq
*vq
)
671 struct vring_avail
*avail
= vq
->avail
;
672 struct vring_used
*used
= vq
->used
;
673 uint64_t log_guest_addr
= vq
->log_guest_addr
;
675 while (vq
->last_avail_index
!= atomic_mb_read(&avail
->idx
)) {
676 vubr_process_desc(dev
, vq
);
677 vq
->last_avail_index
++;
678 vq
->last_used_index
++;
681 atomic_mb_set(&used
->idx
, vq
->last_used_index
);
683 log_guest_addr
+ offsetof(struct vring_used
, idx
),
688 vubr_backend_recv_cb(int sock
, void *ctx
)
690 VubrDev
*dev
= (VubrDev
*) ctx
;
691 VubrVirtq
*rx_vq
= &dev
->vq
[0];
693 struct virtio_net_hdr_v1
*hdr
= (struct virtio_net_hdr_v1
*)buf
;
694 int hdrlen
= sizeof(struct virtio_net_hdr_v1
);
695 int buflen
= sizeof(buf
);
702 DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
704 uint16_t avail_index
= atomic_mb_read(&rx_vq
->avail
->idx
);
706 /* If there is no available descriptors, just do nothing.
707 * The buffer will be handled by next arrived UDP packet,
708 * or next kick on receive virtq. */
709 if (rx_vq
->last_avail_index
== avail_index
) {
710 DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
714 len
= vubr_backend_udp_recvbuf(dev
, buf
+ hdrlen
, buflen
- hdrlen
);
716 *hdr
= (struct virtio_net_hdr_v1
) { };
717 hdr
->num_buffers
= 1;
718 vubr_post_buffer(dev
, rx_vq
, buf
, len
+ hdrlen
);
722 vubr_kick_cb(int sock
, void *ctx
)
724 VubrDev
*dev
= (VubrDev
*) ctx
;
728 rc
= eventfd_read(sock
, &kick_data
);
730 vubr_die("eventfd_read()");
732 DPRINT("Got kick_data: %016"PRIx64
"\n", kick_data
);
733 vubr_process_avail(dev
, &dev
->vq
[1]);
738 vubr_none_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
740 DPRINT("Function %s() not implemented yet.\n", __func__
);
745 vubr_get_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
748 ((1ULL << VIRTIO_NET_F_MRG_RXBUF
) |
749 (1ULL << VHOST_F_LOG_ALL
) |
750 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES
));
752 vmsg
->size
= sizeof(vmsg
->payload
.u64
);
754 DPRINT("Sending back to guest u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
761 vubr_set_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
763 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
764 dev
->features
= vmsg
->payload
.u64
;
769 vubr_set_owner_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
775 vubr_close_log(VubrDev
*dev
)
777 if (dev
->log_table
) {
778 if (munmap(dev
->log_table
, dev
->log_size
) != 0) {
779 vubr_die("munmap()");
784 if (dev
->log_call_fd
!= -1) {
785 close(dev
->log_call_fd
);
786 dev
->log_call_fd
= -1;
791 vubr_reset_device_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
800 vubr_set_mem_table_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
803 VhostUserMemory
*memory
= &vmsg
->payload
.memory
;
804 dev
->nregions
= memory
->nregions
;
806 DPRINT("Nregions: %d\n", memory
->nregions
);
807 for (i
= 0; i
< dev
->nregions
; i
++) {
809 VhostUserMemoryRegion
*msg_region
= &memory
->regions
[i
];
810 VubrDevRegion
*dev_region
= &dev
->regions
[i
];
812 DPRINT("Region %d\n", i
);
813 DPRINT(" guest_phys_addr: 0x%016"PRIx64
"\n",
814 msg_region
->guest_phys_addr
);
815 DPRINT(" memory_size: 0x%016"PRIx64
"\n",
816 msg_region
->memory_size
);
817 DPRINT(" userspace_addr 0x%016"PRIx64
"\n",
818 msg_region
->userspace_addr
);
819 DPRINT(" mmap_offset 0x%016"PRIx64
"\n",
820 msg_region
->mmap_offset
);
822 dev_region
->gpa
= msg_region
->guest_phys_addr
;
823 dev_region
->size
= msg_region
->memory_size
;
824 dev_region
->qva
= msg_region
->userspace_addr
;
825 dev_region
->mmap_offset
= msg_region
->mmap_offset
;
827 /* We don't use offset argument of mmap() since the
828 * mapped address has to be page aligned, and we use huge
830 mmap_addr
= mmap(0, dev_region
->size
+ dev_region
->mmap_offset
,
831 PROT_READ
| PROT_WRITE
, MAP_SHARED
,
834 if (mmap_addr
== MAP_FAILED
) {
838 dev_region
->mmap_addr
= (uint64_t) mmap_addr
;
839 DPRINT(" mmap_addr: 0x%016"PRIx64
"\n", dev_region
->mmap_addr
);
846 vubr_set_log_base_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
849 uint64_t log_mmap_size
, log_mmap_offset
;
852 assert(vmsg
->fd_num
== 1);
855 assert(vmsg
->size
== sizeof(vmsg
->payload
.log
));
856 log_mmap_offset
= vmsg
->payload
.log
.mmap_offset
;
857 log_mmap_size
= vmsg
->payload
.log
.mmap_size
;
858 DPRINT("Log mmap_offset: %"PRId64
"\n", log_mmap_offset
);
859 DPRINT("Log mmap_size: %"PRId64
"\n", log_mmap_size
);
861 rc
= mmap(0, log_mmap_size
, PROT_READ
| PROT_WRITE
, MAP_SHARED
, fd
,
863 if (rc
== MAP_FAILED
) {
867 dev
->log_size
= log_mmap_size
;
869 vmsg
->size
= sizeof(vmsg
->payload
.u64
);
875 vubr_set_log_fd_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
877 assert(vmsg
->fd_num
== 1);
878 dev
->log_call_fd
= vmsg
->fds
[0];
879 DPRINT("Got log_call_fd: %d\n", vmsg
->fds
[0]);
884 vubr_set_vring_num_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
886 unsigned int index
= vmsg
->payload
.state
.index
;
887 unsigned int num
= vmsg
->payload
.state
.num
;
889 DPRINT("State.index: %d\n", index
);
890 DPRINT("State.num: %d\n", num
);
891 dev
->vq
[index
].size
= num
;
896 vubr_set_vring_addr_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
898 struct vhost_vring_addr
*vra
= &vmsg
->payload
.addr
;
899 unsigned int index
= vra
->index
;
900 VubrVirtq
*vq
= &dev
->vq
[index
];
902 DPRINT("vhost_vring_addr:\n");
903 DPRINT(" index: %d\n", vra
->index
);
904 DPRINT(" flags: %d\n", vra
->flags
);
905 DPRINT(" desc_user_addr: 0x%016llx\n", vra
->desc_user_addr
);
906 DPRINT(" used_user_addr: 0x%016llx\n", vra
->used_user_addr
);
907 DPRINT(" avail_user_addr: 0x%016llx\n", vra
->avail_user_addr
);
908 DPRINT(" log_guest_addr: 0x%016llx\n", vra
->log_guest_addr
);
910 vq
->desc
= (struct vring_desc
*)qva_to_va(dev
, vra
->desc_user_addr
);
911 vq
->used
= (struct vring_used
*)qva_to_va(dev
, vra
->used_user_addr
);
912 vq
->avail
= (struct vring_avail
*)qva_to_va(dev
, vra
->avail_user_addr
);
913 vq
->log_guest_addr
= vra
->log_guest_addr
;
915 DPRINT("Setting virtq addresses:\n");
916 DPRINT(" vring_desc at %p\n", vq
->desc
);
917 DPRINT(" vring_used at %p\n", vq
->used
);
918 DPRINT(" vring_avail at %p\n", vq
->avail
);
920 vq
->last_used_index
= vq
->used
->idx
;
925 vubr_set_vring_base_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
927 unsigned int index
= vmsg
->payload
.state
.index
;
928 unsigned int num
= vmsg
->payload
.state
.num
;
930 DPRINT("State.index: %d\n", index
);
931 DPRINT("State.num: %d\n", num
);
932 dev
->vq
[index
].last_avail_index
= num
;
938 vubr_get_vring_base_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
940 unsigned int index
= vmsg
->payload
.state
.index
;
942 DPRINT("State.index: %d\n", index
);
943 vmsg
->payload
.state
.num
= dev
->vq
[index
].last_avail_index
;
944 vmsg
->size
= sizeof(vmsg
->payload
.state
);
945 /* FIXME: this is a work-around for a bug in QEMU enabling
946 * too early vrings. When protocol features are enabled,
947 * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
955 vubr_set_vring_kick_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
957 uint64_t u64_arg
= vmsg
->payload
.u64
;
958 int index
= u64_arg
& VHOST_USER_VRING_IDX_MASK
;
960 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
962 assert((u64_arg
& VHOST_USER_VRING_NOFD_MASK
) == 0);
963 assert(vmsg
->fd_num
== 1);
965 dev
->vq
[index
].kick_fd
= vmsg
->fds
[0];
966 DPRINT("Got kick_fd: %d for vq: %d\n", vmsg
->fds
[0], index
);
968 if (index
% 2 == 1) {
970 dispatcher_add(&dev
->dispatcher
, dev
->vq
[index
].kick_fd
,
973 DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
974 dev
->vq
[index
].kick_fd
, index
);
976 /* We temporarily use this hack to determine that both TX and RX
977 * queues are set up and ready for processing.
978 * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
980 if (dev
->vq
[0].kick_fd
!= -1 &&
981 dev
->vq
[1].kick_fd
!= -1) {
983 DPRINT("vhost-user-bridge is ready for processing queues.\n");
990 vubr_set_vring_call_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
992 uint64_t u64_arg
= vmsg
->payload
.u64
;
993 int index
= u64_arg
& VHOST_USER_VRING_IDX_MASK
;
995 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
996 assert((u64_arg
& VHOST_USER_VRING_NOFD_MASK
) == 0);
997 assert(vmsg
->fd_num
== 1);
999 dev
->vq
[index
].call_fd
= vmsg
->fds
[0];
1000 DPRINT("Got call_fd: %d for vq: %d\n", vmsg
->fds
[0], index
);
1006 vubr_set_vring_err_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1008 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
1013 vubr_get_protocol_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1015 vmsg
->payload
.u64
= 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD
;
1016 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
1017 vmsg
->size
= sizeof(vmsg
->payload
.u64
);
1024 vubr_set_protocol_features_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1026 /* FIXME: unimplented */
1027 DPRINT("u64: 0x%016"PRIx64
"\n", vmsg
->payload
.u64
);
1032 vubr_get_queue_num_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1034 DPRINT("Function %s() not implemented yet.\n", __func__
);
1039 vubr_set_vring_enable_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1041 unsigned int index
= vmsg
->payload
.state
.index
;
1042 unsigned int enable
= vmsg
->payload
.state
.num
;
1044 DPRINT("State.index: %d\n", index
);
1045 DPRINT("State.enable: %d\n", enable
);
1046 dev
->vq
[index
].enable
= enable
;
1051 vubr_send_rarp_exec(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1053 DPRINT("Function %s() not implemented yet.\n", __func__
);
1058 vubr_execute_request(VubrDev
*dev
, VhostUserMsg
*vmsg
)
1060 /* Print out generic part of the request. */
1062 "================== Vhost user message from QEMU ==================\n");
1063 DPRINT("Request: %s (%d)\n", vubr_request_str
[vmsg
->request
],
1065 DPRINT("Flags: 0x%x\n", vmsg
->flags
);
1066 DPRINT("Size: %d\n", vmsg
->size
);
1071 for (i
= 0; i
< vmsg
->fd_num
; i
++) {
1072 DPRINT(" %d", vmsg
->fds
[i
]);
1077 switch (vmsg
->request
) {
1078 case VHOST_USER_NONE
:
1079 return vubr_none_exec(dev
, vmsg
);
1080 case VHOST_USER_GET_FEATURES
:
1081 return vubr_get_features_exec(dev
, vmsg
);
1082 case VHOST_USER_SET_FEATURES
:
1083 return vubr_set_features_exec(dev
, vmsg
);
1084 case VHOST_USER_SET_OWNER
:
1085 return vubr_set_owner_exec(dev
, vmsg
);
1086 case VHOST_USER_RESET_OWNER
:
1087 return vubr_reset_device_exec(dev
, vmsg
);
1088 case VHOST_USER_SET_MEM_TABLE
:
1089 return vubr_set_mem_table_exec(dev
, vmsg
);
1090 case VHOST_USER_SET_LOG_BASE
:
1091 return vubr_set_log_base_exec(dev
, vmsg
);
1092 case VHOST_USER_SET_LOG_FD
:
1093 return vubr_set_log_fd_exec(dev
, vmsg
);
1094 case VHOST_USER_SET_VRING_NUM
:
1095 return vubr_set_vring_num_exec(dev
, vmsg
);
1096 case VHOST_USER_SET_VRING_ADDR
:
1097 return vubr_set_vring_addr_exec(dev
, vmsg
);
1098 case VHOST_USER_SET_VRING_BASE
:
1099 return vubr_set_vring_base_exec(dev
, vmsg
);
1100 case VHOST_USER_GET_VRING_BASE
:
1101 return vubr_get_vring_base_exec(dev
, vmsg
);
1102 case VHOST_USER_SET_VRING_KICK
:
1103 return vubr_set_vring_kick_exec(dev
, vmsg
);
1104 case VHOST_USER_SET_VRING_CALL
:
1105 return vubr_set_vring_call_exec(dev
, vmsg
);
1106 case VHOST_USER_SET_VRING_ERR
:
1107 return vubr_set_vring_err_exec(dev
, vmsg
);
1108 case VHOST_USER_GET_PROTOCOL_FEATURES
:
1109 return vubr_get_protocol_features_exec(dev
, vmsg
);
1110 case VHOST_USER_SET_PROTOCOL_FEATURES
:
1111 return vubr_set_protocol_features_exec(dev
, vmsg
);
1112 case VHOST_USER_GET_QUEUE_NUM
:
1113 return vubr_get_queue_num_exec(dev
, vmsg
);
1114 case VHOST_USER_SET_VRING_ENABLE
:
1115 return vubr_set_vring_enable_exec(dev
, vmsg
);
1116 case VHOST_USER_SEND_RARP
:
1117 return vubr_send_rarp_exec(dev
, vmsg
);
1119 case VHOST_USER_MAX
:
1120 assert(vmsg
->request
!= VHOST_USER_MAX
);
1126 vubr_receive_cb(int sock
, void *ctx
)
1128 VubrDev
*dev
= (VubrDev
*) ctx
;
1130 int reply_requested
;
1132 vubr_message_read(sock
, &vmsg
);
1133 reply_requested
= vubr_execute_request(dev
, &vmsg
);
1134 if (reply_requested
) {
1135 /* Set the version in the flags when sending the reply */
1136 vmsg
.flags
&= ~VHOST_USER_VERSION_MASK
;
1137 vmsg
.flags
|= VHOST_USER_VERSION
;
1138 vmsg
.flags
|= VHOST_USER_REPLY_MASK
;
1139 vubr_message_write(sock
, &vmsg
);
1144 vubr_accept_cb(int sock
, void *ctx
)
1146 VubrDev
*dev
= (VubrDev
*)ctx
;
1148 struct sockaddr_un un
;
1149 socklen_t len
= sizeof(un
);
1151 conn_fd
= accept(sock
, (struct sockaddr
*) &un
, &len
);
1152 if (conn_fd
== -1) {
1153 vubr_die("accept()");
1155 DPRINT("Got connection from remote peer on sock %d\n", conn_fd
);
1156 dispatcher_add(&dev
->dispatcher
, conn_fd
, ctx
, vubr_receive_cb
);
1160 vubr_new(const char *path
)
1162 VubrDev
*dev
= (VubrDev
*) calloc(1, sizeof(VubrDev
));
1165 struct sockaddr_un un
;
1168 for (i
= 0; i
< MAX_NR_VIRTQUEUE
; i
++) {
1169 dev
->vq
[i
] = (VubrVirtq
) {
1170 .call_fd
= -1, .kick_fd
= -1,
1172 .last_avail_index
= 0, .last_used_index
= 0,
1173 .desc
= 0, .avail
= 0, .used
= 0,
1179 dev
->log_call_fd
= -1;
1185 /* Get a UNIX socket. */
1186 dev
->sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
1187 if (dev
->sock
== -1) {
1191 un
.sun_family
= AF_UNIX
;
1192 strcpy(un
.sun_path
, path
);
1193 len
= sizeof(un
.sun_family
) + strlen(path
);
1196 if (bind(dev
->sock
, (struct sockaddr
*) &un
, len
) == -1) {
1200 if (listen(dev
->sock
, 1) == -1) {
1204 dispatcher_init(&dev
->dispatcher
);
1205 dispatcher_add(&dev
->dispatcher
, dev
->sock
, (void *)dev
,
1208 DPRINT("Waiting for connections on UNIX socket %s ...\n", path
);
1213 vubr_backend_udp_setup(VubrDev
*dev
,
1214 const char *local_host
,
1215 uint16_t local_port
,
1216 const char *dest_host
,
1220 struct sockaddr_in si_local
= {
1221 .sin_family
= AF_INET
,
1222 .sin_port
= htons(local_port
),
1225 if (inet_aton(local_host
, &si_local
.sin_addr
) == 0) {
1226 fprintf(stderr
, "inet_aton() failed.\n");
1230 /* setup destination for sends */
1231 dev
->backend_udp_dest
= (struct sockaddr_in
) {
1232 .sin_family
= AF_INET
,
1233 .sin_port
= htons(dest_port
),
1235 if (inet_aton(dest_host
, &dev
->backend_udp_dest
.sin_addr
) == 0) {
1236 fprintf(stderr
, "inet_aton() failed.\n");
1240 sock
= socket(AF_INET
, SOCK_DGRAM
, IPPROTO_UDP
);
1245 if (bind(sock
, (struct sockaddr
*)&si_local
, sizeof(si_local
)) == -1) {
1249 dev
->backend_udp_sock
= sock
;
1250 dispatcher_add(&dev
->dispatcher
, sock
, dev
, vubr_backend_recv_cb
);
1251 DPRINT("Waiting for data from udp backend on %s:%d...\n",
1252 local_host
, local_port
);
1256 vubr_run(VubrDev
*dev
)
1260 dispatcher_wait(&dev
->dispatcher
, 200000);
1261 /* Here one can try polling strategy. */
1266 main(int argc
, char *argv
[])
1270 dev
= vubr_new("/tmp/vubr.sock");
1275 vubr_backend_udp_setup(dev
,