4 * Copyright (c) 2013 Virtual Open Systems Sarl.
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
11 #include "hw/virtio/vhost.h"
12 #include "hw/virtio/vhost-backend.h"
13 #include "sysemu/char.h"
14 #include "sysemu/kvm.h"
15 #include "qemu/error-report.h"
16 #include "qemu/sockets.h"
17 #include "exec/ram_addr.h"
21 #include <sys/ioctl.h>
22 #include <sys/socket.h>
24 #include <linux/vhost.h>
26 #define VHOST_MEMORY_MAX_NREGIONS 8
28 typedef enum VhostUserRequest
{
30 VHOST_USER_GET_FEATURES
= 1,
31 VHOST_USER_SET_FEATURES
= 2,
32 VHOST_USER_SET_OWNER
= 3,
33 VHOST_USER_RESET_OWNER
= 4,
34 VHOST_USER_SET_MEM_TABLE
= 5,
35 VHOST_USER_SET_LOG_BASE
= 6,
36 VHOST_USER_SET_LOG_FD
= 7,
37 VHOST_USER_SET_VRING_NUM
= 8,
38 VHOST_USER_SET_VRING_ADDR
= 9,
39 VHOST_USER_SET_VRING_BASE
= 10,
40 VHOST_USER_GET_VRING_BASE
= 11,
41 VHOST_USER_SET_VRING_KICK
= 12,
42 VHOST_USER_SET_VRING_CALL
= 13,
43 VHOST_USER_SET_VRING_ERR
= 14,
47 typedef struct VhostUserMemoryRegion
{
48 uint64_t guest_phys_addr
;
50 uint64_t userspace_addr
;
52 } VhostUserMemoryRegion
;
54 typedef struct VhostUserMemory
{
57 VhostUserMemoryRegion regions
[VHOST_MEMORY_MAX_NREGIONS
];
60 typedef struct VhostUserMsg
{
61 VhostUserRequest request
;
63 #define VHOST_USER_VERSION_MASK (0x3)
64 #define VHOST_USER_REPLY_MASK (0x1<<2)
66 uint32_t size
; /* the following payload size */
68 #define VHOST_USER_VRING_IDX_MASK (0xff)
69 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
71 struct vhost_vring_state state
;
72 struct vhost_vring_addr addr
;
73 VhostUserMemory memory
;
75 } QEMU_PACKED VhostUserMsg
;
77 static VhostUserMsg m
__attribute__ ((unused
));
78 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \
82 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
84 /* The version of the protocol we support */
85 #define VHOST_USER_VERSION (0x1)
87 static bool ioeventfd_enabled(void)
89 return kvm_enabled() && kvm_eventfds_enabled();
92 static unsigned long int ioctl_to_vhost_user_request
[VHOST_USER_MAX
] = {
93 -1, /* VHOST_USER_NONE */
94 VHOST_GET_FEATURES
, /* VHOST_USER_GET_FEATURES */
95 VHOST_SET_FEATURES
, /* VHOST_USER_SET_FEATURES */
96 VHOST_SET_OWNER
, /* VHOST_USER_SET_OWNER */
97 VHOST_RESET_OWNER
, /* VHOST_USER_RESET_OWNER */
98 VHOST_SET_MEM_TABLE
, /* VHOST_USER_SET_MEM_TABLE */
99 VHOST_SET_LOG_BASE
, /* VHOST_USER_SET_LOG_BASE */
100 VHOST_SET_LOG_FD
, /* VHOST_USER_SET_LOG_FD */
101 VHOST_SET_VRING_NUM
, /* VHOST_USER_SET_VRING_NUM */
102 VHOST_SET_VRING_ADDR
, /* VHOST_USER_SET_VRING_ADDR */
103 VHOST_SET_VRING_BASE
, /* VHOST_USER_SET_VRING_BASE */
104 VHOST_GET_VRING_BASE
, /* VHOST_USER_GET_VRING_BASE */
105 VHOST_SET_VRING_KICK
, /* VHOST_USER_SET_VRING_KICK */
106 VHOST_SET_VRING_CALL
, /* VHOST_USER_SET_VRING_CALL */
107 VHOST_SET_VRING_ERR
/* VHOST_USER_SET_VRING_ERR */
110 static VhostUserRequest
vhost_user_request_translate(unsigned long int request
)
112 VhostUserRequest idx
;
114 for (idx
= 0; idx
< VHOST_USER_MAX
; idx
++) {
115 if (ioctl_to_vhost_user_request
[idx
] == request
) {
120 return (idx
== VHOST_USER_MAX
) ? VHOST_USER_NONE
: idx
;
123 static int vhost_user_read(struct vhost_dev
*dev
, VhostUserMsg
*msg
)
125 CharDriverState
*chr
= dev
->opaque
;
126 uint8_t *p
= (uint8_t *) msg
;
127 int r
, size
= VHOST_USER_HDR_SIZE
;
129 r
= qemu_chr_fe_read_all(chr
, p
, size
);
131 error_report("Failed to read msg header. Read %d instead of %d.", r
,
136 /* validate received flags */
137 if (msg
->flags
!= (VHOST_USER_REPLY_MASK
| VHOST_USER_VERSION
)) {
138 error_report("Failed to read msg header."
139 " Flags 0x%x instead of 0x%x.", msg
->flags
,
140 VHOST_USER_REPLY_MASK
| VHOST_USER_VERSION
);
144 /* validate message size is sane */
145 if (msg
->size
> VHOST_USER_PAYLOAD_SIZE
) {
146 error_report("Failed to read msg header."
147 " Size %d exceeds the maximum %zu.", msg
->size
,
148 VHOST_USER_PAYLOAD_SIZE
);
153 p
+= VHOST_USER_HDR_SIZE
;
155 r
= qemu_chr_fe_read_all(chr
, p
, size
);
157 error_report("Failed to read msg payload."
158 " Read %d instead of %d.", r
, msg
->size
);
169 static int vhost_user_write(struct vhost_dev
*dev
, VhostUserMsg
*msg
,
170 int *fds
, int fd_num
)
172 CharDriverState
*chr
= dev
->opaque
;
173 int size
= VHOST_USER_HDR_SIZE
+ msg
->size
;
176 qemu_chr_fe_set_msgfds(chr
, fds
, fd_num
);
179 return qemu_chr_fe_write_all(chr
, (const uint8_t *) msg
, size
) == size
?
183 static int vhost_user_call(struct vhost_dev
*dev
, unsigned long int request
,
187 VhostUserRequest msg_request
;
188 struct vhost_vring_file
*file
= 0;
190 int fds
[VHOST_MEMORY_MAX_NREGIONS
];
194 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_USER
);
196 msg_request
= vhost_user_request_translate(request
);
197 msg
.request
= msg_request
;
198 msg
.flags
= VHOST_USER_VERSION
;
202 case VHOST_GET_FEATURES
:
206 case VHOST_SET_FEATURES
:
207 case VHOST_SET_LOG_BASE
:
208 msg
.u64
= *((__u64
*) arg
);
209 msg
.size
= sizeof(m
.u64
);
212 case VHOST_SET_OWNER
:
215 case VHOST_RESET_OWNER
:
216 memcpy(&msg
.state
, arg
, sizeof(struct vhost_vring_state
));
217 msg
.state
.index
+= dev
->vq_index
;
218 msg
.size
= sizeof(m
.state
);
221 case VHOST_SET_MEM_TABLE
:
222 for (i
= 0; i
< dev
->mem
->nregions
; ++i
) {
223 struct vhost_memory_region
*reg
= dev
->mem
->regions
+ i
;
226 assert((uintptr_t)reg
->userspace_addr
== reg
->userspace_addr
);
227 qemu_ram_addr_from_host((void *)(uintptr_t)reg
->userspace_addr
, &ram_addr
);
228 fd
= qemu_get_ram_fd(ram_addr
);
230 msg
.memory
.regions
[fd_num
].userspace_addr
= reg
->userspace_addr
;
231 msg
.memory
.regions
[fd_num
].memory_size
= reg
->memory_size
;
232 msg
.memory
.regions
[fd_num
].guest_phys_addr
= reg
->guest_phys_addr
;
233 msg
.memory
.regions
[fd_num
].mmap_offset
= reg
->userspace_addr
-
234 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr
);
235 assert(fd_num
< VHOST_MEMORY_MAX_NREGIONS
);
240 msg
.memory
.nregions
= fd_num
;
243 error_report("Failed initializing vhost-user memory map, "
244 "consider using -object memory-backend-file share=on");
248 msg
.size
= sizeof(m
.memory
.nregions
);
249 msg
.size
+= sizeof(m
.memory
.padding
);
250 msg
.size
+= fd_num
* sizeof(VhostUserMemoryRegion
);
254 case VHOST_SET_LOG_FD
:
255 fds
[fd_num
++] = *((int *) arg
);
258 case VHOST_SET_VRING_NUM
:
259 case VHOST_SET_VRING_BASE
:
260 memcpy(&msg
.state
, arg
, sizeof(struct vhost_vring_state
));
261 msg
.state
.index
+= dev
->vq_index
;
262 msg
.size
= sizeof(m
.state
);
265 case VHOST_GET_VRING_BASE
:
266 memcpy(&msg
.state
, arg
, sizeof(struct vhost_vring_state
));
267 msg
.state
.index
+= dev
->vq_index
;
268 msg
.size
= sizeof(m
.state
);
272 case VHOST_SET_VRING_ADDR
:
273 memcpy(&msg
.addr
, arg
, sizeof(struct vhost_vring_addr
));
274 msg
.addr
.index
+= dev
->vq_index
;
275 msg
.size
= sizeof(m
.addr
);
278 case VHOST_SET_VRING_KICK
:
279 case VHOST_SET_VRING_CALL
:
280 case VHOST_SET_VRING_ERR
:
282 msg
.u64
= (file
->index
+ dev
->vq_index
) & VHOST_USER_VRING_IDX_MASK
;
283 msg
.size
= sizeof(m
.u64
);
284 if (ioeventfd_enabled() && file
->fd
> 0) {
285 fds
[fd_num
++] = file
->fd
;
287 msg
.u64
|= VHOST_USER_VRING_NOFD_MASK
;
291 error_report("vhost-user trying to send unhandled ioctl");
296 if (vhost_user_write(dev
, &msg
, fds
, fd_num
) < 0) {
301 if (vhost_user_read(dev
, &msg
) < 0) {
305 if (msg_request
!= msg
.request
) {
306 error_report("Received unexpected msg type."
307 " Expected %d received %d", msg_request
, msg
.request
);
311 switch (msg_request
) {
312 case VHOST_USER_GET_FEATURES
:
313 if (msg
.size
!= sizeof(m
.u64
)) {
314 error_report("Received bad msg size.");
317 *((__u64
*) arg
) = msg
.u64
;
319 case VHOST_USER_GET_VRING_BASE
:
320 if (msg
.size
!= sizeof(m
.state
)) {
321 error_report("Received bad msg size.");
324 msg
.state
.index
-= dev
->vq_index
;
325 memcpy(arg
, &msg
.state
, sizeof(struct vhost_vring_state
));
328 error_report("Received unexpected msg type.");
337 static int vhost_user_init(struct vhost_dev
*dev
, void *opaque
)
339 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_USER
);
341 dev
->opaque
= opaque
;
346 static int vhost_user_cleanup(struct vhost_dev
*dev
)
348 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_USER
);
355 const VhostOps user_ops
= {
356 .backend_type
= VHOST_BACKEND_TYPE_USER
,
357 .vhost_call
= vhost_user_call
,
358 .vhost_backend_init
= vhost_user_init
,
359 .vhost_backend_cleanup
= vhost_user_cleanup