4 * Copyright (c) 2016 Red Hat, Inc.
7 * Victor Kaplansky <victork@redhat.com>
8 * Marc-André Lureau <mlureau@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2 or
11 * later. See the COPYING file in the top-level directory.
14 #ifndef LIBVHOST_USER_H
15 #define LIBVHOST_USER_H
21 #include <linux/vhost.h>
23 #include "standard-headers/linux/virtio_ring.h"
25 /* Based on qemu/hw/virtio/vhost-user.c */
26 #define VHOST_USER_F_PROTOCOL_FEATURES 30
27 #define VHOST_LOG_PAGE 4096
29 #define VIRTQUEUE_MAX_SIZE 1024
31 #define VHOST_MEMORY_MAX_NREGIONS 8
33 typedef enum VhostSetConfigType
{
34 VHOST_SET_CONFIG_TYPE_MASTER
= 0,
35 VHOST_SET_CONFIG_TYPE_MIGRATION
= 1,
39 * Maximum size of virtio device config space
41 #define VHOST_USER_MAX_CONFIG_SIZE 256
43 enum VhostUserProtocolFeature
{
44 VHOST_USER_PROTOCOL_F_MQ
= 0,
45 VHOST_USER_PROTOCOL_F_LOG_SHMFD
= 1,
46 VHOST_USER_PROTOCOL_F_RARP
= 2,
47 VHOST_USER_PROTOCOL_F_REPLY_ACK
= 3,
48 VHOST_USER_PROTOCOL_F_NET_MTU
= 4,
49 VHOST_USER_PROTOCOL_F_SLAVE_REQ
= 5,
50 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN
= 6,
51 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION
= 7,
52 VHOST_USER_PROTOCOL_F_PAGEFAULT
= 8,
53 VHOST_USER_PROTOCOL_F_CONFIG
= 9,
54 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
= 10,
55 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
= 11,
56 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
= 12,
57 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS
= 14,
59 VHOST_USER_PROTOCOL_F_MAX
62 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
64 typedef enum VhostUserRequest
{
66 VHOST_USER_GET_FEATURES
= 1,
67 VHOST_USER_SET_FEATURES
= 2,
68 VHOST_USER_SET_OWNER
= 3,
69 VHOST_USER_RESET_OWNER
= 4,
70 VHOST_USER_SET_MEM_TABLE
= 5,
71 VHOST_USER_SET_LOG_BASE
= 6,
72 VHOST_USER_SET_LOG_FD
= 7,
73 VHOST_USER_SET_VRING_NUM
= 8,
74 VHOST_USER_SET_VRING_ADDR
= 9,
75 VHOST_USER_SET_VRING_BASE
= 10,
76 VHOST_USER_GET_VRING_BASE
= 11,
77 VHOST_USER_SET_VRING_KICK
= 12,
78 VHOST_USER_SET_VRING_CALL
= 13,
79 VHOST_USER_SET_VRING_ERR
= 14,
80 VHOST_USER_GET_PROTOCOL_FEATURES
= 15,
81 VHOST_USER_SET_PROTOCOL_FEATURES
= 16,
82 VHOST_USER_GET_QUEUE_NUM
= 17,
83 VHOST_USER_SET_VRING_ENABLE
= 18,
84 VHOST_USER_SEND_RARP
= 19,
85 VHOST_USER_NET_SET_MTU
= 20,
86 VHOST_USER_SET_SLAVE_REQ_FD
= 21,
87 VHOST_USER_IOTLB_MSG
= 22,
88 VHOST_USER_SET_VRING_ENDIAN
= 23,
89 VHOST_USER_GET_CONFIG
= 24,
90 VHOST_USER_SET_CONFIG
= 25,
91 VHOST_USER_CREATE_CRYPTO_SESSION
= 26,
92 VHOST_USER_CLOSE_CRYPTO_SESSION
= 27,
93 VHOST_USER_POSTCOPY_ADVISE
= 28,
94 VHOST_USER_POSTCOPY_LISTEN
= 29,
95 VHOST_USER_POSTCOPY_END
= 30,
96 VHOST_USER_GET_INFLIGHT_FD
= 31,
97 VHOST_USER_SET_INFLIGHT_FD
= 32,
98 VHOST_USER_GPU_SET_SOCKET
= 33,
99 VHOST_USER_VRING_KICK
= 35,
100 VHOST_USER_GET_MAX_MEM_SLOTS
= 36,
104 typedef enum VhostUserSlaveRequest
{
105 VHOST_USER_SLAVE_NONE
= 0,
106 VHOST_USER_SLAVE_IOTLB_MSG
= 1,
107 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG
= 2,
108 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG
= 3,
109 VHOST_USER_SLAVE_VRING_CALL
= 4,
110 VHOST_USER_SLAVE_VRING_ERR
= 5,
112 } VhostUserSlaveRequest
;
114 typedef struct VhostUserMemoryRegion
{
115 uint64_t guest_phys_addr
;
116 uint64_t memory_size
;
117 uint64_t userspace_addr
;
118 uint64_t mmap_offset
;
119 } VhostUserMemoryRegion
;
121 typedef struct VhostUserMemory
{
124 VhostUserMemoryRegion regions
[VHOST_MEMORY_MAX_NREGIONS
];
127 typedef struct VhostUserLog
{
129 uint64_t mmap_offset
;
132 typedef struct VhostUserConfig
{
136 uint8_t region
[VHOST_USER_MAX_CONFIG_SIZE
];
139 static VhostUserConfig c
__attribute__ ((unused
));
140 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
144 typedef struct VhostUserVringArea
{
148 } VhostUserVringArea
;
150 typedef struct VhostUserInflight
{
152 uint64_t mmap_offset
;
157 #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
158 # define VU_PACKED __attribute__((gcc_struct, packed))
160 # define VU_PACKED __attribute__((packed))
163 typedef struct VhostUserMsg
{
166 #define VHOST_USER_VERSION_MASK (0x3)
167 #define VHOST_USER_REPLY_MASK (0x1 << 2)
168 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
170 uint32_t size
; /* the following payload size */
173 #define VHOST_USER_VRING_IDX_MASK (0xff)
174 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
176 struct vhost_vring_state state
;
177 struct vhost_vring_addr addr
;
178 VhostUserMemory memory
;
180 VhostUserConfig config
;
181 VhostUserVringArea area
;
182 VhostUserInflight inflight
;
185 int fds
[VHOST_MEMORY_MAX_NREGIONS
];
188 } VU_PACKED VhostUserMsg
;
190 typedef struct VuDevRegion
{
191 /* Guest Physical address. */
193 /* Memory region size. */
195 /* QEMU virtual address (userspace). */
197 /* Starting offset in our mmaped space. */
198 uint64_t mmap_offset
;
199 /* Start address of mmaped space. */
203 typedef struct VuDev VuDev
;
205 typedef uint64_t (*vu_get_features_cb
) (VuDev
*dev
);
206 typedef void (*vu_set_features_cb
) (VuDev
*dev
, uint64_t features
);
207 typedef int (*vu_process_msg_cb
) (VuDev
*dev
, VhostUserMsg
*vmsg
,
209 typedef void (*vu_queue_set_started_cb
) (VuDev
*dev
, int qidx
, bool started
);
210 typedef bool (*vu_queue_is_processed_in_order_cb
) (VuDev
*dev
, int qidx
);
211 typedef int (*vu_get_config_cb
) (VuDev
*dev
, uint8_t *config
, uint32_t len
);
212 typedef int (*vu_set_config_cb
) (VuDev
*dev
, const uint8_t *data
,
213 uint32_t offset
, uint32_t size
,
216 typedef struct VuDevIface
{
217 /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
218 vu_get_features_cb get_features
;
219 /* enable vhost implementation features */
220 vu_set_features_cb set_features
;
221 /* get the protocol feature bitmask from the underlying vhost
223 vu_get_features_cb get_protocol_features
;
224 /* enable protocol features in the underlying vhost implementation. */
225 vu_set_features_cb set_protocol_features
;
226 /* process_msg is called for each vhost-user message received */
227 /* skip libvhost-user processing if return value != 0 */
228 vu_process_msg_cb process_msg
;
229 /* tells when queues can be processed */
230 vu_queue_set_started_cb queue_set_started
;
232 * If the queue is processed in order, in which case it will be
233 * resumed to vring.used->idx. This can help to support resuming
234 * on unmanaged exit/crash.
236 vu_queue_is_processed_in_order_cb queue_is_processed_in_order
;
237 /* get the config space of the device */
238 vu_get_config_cb get_config
;
239 /* set the config space of the device */
240 vu_set_config_cb set_config
;
243 typedef void (*vu_queue_handler_cb
) (VuDev
*dev
, int qidx
);
245 typedef struct VuRing
{
247 struct vring_desc
*desc
;
248 struct vring_avail
*avail
;
249 struct vring_used
*used
;
250 uint64_t log_guest_addr
;
254 typedef struct VuDescStateSplit
{
255 /* Indicate whether this descriptor is inflight or not.
256 * Only available for head-descriptor. */
262 /* Maintain a list for the last batch of used descriptors.
263 * Only available when batching is used for submitting */
266 /* Used to preserve the order of fetching available descriptors.
267 * Only available for head-descriptor. */
271 typedef struct VuVirtqInflight
{
272 /* The feature flags of this region. Now it's initialized to 0. */
275 /* The version of this region. It's 1 currently.
276 * Zero value indicates a vm reset happened. */
279 /* The size of VuDescStateSplit array. It's equal to the virtqueue
280 * size. Slave could get it from queue size field of VhostUserInflight. */
283 /* The head of list that track the last batch of used descriptors. */
284 uint16_t last_batch_head
;
286 /* Storing the idx value of used ring */
289 /* Used to track the state of each descriptor in descriptor table */
290 VuDescStateSplit desc
[];
293 typedef struct VuVirtqInflightDesc
{
296 } VuVirtqInflightDesc
;
298 typedef struct VuVirtq
{
301 VuVirtqInflight
*inflight
;
303 VuVirtqInflightDesc
*resubmit_list
;
305 uint16_t resubmit_num
;
309 /* Next head to pop */
310 uint16_t last_avail_idx
;
312 /* Last avail_idx read from VQ. */
313 uint16_t shadow_avail_idx
;
317 /* Last used index value we have signalled on */
318 uint16_t signalled_used
;
320 /* Last used index value we have signalled on */
321 bool signalled_used_valid
;
323 /* Notification enabled? */
328 vu_queue_handler_cb handler
;
336 /* Guest addresses of our ring */
337 struct vhost_vring_addr vra
;
340 enum VuWatchCondtion
{
341 VU_WATCH_IN
= POLLIN
,
342 VU_WATCH_OUT
= POLLOUT
,
343 VU_WATCH_PRI
= POLLPRI
,
344 VU_WATCH_ERR
= POLLERR
,
345 VU_WATCH_HUP
= POLLHUP
,
348 typedef void (*vu_panic_cb
) (VuDev
*dev
, const char *err
);
349 typedef void (*vu_watch_cb
) (VuDev
*dev
, int condition
, void *data
);
350 typedef void (*vu_set_watch_cb
) (VuDev
*dev
, int fd
, int condition
,
351 vu_watch_cb cb
, void *data
);
352 typedef void (*vu_remove_watch_cb
) (VuDev
*dev
, int fd
);
354 typedef struct VuDevInflightInfo
{
363 VuDevRegion regions
[VHOST_MEMORY_MAX_NREGIONS
];
365 VuDevInflightInfo inflight_info
;
367 /* Must be held while using slave_fd */
368 pthread_mutex_t slave_mutex
;
373 uint64_t protocol_features
;
377 /* @set_watch: add or update the given fd to the watch set,
378 * call cb when condition is met */
379 vu_set_watch_cb set_watch
;
381 /* @remove_watch: remove the given fd from the watch set */
382 vu_remove_watch_cb remove_watch
;
384 /* @panic: encountered an unrecoverable error, you may try to
387 const VuDevIface
*iface
;
391 bool postcopy_listening
;
394 typedef struct VuVirtqElement
{
396 unsigned int out_num
;
399 struct iovec
*out_sg
;
404 * @dev: a VuDev context
405 * @max_queues: maximum number of virtqueues
406 * @socket: the socket connected to vhost-user master
407 * @panic: a panic callback
408 * @set_watch: a set_watch callback
409 * @remove_watch: a remove_watch callback
410 * @iface: a VuDevIface structure with vhost-user device callbacks
412 * Intializes a VuDev vhost-user context.
414 * Returns: true on success, false on failure.
416 bool vu_init(VuDev
*dev
,
420 vu_set_watch_cb set_watch
,
421 vu_remove_watch_cb remove_watch
,
422 const VuDevIface
*iface
);
427 * @dev: a VuDev context
429 * Cleans up the VuDev context
431 void vu_deinit(VuDev
*dev
);
435 * @dev: a VuDev context
437 * Process one vhost-user message.
439 * Returns: TRUE on success, FALSE on failure.
441 bool vu_dispatch(VuDev
*dev
);
445 * @dev: a VuDev context
446 * @plen: guest memory size
447 * @guest_addr: guest address
449 * Translate a guest address to a pointer. Returns NULL on failure.
451 void *vu_gpa_to_va(VuDev
*dev
, uint64_t *plen
, uint64_t guest_addr
);
455 * @dev: a VuDev context
458 * Returns the queue number @qidx.
460 VuVirtq
*vu_get_queue(VuDev
*dev
, int qidx
);
463 * vu_set_queue_handler:
464 * @dev: a VuDev context
465 * @vq: a VuVirtq queue
466 * @handler: the queue handler callback
468 * Set the queue handler. This function may be called several times
469 * for the same queue. If called with NULL @handler, the handler is
472 void vu_set_queue_handler(VuDev
*dev
, VuVirtq
*vq
,
473 vu_queue_handler_cb handler
);
476 * vu_set_queue_host_notifier:
477 * @dev: a VuDev context
478 * @vq: a VuVirtq queue
479 * @fd: a file descriptor
480 * @size: host page size
481 * @offset: notifier offset in @fd file
483 * Set queue's host notifier. This function may be called several
484 * times for the same queue. If called with -1 @fd, the notifier
487 bool vu_set_queue_host_notifier(VuDev
*dev
, VuVirtq
*vq
, int fd
,
488 int size
, int offset
);
491 * vu_queue_set_notification:
492 * @dev: a VuDev context
493 * @vq: a VuVirtq queue
496 * Set whether the queue notifies (via event index or interrupt)
498 void vu_queue_set_notification(VuDev
*dev
, VuVirtq
*vq
, int enable
);
502 * @dev: a VuDev context
503 * @vq: a VuVirtq queue
505 * Returns: whether the queue is enabled.
507 bool vu_queue_enabled(VuDev
*dev
, VuVirtq
*vq
);
511 * @dev: a VuDev context
512 * @vq: a VuVirtq queue
514 * Returns: whether the queue is started.
516 bool vu_queue_started(const VuDev
*dev
, const VuVirtq
*vq
);
520 * @dev: a VuDev context
521 * @vq: a VuVirtq queue
523 * Returns: true if the queue is empty or not ready.
525 bool vu_queue_empty(VuDev
*dev
, VuVirtq
*vq
);
529 * @dev: a VuDev context
530 * @vq: a VuVirtq queue
532 * Request to notify the queue via callfd (skipped if unnecessary)
534 void vu_queue_notify(VuDev
*dev
, VuVirtq
*vq
);
537 * vu_queue_notify_sync:
538 * @dev: a VuDev context
539 * @vq: a VuVirtq queue
541 * Request to notify the queue via callfd (skipped if unnecessary)
542 * or sync message if possible.
544 void vu_queue_notify_sync(VuDev
*dev
, VuVirtq
*vq
);
548 * @dev: a VuDev context
549 * @vq: a VuVirtq queue
550 * @sz: the size of struct to return (must be >= VuVirtqElement)
552 * Returns: a VuVirtqElement filled from the queue or NULL. The
553 * returned element must be free()-d by the caller.
555 void *vu_queue_pop(VuDev
*dev
, VuVirtq
*vq
, size_t sz
);
560 * @dev: a VuDev context
561 * @vq: a VuVirtq queue
562 * @elem: The #VuVirtqElement
563 * @len: number of bytes written
565 * Pretend the most recent element wasn't popped from the virtqueue. The next
566 * call to vu_queue_pop() will refetch the element.
568 void vu_queue_unpop(VuDev
*dev
, VuVirtq
*vq
, VuVirtqElement
*elem
,
573 * @dev: a VuDev context
574 * @vq: a VuVirtq queue
575 * @num: number of elements to push back
577 * Pretend that elements weren't popped from the virtqueue. The next
578 * virtqueue_pop() will refetch the oldest element.
580 * Returns: true on success, false if @num is greater than the number of in use
583 bool vu_queue_rewind(VuDev
*dev
, VuVirtq
*vq
, unsigned int num
);
587 * @dev: a VuDev context
588 * @vq: a VuVirtq queue
589 * @elem: a VuVirtqElement
590 * @len: length in bytes to write
591 * @idx: optional offset for the used ring index (0 in general)
593 * Fill the used ring with @elem element.
595 void vu_queue_fill(VuDev
*dev
, VuVirtq
*vq
,
596 const VuVirtqElement
*elem
,
597 unsigned int len
, unsigned int idx
);
601 * @dev: a VuDev context
602 * @vq: a VuVirtq queue
603 * @elem: a VuVirtqElement
604 * @len: length in bytes to write
606 * Helper that combines vu_queue_fill() with a vu_queue_flush().
608 void vu_queue_push(VuDev
*dev
, VuVirtq
*vq
,
609 const VuVirtqElement
*elem
, unsigned int len
);
613 * @dev: a VuDev context
614 * @vq: a VuVirtq queue
615 * @num: number of elements to flush
617 * Mark the last number of elements as done (used.idx is updated by
620 void vu_queue_flush(VuDev
*dev
, VuVirtq
*vq
, unsigned int num
);
623 * vu_queue_get_avail_bytes:
624 * @dev: a VuDev context
625 * @vq: a VuVirtq queue
626 * @in_bytes: in bytes
627 * @out_bytes: out bytes
628 * @max_in_bytes: stop counting after max_in_bytes
629 * @max_out_bytes: stop counting after max_out_bytes
631 * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
633 void vu_queue_get_avail_bytes(VuDev
*vdev
, VuVirtq
*vq
, unsigned int *in_bytes
,
634 unsigned int *out_bytes
,
635 unsigned max_in_bytes
, unsigned max_out_bytes
);
638 * vu_queue_avail_bytes:
639 * @dev: a VuDev context
640 * @vq: a VuVirtq queue
641 * @in_bytes: expected in bytes
642 * @out_bytes: expected out bytes
644 * Returns: true if in_bytes <= in_total && out_bytes <= out_total
646 bool vu_queue_avail_bytes(VuDev
*dev
, VuVirtq
*vq
, unsigned int in_bytes
,
647 unsigned int out_bytes
);
649 #endif /* LIBVHOST_USER_H */