4 * Copyright (c) 2016 Red Hat, Inc.
7 * Victor Kaplansky <victork@redhat.com>
8 * Marc-André Lureau <mlureau@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2 or
11 * later. See the COPYING file in the top-level directory.
14 #ifndef LIBVHOST_USER_H
15 #define LIBVHOST_USER_H
21 #include <linux/vhost.h>
23 #include "standard-headers/linux/virtio_ring.h"
25 /* Based on qemu/hw/virtio/vhost-user.c */
26 #define VHOST_USER_F_PROTOCOL_FEATURES 30
27 #define VHOST_LOG_PAGE 4096
29 #define VIRTQUEUE_MAX_SIZE 1024
31 #define VHOST_MEMORY_BASELINE_NREGIONS 8
34 * Set a reasonable maximum number of ram slots, which will be supported by
37 #define VHOST_USER_MAX_RAM_SLOTS 32
39 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
41 typedef enum VhostSetConfigType
{
42 VHOST_SET_CONFIG_TYPE_FRONTEND
= 0,
43 VHOST_SET_CONFIG_TYPE_MIGRATION
= 1,
47 * Maximum size of virtio device config space
49 #define VHOST_USER_MAX_CONFIG_SIZE 256
51 enum VhostUserProtocolFeature
{
52 VHOST_USER_PROTOCOL_F_MQ
= 0,
53 VHOST_USER_PROTOCOL_F_LOG_SHMFD
= 1,
54 VHOST_USER_PROTOCOL_F_RARP
= 2,
55 VHOST_USER_PROTOCOL_F_REPLY_ACK
= 3,
56 VHOST_USER_PROTOCOL_F_NET_MTU
= 4,
57 VHOST_USER_PROTOCOL_F_BACKEND_REQ
= 5,
58 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN
= 6,
59 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION
= 7,
60 VHOST_USER_PROTOCOL_F_PAGEFAULT
= 8,
61 VHOST_USER_PROTOCOL_F_CONFIG
= 9,
62 VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD
= 10,
63 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
= 11,
64 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
= 12,
65 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS
= 14,
66 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS
= 15,
67 /* Feature 16 is reserved for VHOST_USER_PROTOCOL_F_STATUS. */
68 /* Feature 17 reserved for VHOST_USER_PROTOCOL_F_XEN_MMAP. */
69 VHOST_USER_PROTOCOL_F_SHARED_OBJECT
= 18,
70 VHOST_USER_PROTOCOL_F_MAX
73 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
75 typedef enum VhostUserRequest
{
77 VHOST_USER_GET_FEATURES
= 1,
78 VHOST_USER_SET_FEATURES
= 2,
79 VHOST_USER_SET_OWNER
= 3,
80 VHOST_USER_RESET_OWNER
= 4,
81 VHOST_USER_SET_MEM_TABLE
= 5,
82 VHOST_USER_SET_LOG_BASE
= 6,
83 VHOST_USER_SET_LOG_FD
= 7,
84 VHOST_USER_SET_VRING_NUM
= 8,
85 VHOST_USER_SET_VRING_ADDR
= 9,
86 VHOST_USER_SET_VRING_BASE
= 10,
87 VHOST_USER_GET_VRING_BASE
= 11,
88 VHOST_USER_SET_VRING_KICK
= 12,
89 VHOST_USER_SET_VRING_CALL
= 13,
90 VHOST_USER_SET_VRING_ERR
= 14,
91 VHOST_USER_GET_PROTOCOL_FEATURES
= 15,
92 VHOST_USER_SET_PROTOCOL_FEATURES
= 16,
93 VHOST_USER_GET_QUEUE_NUM
= 17,
94 VHOST_USER_SET_VRING_ENABLE
= 18,
95 VHOST_USER_SEND_RARP
= 19,
96 VHOST_USER_NET_SET_MTU
= 20,
97 VHOST_USER_SET_BACKEND_REQ_FD
= 21,
98 VHOST_USER_IOTLB_MSG
= 22,
99 VHOST_USER_SET_VRING_ENDIAN
= 23,
100 VHOST_USER_GET_CONFIG
= 24,
101 VHOST_USER_SET_CONFIG
= 25,
102 VHOST_USER_CREATE_CRYPTO_SESSION
= 26,
103 VHOST_USER_CLOSE_CRYPTO_SESSION
= 27,
104 VHOST_USER_POSTCOPY_ADVISE
= 28,
105 VHOST_USER_POSTCOPY_LISTEN
= 29,
106 VHOST_USER_POSTCOPY_END
= 30,
107 VHOST_USER_GET_INFLIGHT_FD
= 31,
108 VHOST_USER_SET_INFLIGHT_FD
= 32,
109 VHOST_USER_GPU_SET_SOCKET
= 33,
110 VHOST_USER_VRING_KICK
= 35,
111 VHOST_USER_GET_MAX_MEM_SLOTS
= 36,
112 VHOST_USER_ADD_MEM_REG
= 37,
113 VHOST_USER_REM_MEM_REG
= 38,
114 VHOST_USER_GET_SHARED_OBJECT
= 41,
118 typedef enum VhostUserBackendRequest
{
119 VHOST_USER_BACKEND_NONE
= 0,
120 VHOST_USER_BACKEND_IOTLB_MSG
= 1,
121 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG
= 2,
122 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG
= 3,
123 VHOST_USER_BACKEND_VRING_CALL
= 4,
124 VHOST_USER_BACKEND_VRING_ERR
= 5,
125 VHOST_USER_BACKEND_SHARED_OBJECT_ADD
= 6,
126 VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE
= 7,
127 VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP
= 8,
128 VHOST_USER_BACKEND_MAX
129 } VhostUserBackendRequest
;
131 typedef struct VhostUserMemoryRegion
{
132 uint64_t guest_phys_addr
;
133 uint64_t memory_size
;
134 uint64_t userspace_addr
;
135 uint64_t mmap_offset
;
136 } VhostUserMemoryRegion
;
138 #define VHOST_USER_MEM_REG_SIZE (sizeof(VhostUserMemoryRegion))
140 typedef struct VhostUserMemory
{
143 VhostUserMemoryRegion regions
[VHOST_MEMORY_BASELINE_NREGIONS
];
146 typedef struct VhostUserMemRegMsg
{
148 VhostUserMemoryRegion region
;
149 } VhostUserMemRegMsg
;
151 typedef struct VhostUserLog
{
153 uint64_t mmap_offset
;
156 typedef struct VhostUserConfig
{
160 uint8_t region
[VHOST_USER_MAX_CONFIG_SIZE
];
163 static VhostUserConfig c
__attribute__ ((unused
));
164 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
168 typedef struct VhostUserVringArea
{
172 } VhostUserVringArea
;
174 typedef struct VhostUserInflight
{
176 uint64_t mmap_offset
;
183 typedef struct VhostUserShared
{
184 unsigned char uuid
[UUID_LEN
];
187 #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
188 # define VU_PACKED __attribute__((gcc_struct, packed))
190 # define VU_PACKED __attribute__((packed))
193 typedef struct VhostUserMsg
{
196 #define VHOST_USER_VERSION_MASK (0x3)
197 #define VHOST_USER_REPLY_MASK (0x1 << 2)
198 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
200 uint32_t size
; /* the following payload size */
203 #define VHOST_USER_VRING_IDX_MASK (0xff)
204 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
206 struct vhost_vring_state state
;
207 struct vhost_vring_addr addr
;
208 VhostUserMemory memory
;
209 VhostUserMemRegMsg memreg
;
211 VhostUserConfig config
;
212 VhostUserVringArea area
;
213 VhostUserInflight inflight
;
214 VhostUserShared object
;
217 int fds
[VHOST_MEMORY_BASELINE_NREGIONS
];
220 } VU_PACKED VhostUserMsg
;
222 typedef struct VuDevRegion
{
223 /* Guest Physical address. */
225 /* Memory region size. */
227 /* QEMU virtual address (userspace). */
229 /* Starting offset in our mmaped space. */
230 uint64_t mmap_offset
;
231 /* Start address of mmaped space. */
235 typedef struct VuDev VuDev
;
237 typedef uint64_t (*vu_get_features_cb
) (VuDev
*dev
);
238 typedef void (*vu_set_features_cb
) (VuDev
*dev
, uint64_t features
);
239 typedef int (*vu_process_msg_cb
) (VuDev
*dev
, VhostUserMsg
*vmsg
,
241 typedef bool (*vu_read_msg_cb
) (VuDev
*dev
, int sock
, VhostUserMsg
*vmsg
);
242 typedef void (*vu_queue_set_started_cb
) (VuDev
*dev
, int qidx
, bool started
);
243 typedef bool (*vu_queue_is_processed_in_order_cb
) (VuDev
*dev
, int qidx
);
244 typedef int (*vu_get_config_cb
) (VuDev
*dev
, uint8_t *config
, uint32_t len
);
245 typedef int (*vu_set_config_cb
) (VuDev
*dev
, const uint8_t *data
,
246 uint32_t offset
, uint32_t size
,
248 typedef int (*vu_get_shared_object_cb
) (VuDev
*dev
, const unsigned char *uuid
);
250 typedef struct VuDevIface
{
251 /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
252 vu_get_features_cb get_features
;
253 /* enable vhost implementation features */
254 vu_set_features_cb set_features
;
255 /* get the protocol feature bitmask from the underlying vhost
257 vu_get_features_cb get_protocol_features
;
258 /* enable protocol features in the underlying vhost implementation. */
259 vu_set_features_cb set_protocol_features
;
260 /* process_msg is called for each vhost-user message received */
261 /* skip libvhost-user processing if return value != 0 */
262 vu_process_msg_cb process_msg
;
263 /* tells when queues can be processed */
264 vu_queue_set_started_cb queue_set_started
;
266 * If the queue is processed in order, in which case it will be
267 * resumed to vring.used->idx. This can help to support resuming
268 * on unmanaged exit/crash.
270 vu_queue_is_processed_in_order_cb queue_is_processed_in_order
;
271 /* get the config space of the device */
272 vu_get_config_cb get_config
;
273 /* set the config space of the device */
274 vu_set_config_cb set_config
;
275 /* get virtio shared object from the underlying vhost implementation. */
276 vu_get_shared_object_cb get_shared_object
;
279 typedef void (*vu_queue_handler_cb
) (VuDev
*dev
, int qidx
);
281 typedef struct VuRing
{
283 struct vring_desc
*desc
;
284 struct vring_avail
*avail
;
285 struct vring_used
*used
;
286 uint64_t log_guest_addr
;
290 typedef struct VuDescStateSplit
{
291 /* Indicate whether this descriptor is inflight or not.
292 * Only available for head-descriptor. */
298 /* Maintain a list for the last batch of used descriptors.
299 * Only available when batching is used for submitting */
302 /* Used to preserve the order of fetching available descriptors.
303 * Only available for head-descriptor. */
307 typedef struct VuVirtqInflight
{
308 /* The feature flags of this region. Now it's initialized to 0. */
311 /* The version of this region. It's 1 currently.
312 * Zero value indicates a vm reset happened. */
316 * The size of VuDescStateSplit array. It's equal to the virtqueue size.
317 * Backend could get it from queue size field of VhostUserInflight.
321 /* The head of list that track the last batch of used descriptors. */
322 uint16_t last_batch_head
;
324 /* Storing the idx value of used ring */
327 /* Used to track the state of each descriptor in descriptor table */
328 VuDescStateSplit desc
[];
331 typedef struct VuVirtqInflightDesc
{
334 } VuVirtqInflightDesc
;
336 typedef struct VuVirtq
{
339 VuVirtqInflight
*inflight
;
341 VuVirtqInflightDesc
*resubmit_list
;
343 uint16_t resubmit_num
;
347 /* Next head to pop */
348 uint16_t last_avail_idx
;
350 /* Last avail_idx read from VQ. */
351 uint16_t shadow_avail_idx
;
355 /* Last used index value we have signalled on */
356 uint16_t signalled_used
;
358 /* Last used index value we have signalled on */
359 bool signalled_used_valid
;
361 /* Notification enabled? */
366 vu_queue_handler_cb handler
;
374 /* Guest addresses of our ring */
375 struct vhost_vring_addr vra
;
378 enum VuWatchCondtion
{
379 VU_WATCH_IN
= POLLIN
,
380 VU_WATCH_OUT
= POLLOUT
,
381 VU_WATCH_PRI
= POLLPRI
,
382 VU_WATCH_ERR
= POLLERR
,
383 VU_WATCH_HUP
= POLLHUP
,
386 typedef void (*vu_panic_cb
) (VuDev
*dev
, const char *err
);
387 typedef void (*vu_watch_cb
) (VuDev
*dev
, int condition
, void *data
);
388 typedef void (*vu_set_watch_cb
) (VuDev
*dev
, int fd
, int condition
,
389 vu_watch_cb cb
, void *data
);
390 typedef void (*vu_remove_watch_cb
) (VuDev
*dev
, int fd
);
392 typedef struct VuDevInflightInfo
{
401 VuDevRegion regions
[VHOST_USER_MAX_RAM_SLOTS
];
403 VuDevInflightInfo inflight_info
;
405 /* Must be held while using backend_fd */
406 pthread_mutex_t backend_mutex
;
411 uint64_t protocol_features
;
416 * @read_msg: custom method to read vhost-user message
418 * Read data from vhost_user socket fd and fill up
419 * the passed VhostUserMsg *vmsg struct.
421 * If reading fails, it should close the received set of file
422 * descriptors as socket message's auxiliary data.
424 * For the details, please refer to vu_message_read in libvhost-user.c
425 * which will be used by default if not custom method is provided when
428 * Returns: true if vhost-user message successfully received,
429 * otherwise return false.
432 vu_read_msg_cb read_msg
;
435 * @set_watch: add or update the given fd to the watch set,
436 * call cb when condition is met.
438 vu_set_watch_cb set_watch
;
440 /* @remove_watch: remove the given fd from the watch set */
441 vu_remove_watch_cb remove_watch
;
444 * @panic: encountered an unrecoverable error, you may try to re-initialize
447 const VuDevIface
*iface
;
451 bool postcopy_listening
;
454 typedef struct VuVirtqElement
{
456 unsigned int out_num
;
459 struct iovec
*out_sg
;
464 * @dev: a VuDev context
465 * @max_queues: maximum number of virtqueues
466 * @socket: the socket connected to vhost-user frontend
467 * @panic: a panic callback
468 * @set_watch: a set_watch callback
469 * @remove_watch: a remove_watch callback
470 * @iface: a VuDevIface structure with vhost-user device callbacks
472 * Initializes a VuDev vhost-user context.
474 * Returns: true on success, false on failure.
476 bool vu_init(VuDev
*dev
,
480 vu_read_msg_cb read_msg
,
481 vu_set_watch_cb set_watch
,
482 vu_remove_watch_cb remove_watch
,
483 const VuDevIface
*iface
);
488 * @dev: a VuDev context
490 * Cleans up the VuDev context
492 void vu_deinit(VuDev
*dev
);
496 * vu_request_to_string: return string for vhost message request
497 * @req: VhostUserMsg request
499 * Returns a const string, do not free.
501 const char *vu_request_to_string(unsigned int req
);
505 * @dev: a VuDev context
507 * Process one vhost-user message.
509 * Returns: TRUE on success, FALSE on failure.
511 bool vu_dispatch(VuDev
*dev
);
515 * @dev: a VuDev context
516 * @plen: guest memory size
517 * @guest_addr: guest address
519 * Translate a guest address to a pointer. Returns NULL on failure.
521 void *vu_gpa_to_va(VuDev
*dev
, uint64_t *plen
, uint64_t guest_addr
);
525 * @dev: a VuDev context
528 * Returns the queue number @qidx.
530 VuVirtq
*vu_get_queue(VuDev
*dev
, int qidx
);
533 * vu_set_queue_handler:
534 * @dev: a VuDev context
535 * @vq: a VuVirtq queue
536 * @handler: the queue handler callback
538 * Set the queue handler. This function may be called several times
539 * for the same queue. If called with NULL @handler, the handler is
542 void vu_set_queue_handler(VuDev
*dev
, VuVirtq
*vq
,
543 vu_queue_handler_cb handler
);
546 * vu_set_queue_host_notifier:
547 * @dev: a VuDev context
548 * @vq: a VuVirtq queue
549 * @fd: a file descriptor
550 * @size: host page size
551 * @offset: notifier offset in @fd file
553 * Set queue's host notifier. This function may be called several
554 * times for the same queue. If called with -1 @fd, the notifier
557 bool vu_set_queue_host_notifier(VuDev
*dev
, VuVirtq
*vq
, int fd
,
558 int size
, int offset
);
561 * vu_lookup_shared_object:
562 * @dev: a VuDev context
563 * @uuid: UUID of the shared object
564 * @dmabuf_fd: output dma-buf file descriptor
566 * Lookup for a virtio shared object (i.e., dma-buf fd) associated with the
567 * received UUID. Result, if found, is stored in the dmabuf_fd argument.
569 * Returns: whether the virtio object was found.
571 bool vu_lookup_shared_object(VuDev
*dev
, unsigned char uuid
[UUID_LEN
],
575 * vu_add_shared_object:
576 * @dev: a VuDev context
577 * @uuid: UUID of the shared object
579 * Registers this back-end as the exporter for the object associated with
582 * Returns: TRUE on success, FALSE on failure.
584 bool vu_add_shared_object(VuDev
*dev
, unsigned char uuid
[UUID_LEN
]);
587 * vu_rm_shared_object:
588 * @dev: a VuDev context
589 * @uuid: UUID of the shared object
591 * Removes a shared object entry (i.e., back-end entry) associated with the
592 * received UUID key from the hash table.
594 * Returns: TRUE on success, FALSE on failure.
596 bool vu_rm_shared_object(VuDev
*dev
, unsigned char uuid
[UUID_LEN
]);
599 * vu_queue_set_notification:
600 * @dev: a VuDev context
601 * @vq: a VuVirtq queue
604 * Set whether the queue notifies (via event index or interrupt)
606 void vu_queue_set_notification(VuDev
*dev
, VuVirtq
*vq
, int enable
);
610 * @dev: a VuDev context
611 * @vq: a VuVirtq queue
613 * Returns: whether the queue is enabled.
615 bool vu_queue_enabled(VuDev
*dev
, VuVirtq
*vq
);
619 * @dev: a VuDev context
620 * @vq: a VuVirtq queue
622 * Returns: whether the queue is started.
624 bool vu_queue_started(const VuDev
*dev
, const VuVirtq
*vq
);
628 * @dev: a VuDev context
629 * @vq: a VuVirtq queue
631 * Returns: true if the queue is empty or not ready.
633 bool vu_queue_empty(VuDev
*dev
, VuVirtq
*vq
);
637 * @dev: a VuDev context
638 * @vq: a VuVirtq queue
640 * Request to notify the queue via callfd (skipped if unnecessary)
642 void vu_queue_notify(VuDev
*dev
, VuVirtq
*vq
);
644 void vu_config_change_msg(VuDev
*dev
);
647 * vu_queue_notify_sync:
648 * @dev: a VuDev context
649 * @vq: a VuVirtq queue
651 * Request to notify the queue via callfd (skipped if unnecessary)
652 * or sync message if possible.
654 void vu_queue_notify_sync(VuDev
*dev
, VuVirtq
*vq
);
658 * @dev: a VuDev context
659 * @vq: a VuVirtq queue
660 * @sz: the size of struct to return (must be >= VuVirtqElement)
662 * Returns: a VuVirtqElement filled from the queue or NULL. The
663 * returned element must be free()-d by the caller.
665 void *vu_queue_pop(VuDev
*dev
, VuVirtq
*vq
, size_t sz
);
670 * @dev: a VuDev context
671 * @vq: a VuVirtq queue
672 * @elem: The #VuVirtqElement
673 * @len: number of bytes written
675 * Pretend the most recent element wasn't popped from the virtqueue. The next
676 * call to vu_queue_pop() will refetch the element.
678 void vu_queue_unpop(VuDev
*dev
, VuVirtq
*vq
, VuVirtqElement
*elem
,
683 * @dev: a VuDev context
684 * @vq: a VuVirtq queue
685 * @num: number of elements to push back
687 * Pretend that elements weren't popped from the virtqueue. The next
688 * virtqueue_pop() will refetch the oldest element.
690 * Returns: true on success, false if @num is greater than the number of in use
693 bool vu_queue_rewind(VuDev
*dev
, VuVirtq
*vq
, unsigned int num
);
697 * @dev: a VuDev context
698 * @vq: a VuVirtq queue
699 * @elem: a VuVirtqElement
700 * @len: length in bytes to write
701 * @idx: optional offset for the used ring index (0 in general)
703 * Fill the used ring with @elem element.
705 void vu_queue_fill(VuDev
*dev
, VuVirtq
*vq
,
706 const VuVirtqElement
*elem
,
707 unsigned int len
, unsigned int idx
);
711 * @dev: a VuDev context
712 * @vq: a VuVirtq queue
713 * @elem: a VuVirtqElement
714 * @len: length in bytes to write
716 * Helper that combines vu_queue_fill() with a vu_queue_flush().
718 void vu_queue_push(VuDev
*dev
, VuVirtq
*vq
,
719 const VuVirtqElement
*elem
, unsigned int len
);
723 * @dev: a VuDev context
724 * @vq: a VuVirtq queue
725 * @num: number of elements to flush
727 * Mark the last number of elements as done (used.idx is updated by
730 void vu_queue_flush(VuDev
*dev
, VuVirtq
*vq
, unsigned int num
);
733 * vu_queue_get_avail_bytes:
734 * @dev: a VuDev context
735 * @vq: a VuVirtq queue
736 * @in_bytes: in bytes
737 * @out_bytes: out bytes
738 * @max_in_bytes: stop counting after max_in_bytes
739 * @max_out_bytes: stop counting after max_out_bytes
741 * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
743 void vu_queue_get_avail_bytes(VuDev
*vdev
, VuVirtq
*vq
, unsigned int *in_bytes
,
744 unsigned int *out_bytes
,
745 unsigned max_in_bytes
, unsigned max_out_bytes
);
748 * vu_queue_avail_bytes:
749 * @dev: a VuDev context
750 * @vq: a VuVirtq queue
751 * @in_bytes: expected in bytes
752 * @out_bytes: expected out bytes
754 * Returns: true if in_bytes <= in_total && out_bytes <= out_total
756 bool vu_queue_avail_bytes(VuDev
*dev
, VuVirtq
*vq
, unsigned int in_bytes
,
757 unsigned int out_bytes
);
759 #endif /* LIBVHOST_USER_H */