Merge remote-tracking branch 'remotes/ericb/tags/pull-bitmaps-2020-05-26-v3' into...
[qemu/ar7.git] / contrib / libvhost-user / libvhost-user.h
blobf30394fab66c2d0363489e6249fa10c10faea56d
1 /*
2 * Vhost User library
4 * Copyright (c) 2016 Red Hat, Inc.
6 * Authors:
7 * Victor Kaplansky <victork@redhat.com>
8 * Marc-André Lureau <mlureau@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2 or
11 * later. See the COPYING file in the top-level directory.
14 #ifndef LIBVHOST_USER_H
15 #define LIBVHOST_USER_H
17 #include <stdint.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <sys/poll.h>
21 #include <linux/vhost.h>
22 #include <pthread.h>
23 #include "standard-headers/linux/virtio_ring.h"
25 /* Based on qemu/hw/virtio/vhost-user.c */
26 #define VHOST_USER_F_PROTOCOL_FEATURES 30
27 #define VHOST_LOG_PAGE 4096
29 #define VIRTQUEUE_MAX_SIZE 1024
31 #define VHOST_MEMORY_MAX_NREGIONS 8
33 typedef enum VhostSetConfigType {
34 VHOST_SET_CONFIG_TYPE_MASTER = 0,
35 VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
36 } VhostSetConfigType;
39 * Maximum size of virtio device config space
41 #define VHOST_USER_MAX_CONFIG_SIZE 256
43 enum VhostUserProtocolFeature {
44 VHOST_USER_PROTOCOL_F_MQ = 0,
45 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
46 VHOST_USER_PROTOCOL_F_RARP = 2,
47 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
48 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
49 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
50 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
51 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
52 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
53 VHOST_USER_PROTOCOL_F_CONFIG = 9,
54 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
55 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
56 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
57 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
59 VHOST_USER_PROTOCOL_F_MAX
62 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
64 typedef enum VhostUserRequest {
65 VHOST_USER_NONE = 0,
66 VHOST_USER_GET_FEATURES = 1,
67 VHOST_USER_SET_FEATURES = 2,
68 VHOST_USER_SET_OWNER = 3,
69 VHOST_USER_RESET_OWNER = 4,
70 VHOST_USER_SET_MEM_TABLE = 5,
71 VHOST_USER_SET_LOG_BASE = 6,
72 VHOST_USER_SET_LOG_FD = 7,
73 VHOST_USER_SET_VRING_NUM = 8,
74 VHOST_USER_SET_VRING_ADDR = 9,
75 VHOST_USER_SET_VRING_BASE = 10,
76 VHOST_USER_GET_VRING_BASE = 11,
77 VHOST_USER_SET_VRING_KICK = 12,
78 VHOST_USER_SET_VRING_CALL = 13,
79 VHOST_USER_SET_VRING_ERR = 14,
80 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
81 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
82 VHOST_USER_GET_QUEUE_NUM = 17,
83 VHOST_USER_SET_VRING_ENABLE = 18,
84 VHOST_USER_SEND_RARP = 19,
85 VHOST_USER_NET_SET_MTU = 20,
86 VHOST_USER_SET_SLAVE_REQ_FD = 21,
87 VHOST_USER_IOTLB_MSG = 22,
88 VHOST_USER_SET_VRING_ENDIAN = 23,
89 VHOST_USER_GET_CONFIG = 24,
90 VHOST_USER_SET_CONFIG = 25,
91 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
92 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
93 VHOST_USER_POSTCOPY_ADVISE = 28,
94 VHOST_USER_POSTCOPY_LISTEN = 29,
95 VHOST_USER_POSTCOPY_END = 30,
96 VHOST_USER_GET_INFLIGHT_FD = 31,
97 VHOST_USER_SET_INFLIGHT_FD = 32,
98 VHOST_USER_GPU_SET_SOCKET = 33,
99 VHOST_USER_VRING_KICK = 35,
100 VHOST_USER_MAX
101 } VhostUserRequest;
103 typedef enum VhostUserSlaveRequest {
104 VHOST_USER_SLAVE_NONE = 0,
105 VHOST_USER_SLAVE_IOTLB_MSG = 1,
106 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
107 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
108 VHOST_USER_SLAVE_VRING_CALL = 4,
109 VHOST_USER_SLAVE_VRING_ERR = 5,
110 VHOST_USER_SLAVE_MAX
111 } VhostUserSlaveRequest;
113 typedef struct VhostUserMemoryRegion {
114 uint64_t guest_phys_addr;
115 uint64_t memory_size;
116 uint64_t userspace_addr;
117 uint64_t mmap_offset;
118 } VhostUserMemoryRegion;
120 typedef struct VhostUserMemory {
121 uint32_t nregions;
122 uint32_t padding;
123 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
124 } VhostUserMemory;
126 typedef struct VhostUserLog {
127 uint64_t mmap_size;
128 uint64_t mmap_offset;
129 } VhostUserLog;
131 typedef struct VhostUserConfig {
132 uint32_t offset;
133 uint32_t size;
134 uint32_t flags;
135 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
136 } VhostUserConfig;
138 static VhostUserConfig c __attribute__ ((unused));
139 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
140 + sizeof(c.size) \
141 + sizeof(c.flags))
143 typedef struct VhostUserVringArea {
144 uint64_t u64;
145 uint64_t size;
146 uint64_t offset;
147 } VhostUserVringArea;
149 typedef struct VhostUserInflight {
150 uint64_t mmap_size;
151 uint64_t mmap_offset;
152 uint16_t num_queues;
153 uint16_t queue_size;
154 } VhostUserInflight;
156 #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
157 # define VU_PACKED __attribute__((gcc_struct, packed))
158 #else
159 # define VU_PACKED __attribute__((packed))
160 #endif
162 typedef struct VhostUserMsg {
163 int request;
165 #define VHOST_USER_VERSION_MASK (0x3)
166 #define VHOST_USER_REPLY_MASK (0x1 << 2)
167 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
168 uint32_t flags;
169 uint32_t size; /* the following payload size */
171 union {
172 #define VHOST_USER_VRING_IDX_MASK (0xff)
173 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
174 uint64_t u64;
175 struct vhost_vring_state state;
176 struct vhost_vring_addr addr;
177 VhostUserMemory memory;
178 VhostUserLog log;
179 VhostUserConfig config;
180 VhostUserVringArea area;
181 VhostUserInflight inflight;
182 } payload;
184 int fds[VHOST_MEMORY_MAX_NREGIONS];
185 int fd_num;
186 uint8_t *data;
187 } VU_PACKED VhostUserMsg;
189 typedef struct VuDevRegion {
190 /* Guest Physical address. */
191 uint64_t gpa;
192 /* Memory region size. */
193 uint64_t size;
194 /* QEMU virtual address (userspace). */
195 uint64_t qva;
196 /* Starting offset in our mmaped space. */
197 uint64_t mmap_offset;
198 /* Start address of mmaped space. */
199 uint64_t mmap_addr;
200 } VuDevRegion;
202 typedef struct VuDev VuDev;
204 typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
205 typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
206 typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
207 int *do_reply);
208 typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
209 typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
210 typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
211 typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data,
212 uint32_t offset, uint32_t size,
213 uint32_t flags);
215 typedef struct VuDevIface {
216 /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
217 vu_get_features_cb get_features;
218 /* enable vhost implementation features */
219 vu_set_features_cb set_features;
220 /* get the protocol feature bitmask from the underlying vhost
221 * implementation */
222 vu_get_features_cb get_protocol_features;
223 /* enable protocol features in the underlying vhost implementation. */
224 vu_set_features_cb set_protocol_features;
225 /* process_msg is called for each vhost-user message received */
226 /* skip libvhost-user processing if return value != 0 */
227 vu_process_msg_cb process_msg;
228 /* tells when queues can be processed */
229 vu_queue_set_started_cb queue_set_started;
231 * If the queue is processed in order, in which case it will be
232 * resumed to vring.used->idx. This can help to support resuming
233 * on unmanaged exit/crash.
235 vu_queue_is_processed_in_order_cb queue_is_processed_in_order;
236 /* get the config space of the device */
237 vu_get_config_cb get_config;
238 /* set the config space of the device */
239 vu_set_config_cb set_config;
240 } VuDevIface;
242 typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
244 typedef struct VuRing {
245 unsigned int num;
246 struct vring_desc *desc;
247 struct vring_avail *avail;
248 struct vring_used *used;
249 uint64_t log_guest_addr;
250 uint32_t flags;
251 } VuRing;
253 typedef struct VuDescStateSplit {
254 /* Indicate whether this descriptor is inflight or not.
255 * Only available for head-descriptor. */
256 uint8_t inflight;
258 /* Padding */
259 uint8_t padding[5];
261 /* Maintain a list for the last batch of used descriptors.
262 * Only available when batching is used for submitting */
263 uint16_t next;
265 /* Used to preserve the order of fetching available descriptors.
266 * Only available for head-descriptor. */
267 uint64_t counter;
268 } VuDescStateSplit;
270 typedef struct VuVirtqInflight {
271 /* The feature flags of this region. Now it's initialized to 0. */
272 uint64_t features;
274 /* The version of this region. It's 1 currently.
275 * Zero value indicates a vm reset happened. */
276 uint16_t version;
278 /* The size of VuDescStateSplit array. It's equal to the virtqueue
279 * size. Slave could get it from queue size field of VhostUserInflight. */
280 uint16_t desc_num;
282 /* The head of list that track the last batch of used descriptors. */
283 uint16_t last_batch_head;
285 /* Storing the idx value of used ring */
286 uint16_t used_idx;
288 /* Used to track the state of each descriptor in descriptor table */
289 VuDescStateSplit desc[];
290 } VuVirtqInflight;
292 typedef struct VuVirtqInflightDesc {
293 uint16_t index;
294 uint64_t counter;
295 } VuVirtqInflightDesc;
297 typedef struct VuVirtq {
298 VuRing vring;
300 VuVirtqInflight *inflight;
302 VuVirtqInflightDesc *resubmit_list;
304 uint16_t resubmit_num;
306 uint64_t counter;
308 /* Next head to pop */
309 uint16_t last_avail_idx;
311 /* Last avail_idx read from VQ. */
312 uint16_t shadow_avail_idx;
314 uint16_t used_idx;
316 /* Last used index value we have signalled on */
317 uint16_t signalled_used;
319 /* Last used index value we have signalled on */
320 bool signalled_used_valid;
322 /* Notification enabled? */
323 bool notification;
325 int inuse;
327 vu_queue_handler_cb handler;
329 int call_fd;
330 int kick_fd;
331 int err_fd;
332 unsigned int enable;
333 bool started;
335 /* Guest addresses of our ring */
336 struct vhost_vring_addr vra;
337 } VuVirtq;
339 enum VuWatchCondtion {
340 VU_WATCH_IN = POLLIN,
341 VU_WATCH_OUT = POLLOUT,
342 VU_WATCH_PRI = POLLPRI,
343 VU_WATCH_ERR = POLLERR,
344 VU_WATCH_HUP = POLLHUP,
347 typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
348 typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
349 typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
350 vu_watch_cb cb, void *data);
351 typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
353 typedef struct VuDevInflightInfo {
354 int fd;
355 void *addr;
356 uint64_t size;
357 } VuDevInflightInfo;
359 struct VuDev {
360 int sock;
361 uint32_t nregions;
362 VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
363 VuVirtq *vq;
364 VuDevInflightInfo inflight_info;
365 int log_call_fd;
366 /* Must be held while using slave_fd */
367 pthread_mutex_t slave_mutex;
368 int slave_fd;
369 uint64_t log_size;
370 uint8_t *log_table;
371 uint64_t features;
372 uint64_t protocol_features;
373 bool broken;
374 uint16_t max_queues;
376 /* @set_watch: add or update the given fd to the watch set,
377 * call cb when condition is met */
378 vu_set_watch_cb set_watch;
380 /* @remove_watch: remove the given fd from the watch set */
381 vu_remove_watch_cb remove_watch;
383 /* @panic: encountered an unrecoverable error, you may try to
384 * re-initialize */
385 vu_panic_cb panic;
386 const VuDevIface *iface;
388 /* Postcopy data */
389 int postcopy_ufd;
390 bool postcopy_listening;
393 typedef struct VuVirtqElement {
394 unsigned int index;
395 unsigned int out_num;
396 unsigned int in_num;
397 struct iovec *in_sg;
398 struct iovec *out_sg;
399 } VuVirtqElement;
402 * vu_init:
403 * @dev: a VuDev context
404 * @max_queues: maximum number of virtqueues
405 * @socket: the socket connected to vhost-user master
406 * @panic: a panic callback
407 * @set_watch: a set_watch callback
408 * @remove_watch: a remove_watch callback
409 * @iface: a VuDevIface structure with vhost-user device callbacks
411 * Intializes a VuDev vhost-user context.
413 * Returns: true on success, false on failure.
415 bool vu_init(VuDev *dev,
416 uint16_t max_queues,
417 int socket,
418 vu_panic_cb panic,
419 vu_set_watch_cb set_watch,
420 vu_remove_watch_cb remove_watch,
421 const VuDevIface *iface);
425 * vu_deinit:
426 * @dev: a VuDev context
428 * Cleans up the VuDev context
430 void vu_deinit(VuDev *dev);
433 * vu_dispatch:
434 * @dev: a VuDev context
436 * Process one vhost-user message.
438 * Returns: TRUE on success, FALSE on failure.
440 bool vu_dispatch(VuDev *dev);
443 * vu_gpa_to_va:
444 * @dev: a VuDev context
445 * @plen: guest memory size
446 * @guest_addr: guest address
448 * Translate a guest address to a pointer. Returns NULL on failure.
450 void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr);
453 * vu_get_queue:
454 * @dev: a VuDev context
455 * @qidx: queue index
457 * Returns the queue number @qidx.
459 VuVirtq *vu_get_queue(VuDev *dev, int qidx);
462 * vu_set_queue_handler:
463 * @dev: a VuDev context
464 * @vq: a VuVirtq queue
465 * @handler: the queue handler callback
467 * Set the queue handler. This function may be called several times
468 * for the same queue. If called with NULL @handler, the handler is
469 * removed.
471 void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
472 vu_queue_handler_cb handler);
475 * vu_set_queue_host_notifier:
476 * @dev: a VuDev context
477 * @vq: a VuVirtq queue
478 * @fd: a file descriptor
479 * @size: host page size
480 * @offset: notifier offset in @fd file
482 * Set queue's host notifier. This function may be called several
483 * times for the same queue. If called with -1 @fd, the notifier
484 * is removed.
486 bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
487 int size, int offset);
490 * vu_queue_set_notification:
491 * @dev: a VuDev context
492 * @vq: a VuVirtq queue
493 * @enable: state
495 * Set whether the queue notifies (via event index or interrupt)
497 void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
500 * vu_queue_enabled:
501 * @dev: a VuDev context
502 * @vq: a VuVirtq queue
504 * Returns: whether the queue is enabled.
506 bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
509 * vu_queue_started:
510 * @dev: a VuDev context
511 * @vq: a VuVirtq queue
513 * Returns: whether the queue is started.
515 bool vu_queue_started(const VuDev *dev, const VuVirtq *vq);
518 * vu_queue_empty:
519 * @dev: a VuDev context
520 * @vq: a VuVirtq queue
522 * Returns: true if the queue is empty or not ready.
524 bool vu_queue_empty(VuDev *dev, VuVirtq *vq);
527 * vu_queue_notify:
528 * @dev: a VuDev context
529 * @vq: a VuVirtq queue
531 * Request to notify the queue via callfd (skipped if unnecessary)
533 void vu_queue_notify(VuDev *dev, VuVirtq *vq);
536 * vu_queue_notify_sync:
537 * @dev: a VuDev context
538 * @vq: a VuVirtq queue
540 * Request to notify the queue via callfd (skipped if unnecessary)
541 * or sync message if possible.
543 void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq);
546 * vu_queue_pop:
547 * @dev: a VuDev context
548 * @vq: a VuVirtq queue
549 * @sz: the size of struct to return (must be >= VuVirtqElement)
551 * Returns: a VuVirtqElement filled from the queue or NULL. The
552 * returned element must be free()-d by the caller.
554 void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
558 * vu_queue_unpop:
559 * @dev: a VuDev context
560 * @vq: a VuVirtq queue
561 * @elem: The #VuVirtqElement
562 * @len: number of bytes written
564 * Pretend the most recent element wasn't popped from the virtqueue. The next
565 * call to vu_queue_pop() will refetch the element.
567 void vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
568 size_t len);
571 * vu_queue_rewind:
572 * @dev: a VuDev context
573 * @vq: a VuVirtq queue
574 * @num: number of elements to push back
576 * Pretend that elements weren't popped from the virtqueue. The next
577 * virtqueue_pop() will refetch the oldest element.
579 * Returns: true on success, false if @num is greater than the number of in use
580 * elements.
582 bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
585 * vu_queue_fill:
586 * @dev: a VuDev context
587 * @vq: a VuVirtq queue
588 * @elem: a VuVirtqElement
589 * @len: length in bytes to write
590 * @idx: optional offset for the used ring index (0 in general)
592 * Fill the used ring with @elem element.
594 void vu_queue_fill(VuDev *dev, VuVirtq *vq,
595 const VuVirtqElement *elem,
596 unsigned int len, unsigned int idx);
599 * vu_queue_push:
600 * @dev: a VuDev context
601 * @vq: a VuVirtq queue
602 * @elem: a VuVirtqElement
603 * @len: length in bytes to write
605 * Helper that combines vu_queue_fill() with a vu_queue_flush().
607 void vu_queue_push(VuDev *dev, VuVirtq *vq,
608 const VuVirtqElement *elem, unsigned int len);
611 * vu_queue_flush:
612 * @dev: a VuDev context
613 * @vq: a VuVirtq queue
614 * @num: number of elements to flush
616 * Mark the last number of elements as done (used.idx is updated by
617 * num elements).
619 void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
622 * vu_queue_get_avail_bytes:
623 * @dev: a VuDev context
624 * @vq: a VuVirtq queue
625 * @in_bytes: in bytes
626 * @out_bytes: out bytes
627 * @max_in_bytes: stop counting after max_in_bytes
628 * @max_out_bytes: stop counting after max_out_bytes
630 * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
632 void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
633 unsigned int *out_bytes,
634 unsigned max_in_bytes, unsigned max_out_bytes);
637 * vu_queue_avail_bytes:
638 * @dev: a VuDev context
639 * @vq: a VuVirtq queue
640 * @in_bytes: expected in bytes
641 * @out_bytes: expected out bytes
643 * Returns: true if in_bytes <= in_total && out_bytes <= out_total
645 bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
646 unsigned int out_bytes);
648 #endif /* LIBVHOST_USER_H */