Support ram slot configuration in libvhost-user
[qemu/ar7.git] / contrib / libvhost-user / libvhost-user.h
blob88ef40d26a2bd19be51f2467efc9044367ee018f
1 /*
2 * Vhost User library
4 * Copyright (c) 2016 Red Hat, Inc.
6 * Authors:
7 * Victor Kaplansky <victork@redhat.com>
8 * Marc-André Lureau <mlureau@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2 or
11 * later. See the COPYING file in the top-level directory.
14 #ifndef LIBVHOST_USER_H
15 #define LIBVHOST_USER_H
17 #include <stdint.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <sys/poll.h>
21 #include <linux/vhost.h>
22 #include <pthread.h>
23 #include "standard-headers/linux/virtio_ring.h"
25 /* Based on qemu/hw/virtio/vhost-user.c */
26 #define VHOST_USER_F_PROTOCOL_FEATURES 30
27 #define VHOST_LOG_PAGE 4096
29 #define VIRTQUEUE_MAX_SIZE 1024
31 #define VHOST_MEMORY_MAX_NREGIONS 8
33 typedef enum VhostSetConfigType {
34 VHOST_SET_CONFIG_TYPE_MASTER = 0,
35 VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
36 } VhostSetConfigType;
39 * Maximum size of virtio device config space
41 #define VHOST_USER_MAX_CONFIG_SIZE 256
43 enum VhostUserProtocolFeature {
44 VHOST_USER_PROTOCOL_F_MQ = 0,
45 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
46 VHOST_USER_PROTOCOL_F_RARP = 2,
47 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
48 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
49 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
50 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
51 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
52 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
53 VHOST_USER_PROTOCOL_F_CONFIG = 9,
54 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
55 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
56 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
57 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
59 VHOST_USER_PROTOCOL_F_MAX
62 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
64 typedef enum VhostUserRequest {
65 VHOST_USER_NONE = 0,
66 VHOST_USER_GET_FEATURES = 1,
67 VHOST_USER_SET_FEATURES = 2,
68 VHOST_USER_SET_OWNER = 3,
69 VHOST_USER_RESET_OWNER = 4,
70 VHOST_USER_SET_MEM_TABLE = 5,
71 VHOST_USER_SET_LOG_BASE = 6,
72 VHOST_USER_SET_LOG_FD = 7,
73 VHOST_USER_SET_VRING_NUM = 8,
74 VHOST_USER_SET_VRING_ADDR = 9,
75 VHOST_USER_SET_VRING_BASE = 10,
76 VHOST_USER_GET_VRING_BASE = 11,
77 VHOST_USER_SET_VRING_KICK = 12,
78 VHOST_USER_SET_VRING_CALL = 13,
79 VHOST_USER_SET_VRING_ERR = 14,
80 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
81 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
82 VHOST_USER_GET_QUEUE_NUM = 17,
83 VHOST_USER_SET_VRING_ENABLE = 18,
84 VHOST_USER_SEND_RARP = 19,
85 VHOST_USER_NET_SET_MTU = 20,
86 VHOST_USER_SET_SLAVE_REQ_FD = 21,
87 VHOST_USER_IOTLB_MSG = 22,
88 VHOST_USER_SET_VRING_ENDIAN = 23,
89 VHOST_USER_GET_CONFIG = 24,
90 VHOST_USER_SET_CONFIG = 25,
91 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
92 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
93 VHOST_USER_POSTCOPY_ADVISE = 28,
94 VHOST_USER_POSTCOPY_LISTEN = 29,
95 VHOST_USER_POSTCOPY_END = 30,
96 VHOST_USER_GET_INFLIGHT_FD = 31,
97 VHOST_USER_SET_INFLIGHT_FD = 32,
98 VHOST_USER_GPU_SET_SOCKET = 33,
99 VHOST_USER_VRING_KICK = 35,
100 VHOST_USER_GET_MAX_MEM_SLOTS = 36,
101 VHOST_USER_MAX
102 } VhostUserRequest;
104 typedef enum VhostUserSlaveRequest {
105 VHOST_USER_SLAVE_NONE = 0,
106 VHOST_USER_SLAVE_IOTLB_MSG = 1,
107 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
108 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
109 VHOST_USER_SLAVE_VRING_CALL = 4,
110 VHOST_USER_SLAVE_VRING_ERR = 5,
111 VHOST_USER_SLAVE_MAX
112 } VhostUserSlaveRequest;
114 typedef struct VhostUserMemoryRegion {
115 uint64_t guest_phys_addr;
116 uint64_t memory_size;
117 uint64_t userspace_addr;
118 uint64_t mmap_offset;
119 } VhostUserMemoryRegion;
121 typedef struct VhostUserMemory {
122 uint32_t nregions;
123 uint32_t padding;
124 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
125 } VhostUserMemory;
127 typedef struct VhostUserLog {
128 uint64_t mmap_size;
129 uint64_t mmap_offset;
130 } VhostUserLog;
132 typedef struct VhostUserConfig {
133 uint32_t offset;
134 uint32_t size;
135 uint32_t flags;
136 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
137 } VhostUserConfig;
139 static VhostUserConfig c __attribute__ ((unused));
140 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
141 + sizeof(c.size) \
142 + sizeof(c.flags))
144 typedef struct VhostUserVringArea {
145 uint64_t u64;
146 uint64_t size;
147 uint64_t offset;
148 } VhostUserVringArea;
150 typedef struct VhostUserInflight {
151 uint64_t mmap_size;
152 uint64_t mmap_offset;
153 uint16_t num_queues;
154 uint16_t queue_size;
155 } VhostUserInflight;
157 #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
158 # define VU_PACKED __attribute__((gcc_struct, packed))
159 #else
160 # define VU_PACKED __attribute__((packed))
161 #endif
163 typedef struct VhostUserMsg {
164 int request;
166 #define VHOST_USER_VERSION_MASK (0x3)
167 #define VHOST_USER_REPLY_MASK (0x1 << 2)
168 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
169 uint32_t flags;
170 uint32_t size; /* the following payload size */
172 union {
173 #define VHOST_USER_VRING_IDX_MASK (0xff)
174 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
175 uint64_t u64;
176 struct vhost_vring_state state;
177 struct vhost_vring_addr addr;
178 VhostUserMemory memory;
179 VhostUserLog log;
180 VhostUserConfig config;
181 VhostUserVringArea area;
182 VhostUserInflight inflight;
183 } payload;
185 int fds[VHOST_MEMORY_MAX_NREGIONS];
186 int fd_num;
187 uint8_t *data;
188 } VU_PACKED VhostUserMsg;
190 typedef struct VuDevRegion {
191 /* Guest Physical address. */
192 uint64_t gpa;
193 /* Memory region size. */
194 uint64_t size;
195 /* QEMU virtual address (userspace). */
196 uint64_t qva;
197 /* Starting offset in our mmaped space. */
198 uint64_t mmap_offset;
199 /* Start address of mmaped space. */
200 uint64_t mmap_addr;
201 } VuDevRegion;
203 typedef struct VuDev VuDev;
205 typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
206 typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
207 typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
208 int *do_reply);
209 typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
210 typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
211 typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
212 typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data,
213 uint32_t offset, uint32_t size,
214 uint32_t flags);
216 typedef struct VuDevIface {
217 /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
218 vu_get_features_cb get_features;
219 /* enable vhost implementation features */
220 vu_set_features_cb set_features;
221 /* get the protocol feature bitmask from the underlying vhost
222 * implementation */
223 vu_get_features_cb get_protocol_features;
224 /* enable protocol features in the underlying vhost implementation. */
225 vu_set_features_cb set_protocol_features;
226 /* process_msg is called for each vhost-user message received */
227 /* skip libvhost-user processing if return value != 0 */
228 vu_process_msg_cb process_msg;
229 /* tells when queues can be processed */
230 vu_queue_set_started_cb queue_set_started;
232 * If the queue is processed in order, in which case it will be
233 * resumed to vring.used->idx. This can help to support resuming
234 * on unmanaged exit/crash.
236 vu_queue_is_processed_in_order_cb queue_is_processed_in_order;
237 /* get the config space of the device */
238 vu_get_config_cb get_config;
239 /* set the config space of the device */
240 vu_set_config_cb set_config;
241 } VuDevIface;
243 typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
245 typedef struct VuRing {
246 unsigned int num;
247 struct vring_desc *desc;
248 struct vring_avail *avail;
249 struct vring_used *used;
250 uint64_t log_guest_addr;
251 uint32_t flags;
252 } VuRing;
254 typedef struct VuDescStateSplit {
255 /* Indicate whether this descriptor is inflight or not.
256 * Only available for head-descriptor. */
257 uint8_t inflight;
259 /* Padding */
260 uint8_t padding[5];
262 /* Maintain a list for the last batch of used descriptors.
263 * Only available when batching is used for submitting */
264 uint16_t next;
266 /* Used to preserve the order of fetching available descriptors.
267 * Only available for head-descriptor. */
268 uint64_t counter;
269 } VuDescStateSplit;
271 typedef struct VuVirtqInflight {
272 /* The feature flags of this region. Now it's initialized to 0. */
273 uint64_t features;
275 /* The version of this region. It's 1 currently.
276 * Zero value indicates a vm reset happened. */
277 uint16_t version;
279 /* The size of VuDescStateSplit array. It's equal to the virtqueue
280 * size. Slave could get it from queue size field of VhostUserInflight. */
281 uint16_t desc_num;
283 /* The head of list that track the last batch of used descriptors. */
284 uint16_t last_batch_head;
286 /* Storing the idx value of used ring */
287 uint16_t used_idx;
289 /* Used to track the state of each descriptor in descriptor table */
290 VuDescStateSplit desc[];
291 } VuVirtqInflight;
293 typedef struct VuVirtqInflightDesc {
294 uint16_t index;
295 uint64_t counter;
296 } VuVirtqInflightDesc;
298 typedef struct VuVirtq {
299 VuRing vring;
301 VuVirtqInflight *inflight;
303 VuVirtqInflightDesc *resubmit_list;
305 uint16_t resubmit_num;
307 uint64_t counter;
309 /* Next head to pop */
310 uint16_t last_avail_idx;
312 /* Last avail_idx read from VQ. */
313 uint16_t shadow_avail_idx;
315 uint16_t used_idx;
317 /* Last used index value we have signalled on */
318 uint16_t signalled_used;
320 /* Last used index value we have signalled on */
321 bool signalled_used_valid;
323 /* Notification enabled? */
324 bool notification;
326 int inuse;
328 vu_queue_handler_cb handler;
330 int call_fd;
331 int kick_fd;
332 int err_fd;
333 unsigned int enable;
334 bool started;
336 /* Guest addresses of our ring */
337 struct vhost_vring_addr vra;
338 } VuVirtq;
340 enum VuWatchCondtion {
341 VU_WATCH_IN = POLLIN,
342 VU_WATCH_OUT = POLLOUT,
343 VU_WATCH_PRI = POLLPRI,
344 VU_WATCH_ERR = POLLERR,
345 VU_WATCH_HUP = POLLHUP,
348 typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
349 typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
350 typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
351 vu_watch_cb cb, void *data);
352 typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
354 typedef struct VuDevInflightInfo {
355 int fd;
356 void *addr;
357 uint64_t size;
358 } VuDevInflightInfo;
360 struct VuDev {
361 int sock;
362 uint32_t nregions;
363 VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
364 VuVirtq *vq;
365 VuDevInflightInfo inflight_info;
366 int log_call_fd;
367 /* Must be held while using slave_fd */
368 pthread_mutex_t slave_mutex;
369 int slave_fd;
370 uint64_t log_size;
371 uint8_t *log_table;
372 uint64_t features;
373 uint64_t protocol_features;
374 bool broken;
375 uint16_t max_queues;
377 /* @set_watch: add or update the given fd to the watch set,
378 * call cb when condition is met */
379 vu_set_watch_cb set_watch;
381 /* @remove_watch: remove the given fd from the watch set */
382 vu_remove_watch_cb remove_watch;
384 /* @panic: encountered an unrecoverable error, you may try to
385 * re-initialize */
386 vu_panic_cb panic;
387 const VuDevIface *iface;
389 /* Postcopy data */
390 int postcopy_ufd;
391 bool postcopy_listening;
394 typedef struct VuVirtqElement {
395 unsigned int index;
396 unsigned int out_num;
397 unsigned int in_num;
398 struct iovec *in_sg;
399 struct iovec *out_sg;
400 } VuVirtqElement;
403 * vu_init:
404 * @dev: a VuDev context
405 * @max_queues: maximum number of virtqueues
406 * @socket: the socket connected to vhost-user master
407 * @panic: a panic callback
408 * @set_watch: a set_watch callback
409 * @remove_watch: a remove_watch callback
410 * @iface: a VuDevIface structure with vhost-user device callbacks
412 * Intializes a VuDev vhost-user context.
414 * Returns: true on success, false on failure.
416 bool vu_init(VuDev *dev,
417 uint16_t max_queues,
418 int socket,
419 vu_panic_cb panic,
420 vu_set_watch_cb set_watch,
421 vu_remove_watch_cb remove_watch,
422 const VuDevIface *iface);
426 * vu_deinit:
427 * @dev: a VuDev context
429 * Cleans up the VuDev context
431 void vu_deinit(VuDev *dev);
434 * vu_dispatch:
435 * @dev: a VuDev context
437 * Process one vhost-user message.
439 * Returns: TRUE on success, FALSE on failure.
441 bool vu_dispatch(VuDev *dev);
444 * vu_gpa_to_va:
445 * @dev: a VuDev context
446 * @plen: guest memory size
447 * @guest_addr: guest address
449 * Translate a guest address to a pointer. Returns NULL on failure.
451 void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr);
454 * vu_get_queue:
455 * @dev: a VuDev context
456 * @qidx: queue index
458 * Returns the queue number @qidx.
460 VuVirtq *vu_get_queue(VuDev *dev, int qidx);
463 * vu_set_queue_handler:
464 * @dev: a VuDev context
465 * @vq: a VuVirtq queue
466 * @handler: the queue handler callback
468 * Set the queue handler. This function may be called several times
469 * for the same queue. If called with NULL @handler, the handler is
470 * removed.
472 void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
473 vu_queue_handler_cb handler);
476 * vu_set_queue_host_notifier:
477 * @dev: a VuDev context
478 * @vq: a VuVirtq queue
479 * @fd: a file descriptor
480 * @size: host page size
481 * @offset: notifier offset in @fd file
483 * Set queue's host notifier. This function may be called several
484 * times for the same queue. If called with -1 @fd, the notifier
485 * is removed.
487 bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
488 int size, int offset);
491 * vu_queue_set_notification:
492 * @dev: a VuDev context
493 * @vq: a VuVirtq queue
494 * @enable: state
496 * Set whether the queue notifies (via event index or interrupt)
498 void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
501 * vu_queue_enabled:
502 * @dev: a VuDev context
503 * @vq: a VuVirtq queue
505 * Returns: whether the queue is enabled.
507 bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
510 * vu_queue_started:
511 * @dev: a VuDev context
512 * @vq: a VuVirtq queue
514 * Returns: whether the queue is started.
516 bool vu_queue_started(const VuDev *dev, const VuVirtq *vq);
519 * vu_queue_empty:
520 * @dev: a VuDev context
521 * @vq: a VuVirtq queue
523 * Returns: true if the queue is empty or not ready.
525 bool vu_queue_empty(VuDev *dev, VuVirtq *vq);
528 * vu_queue_notify:
529 * @dev: a VuDev context
530 * @vq: a VuVirtq queue
532 * Request to notify the queue via callfd (skipped if unnecessary)
534 void vu_queue_notify(VuDev *dev, VuVirtq *vq);
537 * vu_queue_notify_sync:
538 * @dev: a VuDev context
539 * @vq: a VuVirtq queue
541 * Request to notify the queue via callfd (skipped if unnecessary)
542 * or sync message if possible.
544 void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq);
547 * vu_queue_pop:
548 * @dev: a VuDev context
549 * @vq: a VuVirtq queue
550 * @sz: the size of struct to return (must be >= VuVirtqElement)
552 * Returns: a VuVirtqElement filled from the queue or NULL. The
553 * returned element must be free()-d by the caller.
555 void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
559 * vu_queue_unpop:
560 * @dev: a VuDev context
561 * @vq: a VuVirtq queue
562 * @elem: The #VuVirtqElement
563 * @len: number of bytes written
565 * Pretend the most recent element wasn't popped from the virtqueue. The next
566 * call to vu_queue_pop() will refetch the element.
568 void vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
569 size_t len);
572 * vu_queue_rewind:
573 * @dev: a VuDev context
574 * @vq: a VuVirtq queue
575 * @num: number of elements to push back
577 * Pretend that elements weren't popped from the virtqueue. The next
578 * virtqueue_pop() will refetch the oldest element.
580 * Returns: true on success, false if @num is greater than the number of in use
581 * elements.
583 bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
586 * vu_queue_fill:
587 * @dev: a VuDev context
588 * @vq: a VuVirtq queue
589 * @elem: a VuVirtqElement
590 * @len: length in bytes to write
591 * @idx: optional offset for the used ring index (0 in general)
593 * Fill the used ring with @elem element.
595 void vu_queue_fill(VuDev *dev, VuVirtq *vq,
596 const VuVirtqElement *elem,
597 unsigned int len, unsigned int idx);
600 * vu_queue_push:
601 * @dev: a VuDev context
602 * @vq: a VuVirtq queue
603 * @elem: a VuVirtqElement
604 * @len: length in bytes to write
606 * Helper that combines vu_queue_fill() with a vu_queue_flush().
608 void vu_queue_push(VuDev *dev, VuVirtq *vq,
609 const VuVirtqElement *elem, unsigned int len);
612 * vu_queue_flush:
613 * @dev: a VuDev context
614 * @vq: a VuVirtq queue
615 * @num: number of elements to flush
617 * Mark the last number of elements as done (used.idx is updated by
618 * num elements).
620 void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
623 * vu_queue_get_avail_bytes:
624 * @dev: a VuDev context
625 * @vq: a VuVirtq queue
626 * @in_bytes: in bytes
627 * @out_bytes: out bytes
628 * @max_in_bytes: stop counting after max_in_bytes
629 * @max_out_bytes: stop counting after max_out_bytes
631 * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
633 void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
634 unsigned int *out_bytes,
635 unsigned max_in_bytes, unsigned max_out_bytes);
638 * vu_queue_avail_bytes:
639 * @dev: a VuDev context
640 * @vq: a VuVirtq queue
641 * @in_bytes: expected in bytes
642 * @out_bytes: expected out bytes
644 * Returns: true if in_bytes <= in_total && out_bytes <= out_total
646 bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
647 unsigned int out_bytes);
649 #endif /* LIBVHOST_USER_H */