4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qapi/qapi-commands-virtio.h"
18 #include "qemu/defer-call.h"
19 #include "qemu/error-report.h"
21 #include "qemu/main-loop.h"
22 #include "qemu/module.h"
23 #include "qom/object_interfaces.h"
24 #include "hw/core/cpu.h"
25 #include "hw/virtio/virtio.h"
26 #include "hw/virtio/vhost.h"
27 #include "migration/qemu-file-types.h"
28 #include "qemu/atomic.h"
29 #include "hw/virtio/virtio-bus.h"
30 #include "hw/qdev-properties.h"
31 #include "hw/virtio/virtio-access.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/runstate.h"
34 #include "virtio-qmp.h"
36 #include "standard-headers/linux/virtio_ids.h"
37 #include "standard-headers/linux/vhost_types.h"
38 #include "standard-headers/linux/virtio_blk.h"
39 #include "standard-headers/linux/virtio_console.h"
40 #include "standard-headers/linux/virtio_gpu.h"
41 #include "standard-headers/linux/virtio_net.h"
42 #include "standard-headers/linux/virtio_scsi.h"
43 #include "standard-headers/linux/virtio_i2c.h"
44 #include "standard-headers/linux/virtio_balloon.h"
45 #include "standard-headers/linux/virtio_iommu.h"
46 #include "standard-headers/linux/virtio_mem.h"
47 #include "standard-headers/linux/virtio_vsock.h"
50 * Maximum size of virtio device config space
52 #define VHOST_USER_MAX_CONFIG_SIZE 256
55 * The alignment to use between consumer and producer parts of vring.
56 * x86 pagesize again. This is the default, used by transports like PCI
57 * which don't provide a means for the guest to tell the host the alignment.
59 #define VIRTIO_PCI_VRING_ALIGN 4096
61 typedef struct VRingDesc
69 typedef struct VRingPackedDesc
{
76 typedef struct VRingAvail
83 typedef struct VRingUsedElem
89 typedef struct VRingUsed
96 typedef struct VRingMemoryRegionCaches
{
98 MemoryRegionCache desc
;
99 MemoryRegionCache avail
;
100 MemoryRegionCache used
;
101 } VRingMemoryRegionCaches
;
106 unsigned int num_default
;
111 VRingMemoryRegionCaches
*caches
;
114 typedef struct VRingPackedDescEvent
{
117 } VRingPackedDescEvent
;
122 VirtQueueElement
*used_elems
;
124 /* Next head to pop */
125 uint16_t last_avail_idx
;
126 bool last_avail_wrap_counter
;
128 /* Last avail_idx read from VQ. */
129 uint16_t shadow_avail_idx
;
130 bool shadow_avail_wrap_counter
;
133 bool used_wrap_counter
;
135 /* Last used index value we have signalled on */
136 uint16_t signalled_used
;
138 /* Last used index value we have signalled on */
139 bool signalled_used_valid
;
141 /* Notification enabled? */
144 uint16_t queue_index
;
149 VirtIOHandleOutput handle_output
;
151 EventNotifier guest_notifier
;
152 EventNotifier host_notifier
;
153 bool host_notifier_enabled
;
154 QLIST_ENTRY(VirtQueue
) node
;
157 const char *virtio_device_names
[] = {
158 [VIRTIO_ID_NET
] = "virtio-net",
159 [VIRTIO_ID_BLOCK
] = "virtio-blk",
160 [VIRTIO_ID_CONSOLE
] = "virtio-serial",
161 [VIRTIO_ID_RNG
] = "virtio-rng",
162 [VIRTIO_ID_BALLOON
] = "virtio-balloon",
163 [VIRTIO_ID_IOMEM
] = "virtio-iomem",
164 [VIRTIO_ID_RPMSG
] = "virtio-rpmsg",
165 [VIRTIO_ID_SCSI
] = "virtio-scsi",
166 [VIRTIO_ID_9P
] = "virtio-9p",
167 [VIRTIO_ID_MAC80211_WLAN
] = "virtio-mac-wlan",
168 [VIRTIO_ID_RPROC_SERIAL
] = "virtio-rproc-serial",
169 [VIRTIO_ID_CAIF
] = "virtio-caif",
170 [VIRTIO_ID_MEMORY_BALLOON
] = "virtio-mem-balloon",
171 [VIRTIO_ID_GPU
] = "virtio-gpu",
172 [VIRTIO_ID_CLOCK
] = "virtio-clk",
173 [VIRTIO_ID_INPUT
] = "virtio-input",
174 [VIRTIO_ID_VSOCK
] = "vhost-vsock",
175 [VIRTIO_ID_CRYPTO
] = "virtio-crypto",
176 [VIRTIO_ID_SIGNAL_DIST
] = "virtio-signal",
177 [VIRTIO_ID_PSTORE
] = "virtio-pstore",
178 [VIRTIO_ID_IOMMU
] = "virtio-iommu",
179 [VIRTIO_ID_MEM
] = "virtio-mem",
180 [VIRTIO_ID_SOUND
] = "virtio-sound",
181 [VIRTIO_ID_FS
] = "virtio-user-fs",
182 [VIRTIO_ID_PMEM
] = "virtio-pmem",
183 [VIRTIO_ID_RPMB
] = "virtio-rpmb",
184 [VIRTIO_ID_MAC80211_HWSIM
] = "virtio-mac-hwsim",
185 [VIRTIO_ID_VIDEO_ENCODER
] = "virtio-vid-encoder",
186 [VIRTIO_ID_VIDEO_DECODER
] = "virtio-vid-decoder",
187 [VIRTIO_ID_SCMI
] = "virtio-scmi",
188 [VIRTIO_ID_NITRO_SEC_MOD
] = "virtio-nitro-sec-mod",
189 [VIRTIO_ID_I2C_ADAPTER
] = "vhost-user-i2c",
190 [VIRTIO_ID_WATCHDOG
] = "virtio-watchdog",
191 [VIRTIO_ID_CAN
] = "virtio-can",
192 [VIRTIO_ID_DMABUF
] = "virtio-dmabuf",
193 [VIRTIO_ID_PARAM_SERV
] = "virtio-param-serv",
194 [VIRTIO_ID_AUDIO_POLICY
] = "virtio-audio-pol",
195 [VIRTIO_ID_BT
] = "virtio-bluetooth",
196 [VIRTIO_ID_GPIO
] = "virtio-gpio"
199 static const char *virtio_id_to_name(uint16_t device_id
)
201 assert(device_id
< G_N_ELEMENTS(virtio_device_names
));
202 const char *name
= virtio_device_names
[device_id
];
203 assert(name
!= NULL
);
207 /* Called within call_rcu(). */
208 static void virtio_free_region_cache(VRingMemoryRegionCaches
*caches
)
210 assert(caches
!= NULL
);
211 address_space_cache_destroy(&caches
->desc
);
212 address_space_cache_destroy(&caches
->avail
);
213 address_space_cache_destroy(&caches
->used
);
217 static void virtio_virtqueue_reset_region_cache(struct VirtQueue
*vq
)
219 VRingMemoryRegionCaches
*caches
;
221 caches
= qatomic_read(&vq
->vring
.caches
);
222 qatomic_rcu_set(&vq
->vring
.caches
, NULL
);
224 call_rcu(caches
, virtio_free_region_cache
, rcu
);
228 void virtio_init_region_cache(VirtIODevice
*vdev
, int n
)
230 VirtQueue
*vq
= &vdev
->vq
[n
];
231 VRingMemoryRegionCaches
*old
= vq
->vring
.caches
;
232 VRingMemoryRegionCaches
*new = NULL
;
238 addr
= vq
->vring
.desc
;
242 new = g_new0(VRingMemoryRegionCaches
, 1);
243 size
= virtio_queue_get_desc_size(vdev
, n
);
244 packed
= virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
) ?
246 len
= address_space_cache_init(&new->desc
, vdev
->dma_as
,
249 virtio_error(vdev
, "Cannot map desc");
253 size
= virtio_queue_get_used_size(vdev
, n
);
254 len
= address_space_cache_init(&new->used
, vdev
->dma_as
,
255 vq
->vring
.used
, size
, true);
257 virtio_error(vdev
, "Cannot map used");
261 size
= virtio_queue_get_avail_size(vdev
, n
);
262 len
= address_space_cache_init(&new->avail
, vdev
->dma_as
,
263 vq
->vring
.avail
, size
, false);
265 virtio_error(vdev
, "Cannot map avail");
269 qatomic_rcu_set(&vq
->vring
.caches
, new);
271 call_rcu(old
, virtio_free_region_cache
, rcu
);
276 address_space_cache_destroy(&new->avail
);
278 address_space_cache_destroy(&new->used
);
280 address_space_cache_destroy(&new->desc
);
283 virtio_virtqueue_reset_region_cache(vq
);
286 /* virt queue functions */
287 void virtio_queue_update_rings(VirtIODevice
*vdev
, int n
)
289 VRing
*vring
= &vdev
->vq
[n
].vring
;
291 if (!vring
->num
|| !vring
->desc
|| !vring
->align
) {
292 /* not yet setup -> nothing to do */
295 vring
->avail
= vring
->desc
+ vring
->num
* sizeof(VRingDesc
);
296 vring
->used
= vring_align(vring
->avail
+
297 offsetof(VRingAvail
, ring
[vring
->num
]),
299 virtio_init_region_cache(vdev
, n
);
302 /* Called within rcu_read_lock(). */
303 static void vring_split_desc_read(VirtIODevice
*vdev
, VRingDesc
*desc
,
304 MemoryRegionCache
*cache
, int i
)
306 address_space_read_cached(cache
, i
* sizeof(VRingDesc
),
307 desc
, sizeof(VRingDesc
));
308 virtio_tswap64s(vdev
, &desc
->addr
);
309 virtio_tswap32s(vdev
, &desc
->len
);
310 virtio_tswap16s(vdev
, &desc
->flags
);
311 virtio_tswap16s(vdev
, &desc
->next
);
314 static void vring_packed_event_read(VirtIODevice
*vdev
,
315 MemoryRegionCache
*cache
,
316 VRingPackedDescEvent
*e
)
318 hwaddr off_off
= offsetof(VRingPackedDescEvent
, off_wrap
);
319 hwaddr off_flags
= offsetof(VRingPackedDescEvent
, flags
);
321 e
->flags
= virtio_lduw_phys_cached(vdev
, cache
, off_flags
);
322 /* Make sure flags is seen before off_wrap */
324 e
->off_wrap
= virtio_lduw_phys_cached(vdev
, cache
, off_off
);
325 virtio_tswap16s(vdev
, &e
->flags
);
328 static void vring_packed_off_wrap_write(VirtIODevice
*vdev
,
329 MemoryRegionCache
*cache
,
332 hwaddr off
= offsetof(VRingPackedDescEvent
, off_wrap
);
334 virtio_stw_phys_cached(vdev
, cache
, off
, off_wrap
);
335 address_space_cache_invalidate(cache
, off
, sizeof(off_wrap
));
338 static void vring_packed_flags_write(VirtIODevice
*vdev
,
339 MemoryRegionCache
*cache
, uint16_t flags
)
341 hwaddr off
= offsetof(VRingPackedDescEvent
, flags
);
343 virtio_stw_phys_cached(vdev
, cache
, off
, flags
);
344 address_space_cache_invalidate(cache
, off
, sizeof(flags
));
347 /* Called within rcu_read_lock(). */
348 static VRingMemoryRegionCaches
*vring_get_region_caches(struct VirtQueue
*vq
)
350 return qatomic_rcu_read(&vq
->vring
.caches
);
353 /* Called within rcu_read_lock(). */
354 static inline uint16_t vring_avail_flags(VirtQueue
*vq
)
356 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
357 hwaddr pa
= offsetof(VRingAvail
, flags
);
363 return virtio_lduw_phys_cached(vq
->vdev
, &caches
->avail
, pa
);
366 /* Called within rcu_read_lock(). */
367 static inline uint16_t vring_avail_idx(VirtQueue
*vq
)
369 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
370 hwaddr pa
= offsetof(VRingAvail
, idx
);
376 vq
->shadow_avail_idx
= virtio_lduw_phys_cached(vq
->vdev
, &caches
->avail
, pa
);
377 return vq
->shadow_avail_idx
;
380 /* Called within rcu_read_lock(). */
381 static inline uint16_t vring_avail_ring(VirtQueue
*vq
, int i
)
383 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
384 hwaddr pa
= offsetof(VRingAvail
, ring
[i
]);
390 return virtio_lduw_phys_cached(vq
->vdev
, &caches
->avail
, pa
);
393 /* Called within rcu_read_lock(). */
394 static inline uint16_t vring_get_used_event(VirtQueue
*vq
)
396 return vring_avail_ring(vq
, vq
->vring
.num
);
399 /* Called within rcu_read_lock(). */
400 static inline void vring_used_write(VirtQueue
*vq
, VRingUsedElem
*uelem
,
403 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
404 hwaddr pa
= offsetof(VRingUsed
, ring
[i
]);
410 virtio_tswap32s(vq
->vdev
, &uelem
->id
);
411 virtio_tswap32s(vq
->vdev
, &uelem
->len
);
412 address_space_write_cached(&caches
->used
, pa
, uelem
, sizeof(VRingUsedElem
));
413 address_space_cache_invalidate(&caches
->used
, pa
, sizeof(VRingUsedElem
));
416 /* Called within rcu_read_lock(). */
417 static inline uint16_t vring_used_flags(VirtQueue
*vq
)
419 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
420 hwaddr pa
= offsetof(VRingUsed
, flags
);
426 return virtio_lduw_phys_cached(vq
->vdev
, &caches
->used
, pa
);
429 /* Called within rcu_read_lock(). */
430 static uint16_t vring_used_idx(VirtQueue
*vq
)
432 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
433 hwaddr pa
= offsetof(VRingUsed
, idx
);
439 return virtio_lduw_phys_cached(vq
->vdev
, &caches
->used
, pa
);
442 /* Called within rcu_read_lock(). */
443 static inline void vring_used_idx_set(VirtQueue
*vq
, uint16_t val
)
445 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
446 hwaddr pa
= offsetof(VRingUsed
, idx
);
449 virtio_stw_phys_cached(vq
->vdev
, &caches
->used
, pa
, val
);
450 address_space_cache_invalidate(&caches
->used
, pa
, sizeof(val
));
456 /* Called within rcu_read_lock(). */
457 static inline void vring_used_flags_set_bit(VirtQueue
*vq
, int mask
)
459 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
460 VirtIODevice
*vdev
= vq
->vdev
;
461 hwaddr pa
= offsetof(VRingUsed
, flags
);
468 flags
= virtio_lduw_phys_cached(vq
->vdev
, &caches
->used
, pa
);
469 virtio_stw_phys_cached(vdev
, &caches
->used
, pa
, flags
| mask
);
470 address_space_cache_invalidate(&caches
->used
, pa
, sizeof(flags
));
473 /* Called within rcu_read_lock(). */
474 static inline void vring_used_flags_unset_bit(VirtQueue
*vq
, int mask
)
476 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
477 VirtIODevice
*vdev
= vq
->vdev
;
478 hwaddr pa
= offsetof(VRingUsed
, flags
);
485 flags
= virtio_lduw_phys_cached(vq
->vdev
, &caches
->used
, pa
);
486 virtio_stw_phys_cached(vdev
, &caches
->used
, pa
, flags
& ~mask
);
487 address_space_cache_invalidate(&caches
->used
, pa
, sizeof(flags
));
490 /* Called within rcu_read_lock(). */
491 static inline void vring_set_avail_event(VirtQueue
*vq
, uint16_t val
)
493 VRingMemoryRegionCaches
*caches
;
495 if (!vq
->notification
) {
499 caches
= vring_get_region_caches(vq
);
504 pa
= offsetof(VRingUsed
, ring
[vq
->vring
.num
]);
505 virtio_stw_phys_cached(vq
->vdev
, &caches
->used
, pa
, val
);
506 address_space_cache_invalidate(&caches
->used
, pa
, sizeof(val
));
509 static void virtio_queue_split_set_notification(VirtQueue
*vq
, int enable
)
511 RCU_READ_LOCK_GUARD();
513 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_RING_F_EVENT_IDX
)) {
514 vring_set_avail_event(vq
, vring_avail_idx(vq
));
516 vring_used_flags_unset_bit(vq
, VRING_USED_F_NO_NOTIFY
);
518 vring_used_flags_set_bit(vq
, VRING_USED_F_NO_NOTIFY
);
521 /* Expose avail event/used flags before caller checks the avail idx. */
526 static void virtio_queue_packed_set_notification(VirtQueue
*vq
, int enable
)
529 VRingPackedDescEvent e
;
530 VRingMemoryRegionCaches
*caches
;
532 RCU_READ_LOCK_GUARD();
533 caches
= vring_get_region_caches(vq
);
538 vring_packed_event_read(vq
->vdev
, &caches
->used
, &e
);
541 e
.flags
= VRING_PACKED_EVENT_FLAG_DISABLE
;
542 } else if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_RING_F_EVENT_IDX
)) {
543 off_wrap
= vq
->shadow_avail_idx
| vq
->shadow_avail_wrap_counter
<< 15;
544 vring_packed_off_wrap_write(vq
->vdev
, &caches
->used
, off_wrap
);
545 /* Make sure off_wrap is wrote before flags */
547 e
.flags
= VRING_PACKED_EVENT_FLAG_DESC
;
549 e
.flags
= VRING_PACKED_EVENT_FLAG_ENABLE
;
552 vring_packed_flags_write(vq
->vdev
, &caches
->used
, e
.flags
);
554 /* Expose avail event/used flags before caller checks the avail idx. */
559 bool virtio_queue_get_notification(VirtQueue
*vq
)
561 return vq
->notification
;
564 void virtio_queue_set_notification(VirtQueue
*vq
, int enable
)
566 vq
->notification
= enable
;
568 if (!vq
->vring
.desc
) {
572 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
573 virtio_queue_packed_set_notification(vq
, enable
);
575 virtio_queue_split_set_notification(vq
, enable
);
579 int virtio_queue_ready(VirtQueue
*vq
)
581 return vq
->vring
.avail
!= 0;
584 static void vring_packed_desc_read_flags(VirtIODevice
*vdev
,
586 MemoryRegionCache
*cache
,
589 hwaddr off
= i
* sizeof(VRingPackedDesc
) + offsetof(VRingPackedDesc
, flags
);
591 *flags
= virtio_lduw_phys_cached(vdev
, cache
, off
);
594 static void vring_packed_desc_read(VirtIODevice
*vdev
,
595 VRingPackedDesc
*desc
,
596 MemoryRegionCache
*cache
,
597 int i
, bool strict_order
)
599 hwaddr off
= i
* sizeof(VRingPackedDesc
);
601 vring_packed_desc_read_flags(vdev
, &desc
->flags
, cache
, i
);
604 /* Make sure flags is read before the rest fields. */
608 address_space_read_cached(cache
, off
+ offsetof(VRingPackedDesc
, addr
),
609 &desc
->addr
, sizeof(desc
->addr
));
610 address_space_read_cached(cache
, off
+ offsetof(VRingPackedDesc
, id
),
611 &desc
->id
, sizeof(desc
->id
));
612 address_space_read_cached(cache
, off
+ offsetof(VRingPackedDesc
, len
),
613 &desc
->len
, sizeof(desc
->len
));
614 virtio_tswap64s(vdev
, &desc
->addr
);
615 virtio_tswap16s(vdev
, &desc
->id
);
616 virtio_tswap32s(vdev
, &desc
->len
);
619 static void vring_packed_desc_write_data(VirtIODevice
*vdev
,
620 VRingPackedDesc
*desc
,
621 MemoryRegionCache
*cache
,
624 hwaddr off_id
= i
* sizeof(VRingPackedDesc
) +
625 offsetof(VRingPackedDesc
, id
);
626 hwaddr off_len
= i
* sizeof(VRingPackedDesc
) +
627 offsetof(VRingPackedDesc
, len
);
629 virtio_tswap32s(vdev
, &desc
->len
);
630 virtio_tswap16s(vdev
, &desc
->id
);
631 address_space_write_cached(cache
, off_id
, &desc
->id
, sizeof(desc
->id
));
632 address_space_cache_invalidate(cache
, off_id
, sizeof(desc
->id
));
633 address_space_write_cached(cache
, off_len
, &desc
->len
, sizeof(desc
->len
));
634 address_space_cache_invalidate(cache
, off_len
, sizeof(desc
->len
));
637 static void vring_packed_desc_write_flags(VirtIODevice
*vdev
,
638 VRingPackedDesc
*desc
,
639 MemoryRegionCache
*cache
,
642 hwaddr off
= i
* sizeof(VRingPackedDesc
) + offsetof(VRingPackedDesc
, flags
);
644 virtio_stw_phys_cached(vdev
, cache
, off
, desc
->flags
);
645 address_space_cache_invalidate(cache
, off
, sizeof(desc
->flags
));
648 static void vring_packed_desc_write(VirtIODevice
*vdev
,
649 VRingPackedDesc
*desc
,
650 MemoryRegionCache
*cache
,
651 int i
, bool strict_order
)
653 vring_packed_desc_write_data(vdev
, desc
, cache
, i
);
655 /* Make sure data is wrote before flags. */
658 vring_packed_desc_write_flags(vdev
, desc
, cache
, i
);
661 static inline bool is_desc_avail(uint16_t flags
, bool wrap_counter
)
665 avail
= !!(flags
& (1 << VRING_PACKED_DESC_F_AVAIL
));
666 used
= !!(flags
& (1 << VRING_PACKED_DESC_F_USED
));
667 return (avail
!= used
) && (avail
== wrap_counter
);
670 /* Fetch avail_idx from VQ memory only when we really need to know if
671 * guest has added some buffers.
672 * Called within rcu_read_lock(). */
673 static int virtio_queue_empty_rcu(VirtQueue
*vq
)
675 if (virtio_device_disabled(vq
->vdev
)) {
679 if (unlikely(!vq
->vring
.avail
)) {
683 if (vq
->shadow_avail_idx
!= vq
->last_avail_idx
) {
687 return vring_avail_idx(vq
) == vq
->last_avail_idx
;
690 static int virtio_queue_split_empty(VirtQueue
*vq
)
694 if (virtio_device_disabled(vq
->vdev
)) {
698 if (unlikely(!vq
->vring
.avail
)) {
702 if (vq
->shadow_avail_idx
!= vq
->last_avail_idx
) {
706 RCU_READ_LOCK_GUARD();
707 empty
= vring_avail_idx(vq
) == vq
->last_avail_idx
;
711 /* Called within rcu_read_lock(). */
712 static int virtio_queue_packed_empty_rcu(VirtQueue
*vq
)
714 struct VRingPackedDesc desc
;
715 VRingMemoryRegionCaches
*cache
;
717 if (unlikely(!vq
->vring
.desc
)) {
721 cache
= vring_get_region_caches(vq
);
726 vring_packed_desc_read_flags(vq
->vdev
, &desc
.flags
, &cache
->desc
,
729 return !is_desc_avail(desc
.flags
, vq
->last_avail_wrap_counter
);
732 static int virtio_queue_packed_empty(VirtQueue
*vq
)
734 RCU_READ_LOCK_GUARD();
735 return virtio_queue_packed_empty_rcu(vq
);
738 int virtio_queue_empty(VirtQueue
*vq
)
740 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
741 return virtio_queue_packed_empty(vq
);
743 return virtio_queue_split_empty(vq
);
747 static void virtqueue_unmap_sg(VirtQueue
*vq
, const VirtQueueElement
*elem
,
750 AddressSpace
*dma_as
= vq
->vdev
->dma_as
;
755 for (i
= 0; i
< elem
->in_num
; i
++) {
756 size_t size
= MIN(len
- offset
, elem
->in_sg
[i
].iov_len
);
758 dma_memory_unmap(dma_as
, elem
->in_sg
[i
].iov_base
,
759 elem
->in_sg
[i
].iov_len
,
760 DMA_DIRECTION_FROM_DEVICE
, size
);
765 for (i
= 0; i
< elem
->out_num
; i
++)
766 dma_memory_unmap(dma_as
, elem
->out_sg
[i
].iov_base
,
767 elem
->out_sg
[i
].iov_len
,
768 DMA_DIRECTION_TO_DEVICE
,
769 elem
->out_sg
[i
].iov_len
);
772 /* virtqueue_detach_element:
773 * @vq: The #VirtQueue
774 * @elem: The #VirtQueueElement
775 * @len: number of bytes written
777 * Detach the element from the virtqueue. This function is suitable for device
778 * reset or other situations where a #VirtQueueElement is simply freed and will
779 * not be pushed or discarded.
781 void virtqueue_detach_element(VirtQueue
*vq
, const VirtQueueElement
*elem
,
784 vq
->inuse
-= elem
->ndescs
;
785 virtqueue_unmap_sg(vq
, elem
, len
);
788 static void virtqueue_split_rewind(VirtQueue
*vq
, unsigned int num
)
790 vq
->last_avail_idx
-= num
;
793 static void virtqueue_packed_rewind(VirtQueue
*vq
, unsigned int num
)
795 if (vq
->last_avail_idx
< num
) {
796 vq
->last_avail_idx
= vq
->vring
.num
+ vq
->last_avail_idx
- num
;
797 vq
->last_avail_wrap_counter
^= 1;
799 vq
->last_avail_idx
-= num
;
804 * @vq: The #VirtQueue
805 * @elem: The #VirtQueueElement
806 * @len: number of bytes written
808 * Pretend the most recent element wasn't popped from the virtqueue. The next
809 * call to virtqueue_pop() will refetch the element.
811 void virtqueue_unpop(VirtQueue
*vq
, const VirtQueueElement
*elem
,
815 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
816 virtqueue_packed_rewind(vq
, 1);
818 virtqueue_split_rewind(vq
, 1);
821 virtqueue_detach_element(vq
, elem
, len
);
825 * @vq: The #VirtQueue
826 * @num: Number of elements to push back
828 * Pretend that elements weren't popped from the virtqueue. The next
829 * virtqueue_pop() will refetch the oldest element.
831 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
833 * Returns: true on success, false if @num is greater than the number of in use
836 bool virtqueue_rewind(VirtQueue
*vq
, unsigned int num
)
838 if (num
> vq
->inuse
) {
843 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
844 virtqueue_packed_rewind(vq
, num
);
846 virtqueue_split_rewind(vq
, num
);
851 static void virtqueue_split_fill(VirtQueue
*vq
, const VirtQueueElement
*elem
,
852 unsigned int len
, unsigned int idx
)
856 if (unlikely(!vq
->vring
.used
)) {
860 idx
= (idx
+ vq
->used_idx
) % vq
->vring
.num
;
862 uelem
.id
= elem
->index
;
864 vring_used_write(vq
, &uelem
, idx
);
867 static void virtqueue_packed_fill(VirtQueue
*vq
, const VirtQueueElement
*elem
,
868 unsigned int len
, unsigned int idx
)
870 vq
->used_elems
[idx
].index
= elem
->index
;
871 vq
->used_elems
[idx
].len
= len
;
872 vq
->used_elems
[idx
].ndescs
= elem
->ndescs
;
875 static void virtqueue_packed_fill_desc(VirtQueue
*vq
,
876 const VirtQueueElement
*elem
,
881 VRingMemoryRegionCaches
*caches
;
882 VRingPackedDesc desc
= {
886 bool wrap_counter
= vq
->used_wrap_counter
;
888 if (unlikely(!vq
->vring
.desc
)) {
892 head
= vq
->used_idx
+ idx
;
893 if (head
>= vq
->vring
.num
) {
894 head
-= vq
->vring
.num
;
898 desc
.flags
|= (1 << VRING_PACKED_DESC_F_AVAIL
);
899 desc
.flags
|= (1 << VRING_PACKED_DESC_F_USED
);
901 desc
.flags
&= ~(1 << VRING_PACKED_DESC_F_AVAIL
);
902 desc
.flags
&= ~(1 << VRING_PACKED_DESC_F_USED
);
905 caches
= vring_get_region_caches(vq
);
910 vring_packed_desc_write(vq
->vdev
, &desc
, &caches
->desc
, head
, strict_order
);
913 /* Called within rcu_read_lock(). */
914 void virtqueue_fill(VirtQueue
*vq
, const VirtQueueElement
*elem
,
915 unsigned int len
, unsigned int idx
)
917 trace_virtqueue_fill(vq
, elem
, len
, idx
);
919 virtqueue_unmap_sg(vq
, elem
, len
);
921 if (virtio_device_disabled(vq
->vdev
)) {
925 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
926 virtqueue_packed_fill(vq
, elem
, len
, idx
);
928 virtqueue_split_fill(vq
, elem
, len
, idx
);
932 /* Called within rcu_read_lock(). */
933 static void virtqueue_split_flush(VirtQueue
*vq
, unsigned int count
)
937 if (unlikely(!vq
->vring
.used
)) {
941 /* Make sure buffer is written before we update index. */
943 trace_virtqueue_flush(vq
, count
);
946 vring_used_idx_set(vq
, new);
948 if (unlikely((int16_t)(new - vq
->signalled_used
) < (uint16_t)(new - old
)))
949 vq
->signalled_used_valid
= false;
952 static void virtqueue_packed_flush(VirtQueue
*vq
, unsigned int count
)
954 unsigned int i
, ndescs
= 0;
956 if (unlikely(!vq
->vring
.desc
)) {
961 * For indirect element's 'ndescs' is 1.
962 * For all other elemment's 'ndescs' is the
963 * number of descriptors chained by NEXT (as set in virtqueue_packed_pop).
964 * So When the 'elem' be filled into the descriptor ring,
965 * The 'idx' of this 'elem' shall be
966 * the value of 'vq->used_idx' plus the 'ndescs'.
968 ndescs
+= vq
->used_elems
[0].ndescs
;
969 for (i
= 1; i
< count
; i
++) {
970 virtqueue_packed_fill_desc(vq
, &vq
->used_elems
[i
], ndescs
, false);
971 ndescs
+= vq
->used_elems
[i
].ndescs
;
973 virtqueue_packed_fill_desc(vq
, &vq
->used_elems
[0], 0, true);
976 vq
->used_idx
+= ndescs
;
977 if (vq
->used_idx
>= vq
->vring
.num
) {
978 vq
->used_idx
-= vq
->vring
.num
;
979 vq
->used_wrap_counter
^= 1;
980 vq
->signalled_used_valid
= false;
984 void virtqueue_flush(VirtQueue
*vq
, unsigned int count
)
986 if (virtio_device_disabled(vq
->vdev
)) {
991 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
992 virtqueue_packed_flush(vq
, count
);
994 virtqueue_split_flush(vq
, count
);
998 void virtqueue_push(VirtQueue
*vq
, const VirtQueueElement
*elem
,
1001 RCU_READ_LOCK_GUARD();
1002 virtqueue_fill(vq
, elem
, len
, 0);
1003 virtqueue_flush(vq
, 1);
1006 /* Called within rcu_read_lock(). */
1007 static int virtqueue_num_heads(VirtQueue
*vq
, unsigned int idx
)
1009 uint16_t avail_idx
, num_heads
;
1011 /* Use shadow index whenever possible. */
1012 avail_idx
= (vq
->shadow_avail_idx
!= idx
) ? vq
->shadow_avail_idx
1013 : vring_avail_idx(vq
);
1014 num_heads
= avail_idx
- idx
;
1016 /* Check it isn't doing very strange things with descriptor numbers. */
1017 if (num_heads
> vq
->vring
.num
) {
1018 virtio_error(vq
->vdev
, "Guest moved used index from %u to %u",
1019 idx
, vq
->shadow_avail_idx
);
1023 * On success, callers read a descriptor at vq->last_avail_idx.
1024 * Make sure descriptor read does not bypass avail index read.
1026 * This is necessary even if we are using a shadow index, since
1027 * the shadow index could have been initialized by calling
1028 * vring_avail_idx() outside of this function, i.e., by a guest
1029 * memory read not accompanied by a barrier.
1038 /* Called within rcu_read_lock(). */
1039 static bool virtqueue_get_head(VirtQueue
*vq
, unsigned int idx
,
1042 /* Grab the next descriptor number they're advertising, and increment
1043 * the index we've seen. */
1044 *head
= vring_avail_ring(vq
, idx
% vq
->vring
.num
);
1046 /* If their number is silly, that's a fatal mistake. */
1047 if (*head
>= vq
->vring
.num
) {
1048 virtio_error(vq
->vdev
, "Guest says index %u is available", *head
);
1056 VIRTQUEUE_READ_DESC_ERROR
= -1,
1057 VIRTQUEUE_READ_DESC_DONE
= 0, /* end of chain */
1058 VIRTQUEUE_READ_DESC_MORE
= 1, /* more buffers in chain */
1061 /* Reads the 'desc->next' descriptor into '*desc'. */
1062 static int virtqueue_split_read_next_desc(VirtIODevice
*vdev
, VRingDesc
*desc
,
1063 MemoryRegionCache
*desc_cache
,
1066 /* If this descriptor says it doesn't chain, we're done. */
1067 if (!(desc
->flags
& VRING_DESC_F_NEXT
)) {
1068 return VIRTQUEUE_READ_DESC_DONE
;
1071 /* Check they're not leading us off end of descriptors. */
1072 if (desc
->next
>= max
) {
1073 virtio_error(vdev
, "Desc next is %u", desc
->next
);
1074 return VIRTQUEUE_READ_DESC_ERROR
;
1077 vring_split_desc_read(vdev
, desc
, desc_cache
, desc
->next
);
1078 return VIRTQUEUE_READ_DESC_MORE
;
1081 /* Called within rcu_read_lock(). */
1082 static void virtqueue_split_get_avail_bytes(VirtQueue
*vq
,
1083 unsigned int *in_bytes
, unsigned int *out_bytes
,
1084 unsigned max_in_bytes
, unsigned max_out_bytes
,
1085 VRingMemoryRegionCaches
*caches
)
1087 VirtIODevice
*vdev
= vq
->vdev
;
1089 unsigned int total_bufs
, in_total
, out_total
;
1090 MemoryRegionCache indirect_desc_cache
;
1094 address_space_cache_init_empty(&indirect_desc_cache
);
1096 idx
= vq
->last_avail_idx
;
1097 total_bufs
= in_total
= out_total
= 0;
1099 while ((rc
= virtqueue_num_heads(vq
, idx
)) > 0) {
1100 MemoryRegionCache
*desc_cache
= &caches
->desc
;
1101 unsigned int num_bufs
;
1104 unsigned int max
= vq
->vring
.num
;
1106 num_bufs
= total_bufs
;
1108 if (!virtqueue_get_head(vq
, idx
++, &i
)) {
1112 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
1114 if (desc
.flags
& VRING_DESC_F_INDIRECT
) {
1115 if (!desc
.len
|| (desc
.len
% sizeof(VRingDesc
))) {
1116 virtio_error(vdev
, "Invalid size for indirect buffer table");
1120 /* If we've got too many, that implies a descriptor loop. */
1121 if (num_bufs
>= max
) {
1122 virtio_error(vdev
, "Looped descriptor");
1126 /* loop over the indirect descriptor table */
1127 len
= address_space_cache_init(&indirect_desc_cache
,
1129 desc
.addr
, desc
.len
, false);
1130 desc_cache
= &indirect_desc_cache
;
1131 if (len
< desc
.len
) {
1132 virtio_error(vdev
, "Cannot map indirect buffer");
1136 max
= desc
.len
/ sizeof(VRingDesc
);
1138 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
1142 /* If we've got too many, that implies a descriptor loop. */
1143 if (++num_bufs
> max
) {
1144 virtio_error(vdev
, "Looped descriptor");
1148 if (desc
.flags
& VRING_DESC_F_WRITE
) {
1149 in_total
+= desc
.len
;
1151 out_total
+= desc
.len
;
1153 if (in_total
>= max_in_bytes
&& out_total
>= max_out_bytes
) {
1157 rc
= virtqueue_split_read_next_desc(vdev
, &desc
, desc_cache
, max
);
1158 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
1160 if (rc
== VIRTQUEUE_READ_DESC_ERROR
) {
1164 if (desc_cache
== &indirect_desc_cache
) {
1165 address_space_cache_destroy(&indirect_desc_cache
);
1168 total_bufs
= num_bufs
;
1177 address_space_cache_destroy(&indirect_desc_cache
);
1179 *in_bytes
= in_total
;
1182 *out_bytes
= out_total
;
1187 in_total
= out_total
= 0;
1191 static int virtqueue_packed_read_next_desc(VirtQueue
*vq
,
1192 VRingPackedDesc
*desc
,
1199 /* If this descriptor says it doesn't chain, we're done. */
1200 if (!indirect
&& !(desc
->flags
& VRING_DESC_F_NEXT
)) {
1201 return VIRTQUEUE_READ_DESC_DONE
;
1207 return VIRTQUEUE_READ_DESC_DONE
;
1209 (*next
) -= vq
->vring
.num
;
1213 vring_packed_desc_read(vq
->vdev
, desc
, desc_cache
, *next
, false);
1214 return VIRTQUEUE_READ_DESC_MORE
;
1217 /* Called within rcu_read_lock(). */
1218 static void virtqueue_packed_get_avail_bytes(VirtQueue
*vq
,
1219 unsigned int *in_bytes
,
1220 unsigned int *out_bytes
,
1221 unsigned max_in_bytes
,
1222 unsigned max_out_bytes
,
1223 VRingMemoryRegionCaches
*caches
)
1225 VirtIODevice
*vdev
= vq
->vdev
;
1227 unsigned int total_bufs
, in_total
, out_total
;
1228 MemoryRegionCache indirect_desc_cache
;
1229 MemoryRegionCache
*desc_cache
;
1231 VRingPackedDesc desc
;
1234 address_space_cache_init_empty(&indirect_desc_cache
);
1236 idx
= vq
->last_avail_idx
;
1237 wrap_counter
= vq
->last_avail_wrap_counter
;
1238 total_bufs
= in_total
= out_total
= 0;
1241 unsigned int num_bufs
= total_bufs
;
1242 unsigned int i
= idx
;
1244 unsigned int max
= vq
->vring
.num
;
1246 desc_cache
= &caches
->desc
;
1248 vring_packed_desc_read(vdev
, &desc
, desc_cache
, idx
, true);
1249 if (!is_desc_avail(desc
.flags
, wrap_counter
)) {
1253 if (desc
.flags
& VRING_DESC_F_INDIRECT
) {
1254 if (desc
.len
% sizeof(VRingPackedDesc
)) {
1255 virtio_error(vdev
, "Invalid size for indirect buffer table");
1259 /* If we've got too many, that implies a descriptor loop. */
1260 if (num_bufs
>= max
) {
1261 virtio_error(vdev
, "Looped descriptor");
1265 /* loop over the indirect descriptor table */
1266 len
= address_space_cache_init(&indirect_desc_cache
,
1268 desc
.addr
, desc
.len
, false);
1269 desc_cache
= &indirect_desc_cache
;
1270 if (len
< desc
.len
) {
1271 virtio_error(vdev
, "Cannot map indirect buffer");
1275 max
= desc
.len
/ sizeof(VRingPackedDesc
);
1277 vring_packed_desc_read(vdev
, &desc
, desc_cache
, i
, false);
1281 /* If we've got too many, that implies a descriptor loop. */
1282 if (++num_bufs
> max
) {
1283 virtio_error(vdev
, "Looped descriptor");
1287 if (desc
.flags
& VRING_DESC_F_WRITE
) {
1288 in_total
+= desc
.len
;
1290 out_total
+= desc
.len
;
1292 if (in_total
>= max_in_bytes
&& out_total
>= max_out_bytes
) {
1296 rc
= virtqueue_packed_read_next_desc(vq
, &desc
, desc_cache
, max
,
1298 &indirect_desc_cache
);
1299 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
1301 if (desc_cache
== &indirect_desc_cache
) {
1302 address_space_cache_destroy(&indirect_desc_cache
);
1306 idx
+= num_bufs
- total_bufs
;
1307 total_bufs
= num_bufs
;
1310 if (idx
>= vq
->vring
.num
) {
1311 idx
-= vq
->vring
.num
;
1316 /* Record the index and wrap counter for a kick we want */
1317 vq
->shadow_avail_idx
= idx
;
1318 vq
->shadow_avail_wrap_counter
= wrap_counter
;
1320 address_space_cache_destroy(&indirect_desc_cache
);
1322 *in_bytes
= in_total
;
1325 *out_bytes
= out_total
;
1330 in_total
= out_total
= 0;
1334 void virtqueue_get_avail_bytes(VirtQueue
*vq
, unsigned int *in_bytes
,
1335 unsigned int *out_bytes
,
1336 unsigned max_in_bytes
, unsigned max_out_bytes
)
1339 VRingMemoryRegionCaches
*caches
;
1341 RCU_READ_LOCK_GUARD();
1343 if (unlikely(!vq
->vring
.desc
)) {
1347 caches
= vring_get_region_caches(vq
);
1352 desc_size
= virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
) ?
1353 sizeof(VRingPackedDesc
) : sizeof(VRingDesc
);
1354 if (caches
->desc
.len
< vq
->vring
.num
* desc_size
) {
1355 virtio_error(vq
->vdev
, "Cannot map descriptor ring");
1359 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
1360 virtqueue_packed_get_avail_bytes(vq
, in_bytes
, out_bytes
,
1361 max_in_bytes
, max_out_bytes
,
1364 virtqueue_split_get_avail_bytes(vq
, in_bytes
, out_bytes
,
1365 max_in_bytes
, max_out_bytes
,
1379 int virtqueue_avail_bytes(VirtQueue
*vq
, unsigned int in_bytes
,
1380 unsigned int out_bytes
)
1382 unsigned int in_total
, out_total
;
1384 virtqueue_get_avail_bytes(vq
, &in_total
, &out_total
, in_bytes
, out_bytes
);
1385 return in_bytes
<= in_total
&& out_bytes
<= out_total
;
1388 static bool virtqueue_map_desc(VirtIODevice
*vdev
, unsigned int *p_num_sg
,
1389 hwaddr
*addr
, struct iovec
*iov
,
1390 unsigned int max_num_sg
, bool is_write
,
1391 hwaddr pa
, size_t sz
)
1394 unsigned num_sg
= *p_num_sg
;
1395 assert(num_sg
<= max_num_sg
);
1398 virtio_error(vdev
, "virtio: zero sized buffers are not allowed");
1405 if (num_sg
== max_num_sg
) {
1406 virtio_error(vdev
, "virtio: too many write descriptors in "
1411 iov
[num_sg
].iov_base
= dma_memory_map(vdev
->dma_as
, pa
, &len
,
1413 DMA_DIRECTION_FROM_DEVICE
:
1414 DMA_DIRECTION_TO_DEVICE
,
1415 MEMTXATTRS_UNSPECIFIED
);
1416 if (!iov
[num_sg
].iov_base
) {
1417 virtio_error(vdev
, "virtio: bogus descriptor or out of resources");
1421 iov
[num_sg
].iov_len
= len
;
1435 /* Only used by error code paths before we have a VirtQueueElement (therefore
1436 * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to
1439 static void virtqueue_undo_map_desc(unsigned int out_num
, unsigned int in_num
,
1444 for (i
= 0; i
< out_num
+ in_num
; i
++) {
1445 int is_write
= i
>= out_num
;
1447 cpu_physical_memory_unmap(iov
->iov_base
, iov
->iov_len
, is_write
, 0);
1452 static void virtqueue_map_iovec(VirtIODevice
*vdev
, struct iovec
*sg
,
1453 hwaddr
*addr
, unsigned int num_sg
,
1459 for (i
= 0; i
< num_sg
; i
++) {
1460 len
= sg
[i
].iov_len
;
1461 sg
[i
].iov_base
= dma_memory_map(vdev
->dma_as
,
1462 addr
[i
], &len
, is_write
?
1463 DMA_DIRECTION_FROM_DEVICE
:
1464 DMA_DIRECTION_TO_DEVICE
,
1465 MEMTXATTRS_UNSPECIFIED
);
1466 if (!sg
[i
].iov_base
) {
1467 error_report("virtio: error trying to map MMIO memory");
1470 if (len
!= sg
[i
].iov_len
) {
1471 error_report("virtio: unexpected memory split");
1477 void virtqueue_map(VirtIODevice
*vdev
, VirtQueueElement
*elem
)
1479 virtqueue_map_iovec(vdev
, elem
->in_sg
, elem
->in_addr
, elem
->in_num
, true);
1480 virtqueue_map_iovec(vdev
, elem
->out_sg
, elem
->out_addr
, elem
->out_num
,
1484 static void *virtqueue_alloc_element(size_t sz
, unsigned out_num
, unsigned in_num
)
1486 VirtQueueElement
*elem
;
1487 size_t in_addr_ofs
= QEMU_ALIGN_UP(sz
, __alignof__(elem
->in_addr
[0]));
1488 size_t out_addr_ofs
= in_addr_ofs
+ in_num
* sizeof(elem
->in_addr
[0]);
1489 size_t out_addr_end
= out_addr_ofs
+ out_num
* sizeof(elem
->out_addr
[0]);
1490 size_t in_sg_ofs
= QEMU_ALIGN_UP(out_addr_end
, __alignof__(elem
->in_sg
[0]));
1491 size_t out_sg_ofs
= in_sg_ofs
+ in_num
* sizeof(elem
->in_sg
[0]);
1492 size_t out_sg_end
= out_sg_ofs
+ out_num
* sizeof(elem
->out_sg
[0]);
1494 assert(sz
>= sizeof(VirtQueueElement
));
1495 elem
= g_malloc(out_sg_end
);
1496 trace_virtqueue_alloc_element(elem
, sz
, in_num
, out_num
);
1497 elem
->out_num
= out_num
;
1498 elem
->in_num
= in_num
;
1499 elem
->in_addr
= (void *)elem
+ in_addr_ofs
;
1500 elem
->out_addr
= (void *)elem
+ out_addr_ofs
;
1501 elem
->in_sg
= (void *)elem
+ in_sg_ofs
;
1502 elem
->out_sg
= (void *)elem
+ out_sg_ofs
;
1506 static void *virtqueue_split_pop(VirtQueue
*vq
, size_t sz
)
1508 unsigned int i
, head
, max
;
1509 VRingMemoryRegionCaches
*caches
;
1510 MemoryRegionCache indirect_desc_cache
;
1511 MemoryRegionCache
*desc_cache
;
1513 VirtIODevice
*vdev
= vq
->vdev
;
1514 VirtQueueElement
*elem
= NULL
;
1515 unsigned out_num
, in_num
, elem_entries
;
1516 hwaddr addr
[VIRTQUEUE_MAX_SIZE
];
1517 struct iovec iov
[VIRTQUEUE_MAX_SIZE
];
1521 address_space_cache_init_empty(&indirect_desc_cache
);
1523 RCU_READ_LOCK_GUARD();
1524 if (virtio_queue_empty_rcu(vq
)) {
1527 /* Needed after virtio_queue_empty(), see comment in
1528 * virtqueue_num_heads(). */
1531 /* When we start there are none of either input nor output. */
1532 out_num
= in_num
= elem_entries
= 0;
1534 max
= vq
->vring
.num
;
1536 if (vq
->inuse
>= vq
->vring
.num
) {
1537 virtio_error(vdev
, "Virtqueue size exceeded");
1541 if (!virtqueue_get_head(vq
, vq
->last_avail_idx
++, &head
)) {
1545 if (virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
)) {
1546 vring_set_avail_event(vq
, vq
->last_avail_idx
);
1551 caches
= vring_get_region_caches(vq
);
1553 virtio_error(vdev
, "Region caches not initialized");
1557 if (caches
->desc
.len
< max
* sizeof(VRingDesc
)) {
1558 virtio_error(vdev
, "Cannot map descriptor ring");
1562 desc_cache
= &caches
->desc
;
1563 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
1564 if (desc
.flags
& VRING_DESC_F_INDIRECT
) {
1565 if (!desc
.len
|| (desc
.len
% sizeof(VRingDesc
))) {
1566 virtio_error(vdev
, "Invalid size for indirect buffer table");
1570 /* loop over the indirect descriptor table */
1571 len
= address_space_cache_init(&indirect_desc_cache
, vdev
->dma_as
,
1572 desc
.addr
, desc
.len
, false);
1573 desc_cache
= &indirect_desc_cache
;
1574 if (len
< desc
.len
) {
1575 virtio_error(vdev
, "Cannot map indirect buffer");
1579 max
= desc
.len
/ sizeof(VRingDesc
);
1581 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
1584 /* Collect all the descriptors */
1588 if (desc
.flags
& VRING_DESC_F_WRITE
) {
1589 map_ok
= virtqueue_map_desc(vdev
, &in_num
, addr
+ out_num
,
1591 VIRTQUEUE_MAX_SIZE
- out_num
, true,
1592 desc
.addr
, desc
.len
);
1595 virtio_error(vdev
, "Incorrect order for descriptors");
1598 map_ok
= virtqueue_map_desc(vdev
, &out_num
, addr
, iov
,
1599 VIRTQUEUE_MAX_SIZE
, false,
1600 desc
.addr
, desc
.len
);
1606 /* If we've got too many, that implies a descriptor loop. */
1607 if (++elem_entries
> max
) {
1608 virtio_error(vdev
, "Looped descriptor");
1612 rc
= virtqueue_split_read_next_desc(vdev
, &desc
, desc_cache
, max
);
1613 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
1615 if (rc
== VIRTQUEUE_READ_DESC_ERROR
) {
1619 /* Now copy what we have collected and mapped */
1620 elem
= virtqueue_alloc_element(sz
, out_num
, in_num
);
1623 for (i
= 0; i
< out_num
; i
++) {
1624 elem
->out_addr
[i
] = addr
[i
];
1625 elem
->out_sg
[i
] = iov
[i
];
1627 for (i
= 0; i
< in_num
; i
++) {
1628 elem
->in_addr
[i
] = addr
[out_num
+ i
];
1629 elem
->in_sg
[i
] = iov
[out_num
+ i
];
1634 trace_virtqueue_pop(vq
, elem
, elem
->in_num
, elem
->out_num
);
1636 address_space_cache_destroy(&indirect_desc_cache
);
1641 virtqueue_undo_map_desc(out_num
, in_num
, iov
);
1645 static void *virtqueue_packed_pop(VirtQueue
*vq
, size_t sz
)
1647 unsigned int i
, max
;
1648 VRingMemoryRegionCaches
*caches
;
1649 MemoryRegionCache indirect_desc_cache
;
1650 MemoryRegionCache
*desc_cache
;
1652 VirtIODevice
*vdev
= vq
->vdev
;
1653 VirtQueueElement
*elem
= NULL
;
1654 unsigned out_num
, in_num
, elem_entries
;
1655 hwaddr addr
[VIRTQUEUE_MAX_SIZE
];
1656 struct iovec iov
[VIRTQUEUE_MAX_SIZE
];
1657 VRingPackedDesc desc
;
1661 address_space_cache_init_empty(&indirect_desc_cache
);
1663 RCU_READ_LOCK_GUARD();
1664 if (virtio_queue_packed_empty_rcu(vq
)) {
1668 /* When we start there are none of either input nor output. */
1669 out_num
= in_num
= elem_entries
= 0;
1671 max
= vq
->vring
.num
;
1673 if (vq
->inuse
>= vq
->vring
.num
) {
1674 virtio_error(vdev
, "Virtqueue size exceeded");
1678 i
= vq
->last_avail_idx
;
1680 caches
= vring_get_region_caches(vq
);
1682 virtio_error(vdev
, "Region caches not initialized");
1686 if (caches
->desc
.len
< max
* sizeof(VRingDesc
)) {
1687 virtio_error(vdev
, "Cannot map descriptor ring");
1691 desc_cache
= &caches
->desc
;
1692 vring_packed_desc_read(vdev
, &desc
, desc_cache
, i
, true);
1694 if (desc
.flags
& VRING_DESC_F_INDIRECT
) {
1695 if (desc
.len
% sizeof(VRingPackedDesc
)) {
1696 virtio_error(vdev
, "Invalid size for indirect buffer table");
1700 /* loop over the indirect descriptor table */
1701 len
= address_space_cache_init(&indirect_desc_cache
, vdev
->dma_as
,
1702 desc
.addr
, desc
.len
, false);
1703 desc_cache
= &indirect_desc_cache
;
1704 if (len
< desc
.len
) {
1705 virtio_error(vdev
, "Cannot map indirect buffer");
1709 max
= desc
.len
/ sizeof(VRingPackedDesc
);
1711 vring_packed_desc_read(vdev
, &desc
, desc_cache
, i
, false);
1714 /* Collect all the descriptors */
1718 if (desc
.flags
& VRING_DESC_F_WRITE
) {
1719 map_ok
= virtqueue_map_desc(vdev
, &in_num
, addr
+ out_num
,
1721 VIRTQUEUE_MAX_SIZE
- out_num
, true,
1722 desc
.addr
, desc
.len
);
1725 virtio_error(vdev
, "Incorrect order for descriptors");
1728 map_ok
= virtqueue_map_desc(vdev
, &out_num
, addr
, iov
,
1729 VIRTQUEUE_MAX_SIZE
, false,
1730 desc
.addr
, desc
.len
);
1736 /* If we've got too many, that implies a descriptor loop. */
1737 if (++elem_entries
> max
) {
1738 virtio_error(vdev
, "Looped descriptor");
1742 rc
= virtqueue_packed_read_next_desc(vq
, &desc
, desc_cache
, max
, &i
,
1744 &indirect_desc_cache
);
1745 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
1747 /* Now copy what we have collected and mapped */
1748 elem
= virtqueue_alloc_element(sz
, out_num
, in_num
);
1749 for (i
= 0; i
< out_num
; i
++) {
1750 elem
->out_addr
[i
] = addr
[i
];
1751 elem
->out_sg
[i
] = iov
[i
];
1753 for (i
= 0; i
< in_num
; i
++) {
1754 elem
->in_addr
[i
] = addr
[out_num
+ i
];
1755 elem
->in_sg
[i
] = iov
[out_num
+ i
];
1759 elem
->ndescs
= (desc_cache
== &indirect_desc_cache
) ? 1 : elem_entries
;
1760 vq
->last_avail_idx
+= elem
->ndescs
;
1761 vq
->inuse
+= elem
->ndescs
;
1763 if (vq
->last_avail_idx
>= vq
->vring
.num
) {
1764 vq
->last_avail_idx
-= vq
->vring
.num
;
1765 vq
->last_avail_wrap_counter
^= 1;
1768 vq
->shadow_avail_idx
= vq
->last_avail_idx
;
1769 vq
->shadow_avail_wrap_counter
= vq
->last_avail_wrap_counter
;
1771 trace_virtqueue_pop(vq
, elem
, elem
->in_num
, elem
->out_num
);
1773 address_space_cache_destroy(&indirect_desc_cache
);
1778 virtqueue_undo_map_desc(out_num
, in_num
, iov
);
1782 void *virtqueue_pop(VirtQueue
*vq
, size_t sz
)
1784 if (virtio_device_disabled(vq
->vdev
)) {
1788 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
1789 return virtqueue_packed_pop(vq
, sz
);
1791 return virtqueue_split_pop(vq
, sz
);
1795 static unsigned int virtqueue_packed_drop_all(VirtQueue
*vq
)
1797 VRingMemoryRegionCaches
*caches
;
1798 MemoryRegionCache
*desc_cache
;
1799 unsigned int dropped
= 0;
1800 VirtQueueElement elem
= {};
1801 VirtIODevice
*vdev
= vq
->vdev
;
1802 VRingPackedDesc desc
;
1804 RCU_READ_LOCK_GUARD();
1806 caches
= vring_get_region_caches(vq
);
1811 desc_cache
= &caches
->desc
;
1813 virtio_queue_set_notification(vq
, 0);
1815 while (vq
->inuse
< vq
->vring
.num
) {
1816 unsigned int idx
= vq
->last_avail_idx
;
1818 * works similar to virtqueue_pop but does not map buffers
1819 * and does not allocate any memory.
1821 vring_packed_desc_read(vdev
, &desc
, desc_cache
,
1822 vq
->last_avail_idx
, true);
1823 if (!is_desc_avail(desc
.flags
, vq
->last_avail_wrap_counter
)) {
1826 elem
.index
= desc
.id
;
1828 while (virtqueue_packed_read_next_desc(vq
, &desc
, desc_cache
,
1829 vq
->vring
.num
, &idx
, false)) {
1833 * immediately push the element, nothing to unmap
1834 * as both in_num and out_num are set to 0.
1836 virtqueue_push(vq
, &elem
, 0);
1838 vq
->last_avail_idx
+= elem
.ndescs
;
1839 if (vq
->last_avail_idx
>= vq
->vring
.num
) {
1840 vq
->last_avail_idx
-= vq
->vring
.num
;
1841 vq
->last_avail_wrap_counter
^= 1;
1848 static unsigned int virtqueue_split_drop_all(VirtQueue
*vq
)
1850 unsigned int dropped
= 0;
1851 VirtQueueElement elem
= {};
1852 VirtIODevice
*vdev
= vq
->vdev
;
1853 bool fEventIdx
= virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
);
1855 while (!virtio_queue_empty(vq
) && vq
->inuse
< vq
->vring
.num
) {
1856 /* works similar to virtqueue_pop but does not map buffers
1857 * and does not allocate any memory */
1859 if (!virtqueue_get_head(vq
, vq
->last_avail_idx
, &elem
.index
)) {
1863 vq
->last_avail_idx
++;
1865 vring_set_avail_event(vq
, vq
->last_avail_idx
);
1867 /* immediately push the element, nothing to unmap
1868 * as both in_num and out_num are set to 0 */
1869 virtqueue_push(vq
, &elem
, 0);
1876 /* virtqueue_drop_all:
1877 * @vq: The #VirtQueue
1878 * Drops all queued buffers and indicates them to the guest
1879 * as if they are done. Useful when buffers can not be
1880 * processed but must be returned to the guest.
1882 unsigned int virtqueue_drop_all(VirtQueue
*vq
)
1884 struct VirtIODevice
*vdev
= vq
->vdev
;
1886 if (virtio_device_disabled(vq
->vdev
)) {
1890 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
1891 return virtqueue_packed_drop_all(vq
);
1893 return virtqueue_split_drop_all(vq
);
1897 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1898 * it is what QEMU has always done by mistake. We can change it sooner
1899 * or later by bumping the version number of the affected vm states.
1900 * In the meanwhile, since the in-memory layout of VirtQueueElement
1901 * has changed, we need to marshal to and from the layout that was
1902 * used before the change.
1904 typedef struct VirtQueueElementOld
{
1906 unsigned int out_num
;
1907 unsigned int in_num
;
1908 hwaddr in_addr
[VIRTQUEUE_MAX_SIZE
];
1909 hwaddr out_addr
[VIRTQUEUE_MAX_SIZE
];
1910 struct iovec in_sg
[VIRTQUEUE_MAX_SIZE
];
1911 struct iovec out_sg
[VIRTQUEUE_MAX_SIZE
];
1912 } VirtQueueElementOld
;
1914 void *qemu_get_virtqueue_element(VirtIODevice
*vdev
, QEMUFile
*f
, size_t sz
)
1916 VirtQueueElement
*elem
;
1917 VirtQueueElementOld data
;
1920 qemu_get_buffer(f
, (uint8_t *)&data
, sizeof(VirtQueueElementOld
));
1922 /* TODO: teach all callers that this can fail, and return failure instead
1923 * of asserting here.
1924 * This is just one thing (there are probably more) that must be
1925 * fixed before we can allow NDEBUG compilation.
1927 assert(ARRAY_SIZE(data
.in_addr
) >= data
.in_num
);
1928 assert(ARRAY_SIZE(data
.out_addr
) >= data
.out_num
);
1930 elem
= virtqueue_alloc_element(sz
, data
.out_num
, data
.in_num
);
1931 elem
->index
= data
.index
;
1933 for (i
= 0; i
< elem
->in_num
; i
++) {
1934 elem
->in_addr
[i
] = data
.in_addr
[i
];
1937 for (i
= 0; i
< elem
->out_num
; i
++) {
1938 elem
->out_addr
[i
] = data
.out_addr
[i
];
1941 for (i
= 0; i
< elem
->in_num
; i
++) {
1942 /* Base is overwritten by virtqueue_map. */
1943 elem
->in_sg
[i
].iov_base
= 0;
1944 elem
->in_sg
[i
].iov_len
= data
.in_sg
[i
].iov_len
;
1947 for (i
= 0; i
< elem
->out_num
; i
++) {
1948 /* Base is overwritten by virtqueue_map. */
1949 elem
->out_sg
[i
].iov_base
= 0;
1950 elem
->out_sg
[i
].iov_len
= data
.out_sg
[i
].iov_len
;
1953 if (virtio_host_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
1954 qemu_get_be32s(f
, &elem
->ndescs
);
1957 virtqueue_map(vdev
, elem
);
1961 void qemu_put_virtqueue_element(VirtIODevice
*vdev
, QEMUFile
*f
,
1962 VirtQueueElement
*elem
)
1964 VirtQueueElementOld data
;
1967 memset(&data
, 0, sizeof(data
));
1968 data
.index
= elem
->index
;
1969 data
.in_num
= elem
->in_num
;
1970 data
.out_num
= elem
->out_num
;
1972 for (i
= 0; i
< elem
->in_num
; i
++) {
1973 data
.in_addr
[i
] = elem
->in_addr
[i
];
1976 for (i
= 0; i
< elem
->out_num
; i
++) {
1977 data
.out_addr
[i
] = elem
->out_addr
[i
];
1980 for (i
= 0; i
< elem
->in_num
; i
++) {
1981 /* Base is overwritten by virtqueue_map when loading. Do not
1982 * save it, as it would leak the QEMU address space layout. */
1983 data
.in_sg
[i
].iov_len
= elem
->in_sg
[i
].iov_len
;
1986 for (i
= 0; i
< elem
->out_num
; i
++) {
1987 /* Do not save iov_base as above. */
1988 data
.out_sg
[i
].iov_len
= elem
->out_sg
[i
].iov_len
;
1991 if (virtio_host_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
1992 qemu_put_be32s(f
, &elem
->ndescs
);
1995 qemu_put_buffer(f
, (uint8_t *)&data
, sizeof(VirtQueueElementOld
));
1999 static void virtio_notify_vector(VirtIODevice
*vdev
, uint16_t vector
)
2001 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2002 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2004 if (virtio_device_disabled(vdev
)) {
2009 k
->notify(qbus
->parent
, vector
);
2013 void virtio_update_irq(VirtIODevice
*vdev
)
2015 virtio_notify_vector(vdev
, VIRTIO_NO_VECTOR
);
2018 static int virtio_validate_features(VirtIODevice
*vdev
)
2020 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2022 if (virtio_host_has_feature(vdev
, VIRTIO_F_IOMMU_PLATFORM
) &&
2023 !virtio_vdev_has_feature(vdev
, VIRTIO_F_IOMMU_PLATFORM
)) {
2027 if (k
->validate_features
) {
2028 return k
->validate_features(vdev
);
2034 int virtio_set_status(VirtIODevice
*vdev
, uint8_t val
)
2036 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2037 trace_virtio_set_status(vdev
, val
);
2039 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
2040 if (!(vdev
->status
& VIRTIO_CONFIG_S_FEATURES_OK
) &&
2041 val
& VIRTIO_CONFIG_S_FEATURES_OK
) {
2042 int ret
= virtio_validate_features(vdev
);
2050 if ((vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
) !=
2051 (val
& VIRTIO_CONFIG_S_DRIVER_OK
)) {
2052 virtio_set_started(vdev
, val
& VIRTIO_CONFIG_S_DRIVER_OK
);
2055 if (k
->set_status
) {
2056 k
->set_status(vdev
, val
);
2063 static enum virtio_device_endian
virtio_default_endian(void)
2065 if (target_words_bigendian()) {
2066 return VIRTIO_DEVICE_ENDIAN_BIG
;
2068 return VIRTIO_DEVICE_ENDIAN_LITTLE
;
2072 static enum virtio_device_endian
virtio_current_cpu_endian(void)
2074 if (cpu_virtio_is_big_endian(current_cpu
)) {
2075 return VIRTIO_DEVICE_ENDIAN_BIG
;
2077 return VIRTIO_DEVICE_ENDIAN_LITTLE
;
2081 static void __virtio_queue_reset(VirtIODevice
*vdev
, uint32_t i
)
2083 vdev
->vq
[i
].vring
.desc
= 0;
2084 vdev
->vq
[i
].vring
.avail
= 0;
2085 vdev
->vq
[i
].vring
.used
= 0;
2086 vdev
->vq
[i
].last_avail_idx
= 0;
2087 vdev
->vq
[i
].shadow_avail_idx
= 0;
2088 vdev
->vq
[i
].used_idx
= 0;
2089 vdev
->vq
[i
].last_avail_wrap_counter
= true;
2090 vdev
->vq
[i
].shadow_avail_wrap_counter
= true;
2091 vdev
->vq
[i
].used_wrap_counter
= true;
2092 virtio_queue_set_vector(vdev
, i
, VIRTIO_NO_VECTOR
);
2093 vdev
->vq
[i
].signalled_used
= 0;
2094 vdev
->vq
[i
].signalled_used_valid
= false;
2095 vdev
->vq
[i
].notification
= true;
2096 vdev
->vq
[i
].vring
.num
= vdev
->vq
[i
].vring
.num_default
;
2097 vdev
->vq
[i
].inuse
= 0;
2098 virtio_virtqueue_reset_region_cache(&vdev
->vq
[i
]);
2101 void virtio_queue_reset(VirtIODevice
*vdev
, uint32_t queue_index
)
2103 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2105 if (k
->queue_reset
) {
2106 k
->queue_reset(vdev
, queue_index
);
2109 __virtio_queue_reset(vdev
, queue_index
);
2112 void virtio_queue_enable(VirtIODevice
*vdev
, uint32_t queue_index
)
2114 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2117 * TODO: Seabios is currently out of spec and triggering this error.
2118 * So this needs to be fixed in Seabios, then this can
2119 * be re-enabled for new machine types only, and also after
2120 * being converted to LOG_GUEST_ERROR.
2122 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2123 error_report("queue_enable is only supported in devices of virtio "
2128 if (k
->queue_enable
) {
2129 k
->queue_enable(vdev
, queue_index
);
2133 void virtio_reset(void *opaque
)
2135 VirtIODevice
*vdev
= opaque
;
2136 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2139 virtio_set_status(vdev
, 0);
2141 /* Guest initiated reset */
2142 vdev
->device_endian
= virtio_current_cpu_endian();
2145 vdev
->device_endian
= virtio_default_endian();
2148 if (vdev
->vhost_started
&& k
->get_vhost
) {
2149 vhost_reset_device(k
->get_vhost(vdev
));
2156 vdev
->start_on_kick
= false;
2157 vdev
->started
= false;
2158 vdev
->broken
= false;
2159 vdev
->guest_features
= 0;
2160 vdev
->queue_sel
= 0;
2162 vdev
->disabled
= false;
2163 qatomic_set(&vdev
->isr
, 0);
2164 vdev
->config_vector
= VIRTIO_NO_VECTOR
;
2165 virtio_notify_vector(vdev
, vdev
->config_vector
);
2167 for(i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2168 __virtio_queue_reset(vdev
, i
);
2172 void virtio_queue_set_addr(VirtIODevice
*vdev
, int n
, hwaddr addr
)
2174 if (!vdev
->vq
[n
].vring
.num
) {
2177 vdev
->vq
[n
].vring
.desc
= addr
;
2178 virtio_queue_update_rings(vdev
, n
);
2181 hwaddr
virtio_queue_get_addr(VirtIODevice
*vdev
, int n
)
2183 return vdev
->vq
[n
].vring
.desc
;
2186 void virtio_queue_set_rings(VirtIODevice
*vdev
, int n
, hwaddr desc
,
2187 hwaddr avail
, hwaddr used
)
2189 if (!vdev
->vq
[n
].vring
.num
) {
2192 vdev
->vq
[n
].vring
.desc
= desc
;
2193 vdev
->vq
[n
].vring
.avail
= avail
;
2194 vdev
->vq
[n
].vring
.used
= used
;
2195 virtio_init_region_cache(vdev
, n
);
2198 void virtio_queue_set_num(VirtIODevice
*vdev
, int n
, int num
)
2200 /* Don't allow guest to flip queue between existent and
2201 * nonexistent states, or to set it to an invalid size.
2203 if (!!num
!= !!vdev
->vq
[n
].vring
.num
||
2204 num
> VIRTQUEUE_MAX_SIZE
||
2208 vdev
->vq
[n
].vring
.num
= num
;
2211 VirtQueue
*virtio_vector_first_queue(VirtIODevice
*vdev
, uint16_t vector
)
2213 return QLIST_FIRST(&vdev
->vector_queues
[vector
]);
2216 VirtQueue
*virtio_vector_next_queue(VirtQueue
*vq
)
2218 return QLIST_NEXT(vq
, node
);
2221 int virtio_queue_get_num(VirtIODevice
*vdev
, int n
)
2223 return vdev
->vq
[n
].vring
.num
;
2226 int virtio_queue_get_max_num(VirtIODevice
*vdev
, int n
)
2228 return vdev
->vq
[n
].vring
.num_default
;
2231 int virtio_get_num_queues(VirtIODevice
*vdev
)
2235 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2236 if (!virtio_queue_get_num(vdev
, i
)) {
2244 void virtio_queue_set_align(VirtIODevice
*vdev
, int n
, int align
)
2246 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2247 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2249 /* virtio-1 compliant devices cannot change the alignment */
2250 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
2251 error_report("tried to modify queue alignment for virtio-1 device");
2254 /* Check that the transport told us it was going to do this
2255 * (so a buggy transport will immediately assert rather than
2256 * silently failing to migrate this state)
2258 assert(k
->has_variable_vring_alignment
);
2261 vdev
->vq
[n
].vring
.align
= align
;
2262 virtio_queue_update_rings(vdev
, n
);
2266 static void virtio_queue_notify_vq(VirtQueue
*vq
)
2268 if (vq
->vring
.desc
&& vq
->handle_output
) {
2269 VirtIODevice
*vdev
= vq
->vdev
;
2271 if (unlikely(vdev
->broken
)) {
2275 trace_virtio_queue_notify(vdev
, vq
- vdev
->vq
, vq
);
2276 vq
->handle_output(vdev
, vq
);
2278 if (unlikely(vdev
->start_on_kick
)) {
2279 virtio_set_started(vdev
, true);
2284 void virtio_queue_notify(VirtIODevice
*vdev
, int n
)
2286 VirtQueue
*vq
= &vdev
->vq
[n
];
2288 if (unlikely(!vq
->vring
.desc
|| vdev
->broken
)) {
2292 trace_virtio_queue_notify(vdev
, vq
- vdev
->vq
, vq
);
2293 if (vq
->host_notifier_enabled
) {
2294 event_notifier_set(&vq
->host_notifier
);
2295 } else if (vq
->handle_output
) {
2296 vq
->handle_output(vdev
, vq
);
2298 if (unlikely(vdev
->start_on_kick
)) {
2299 virtio_set_started(vdev
, true);
2304 uint16_t virtio_queue_vector(VirtIODevice
*vdev
, int n
)
2306 return n
< VIRTIO_QUEUE_MAX
? vdev
->vq
[n
].vector
:
2310 void virtio_queue_set_vector(VirtIODevice
*vdev
, int n
, uint16_t vector
)
2312 VirtQueue
*vq
= &vdev
->vq
[n
];
2314 if (n
< VIRTIO_QUEUE_MAX
) {
2315 if (vdev
->vector_queues
&&
2316 vdev
->vq
[n
].vector
!= VIRTIO_NO_VECTOR
) {
2317 QLIST_REMOVE(vq
, node
);
2319 vdev
->vq
[n
].vector
= vector
;
2320 if (vdev
->vector_queues
&&
2321 vector
!= VIRTIO_NO_VECTOR
) {
2322 QLIST_INSERT_HEAD(&vdev
->vector_queues
[vector
], vq
, node
);
2327 VirtQueue
*virtio_add_queue(VirtIODevice
*vdev
, int queue_size
,
2328 VirtIOHandleOutput handle_output
)
2332 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2333 if (vdev
->vq
[i
].vring
.num
== 0)
2337 if (i
== VIRTIO_QUEUE_MAX
|| queue_size
> VIRTQUEUE_MAX_SIZE
)
2340 vdev
->vq
[i
].vring
.num
= queue_size
;
2341 vdev
->vq
[i
].vring
.num_default
= queue_size
;
2342 vdev
->vq
[i
].vring
.align
= VIRTIO_PCI_VRING_ALIGN
;
2343 vdev
->vq
[i
].handle_output
= handle_output
;
2344 vdev
->vq
[i
].used_elems
= g_new0(VirtQueueElement
, queue_size
);
2346 return &vdev
->vq
[i
];
2349 void virtio_delete_queue(VirtQueue
*vq
)
2352 vq
->vring
.num_default
= 0;
2353 vq
->handle_output
= NULL
;
2354 g_free(vq
->used_elems
);
2355 vq
->used_elems
= NULL
;
2356 virtio_virtqueue_reset_region_cache(vq
);
2359 void virtio_del_queue(VirtIODevice
*vdev
, int n
)
2361 if (n
< 0 || n
>= VIRTIO_QUEUE_MAX
) {
2365 virtio_delete_queue(&vdev
->vq
[n
]);
2368 static void virtio_set_isr(VirtIODevice
*vdev
, int value
)
2370 uint8_t old
= qatomic_read(&vdev
->isr
);
2372 /* Do not write ISR if it does not change, so that its cacheline remains
2373 * shared in the common case where the guest does not read it.
2375 if ((old
& value
) != value
) {
2376 qatomic_or(&vdev
->isr
, value
);
2380 /* Called within rcu_read_lock(). */
2381 static bool virtio_split_should_notify(VirtIODevice
*vdev
, VirtQueue
*vq
)
2385 /* We need to expose used array entries before checking used event. */
2387 /* Always notify when queue is empty (when feature acknowledge) */
2388 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_NOTIFY_ON_EMPTY
) &&
2389 !vq
->inuse
&& virtio_queue_empty(vq
)) {
2393 if (!virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
)) {
2394 return !(vring_avail_flags(vq
) & VRING_AVAIL_F_NO_INTERRUPT
);
2397 v
= vq
->signalled_used_valid
;
2398 vq
->signalled_used_valid
= true;
2399 old
= vq
->signalled_used
;
2400 new = vq
->signalled_used
= vq
->used_idx
;
2401 return !v
|| vring_need_event(vring_get_used_event(vq
), new, old
);
2404 static bool vring_packed_need_event(VirtQueue
*vq
, bool wrap
,
2405 uint16_t off_wrap
, uint16_t new,
2408 int off
= off_wrap
& ~(1 << 15);
2410 if (wrap
!= off_wrap
>> 15) {
2411 off
-= vq
->vring
.num
;
2414 return vring_need_event(off
, new, old
);
2417 /* Called within rcu_read_lock(). */
2418 static bool virtio_packed_should_notify(VirtIODevice
*vdev
, VirtQueue
*vq
)
2420 VRingPackedDescEvent e
;
2423 VRingMemoryRegionCaches
*caches
;
2425 caches
= vring_get_region_caches(vq
);
2430 vring_packed_event_read(vdev
, &caches
->avail
, &e
);
2432 old
= vq
->signalled_used
;
2433 new = vq
->signalled_used
= vq
->used_idx
;
2434 v
= vq
->signalled_used_valid
;
2435 vq
->signalled_used_valid
= true;
2437 if (e
.flags
== VRING_PACKED_EVENT_FLAG_DISABLE
) {
2439 } else if (e
.flags
== VRING_PACKED_EVENT_FLAG_ENABLE
) {
2443 return !v
|| vring_packed_need_event(vq
, vq
->used_wrap_counter
,
2444 e
.off_wrap
, new, old
);
2447 /* Called within rcu_read_lock(). */
2448 static bool virtio_should_notify(VirtIODevice
*vdev
, VirtQueue
*vq
)
2450 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
2451 return virtio_packed_should_notify(vdev
, vq
);
2453 return virtio_split_should_notify(vdev
, vq
);
2457 /* Batch irqs while inside a defer_call_begin()/defer_call_end() section */
2458 static void virtio_notify_irqfd_deferred_fn(void *opaque
)
2460 EventNotifier
*notifier
= opaque
;
2461 VirtQueue
*vq
= container_of(notifier
, VirtQueue
, guest_notifier
);
2463 trace_virtio_notify_irqfd_deferred_fn(vq
->vdev
, vq
);
2464 event_notifier_set(notifier
);
2467 void virtio_notify_irqfd(VirtIODevice
*vdev
, VirtQueue
*vq
)
2469 WITH_RCU_READ_LOCK_GUARD() {
2470 if (!virtio_should_notify(vdev
, vq
)) {
2475 trace_virtio_notify_irqfd(vdev
, vq
);
2478 * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2479 * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2480 * incorrectly polling this bit during crashdump and hibernation
2481 * in MSI mode, causing a hang if this bit is never updated.
2482 * Recent releases of Windows do not really shut down, but rather
2483 * log out and hibernate to make the next startup faster. Hence,
2484 * this manifested as a more serious hang during shutdown with
2486 * Next driver release from 2016 fixed this problem, so working around it
2487 * is not a must, but it's easy to do so let's do it here.
2489 * Note: it's safe to update ISR from any thread as it was switched
2490 * to an atomic operation.
2492 virtio_set_isr(vq
->vdev
, 0x1);
2493 defer_call(virtio_notify_irqfd_deferred_fn
, &vq
->guest_notifier
);
2496 static void virtio_irq(VirtQueue
*vq
)
2498 virtio_set_isr(vq
->vdev
, 0x1);
2499 virtio_notify_vector(vq
->vdev
, vq
->vector
);
2502 void virtio_notify(VirtIODevice
*vdev
, VirtQueue
*vq
)
2504 WITH_RCU_READ_LOCK_GUARD() {
2505 if (!virtio_should_notify(vdev
, vq
)) {
2510 trace_virtio_notify(vdev
, vq
);
2514 void virtio_notify_config(VirtIODevice
*vdev
)
2516 if (!(vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
))
2519 virtio_set_isr(vdev
, 0x3);
2521 virtio_notify_vector(vdev
, vdev
->config_vector
);
2524 static bool virtio_device_endian_needed(void *opaque
)
2526 VirtIODevice
*vdev
= opaque
;
2528 assert(vdev
->device_endian
!= VIRTIO_DEVICE_ENDIAN_UNKNOWN
);
2529 if (!virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
2530 return vdev
->device_endian
!= virtio_default_endian();
2532 /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2533 return vdev
->device_endian
!= VIRTIO_DEVICE_ENDIAN_LITTLE
;
2536 static bool virtio_64bit_features_needed(void *opaque
)
2538 VirtIODevice
*vdev
= opaque
;
2540 return (vdev
->host_features
>> 32) != 0;
2543 static bool virtio_virtqueue_needed(void *opaque
)
2545 VirtIODevice
*vdev
= opaque
;
2547 return virtio_host_has_feature(vdev
, VIRTIO_F_VERSION_1
);
2550 static bool virtio_packed_virtqueue_needed(void *opaque
)
2552 VirtIODevice
*vdev
= opaque
;
2554 return virtio_host_has_feature(vdev
, VIRTIO_F_RING_PACKED
);
2557 static bool virtio_ringsize_needed(void *opaque
)
2559 VirtIODevice
*vdev
= opaque
;
2562 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2563 if (vdev
->vq
[i
].vring
.num
!= vdev
->vq
[i
].vring
.num_default
) {
2570 static bool virtio_extra_state_needed(void *opaque
)
2572 VirtIODevice
*vdev
= opaque
;
2573 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2574 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2576 return k
->has_extra_state
&&
2577 k
->has_extra_state(qbus
->parent
);
2580 static bool virtio_broken_needed(void *opaque
)
2582 VirtIODevice
*vdev
= opaque
;
2584 return vdev
->broken
;
2587 static bool virtio_started_needed(void *opaque
)
2589 VirtIODevice
*vdev
= opaque
;
2591 return vdev
->started
;
2594 static bool virtio_disabled_needed(void *opaque
)
2596 VirtIODevice
*vdev
= opaque
;
2598 return vdev
->disabled
;
2601 static const VMStateDescription vmstate_virtqueue
= {
2602 .name
= "virtqueue_state",
2604 .minimum_version_id
= 1,
2605 .fields
= (const VMStateField
[]) {
2606 VMSTATE_UINT64(vring
.avail
, struct VirtQueue
),
2607 VMSTATE_UINT64(vring
.used
, struct VirtQueue
),
2608 VMSTATE_END_OF_LIST()
2612 static const VMStateDescription vmstate_packed_virtqueue
= {
2613 .name
= "packed_virtqueue_state",
2615 .minimum_version_id
= 1,
2616 .fields
= (const VMStateField
[]) {
2617 VMSTATE_UINT16(last_avail_idx
, struct VirtQueue
),
2618 VMSTATE_BOOL(last_avail_wrap_counter
, struct VirtQueue
),
2619 VMSTATE_UINT16(used_idx
, struct VirtQueue
),
2620 VMSTATE_BOOL(used_wrap_counter
, struct VirtQueue
),
2621 VMSTATE_UINT32(inuse
, struct VirtQueue
),
2622 VMSTATE_END_OF_LIST()
2626 static const VMStateDescription vmstate_virtio_virtqueues
= {
2627 .name
= "virtio/virtqueues",
2629 .minimum_version_id
= 1,
2630 .needed
= &virtio_virtqueue_needed
,
2631 .fields
= (const VMStateField
[]) {
2632 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq
, struct VirtIODevice
,
2633 VIRTIO_QUEUE_MAX
, 0, vmstate_virtqueue
, VirtQueue
),
2634 VMSTATE_END_OF_LIST()
2638 static const VMStateDescription vmstate_virtio_packed_virtqueues
= {
2639 .name
= "virtio/packed_virtqueues",
2641 .minimum_version_id
= 1,
2642 .needed
= &virtio_packed_virtqueue_needed
,
2643 .fields
= (const VMStateField
[]) {
2644 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq
, struct VirtIODevice
,
2645 VIRTIO_QUEUE_MAX
, 0, vmstate_packed_virtqueue
, VirtQueue
),
2646 VMSTATE_END_OF_LIST()
2650 static const VMStateDescription vmstate_ringsize
= {
2651 .name
= "ringsize_state",
2653 .minimum_version_id
= 1,
2654 .fields
= (const VMStateField
[]) {
2655 VMSTATE_UINT32(vring
.num_default
, struct VirtQueue
),
2656 VMSTATE_END_OF_LIST()
2660 static const VMStateDescription vmstate_virtio_ringsize
= {
2661 .name
= "virtio/ringsize",
2663 .minimum_version_id
= 1,
2664 .needed
= &virtio_ringsize_needed
,
2665 .fields
= (const VMStateField
[]) {
2666 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq
, struct VirtIODevice
,
2667 VIRTIO_QUEUE_MAX
, 0, vmstate_ringsize
, VirtQueue
),
2668 VMSTATE_END_OF_LIST()
2672 static int get_extra_state(QEMUFile
*f
, void *pv
, size_t size
,
2673 const VMStateField
*field
)
2675 VirtIODevice
*vdev
= pv
;
2676 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2677 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2679 if (!k
->load_extra_state
) {
2682 return k
->load_extra_state(qbus
->parent
, f
);
2686 static int put_extra_state(QEMUFile
*f
, void *pv
, size_t size
,
2687 const VMStateField
*field
, JSONWriter
*vmdesc
)
2689 VirtIODevice
*vdev
= pv
;
2690 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2691 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2693 k
->save_extra_state(qbus
->parent
, f
);
2697 static const VMStateInfo vmstate_info_extra_state
= {
2698 .name
= "virtqueue_extra_state",
2699 .get
= get_extra_state
,
2700 .put
= put_extra_state
,
2703 static const VMStateDescription vmstate_virtio_extra_state
= {
2704 .name
= "virtio/extra_state",
2706 .minimum_version_id
= 1,
2707 .needed
= &virtio_extra_state_needed
,
2708 .fields
= (const VMStateField
[]) {
2710 .name
= "extra_state",
2712 .field_exists
= NULL
,
2714 .info
= &vmstate_info_extra_state
,
2715 .flags
= VMS_SINGLE
,
2718 VMSTATE_END_OF_LIST()
2722 static const VMStateDescription vmstate_virtio_device_endian
= {
2723 .name
= "virtio/device_endian",
2725 .minimum_version_id
= 1,
2726 .needed
= &virtio_device_endian_needed
,
2727 .fields
= (const VMStateField
[]) {
2728 VMSTATE_UINT8(device_endian
, VirtIODevice
),
2729 VMSTATE_END_OF_LIST()
2733 static const VMStateDescription vmstate_virtio_64bit_features
= {
2734 .name
= "virtio/64bit_features",
2736 .minimum_version_id
= 1,
2737 .needed
= &virtio_64bit_features_needed
,
2738 .fields
= (const VMStateField
[]) {
2739 VMSTATE_UINT64(guest_features
, VirtIODevice
),
2740 VMSTATE_END_OF_LIST()
2744 static const VMStateDescription vmstate_virtio_broken
= {
2745 .name
= "virtio/broken",
2747 .minimum_version_id
= 1,
2748 .needed
= &virtio_broken_needed
,
2749 .fields
= (const VMStateField
[]) {
2750 VMSTATE_BOOL(broken
, VirtIODevice
),
2751 VMSTATE_END_OF_LIST()
2755 static const VMStateDescription vmstate_virtio_started
= {
2756 .name
= "virtio/started",
2758 .minimum_version_id
= 1,
2759 .needed
= &virtio_started_needed
,
2760 .fields
= (const VMStateField
[]) {
2761 VMSTATE_BOOL(started
, VirtIODevice
),
2762 VMSTATE_END_OF_LIST()
2766 static const VMStateDescription vmstate_virtio_disabled
= {
2767 .name
= "virtio/disabled",
2769 .minimum_version_id
= 1,
2770 .needed
= &virtio_disabled_needed
,
2771 .fields
= (const VMStateField
[]) {
2772 VMSTATE_BOOL(disabled
, VirtIODevice
),
2773 VMSTATE_END_OF_LIST()
2777 static const VMStateDescription vmstate_virtio
= {
2780 .minimum_version_id
= 1,
2781 .fields
= (const VMStateField
[]) {
2782 VMSTATE_END_OF_LIST()
2784 .subsections
= (const VMStateDescription
* const []) {
2785 &vmstate_virtio_device_endian
,
2786 &vmstate_virtio_64bit_features
,
2787 &vmstate_virtio_virtqueues
,
2788 &vmstate_virtio_ringsize
,
2789 &vmstate_virtio_broken
,
2790 &vmstate_virtio_extra_state
,
2791 &vmstate_virtio_started
,
2792 &vmstate_virtio_packed_virtqueues
,
2793 &vmstate_virtio_disabled
,
2798 int virtio_save(VirtIODevice
*vdev
, QEMUFile
*f
)
2800 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2801 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2802 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2803 uint32_t guest_features_lo
= (vdev
->guest_features
& 0xffffffff);
2806 if (k
->save_config
) {
2807 k
->save_config(qbus
->parent
, f
);
2810 qemu_put_8s(f
, &vdev
->status
);
2811 qemu_put_8s(f
, &vdev
->isr
);
2812 qemu_put_be16s(f
, &vdev
->queue_sel
);
2813 qemu_put_be32s(f
, &guest_features_lo
);
2814 qemu_put_be32(f
, vdev
->config_len
);
2815 qemu_put_buffer(f
, vdev
->config
, vdev
->config_len
);
2817 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2818 if (vdev
->vq
[i
].vring
.num
== 0)
2822 qemu_put_be32(f
, i
);
2824 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2825 if (vdev
->vq
[i
].vring
.num
== 0)
2828 qemu_put_be32(f
, vdev
->vq
[i
].vring
.num
);
2829 if (k
->has_variable_vring_alignment
) {
2830 qemu_put_be32(f
, vdev
->vq
[i
].vring
.align
);
2833 * Save desc now, the rest of the ring addresses are saved in
2834 * subsections for VIRTIO-1 devices.
2836 qemu_put_be64(f
, vdev
->vq
[i
].vring
.desc
);
2837 qemu_put_be16s(f
, &vdev
->vq
[i
].last_avail_idx
);
2838 if (k
->save_queue
) {
2839 k
->save_queue(qbus
->parent
, i
, f
);
2843 if (vdc
->save
!= NULL
) {
2848 int ret
= vmstate_save_state(f
, vdc
->vmsd
, vdev
, NULL
);
2855 return vmstate_save_state(f
, &vmstate_virtio
, vdev
, NULL
);
2858 /* A wrapper for use as a VMState .put function */
2859 static int virtio_device_put(QEMUFile
*f
, void *opaque
, size_t size
,
2860 const VMStateField
*field
, JSONWriter
*vmdesc
)
2862 return virtio_save(VIRTIO_DEVICE(opaque
), f
);
2865 /* A wrapper for use as a VMState .get function */
2866 static int coroutine_mixed_fn
2867 virtio_device_get(QEMUFile
*f
, void *opaque
, size_t size
,
2868 const VMStateField
*field
)
2870 VirtIODevice
*vdev
= VIRTIO_DEVICE(opaque
);
2871 DeviceClass
*dc
= DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev
));
2873 return virtio_load(vdev
, f
, dc
->vmsd
->version_id
);
2876 const VMStateInfo virtio_vmstate_info
= {
2878 .get
= virtio_device_get
,
2879 .put
= virtio_device_put
,
2882 static int virtio_set_features_nocheck(VirtIODevice
*vdev
, uint64_t val
)
2884 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2885 bool bad
= (val
& ~(vdev
->host_features
)) != 0;
2887 val
&= vdev
->host_features
;
2888 if (k
->set_features
) {
2889 k
->set_features(vdev
, val
);
2891 vdev
->guest_features
= val
;
2892 return bad
? -1 : 0;
2895 typedef struct VirtioSetFeaturesNocheckData
{
2900 } VirtioSetFeaturesNocheckData
;
2902 static void virtio_set_features_nocheck_bh(void *opaque
)
2904 VirtioSetFeaturesNocheckData
*data
= opaque
;
2906 data
->ret
= virtio_set_features_nocheck(data
->vdev
, data
->val
);
2907 aio_co_wake(data
->co
);
2910 static int coroutine_mixed_fn
2911 virtio_set_features_nocheck_maybe_co(VirtIODevice
*vdev
, uint64_t val
)
2913 if (qemu_in_coroutine()) {
2914 VirtioSetFeaturesNocheckData data
= {
2915 .co
= qemu_coroutine_self(),
2919 aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
2920 virtio_set_features_nocheck_bh
, &data
);
2921 qemu_coroutine_yield();
2924 return virtio_set_features_nocheck(vdev
, val
);
2928 int virtio_set_features(VirtIODevice
*vdev
, uint64_t val
)
2932 * The driver must not attempt to set features after feature negotiation
2935 if (vdev
->status
& VIRTIO_CONFIG_S_FEATURES_OK
) {
2939 if (val
& (1ull << VIRTIO_F_BAD_FEATURE
)) {
2940 qemu_log_mask(LOG_GUEST_ERROR
,
2941 "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
2942 __func__
, vdev
->name
);
2945 ret
= virtio_set_features_nocheck(vdev
, val
);
2946 if (virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
)) {
2947 /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */
2949 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2950 if (vdev
->vq
[i
].vring
.num
!= 0) {
2951 virtio_init_region_cache(vdev
, i
);
2956 if (!virtio_device_started(vdev
, vdev
->status
) &&
2957 !virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
2958 vdev
->start_on_kick
= true;
2964 size_t virtio_get_config_size(const VirtIOConfigSizeParams
*params
,
2965 uint64_t host_features
)
2967 size_t config_size
= params
->min_size
;
2968 const VirtIOFeature
*feature_sizes
= params
->feature_sizes
;
2971 for (i
= 0; feature_sizes
[i
].flags
!= 0; i
++) {
2972 if (host_features
& feature_sizes
[i
].flags
) {
2973 config_size
= MAX(feature_sizes
[i
].end
, config_size
);
2977 assert(config_size
<= params
->max_size
);
2981 int coroutine_mixed_fn
2982 virtio_load(VirtIODevice
*vdev
, QEMUFile
*f
, int version_id
)
2988 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2989 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2990 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2993 * We poison the endianness to ensure it does not get used before
2994 * subsections have been loaded.
2996 vdev
->device_endian
= VIRTIO_DEVICE_ENDIAN_UNKNOWN
;
2998 if (k
->load_config
) {
2999 ret
= k
->load_config(qbus
->parent
, f
);
3004 qemu_get_8s(f
, &vdev
->status
);
3005 qemu_get_8s(f
, &vdev
->isr
);
3006 qemu_get_be16s(f
, &vdev
->queue_sel
);
3007 if (vdev
->queue_sel
>= VIRTIO_QUEUE_MAX
) {
3010 qemu_get_be32s(f
, &features
);
3013 * Temporarily set guest_features low bits - needed by
3014 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3015 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3017 * Note: devices should always test host features in future - don't create
3018 * new dependencies like this.
3020 vdev
->guest_features
= features
;
3022 config_len
= qemu_get_be32(f
);
3025 * There are cases where the incoming config can be bigger or smaller
3026 * than what we have; so load what we have space for, and skip
3027 * any excess that's in the stream.
3029 qemu_get_buffer(f
, vdev
->config
, MIN(config_len
, vdev
->config_len
));
3031 while (config_len
> vdev
->config_len
) {
3036 num
= qemu_get_be32(f
);
3038 if (num
> VIRTIO_QUEUE_MAX
) {
3039 error_report("Invalid number of virtqueues: 0x%x", num
);
3043 for (i
= 0; i
< num
; i
++) {
3044 vdev
->vq
[i
].vring
.num
= qemu_get_be32(f
);
3045 if (k
->has_variable_vring_alignment
) {
3046 vdev
->vq
[i
].vring
.align
= qemu_get_be32(f
);
3048 vdev
->vq
[i
].vring
.desc
= qemu_get_be64(f
);
3049 qemu_get_be16s(f
, &vdev
->vq
[i
].last_avail_idx
);
3050 vdev
->vq
[i
].signalled_used_valid
= false;
3051 vdev
->vq
[i
].notification
= true;
3053 if (!vdev
->vq
[i
].vring
.desc
&& vdev
->vq
[i
].last_avail_idx
) {
3054 error_report("VQ %d address 0x0 "
3055 "inconsistent with Host index 0x%x",
3056 i
, vdev
->vq
[i
].last_avail_idx
);
3059 if (k
->load_queue
) {
3060 ret
= k
->load_queue(qbus
->parent
, i
, f
);
3066 virtio_notify_vector(vdev
, VIRTIO_NO_VECTOR
);
3068 if (vdc
->load
!= NULL
) {
3069 ret
= vdc
->load(vdev
, f
, version_id
);
3076 ret
= vmstate_load_state(f
, vdc
->vmsd
, vdev
, version_id
);
3083 ret
= vmstate_load_state(f
, &vmstate_virtio
, vdev
, 1);
3088 if (vdev
->device_endian
== VIRTIO_DEVICE_ENDIAN_UNKNOWN
) {
3089 vdev
->device_endian
= virtio_default_endian();
3092 if (virtio_64bit_features_needed(vdev
)) {
3094 * Subsection load filled vdev->guest_features. Run them
3095 * through virtio_set_features to sanity-check them against
3098 uint64_t features64
= vdev
->guest_features
;
3099 if (virtio_set_features_nocheck_maybe_co(vdev
, features64
) < 0) {
3100 error_report("Features 0x%" PRIx64
" unsupported. "
3101 "Allowed features: 0x%" PRIx64
,
3102 features64
, vdev
->host_features
);
3106 if (virtio_set_features_nocheck_maybe_co(vdev
, features
) < 0) {
3107 error_report("Features 0x%x unsupported. "
3108 "Allowed features: 0x%" PRIx64
,
3109 features
, vdev
->host_features
);
3114 if (!virtio_device_started(vdev
, vdev
->status
) &&
3115 !virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
3116 vdev
->start_on_kick
= true;
3119 RCU_READ_LOCK_GUARD();
3120 for (i
= 0; i
< num
; i
++) {
3121 if (vdev
->vq
[i
].vring
.desc
) {
3125 * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3126 * only the region cache needs to be set up. Legacy devices need
3127 * to calculate used and avail ring addresses based on the desc
3130 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
3131 virtio_init_region_cache(vdev
, i
);
3133 virtio_queue_update_rings(vdev
, i
);
3136 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3137 vdev
->vq
[i
].shadow_avail_idx
= vdev
->vq
[i
].last_avail_idx
;
3138 vdev
->vq
[i
].shadow_avail_wrap_counter
=
3139 vdev
->vq
[i
].last_avail_wrap_counter
;
3143 nheads
= vring_avail_idx(&vdev
->vq
[i
]) - vdev
->vq
[i
].last_avail_idx
;
3144 /* Check it isn't doing strange things with descriptor numbers. */
3145 if (nheads
> vdev
->vq
[i
].vring
.num
) {
3146 virtio_error(vdev
, "VQ %d size 0x%x Guest index 0x%x "
3147 "inconsistent with Host index 0x%x: delta 0x%x",
3148 i
, vdev
->vq
[i
].vring
.num
,
3149 vring_avail_idx(&vdev
->vq
[i
]),
3150 vdev
->vq
[i
].last_avail_idx
, nheads
);
3151 vdev
->vq
[i
].used_idx
= 0;
3152 vdev
->vq
[i
].shadow_avail_idx
= 0;
3153 vdev
->vq
[i
].inuse
= 0;
3156 vdev
->vq
[i
].used_idx
= vring_used_idx(&vdev
->vq
[i
]);
3157 vdev
->vq
[i
].shadow_avail_idx
= vring_avail_idx(&vdev
->vq
[i
]);
3160 * Some devices migrate VirtQueueElements that have been popped
3161 * from the avail ring but not yet returned to the used ring.
3162 * Since max ring size < UINT16_MAX it's safe to use modulo
3163 * UINT16_MAX + 1 subtraction.
3165 vdev
->vq
[i
].inuse
= (uint16_t)(vdev
->vq
[i
].last_avail_idx
-
3166 vdev
->vq
[i
].used_idx
);
3167 if (vdev
->vq
[i
].inuse
> vdev
->vq
[i
].vring
.num
) {
3168 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3170 i
, vdev
->vq
[i
].vring
.num
,
3171 vdev
->vq
[i
].last_avail_idx
,
3172 vdev
->vq
[i
].used_idx
);
3178 if (vdc
->post_load
) {
3179 ret
= vdc
->post_load(vdev
);
3188 void virtio_cleanup(VirtIODevice
*vdev
)
3190 qemu_del_vm_change_state_handler(vdev
->vmstate
);
3193 static void virtio_vmstate_change(void *opaque
, bool running
, RunState state
)
3195 VirtIODevice
*vdev
= opaque
;
3196 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3197 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
3198 bool backend_run
= running
&& virtio_device_started(vdev
, vdev
->status
);
3199 vdev
->vm_running
= running
;
3202 virtio_set_status(vdev
, vdev
->status
);
3205 if (k
->vmstate_change
) {
3206 k
->vmstate_change(qbus
->parent
, backend_run
);
3210 virtio_set_status(vdev
, vdev
->status
);
3214 void virtio_instance_init_common(Object
*proxy_obj
, void *data
,
3215 size_t vdev_size
, const char *vdev_name
)
3217 DeviceState
*vdev
= data
;
3219 object_initialize_child_with_props(proxy_obj
, "virtio-backend", vdev
,
3220 vdev_size
, vdev_name
, &error_abort
,
3222 qdev_alias_all_properties(vdev
, proxy_obj
);
3225 void virtio_init(VirtIODevice
*vdev
, uint16_t device_id
, size_t config_size
)
3227 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3228 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
3230 int nvectors
= k
->query_nvectors
? k
->query_nvectors(qbus
->parent
) : 0;
3233 vdev
->vector_queues
=
3234 g_malloc0(sizeof(*vdev
->vector_queues
) * nvectors
);
3237 vdev
->start_on_kick
= false;
3238 vdev
->started
= false;
3239 vdev
->vhost_started
= false;
3240 vdev
->device_id
= device_id
;
3242 qatomic_set(&vdev
->isr
, 0);
3243 vdev
->queue_sel
= 0;
3244 vdev
->config_vector
= VIRTIO_NO_VECTOR
;
3245 vdev
->vq
= g_new0(VirtQueue
, VIRTIO_QUEUE_MAX
);
3246 vdev
->vm_running
= runstate_is_running();
3247 vdev
->broken
= false;
3248 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
3249 vdev
->vq
[i
].vector
= VIRTIO_NO_VECTOR
;
3250 vdev
->vq
[i
].vdev
= vdev
;
3251 vdev
->vq
[i
].queue_index
= i
;
3252 vdev
->vq
[i
].host_notifier_enabled
= false;
3255 vdev
->name
= virtio_id_to_name(device_id
);
3256 vdev
->config_len
= config_size
;
3257 if (vdev
->config_len
) {
3258 vdev
->config
= g_malloc0(config_size
);
3260 vdev
->config
= NULL
;
3262 vdev
->vmstate
= qdev_add_vm_change_state_handler(DEVICE(vdev
),
3263 virtio_vmstate_change
, vdev
);
3264 vdev
->device_endian
= virtio_default_endian();
3265 vdev
->use_guest_notifier_mask
= true;
3269 * Only devices that have already been around prior to defining the virtio
3270 * standard support legacy mode; this includes devices not specified in the
3271 * standard. All newer devices conform to the virtio standard only.
3273 bool virtio_legacy_allowed(VirtIODevice
*vdev
)
3275 switch (vdev
->device_id
) {
3277 case VIRTIO_ID_BLOCK
:
3278 case VIRTIO_ID_CONSOLE
:
3280 case VIRTIO_ID_BALLOON
:
3281 case VIRTIO_ID_RPMSG
:
3282 case VIRTIO_ID_SCSI
:
3284 case VIRTIO_ID_RPROC_SERIAL
:
3285 case VIRTIO_ID_CAIF
:
3292 bool virtio_legacy_check_disabled(VirtIODevice
*vdev
)
3294 return vdev
->disable_legacy_check
;
3297 hwaddr
virtio_queue_get_desc_addr(VirtIODevice
*vdev
, int n
)
3299 return vdev
->vq
[n
].vring
.desc
;
3302 bool virtio_queue_enabled_legacy(VirtIODevice
*vdev
, int n
)
3304 return virtio_queue_get_desc_addr(vdev
, n
) != 0;
3307 bool virtio_queue_enabled(VirtIODevice
*vdev
, int n
)
3309 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3310 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
3312 if (k
->queue_enabled
) {
3313 return k
->queue_enabled(qbus
->parent
, n
);
3315 return virtio_queue_enabled_legacy(vdev
, n
);
3318 hwaddr
virtio_queue_get_avail_addr(VirtIODevice
*vdev
, int n
)
3320 return vdev
->vq
[n
].vring
.avail
;
3323 hwaddr
virtio_queue_get_used_addr(VirtIODevice
*vdev
, int n
)
3325 return vdev
->vq
[n
].vring
.used
;
3328 hwaddr
virtio_queue_get_desc_size(VirtIODevice
*vdev
, int n
)
3330 return sizeof(VRingDesc
) * vdev
->vq
[n
].vring
.num
;
3333 hwaddr
virtio_queue_get_avail_size(VirtIODevice
*vdev
, int n
)
3337 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3338 return sizeof(struct VRingPackedDescEvent
);
3341 s
= virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
) ? 2 : 0;
3342 return offsetof(VRingAvail
, ring
) +
3343 sizeof(uint16_t) * vdev
->vq
[n
].vring
.num
+ s
;
3346 hwaddr
virtio_queue_get_used_size(VirtIODevice
*vdev
, int n
)
3350 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3351 return sizeof(struct VRingPackedDescEvent
);
3354 s
= virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
) ? 2 : 0;
3355 return offsetof(VRingUsed
, ring
) +
3356 sizeof(VRingUsedElem
) * vdev
->vq
[n
].vring
.num
+ s
;
3359 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice
*vdev
,
3362 unsigned int avail
, used
;
3364 avail
= vdev
->vq
[n
].last_avail_idx
;
3365 avail
|= ((uint16_t)vdev
->vq
[n
].last_avail_wrap_counter
) << 15;
3367 used
= vdev
->vq
[n
].used_idx
;
3368 used
|= ((uint16_t)vdev
->vq
[n
].used_wrap_counter
) << 15;
3370 return avail
| used
<< 16;
3373 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice
*vdev
,
3376 return vdev
->vq
[n
].last_avail_idx
;
3379 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice
*vdev
, int n
)
3381 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3382 return virtio_queue_packed_get_last_avail_idx(vdev
, n
);
3384 return virtio_queue_split_get_last_avail_idx(vdev
, n
);
3388 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice
*vdev
,
3389 int n
, unsigned int idx
)
3391 struct VirtQueue
*vq
= &vdev
->vq
[n
];
3393 vq
->last_avail_idx
= vq
->shadow_avail_idx
= idx
& 0x7fff;
3394 vq
->last_avail_wrap_counter
=
3395 vq
->shadow_avail_wrap_counter
= !!(idx
& 0x8000);
3397 vq
->used_idx
= idx
& 0x7fff;
3398 vq
->used_wrap_counter
= !!(idx
& 0x8000);
3401 static void virtio_queue_split_set_last_avail_idx(VirtIODevice
*vdev
,
3402 int n
, unsigned int idx
)
3404 vdev
->vq
[n
].last_avail_idx
= idx
;
3405 vdev
->vq
[n
].shadow_avail_idx
= idx
;
3408 void virtio_queue_set_last_avail_idx(VirtIODevice
*vdev
, int n
,
3411 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3412 virtio_queue_packed_set_last_avail_idx(vdev
, n
, idx
);
3414 virtio_queue_split_set_last_avail_idx(vdev
, n
, idx
);
3418 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice
*vdev
,
3421 /* We don't have a reference like avail idx in shared memory */
3425 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice
*vdev
,
3428 RCU_READ_LOCK_GUARD();
3429 if (vdev
->vq
[n
].vring
.desc
) {
3430 vdev
->vq
[n
].last_avail_idx
= vring_used_idx(&vdev
->vq
[n
]);
3431 vdev
->vq
[n
].shadow_avail_idx
= vdev
->vq
[n
].last_avail_idx
;
3435 void virtio_queue_restore_last_avail_idx(VirtIODevice
*vdev
, int n
)
3437 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3438 virtio_queue_packed_restore_last_avail_idx(vdev
, n
);
3440 virtio_queue_split_restore_last_avail_idx(vdev
, n
);
3444 static void virtio_queue_packed_update_used_idx(VirtIODevice
*vdev
, int n
)
3446 /* used idx was updated through set_last_avail_idx() */
3450 static void virtio_split_packed_update_used_idx(VirtIODevice
*vdev
, int n
)
3452 RCU_READ_LOCK_GUARD();
3453 if (vdev
->vq
[n
].vring
.desc
) {
3454 vdev
->vq
[n
].used_idx
= vring_used_idx(&vdev
->vq
[n
]);
3458 void virtio_queue_update_used_idx(VirtIODevice
*vdev
, int n
)
3460 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3461 return virtio_queue_packed_update_used_idx(vdev
, n
);
3463 return virtio_split_packed_update_used_idx(vdev
, n
);
3467 void virtio_queue_invalidate_signalled_used(VirtIODevice
*vdev
, int n
)
3469 vdev
->vq
[n
].signalled_used_valid
= false;
3472 VirtQueue
*virtio_get_queue(VirtIODevice
*vdev
, int n
)
3474 return vdev
->vq
+ n
;
3477 uint16_t virtio_get_queue_index(VirtQueue
*vq
)
3479 return vq
->queue_index
;
3482 static void virtio_queue_guest_notifier_read(EventNotifier
*n
)
3484 VirtQueue
*vq
= container_of(n
, VirtQueue
, guest_notifier
);
3485 if (event_notifier_test_and_clear(n
)) {
3489 static void virtio_config_guest_notifier_read(EventNotifier
*n
)
3491 VirtIODevice
*vdev
= container_of(n
, VirtIODevice
, config_notifier
);
3493 if (event_notifier_test_and_clear(n
)) {
3494 virtio_notify_config(vdev
);
3497 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue
*vq
, bool assign
,
3500 if (assign
&& !with_irqfd
) {
3501 event_notifier_set_handler(&vq
->guest_notifier
,
3502 virtio_queue_guest_notifier_read
);
3504 event_notifier_set_handler(&vq
->guest_notifier
, NULL
);
3507 /* Test and clear notifier before closing it,
3508 * in case poll callback didn't have time to run. */
3509 virtio_queue_guest_notifier_read(&vq
->guest_notifier
);
3513 void virtio_config_set_guest_notifier_fd_handler(VirtIODevice
*vdev
,
3514 bool assign
, bool with_irqfd
)
3517 n
= &vdev
->config_notifier
;
3518 if (assign
&& !with_irqfd
) {
3519 event_notifier_set_handler(n
, virtio_config_guest_notifier_read
);
3521 event_notifier_set_handler(n
, NULL
);
3524 /* Test and clear notifier before closing it,*/
3525 /* in case poll callback didn't have time to run. */
3526 virtio_config_guest_notifier_read(n
);
3530 EventNotifier
*virtio_queue_get_guest_notifier(VirtQueue
*vq
)
3532 return &vq
->guest_notifier
;
3535 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier
*n
)
3537 VirtQueue
*vq
= container_of(n
, VirtQueue
, host_notifier
);
3539 virtio_queue_set_notification(vq
, 0);
3542 static bool virtio_queue_host_notifier_aio_poll(void *opaque
)
3544 EventNotifier
*n
= opaque
;
3545 VirtQueue
*vq
= container_of(n
, VirtQueue
, host_notifier
);
3547 return vq
->vring
.desc
&& !virtio_queue_empty(vq
);
3550 static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier
*n
)
3552 VirtQueue
*vq
= container_of(n
, VirtQueue
, host_notifier
);
3554 virtio_queue_notify_vq(vq
);
3557 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier
*n
)
3559 VirtQueue
*vq
= container_of(n
, VirtQueue
, host_notifier
);
3561 /* Caller polls once more after this to catch requests that race with us */
3562 virtio_queue_set_notification(vq
, 1);
3565 void virtio_queue_aio_attach_host_notifier(VirtQueue
*vq
, AioContext
*ctx
)
3568 * virtio_queue_aio_detach_host_notifier() can leave notifications disabled.
3569 * Re-enable them. (And if detach has not been used before, notifications
3570 * being enabled is still the default state while a notifier is attached;
3571 * see virtio_queue_host_notifier_aio_poll_end(), which will always leave
3572 * notifications enabled once the polling section is left.)
3574 if (!virtio_queue_get_notification(vq
)) {
3575 virtio_queue_set_notification(vq
, 1);
3578 aio_set_event_notifier(ctx
, &vq
->host_notifier
,
3579 virtio_queue_host_notifier_read
,
3580 virtio_queue_host_notifier_aio_poll
,
3581 virtio_queue_host_notifier_aio_poll_ready
);
3582 aio_set_event_notifier_poll(ctx
, &vq
->host_notifier
,
3583 virtio_queue_host_notifier_aio_poll_begin
,
3584 virtio_queue_host_notifier_aio_poll_end
);
3587 * We will have ignored notifications about new requests from the guest
3588 * while no notifiers were attached, so "kick" the virt queue to process
3589 * those requests now.
3591 event_notifier_set(&vq
->host_notifier
);
3595 * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
3596 * this for rx virtqueues and similar cases where the virtqueue handler
3597 * function does not pop all elements. When the virtqueue is left non-empty
3598 * polling consumes CPU cycles and should not be used.
3600 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue
*vq
, AioContext
*ctx
)
3602 /* See virtio_queue_aio_attach_host_notifier() */
3603 if (!virtio_queue_get_notification(vq
)) {
3604 virtio_queue_set_notification(vq
, 1);
3607 aio_set_event_notifier(ctx
, &vq
->host_notifier
,
3608 virtio_queue_host_notifier_read
,
3612 * See virtio_queue_aio_attach_host_notifier().
3613 * Note that this may be unnecessary for the type of virtqueues this
3614 * function is used for. Still, it will not hurt to have a quick look into
3615 * whether we can/should process any of the virtqueue elements.
3617 event_notifier_set(&vq
->host_notifier
);
3620 void virtio_queue_aio_detach_host_notifier(VirtQueue
*vq
, AioContext
*ctx
)
3622 aio_set_event_notifier(ctx
, &vq
->host_notifier
, NULL
, NULL
, NULL
);
3625 * aio_set_event_notifier_poll() does not guarantee whether io_poll_end()
3626 * will run after io_poll_begin(), so by removing the notifier, we do not
3627 * know whether virtio_queue_host_notifier_aio_poll_end() has run after a
3628 * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether
3629 * notifications are enabled or disabled. It does not really matter anyway;
3630 * we just removed the notifier, so we do not care about notifications until
3631 * we potentially re-attach it. The attach_host_notifier functions will
3632 * ensure that notifications are enabled again when they are needed.
3636 void virtio_queue_host_notifier_read(EventNotifier
*n
)
3638 VirtQueue
*vq
= container_of(n
, VirtQueue
, host_notifier
);
3639 if (event_notifier_test_and_clear(n
)) {
3640 virtio_queue_notify_vq(vq
);
3644 EventNotifier
*virtio_queue_get_host_notifier(VirtQueue
*vq
)
3646 return &vq
->host_notifier
;
3649 EventNotifier
*virtio_config_get_guest_notifier(VirtIODevice
*vdev
)
3651 return &vdev
->config_notifier
;
3654 void virtio_queue_set_host_notifier_enabled(VirtQueue
*vq
, bool enabled
)
3656 vq
->host_notifier_enabled
= enabled
;
3659 int virtio_queue_set_host_notifier_mr(VirtIODevice
*vdev
, int n
,
3660 MemoryRegion
*mr
, bool assign
)
3662 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3663 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
3665 if (k
->set_host_notifier_mr
) {
3666 return k
->set_host_notifier_mr(qbus
->parent
, n
, mr
, assign
);
3672 void virtio_device_set_child_bus_name(VirtIODevice
*vdev
, char *bus_name
)
3674 g_free(vdev
->bus_name
);
3675 vdev
->bus_name
= g_strdup(bus_name
);
3678 void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice
*vdev
, const char *fmt
, ...)
3683 error_vreport(fmt
, ap
);
3686 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
3687 vdev
->status
= vdev
->status
| VIRTIO_CONFIG_S_NEEDS_RESET
;
3688 virtio_notify_config(vdev
);
3691 vdev
->broken
= true;
3694 static void virtio_memory_listener_commit(MemoryListener
*listener
)
3696 VirtIODevice
*vdev
= container_of(listener
, VirtIODevice
, listener
);
3699 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
3700 if (vdev
->vq
[i
].vring
.num
== 0) {
3703 virtio_init_region_cache(vdev
, i
);
3707 static void virtio_device_realize(DeviceState
*dev
, Error
**errp
)
3709 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
3710 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_GET_CLASS(dev
);
3713 /* Devices should either use vmsd or the load/save methods */
3714 assert(!vdc
->vmsd
|| !vdc
->load
);
3716 if (vdc
->realize
!= NULL
) {
3717 vdc
->realize(dev
, &err
);
3719 error_propagate(errp
, err
);
3724 virtio_bus_device_plugged(vdev
, &err
);
3726 error_propagate(errp
, err
);
3727 vdc
->unrealize(dev
);
3731 vdev
->listener
.commit
= virtio_memory_listener_commit
;
3732 vdev
->listener
.name
= "virtio";
3733 memory_listener_register(&vdev
->listener
, vdev
->dma_as
);
3736 static void virtio_device_unrealize(DeviceState
*dev
)
3738 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
3739 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_GET_CLASS(dev
);
3741 memory_listener_unregister(&vdev
->listener
);
3742 virtio_bus_device_unplugged(vdev
);
3744 if (vdc
->unrealize
!= NULL
) {
3745 vdc
->unrealize(dev
);
3748 g_free(vdev
->bus_name
);
3749 vdev
->bus_name
= NULL
;
3752 static void virtio_device_free_virtqueues(VirtIODevice
*vdev
)
3759 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
3760 if (vdev
->vq
[i
].vring
.num
== 0) {
3763 virtio_virtqueue_reset_region_cache(&vdev
->vq
[i
]);
3768 static void virtio_device_instance_finalize(Object
*obj
)
3770 VirtIODevice
*vdev
= VIRTIO_DEVICE(obj
);
3772 virtio_device_free_virtqueues(vdev
);
3774 g_free(vdev
->config
);
3775 g_free(vdev
->vector_queues
);
3778 static Property virtio_properties
[] = {
3779 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice
, host_features
),
3780 DEFINE_PROP_BOOL("use-started", VirtIODevice
, use_started
, true),
3781 DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice
, use_disabled_flag
, true),
3782 DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice
,
3783 disable_legacy_check
, false),
3784 DEFINE_PROP_END_OF_LIST(),
3787 static int virtio_device_start_ioeventfd_impl(VirtIODevice
*vdev
)
3789 VirtioBusState
*qbus
= VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev
)));
3793 * Batch all the host notifiers in a single transaction to avoid
3794 * quadratic time complexity in address_space_update_ioeventfds().
3796 memory_region_transaction_begin();
3797 for (n
= 0; n
< VIRTIO_QUEUE_MAX
; n
++) {
3798 VirtQueue
*vq
= &vdev
->vq
[n
];
3799 if (!virtio_queue_get_num(vdev
, n
)) {
3802 r
= virtio_bus_set_host_notifier(qbus
, n
, true);
3807 event_notifier_set_handler(&vq
->host_notifier
,
3808 virtio_queue_host_notifier_read
);
3811 for (n
= 0; n
< VIRTIO_QUEUE_MAX
; n
++) {
3812 /* Kick right away to begin processing requests already in vring */
3813 VirtQueue
*vq
= &vdev
->vq
[n
];
3814 if (!vq
->vring
.num
) {
3817 event_notifier_set(&vq
->host_notifier
);
3819 memory_region_transaction_commit();
3823 i
= n
; /* save n for a second iteration after transaction is committed. */
3825 VirtQueue
*vq
= &vdev
->vq
[n
];
3826 if (!virtio_queue_get_num(vdev
, n
)) {
3830 event_notifier_set_handler(&vq
->host_notifier
, NULL
);
3831 r
= virtio_bus_set_host_notifier(qbus
, n
, false);
3835 * The transaction expects the ioeventfds to be open when it
3836 * commits. Do it now, before the cleanup loop.
3838 memory_region_transaction_commit();
3841 if (!virtio_queue_get_num(vdev
, i
)) {
3844 virtio_bus_cleanup_host_notifier(qbus
, i
);
3849 int virtio_device_start_ioeventfd(VirtIODevice
*vdev
)
3851 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3852 VirtioBusState
*vbus
= VIRTIO_BUS(qbus
);
3854 return virtio_bus_start_ioeventfd(vbus
);
3857 static void virtio_device_stop_ioeventfd_impl(VirtIODevice
*vdev
)
3859 VirtioBusState
*qbus
= VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev
)));
3863 * Batch all the host notifiers in a single transaction to avoid
3864 * quadratic time complexity in address_space_update_ioeventfds().
3866 memory_region_transaction_begin();
3867 for (n
= 0; n
< VIRTIO_QUEUE_MAX
; n
++) {
3868 VirtQueue
*vq
= &vdev
->vq
[n
];
3870 if (!virtio_queue_get_num(vdev
, n
)) {
3873 event_notifier_set_handler(&vq
->host_notifier
, NULL
);
3874 r
= virtio_bus_set_host_notifier(qbus
, n
, false);
3878 * The transaction expects the ioeventfds to be open when it
3879 * commits. Do it now, before the cleanup loop.
3881 memory_region_transaction_commit();
3883 for (n
= 0; n
< VIRTIO_QUEUE_MAX
; n
++) {
3884 if (!virtio_queue_get_num(vdev
, n
)) {
3887 virtio_bus_cleanup_host_notifier(qbus
, n
);
3891 int virtio_device_grab_ioeventfd(VirtIODevice
*vdev
)
3893 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3894 VirtioBusState
*vbus
= VIRTIO_BUS(qbus
);
3896 return virtio_bus_grab_ioeventfd(vbus
);
3899 void virtio_device_release_ioeventfd(VirtIODevice
*vdev
)
3901 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3902 VirtioBusState
*vbus
= VIRTIO_BUS(qbus
);
3904 virtio_bus_release_ioeventfd(vbus
);
3907 static void virtio_device_class_init(ObjectClass
*klass
, void *data
)
3909 /* Set the default value here. */
3910 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_CLASS(klass
);
3911 DeviceClass
*dc
= DEVICE_CLASS(klass
);
3913 dc
->realize
= virtio_device_realize
;
3914 dc
->unrealize
= virtio_device_unrealize
;
3915 dc
->bus_type
= TYPE_VIRTIO_BUS
;
3916 device_class_set_props(dc
, virtio_properties
);
3917 vdc
->start_ioeventfd
= virtio_device_start_ioeventfd_impl
;
3918 vdc
->stop_ioeventfd
= virtio_device_stop_ioeventfd_impl
;
3920 vdc
->legacy_features
|= VIRTIO_LEGACY_FEATURES
;
3923 bool virtio_device_ioeventfd_enabled(VirtIODevice
*vdev
)
3925 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3926 VirtioBusState
*vbus
= VIRTIO_BUS(qbus
);
3928 return virtio_bus_ioeventfd_enabled(vbus
);
3931 VirtQueueStatus
*qmp_x_query_virtio_queue_status(const char *path
,
3936 VirtQueueStatus
*status
;
3938 vdev
= qmp_find_virtio_device(path
);
3940 error_setg(errp
, "Path %s is not a VirtIODevice", path
);
3944 if (queue
>= VIRTIO_QUEUE_MAX
|| !virtio_queue_get_num(vdev
, queue
)) {
3945 error_setg(errp
, "Invalid virtqueue number %d", queue
);
3949 status
= g_new0(VirtQueueStatus
, 1);
3950 status
->name
= g_strdup(vdev
->name
);
3951 status
->queue_index
= vdev
->vq
[queue
].queue_index
;
3952 status
->inuse
= vdev
->vq
[queue
].inuse
;
3953 status
->vring_num
= vdev
->vq
[queue
].vring
.num
;
3954 status
->vring_num_default
= vdev
->vq
[queue
].vring
.num_default
;
3955 status
->vring_align
= vdev
->vq
[queue
].vring
.align
;
3956 status
->vring_desc
= vdev
->vq
[queue
].vring
.desc
;
3957 status
->vring_avail
= vdev
->vq
[queue
].vring
.avail
;
3958 status
->vring_used
= vdev
->vq
[queue
].vring
.used
;
3959 status
->used_idx
= vdev
->vq
[queue
].used_idx
;
3960 status
->signalled_used
= vdev
->vq
[queue
].signalled_used
;
3961 status
->signalled_used_valid
= vdev
->vq
[queue
].signalled_used_valid
;
3963 if (vdev
->vhost_started
) {
3964 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_GET_CLASS(vdev
);
3965 struct vhost_dev
*hdev
= vdc
->get_vhost(vdev
);
3967 /* check if vq index exists for vhost as well */
3968 if (queue
>= hdev
->vq_index
&& queue
< hdev
->vq_index
+ hdev
->nvqs
) {
3969 status
->has_last_avail_idx
= true;
3971 int vhost_vq_index
=
3972 hdev
->vhost_ops
->vhost_get_vq_index(hdev
, queue
);
3973 struct vhost_vring_state state
= {
3974 .index
= vhost_vq_index
,
3977 status
->last_avail_idx
=
3978 hdev
->vhost_ops
->vhost_get_vring_base(hdev
, &state
);
3981 status
->has_shadow_avail_idx
= true;
3982 status
->has_last_avail_idx
= true;
3983 status
->last_avail_idx
= vdev
->vq
[queue
].last_avail_idx
;
3984 status
->shadow_avail_idx
= vdev
->vq
[queue
].shadow_avail_idx
;
3990 static strList
*qmp_decode_vring_desc_flags(uint16_t flags
)
3992 strList
*list
= NULL
;
4000 { VRING_DESC_F_NEXT
, "next" },
4001 { VRING_DESC_F_WRITE
, "write" },
4002 { VRING_DESC_F_INDIRECT
, "indirect" },
4003 { 1 << VRING_PACKED_DESC_F_AVAIL
, "avail" },
4004 { 1 << VRING_PACKED_DESC_F_USED
, "used" },
4008 for (i
= 0; map
[i
].flag
; i
++) {
4009 if ((map
[i
].flag
& flags
) == 0) {
4012 node
= g_malloc0(sizeof(strList
));
4013 node
->value
= g_strdup(map
[i
].value
);
4021 VirtioQueueElement
*qmp_x_query_virtio_queue_element(const char *path
,
4029 VirtioQueueElement
*element
= NULL
;
4031 vdev
= qmp_find_virtio_device(path
);
4033 error_setg(errp
, "Path %s is not a VirtIO device", path
);
4037 if (queue
>= VIRTIO_QUEUE_MAX
|| !virtio_queue_get_num(vdev
, queue
)) {
4038 error_setg(errp
, "Invalid virtqueue number %d", queue
);
4041 vq
= &vdev
->vq
[queue
];
4043 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
4044 error_setg(errp
, "Packed ring not supported");
4047 unsigned int head
, i
, max
;
4048 VRingMemoryRegionCaches
*caches
;
4049 MemoryRegionCache indirect_desc_cache
;
4050 MemoryRegionCache
*desc_cache
;
4052 VirtioRingDescList
*list
= NULL
;
4053 VirtioRingDescList
*node
;
4056 address_space_cache_init_empty(&indirect_desc_cache
);
4058 RCU_READ_LOCK_GUARD();
4060 max
= vq
->vring
.num
;
4063 head
= vring_avail_ring(vq
, vq
->last_avail_idx
% vq
->vring
.num
);
4065 head
= vring_avail_ring(vq
, index
% vq
->vring
.num
);
4069 caches
= vring_get_region_caches(vq
);
4071 error_setg(errp
, "Region caches not initialized");
4074 if (caches
->desc
.len
< max
* sizeof(VRingDesc
)) {
4075 error_setg(errp
, "Cannot map descriptor ring");
4079 desc_cache
= &caches
->desc
;
4080 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
4081 if (desc
.flags
& VRING_DESC_F_INDIRECT
) {
4083 len
= address_space_cache_init(&indirect_desc_cache
, vdev
->dma_as
,
4084 desc
.addr
, desc
.len
, false);
4085 desc_cache
= &indirect_desc_cache
;
4086 if (len
< desc
.len
) {
4087 error_setg(errp
, "Cannot map indirect buffer");
4091 max
= desc
.len
/ sizeof(VRingDesc
);
4093 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
4096 element
= g_new0(VirtioQueueElement
, 1);
4097 element
->avail
= g_new0(VirtioRingAvail
, 1);
4098 element
->used
= g_new0(VirtioRingUsed
, 1);
4099 element
->name
= g_strdup(vdev
->name
);
4100 element
->index
= head
;
4101 element
->avail
->flags
= vring_avail_flags(vq
);
4102 element
->avail
->idx
= vring_avail_idx(vq
);
4103 element
->avail
->ring
= head
;
4104 element
->used
->flags
= vring_used_flags(vq
);
4105 element
->used
->idx
= vring_used_idx(vq
);
4109 /* A buggy driver may produce an infinite loop */
4110 if (ndescs
>= max
) {
4113 node
= g_new0(VirtioRingDescList
, 1);
4114 node
->value
= g_new0(VirtioRingDesc
, 1);
4115 node
->value
->addr
= desc
.addr
;
4116 node
->value
->len
= desc
.len
;
4117 node
->value
->flags
= qmp_decode_vring_desc_flags(desc
.flags
);
4122 rc
= virtqueue_split_read_next_desc(vdev
, &desc
, desc_cache
, max
);
4123 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
4124 element
->descs
= list
;
4126 address_space_cache_destroy(&indirect_desc_cache
);
4132 static const TypeInfo virtio_device_info
= {
4133 .name
= TYPE_VIRTIO_DEVICE
,
4134 .parent
= TYPE_DEVICE
,
4135 .instance_size
= sizeof(VirtIODevice
),
4136 .class_init
= virtio_device_class_init
,
4137 .instance_finalize
= virtio_device_instance_finalize
,
4139 .class_size
= sizeof(VirtioDeviceClass
),
4142 static void virtio_register_types(void)
4144 type_register_static(&virtio_device_info
);
4147 type_init(virtio_register_types
)
4149 QEMUBH
*virtio_bh_new_guarded_full(DeviceState
*dev
,
4150 QEMUBHFunc
*cb
, void *opaque
,
4153 DeviceState
*transport
= qdev_get_parent_bus(dev
)->parent
;
4155 return qemu_bh_new_full(cb
, opaque
, name
,
4156 &transport
->mem_reentrancy_guard
);