4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qapi/qapi-commands-virtio.h"
18 #include "qemu/error-report.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "qom/object_interfaces.h"
23 #include "hw/core/cpu.h"
24 #include "hw/virtio/virtio.h"
25 #include "hw/virtio/vhost.h"
26 #include "migration/qemu-file-types.h"
27 #include "qemu/atomic.h"
28 #include "hw/virtio/virtio-bus.h"
29 #include "hw/qdev-properties.h"
30 #include "hw/virtio/virtio-access.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/runstate.h"
33 #include "virtio-qmp.h"
35 #include "standard-headers/linux/virtio_ids.h"
36 #include "standard-headers/linux/vhost_types.h"
37 #include "standard-headers/linux/virtio_blk.h"
38 #include "standard-headers/linux/virtio_console.h"
39 #include "standard-headers/linux/virtio_gpu.h"
40 #include "standard-headers/linux/virtio_net.h"
41 #include "standard-headers/linux/virtio_scsi.h"
42 #include "standard-headers/linux/virtio_i2c.h"
43 #include "standard-headers/linux/virtio_balloon.h"
44 #include "standard-headers/linux/virtio_iommu.h"
45 #include "standard-headers/linux/virtio_mem.h"
46 #include "standard-headers/linux/virtio_vsock.h"
49 * Maximum size of virtio device config space
51 #define VHOST_USER_MAX_CONFIG_SIZE 256
54 * The alignment to use between consumer and producer parts of vring.
55 * x86 pagesize again. This is the default, used by transports like PCI
56 * which don't provide a means for the guest to tell the host the alignment.
58 #define VIRTIO_PCI_VRING_ALIGN 4096
60 typedef struct VRingDesc
68 typedef struct VRingPackedDesc
{
75 typedef struct VRingAvail
82 typedef struct VRingUsedElem
88 typedef struct VRingUsed
95 typedef struct VRingMemoryRegionCaches
{
97 MemoryRegionCache desc
;
98 MemoryRegionCache avail
;
99 MemoryRegionCache used
;
100 } VRingMemoryRegionCaches
;
105 unsigned int num_default
;
110 VRingMemoryRegionCaches
*caches
;
113 typedef struct VRingPackedDescEvent
{
116 } VRingPackedDescEvent
;
121 VirtQueueElement
*used_elems
;
123 /* Next head to pop */
124 uint16_t last_avail_idx
;
125 bool last_avail_wrap_counter
;
127 /* Last avail_idx read from VQ. */
128 uint16_t shadow_avail_idx
;
129 bool shadow_avail_wrap_counter
;
132 bool used_wrap_counter
;
134 /* Last used index value we have signalled on */
135 uint16_t signalled_used
;
137 /* Last used index value we have signalled on */
138 bool signalled_used_valid
;
140 /* Notification enabled? */
143 uint16_t queue_index
;
148 VirtIOHandleOutput handle_output
;
150 EventNotifier guest_notifier
;
151 EventNotifier host_notifier
;
152 bool host_notifier_enabled
;
153 QLIST_ENTRY(VirtQueue
) node
;
156 const char *virtio_device_names
[] = {
157 [VIRTIO_ID_NET
] = "virtio-net",
158 [VIRTIO_ID_BLOCK
] = "virtio-blk",
159 [VIRTIO_ID_CONSOLE
] = "virtio-serial",
160 [VIRTIO_ID_RNG
] = "virtio-rng",
161 [VIRTIO_ID_BALLOON
] = "virtio-balloon",
162 [VIRTIO_ID_IOMEM
] = "virtio-iomem",
163 [VIRTIO_ID_RPMSG
] = "virtio-rpmsg",
164 [VIRTIO_ID_SCSI
] = "virtio-scsi",
165 [VIRTIO_ID_9P
] = "virtio-9p",
166 [VIRTIO_ID_MAC80211_WLAN
] = "virtio-mac-wlan",
167 [VIRTIO_ID_RPROC_SERIAL
] = "virtio-rproc-serial",
168 [VIRTIO_ID_CAIF
] = "virtio-caif",
169 [VIRTIO_ID_MEMORY_BALLOON
] = "virtio-mem-balloon",
170 [VIRTIO_ID_GPU
] = "virtio-gpu",
171 [VIRTIO_ID_CLOCK
] = "virtio-clk",
172 [VIRTIO_ID_INPUT
] = "virtio-input",
173 [VIRTIO_ID_VSOCK
] = "vhost-vsock",
174 [VIRTIO_ID_CRYPTO
] = "virtio-crypto",
175 [VIRTIO_ID_SIGNAL_DIST
] = "virtio-signal",
176 [VIRTIO_ID_PSTORE
] = "virtio-pstore",
177 [VIRTIO_ID_IOMMU
] = "virtio-iommu",
178 [VIRTIO_ID_MEM
] = "virtio-mem",
179 [VIRTIO_ID_SOUND
] = "virtio-sound",
180 [VIRTIO_ID_FS
] = "virtio-user-fs",
181 [VIRTIO_ID_PMEM
] = "virtio-pmem",
182 [VIRTIO_ID_RPMB
] = "virtio-rpmb",
183 [VIRTIO_ID_MAC80211_HWSIM
] = "virtio-mac-hwsim",
184 [VIRTIO_ID_VIDEO_ENCODER
] = "virtio-vid-encoder",
185 [VIRTIO_ID_VIDEO_DECODER
] = "virtio-vid-decoder",
186 [VIRTIO_ID_SCMI
] = "virtio-scmi",
187 [VIRTIO_ID_NITRO_SEC_MOD
] = "virtio-nitro-sec-mod",
188 [VIRTIO_ID_I2C_ADAPTER
] = "vhost-user-i2c",
189 [VIRTIO_ID_WATCHDOG
] = "virtio-watchdog",
190 [VIRTIO_ID_CAN
] = "virtio-can",
191 [VIRTIO_ID_DMABUF
] = "virtio-dmabuf",
192 [VIRTIO_ID_PARAM_SERV
] = "virtio-param-serv",
193 [VIRTIO_ID_AUDIO_POLICY
] = "virtio-audio-pol",
194 [VIRTIO_ID_BT
] = "virtio-bluetooth",
195 [VIRTIO_ID_GPIO
] = "virtio-gpio"
198 static const char *virtio_id_to_name(uint16_t device_id
)
200 assert(device_id
< G_N_ELEMENTS(virtio_device_names
));
201 const char *name
= virtio_device_names
[device_id
];
202 assert(name
!= NULL
);
206 /* Called within call_rcu(). */
207 static void virtio_free_region_cache(VRingMemoryRegionCaches
*caches
)
209 assert(caches
!= NULL
);
210 address_space_cache_destroy(&caches
->desc
);
211 address_space_cache_destroy(&caches
->avail
);
212 address_space_cache_destroy(&caches
->used
);
216 static void virtio_virtqueue_reset_region_cache(struct VirtQueue
*vq
)
218 VRingMemoryRegionCaches
*caches
;
220 caches
= qatomic_read(&vq
->vring
.caches
);
221 qatomic_rcu_set(&vq
->vring
.caches
, NULL
);
223 call_rcu(caches
, virtio_free_region_cache
, rcu
);
227 void virtio_init_region_cache(VirtIODevice
*vdev
, int n
)
229 VirtQueue
*vq
= &vdev
->vq
[n
];
230 VRingMemoryRegionCaches
*old
= vq
->vring
.caches
;
231 VRingMemoryRegionCaches
*new = NULL
;
237 addr
= vq
->vring
.desc
;
241 new = g_new0(VRingMemoryRegionCaches
, 1);
242 size
= virtio_queue_get_desc_size(vdev
, n
);
243 packed
= virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
) ?
245 len
= address_space_cache_init(&new->desc
, vdev
->dma_as
,
248 virtio_error(vdev
, "Cannot map desc");
252 size
= virtio_queue_get_used_size(vdev
, n
);
253 len
= address_space_cache_init(&new->used
, vdev
->dma_as
,
254 vq
->vring
.used
, size
, true);
256 virtio_error(vdev
, "Cannot map used");
260 size
= virtio_queue_get_avail_size(vdev
, n
);
261 len
= address_space_cache_init(&new->avail
, vdev
->dma_as
,
262 vq
->vring
.avail
, size
, false);
264 virtio_error(vdev
, "Cannot map avail");
268 qatomic_rcu_set(&vq
->vring
.caches
, new);
270 call_rcu(old
, virtio_free_region_cache
, rcu
);
275 address_space_cache_destroy(&new->avail
);
277 address_space_cache_destroy(&new->used
);
279 address_space_cache_destroy(&new->desc
);
282 virtio_virtqueue_reset_region_cache(vq
);
285 /* virt queue functions */
286 void virtio_queue_update_rings(VirtIODevice
*vdev
, int n
)
288 VRing
*vring
= &vdev
->vq
[n
].vring
;
290 if (!vring
->num
|| !vring
->desc
|| !vring
->align
) {
291 /* not yet setup -> nothing to do */
294 vring
->avail
= vring
->desc
+ vring
->num
* sizeof(VRingDesc
);
295 vring
->used
= vring_align(vring
->avail
+
296 offsetof(VRingAvail
, ring
[vring
->num
]),
298 virtio_init_region_cache(vdev
, n
);
301 /* Called within rcu_read_lock(). */
302 static void vring_split_desc_read(VirtIODevice
*vdev
, VRingDesc
*desc
,
303 MemoryRegionCache
*cache
, int i
)
305 address_space_read_cached(cache
, i
* sizeof(VRingDesc
),
306 desc
, sizeof(VRingDesc
));
307 virtio_tswap64s(vdev
, &desc
->addr
);
308 virtio_tswap32s(vdev
, &desc
->len
);
309 virtio_tswap16s(vdev
, &desc
->flags
);
310 virtio_tswap16s(vdev
, &desc
->next
);
313 static void vring_packed_event_read(VirtIODevice
*vdev
,
314 MemoryRegionCache
*cache
,
315 VRingPackedDescEvent
*e
)
317 hwaddr off_off
= offsetof(VRingPackedDescEvent
, off_wrap
);
318 hwaddr off_flags
= offsetof(VRingPackedDescEvent
, flags
);
320 e
->flags
= virtio_lduw_phys_cached(vdev
, cache
, off_flags
);
321 /* Make sure flags is seen before off_wrap */
323 e
->off_wrap
= virtio_lduw_phys_cached(vdev
, cache
, off_off
);
324 virtio_tswap16s(vdev
, &e
->flags
);
327 static void vring_packed_off_wrap_write(VirtIODevice
*vdev
,
328 MemoryRegionCache
*cache
,
331 hwaddr off
= offsetof(VRingPackedDescEvent
, off_wrap
);
333 virtio_stw_phys_cached(vdev
, cache
, off
, off_wrap
);
334 address_space_cache_invalidate(cache
, off
, sizeof(off_wrap
));
337 static void vring_packed_flags_write(VirtIODevice
*vdev
,
338 MemoryRegionCache
*cache
, uint16_t flags
)
340 hwaddr off
= offsetof(VRingPackedDescEvent
, flags
);
342 virtio_stw_phys_cached(vdev
, cache
, off
, flags
);
343 address_space_cache_invalidate(cache
, off
, sizeof(flags
));
346 /* Called within rcu_read_lock(). */
347 static VRingMemoryRegionCaches
*vring_get_region_caches(struct VirtQueue
*vq
)
349 return qatomic_rcu_read(&vq
->vring
.caches
);
352 /* Called within rcu_read_lock(). */
353 static inline uint16_t vring_avail_flags(VirtQueue
*vq
)
355 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
356 hwaddr pa
= offsetof(VRingAvail
, flags
);
362 return virtio_lduw_phys_cached(vq
->vdev
, &caches
->avail
, pa
);
365 /* Called within rcu_read_lock(). */
366 static inline uint16_t vring_avail_idx(VirtQueue
*vq
)
368 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
369 hwaddr pa
= offsetof(VRingAvail
, idx
);
375 vq
->shadow_avail_idx
= virtio_lduw_phys_cached(vq
->vdev
, &caches
->avail
, pa
);
376 return vq
->shadow_avail_idx
;
379 /* Called within rcu_read_lock(). */
380 static inline uint16_t vring_avail_ring(VirtQueue
*vq
, int i
)
382 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
383 hwaddr pa
= offsetof(VRingAvail
, ring
[i
]);
389 return virtio_lduw_phys_cached(vq
->vdev
, &caches
->avail
, pa
);
392 /* Called within rcu_read_lock(). */
393 static inline uint16_t vring_get_used_event(VirtQueue
*vq
)
395 return vring_avail_ring(vq
, vq
->vring
.num
);
398 /* Called within rcu_read_lock(). */
399 static inline void vring_used_write(VirtQueue
*vq
, VRingUsedElem
*uelem
,
402 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
403 hwaddr pa
= offsetof(VRingUsed
, ring
[i
]);
409 virtio_tswap32s(vq
->vdev
, &uelem
->id
);
410 virtio_tswap32s(vq
->vdev
, &uelem
->len
);
411 address_space_write_cached(&caches
->used
, pa
, uelem
, sizeof(VRingUsedElem
));
412 address_space_cache_invalidate(&caches
->used
, pa
, sizeof(VRingUsedElem
));
415 /* Called within rcu_read_lock(). */
416 static inline uint16_t vring_used_flags(VirtQueue
*vq
)
418 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
419 hwaddr pa
= offsetof(VRingUsed
, flags
);
425 return virtio_lduw_phys_cached(vq
->vdev
, &caches
->used
, pa
);
428 /* Called within rcu_read_lock(). */
429 static uint16_t vring_used_idx(VirtQueue
*vq
)
431 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
432 hwaddr pa
= offsetof(VRingUsed
, idx
);
438 return virtio_lduw_phys_cached(vq
->vdev
, &caches
->used
, pa
);
441 /* Called within rcu_read_lock(). */
442 static inline void vring_used_idx_set(VirtQueue
*vq
, uint16_t val
)
444 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
445 hwaddr pa
= offsetof(VRingUsed
, idx
);
448 virtio_stw_phys_cached(vq
->vdev
, &caches
->used
, pa
, val
);
449 address_space_cache_invalidate(&caches
->used
, pa
, sizeof(val
));
455 /* Called within rcu_read_lock(). */
456 static inline void vring_used_flags_set_bit(VirtQueue
*vq
, int mask
)
458 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
459 VirtIODevice
*vdev
= vq
->vdev
;
460 hwaddr pa
= offsetof(VRingUsed
, flags
);
467 flags
= virtio_lduw_phys_cached(vq
->vdev
, &caches
->used
, pa
);
468 virtio_stw_phys_cached(vdev
, &caches
->used
, pa
, flags
| mask
);
469 address_space_cache_invalidate(&caches
->used
, pa
, sizeof(flags
));
472 /* Called within rcu_read_lock(). */
473 static inline void vring_used_flags_unset_bit(VirtQueue
*vq
, int mask
)
475 VRingMemoryRegionCaches
*caches
= vring_get_region_caches(vq
);
476 VirtIODevice
*vdev
= vq
->vdev
;
477 hwaddr pa
= offsetof(VRingUsed
, flags
);
484 flags
= virtio_lduw_phys_cached(vq
->vdev
, &caches
->used
, pa
);
485 virtio_stw_phys_cached(vdev
, &caches
->used
, pa
, flags
& ~mask
);
486 address_space_cache_invalidate(&caches
->used
, pa
, sizeof(flags
));
489 /* Called within rcu_read_lock(). */
490 static inline void vring_set_avail_event(VirtQueue
*vq
, uint16_t val
)
492 VRingMemoryRegionCaches
*caches
;
494 if (!vq
->notification
) {
498 caches
= vring_get_region_caches(vq
);
503 pa
= offsetof(VRingUsed
, ring
[vq
->vring
.num
]);
504 virtio_stw_phys_cached(vq
->vdev
, &caches
->used
, pa
, val
);
505 address_space_cache_invalidate(&caches
->used
, pa
, sizeof(val
));
508 static void virtio_queue_split_set_notification(VirtQueue
*vq
, int enable
)
510 RCU_READ_LOCK_GUARD();
512 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_RING_F_EVENT_IDX
)) {
513 vring_set_avail_event(vq
, vring_avail_idx(vq
));
515 vring_used_flags_unset_bit(vq
, VRING_USED_F_NO_NOTIFY
);
517 vring_used_flags_set_bit(vq
, VRING_USED_F_NO_NOTIFY
);
520 /* Expose avail event/used flags before caller checks the avail idx. */
525 static void virtio_queue_packed_set_notification(VirtQueue
*vq
, int enable
)
528 VRingPackedDescEvent e
;
529 VRingMemoryRegionCaches
*caches
;
531 RCU_READ_LOCK_GUARD();
532 caches
= vring_get_region_caches(vq
);
537 vring_packed_event_read(vq
->vdev
, &caches
->used
, &e
);
540 e
.flags
= VRING_PACKED_EVENT_FLAG_DISABLE
;
541 } else if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_RING_F_EVENT_IDX
)) {
542 off_wrap
= vq
->shadow_avail_idx
| vq
->shadow_avail_wrap_counter
<< 15;
543 vring_packed_off_wrap_write(vq
->vdev
, &caches
->used
, off_wrap
);
544 /* Make sure off_wrap is wrote before flags */
546 e
.flags
= VRING_PACKED_EVENT_FLAG_DESC
;
548 e
.flags
= VRING_PACKED_EVENT_FLAG_ENABLE
;
551 vring_packed_flags_write(vq
->vdev
, &caches
->used
, e
.flags
);
553 /* Expose avail event/used flags before caller checks the avail idx. */
558 bool virtio_queue_get_notification(VirtQueue
*vq
)
560 return vq
->notification
;
563 void virtio_queue_set_notification(VirtQueue
*vq
, int enable
)
565 vq
->notification
= enable
;
567 if (!vq
->vring
.desc
) {
571 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
572 virtio_queue_packed_set_notification(vq
, enable
);
574 virtio_queue_split_set_notification(vq
, enable
);
578 int virtio_queue_ready(VirtQueue
*vq
)
580 return vq
->vring
.avail
!= 0;
583 static void vring_packed_desc_read_flags(VirtIODevice
*vdev
,
585 MemoryRegionCache
*cache
,
588 hwaddr off
= i
* sizeof(VRingPackedDesc
) + offsetof(VRingPackedDesc
, flags
);
590 *flags
= virtio_lduw_phys_cached(vdev
, cache
, off
);
593 static void vring_packed_desc_read(VirtIODevice
*vdev
,
594 VRingPackedDesc
*desc
,
595 MemoryRegionCache
*cache
,
596 int i
, bool strict_order
)
598 hwaddr off
= i
* sizeof(VRingPackedDesc
);
600 vring_packed_desc_read_flags(vdev
, &desc
->flags
, cache
, i
);
603 /* Make sure flags is read before the rest fields. */
607 address_space_read_cached(cache
, off
+ offsetof(VRingPackedDesc
, addr
),
608 &desc
->addr
, sizeof(desc
->addr
));
609 address_space_read_cached(cache
, off
+ offsetof(VRingPackedDesc
, id
),
610 &desc
->id
, sizeof(desc
->id
));
611 address_space_read_cached(cache
, off
+ offsetof(VRingPackedDesc
, len
),
612 &desc
->len
, sizeof(desc
->len
));
613 virtio_tswap64s(vdev
, &desc
->addr
);
614 virtio_tswap16s(vdev
, &desc
->id
);
615 virtio_tswap32s(vdev
, &desc
->len
);
618 static void vring_packed_desc_write_data(VirtIODevice
*vdev
,
619 VRingPackedDesc
*desc
,
620 MemoryRegionCache
*cache
,
623 hwaddr off_id
= i
* sizeof(VRingPackedDesc
) +
624 offsetof(VRingPackedDesc
, id
);
625 hwaddr off_len
= i
* sizeof(VRingPackedDesc
) +
626 offsetof(VRingPackedDesc
, len
);
628 virtio_tswap32s(vdev
, &desc
->len
);
629 virtio_tswap16s(vdev
, &desc
->id
);
630 address_space_write_cached(cache
, off_id
, &desc
->id
, sizeof(desc
->id
));
631 address_space_cache_invalidate(cache
, off_id
, sizeof(desc
->id
));
632 address_space_write_cached(cache
, off_len
, &desc
->len
, sizeof(desc
->len
));
633 address_space_cache_invalidate(cache
, off_len
, sizeof(desc
->len
));
636 static void vring_packed_desc_write_flags(VirtIODevice
*vdev
,
637 VRingPackedDesc
*desc
,
638 MemoryRegionCache
*cache
,
641 hwaddr off
= i
* sizeof(VRingPackedDesc
) + offsetof(VRingPackedDesc
, flags
);
643 virtio_stw_phys_cached(vdev
, cache
, off
, desc
->flags
);
644 address_space_cache_invalidate(cache
, off
, sizeof(desc
->flags
));
647 static void vring_packed_desc_write(VirtIODevice
*vdev
,
648 VRingPackedDesc
*desc
,
649 MemoryRegionCache
*cache
,
650 int i
, bool strict_order
)
652 vring_packed_desc_write_data(vdev
, desc
, cache
, i
);
654 /* Make sure data is wrote before flags. */
657 vring_packed_desc_write_flags(vdev
, desc
, cache
, i
);
660 static inline bool is_desc_avail(uint16_t flags
, bool wrap_counter
)
664 avail
= !!(flags
& (1 << VRING_PACKED_DESC_F_AVAIL
));
665 used
= !!(flags
& (1 << VRING_PACKED_DESC_F_USED
));
666 return (avail
!= used
) && (avail
== wrap_counter
);
669 /* Fetch avail_idx from VQ memory only when we really need to know if
670 * guest has added some buffers.
671 * Called within rcu_read_lock(). */
672 static int virtio_queue_empty_rcu(VirtQueue
*vq
)
674 if (virtio_device_disabled(vq
->vdev
)) {
678 if (unlikely(!vq
->vring
.avail
)) {
682 if (vq
->shadow_avail_idx
!= vq
->last_avail_idx
) {
686 return vring_avail_idx(vq
) == vq
->last_avail_idx
;
689 static int virtio_queue_split_empty(VirtQueue
*vq
)
693 if (virtio_device_disabled(vq
->vdev
)) {
697 if (unlikely(!vq
->vring
.avail
)) {
701 if (vq
->shadow_avail_idx
!= vq
->last_avail_idx
) {
705 RCU_READ_LOCK_GUARD();
706 empty
= vring_avail_idx(vq
) == vq
->last_avail_idx
;
710 /* Called within rcu_read_lock(). */
711 static int virtio_queue_packed_empty_rcu(VirtQueue
*vq
)
713 struct VRingPackedDesc desc
;
714 VRingMemoryRegionCaches
*cache
;
716 if (unlikely(!vq
->vring
.desc
)) {
720 cache
= vring_get_region_caches(vq
);
725 vring_packed_desc_read_flags(vq
->vdev
, &desc
.flags
, &cache
->desc
,
728 return !is_desc_avail(desc
.flags
, vq
->last_avail_wrap_counter
);
731 static int virtio_queue_packed_empty(VirtQueue
*vq
)
733 RCU_READ_LOCK_GUARD();
734 return virtio_queue_packed_empty_rcu(vq
);
737 int virtio_queue_empty(VirtQueue
*vq
)
739 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
740 return virtio_queue_packed_empty(vq
);
742 return virtio_queue_split_empty(vq
);
746 static void virtqueue_unmap_sg(VirtQueue
*vq
, const VirtQueueElement
*elem
,
749 AddressSpace
*dma_as
= vq
->vdev
->dma_as
;
754 for (i
= 0; i
< elem
->in_num
; i
++) {
755 size_t size
= MIN(len
- offset
, elem
->in_sg
[i
].iov_len
);
757 dma_memory_unmap(dma_as
, elem
->in_sg
[i
].iov_base
,
758 elem
->in_sg
[i
].iov_len
,
759 DMA_DIRECTION_FROM_DEVICE
, size
);
764 for (i
= 0; i
< elem
->out_num
; i
++)
765 dma_memory_unmap(dma_as
, elem
->out_sg
[i
].iov_base
,
766 elem
->out_sg
[i
].iov_len
,
767 DMA_DIRECTION_TO_DEVICE
,
768 elem
->out_sg
[i
].iov_len
);
771 /* virtqueue_detach_element:
772 * @vq: The #VirtQueue
773 * @elem: The #VirtQueueElement
774 * @len: number of bytes written
776 * Detach the element from the virtqueue. This function is suitable for device
777 * reset or other situations where a #VirtQueueElement is simply freed and will
778 * not be pushed or discarded.
780 void virtqueue_detach_element(VirtQueue
*vq
, const VirtQueueElement
*elem
,
783 vq
->inuse
-= elem
->ndescs
;
784 virtqueue_unmap_sg(vq
, elem
, len
);
787 static void virtqueue_split_rewind(VirtQueue
*vq
, unsigned int num
)
789 vq
->last_avail_idx
-= num
;
792 static void virtqueue_packed_rewind(VirtQueue
*vq
, unsigned int num
)
794 if (vq
->last_avail_idx
< num
) {
795 vq
->last_avail_idx
= vq
->vring
.num
+ vq
->last_avail_idx
- num
;
796 vq
->last_avail_wrap_counter
^= 1;
798 vq
->last_avail_idx
-= num
;
803 * @vq: The #VirtQueue
804 * @elem: The #VirtQueueElement
805 * @len: number of bytes written
807 * Pretend the most recent element wasn't popped from the virtqueue. The next
808 * call to virtqueue_pop() will refetch the element.
810 void virtqueue_unpop(VirtQueue
*vq
, const VirtQueueElement
*elem
,
814 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
815 virtqueue_packed_rewind(vq
, 1);
817 virtqueue_split_rewind(vq
, 1);
820 virtqueue_detach_element(vq
, elem
, len
);
824 * @vq: The #VirtQueue
825 * @num: Number of elements to push back
827 * Pretend that elements weren't popped from the virtqueue. The next
828 * virtqueue_pop() will refetch the oldest element.
830 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
832 * Returns: true on success, false if @num is greater than the number of in use
835 bool virtqueue_rewind(VirtQueue
*vq
, unsigned int num
)
837 if (num
> vq
->inuse
) {
842 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
843 virtqueue_packed_rewind(vq
, num
);
845 virtqueue_split_rewind(vq
, num
);
850 static void virtqueue_split_fill(VirtQueue
*vq
, const VirtQueueElement
*elem
,
851 unsigned int len
, unsigned int idx
)
855 if (unlikely(!vq
->vring
.used
)) {
859 idx
= (idx
+ vq
->used_idx
) % vq
->vring
.num
;
861 uelem
.id
= elem
->index
;
863 vring_used_write(vq
, &uelem
, idx
);
866 static void virtqueue_packed_fill(VirtQueue
*vq
, const VirtQueueElement
*elem
,
867 unsigned int len
, unsigned int idx
)
869 vq
->used_elems
[idx
].index
= elem
->index
;
870 vq
->used_elems
[idx
].len
= len
;
871 vq
->used_elems
[idx
].ndescs
= elem
->ndescs
;
874 static void virtqueue_packed_fill_desc(VirtQueue
*vq
,
875 const VirtQueueElement
*elem
,
880 VRingMemoryRegionCaches
*caches
;
881 VRingPackedDesc desc
= {
885 bool wrap_counter
= vq
->used_wrap_counter
;
887 if (unlikely(!vq
->vring
.desc
)) {
891 head
= vq
->used_idx
+ idx
;
892 if (head
>= vq
->vring
.num
) {
893 head
-= vq
->vring
.num
;
897 desc
.flags
|= (1 << VRING_PACKED_DESC_F_AVAIL
);
898 desc
.flags
|= (1 << VRING_PACKED_DESC_F_USED
);
900 desc
.flags
&= ~(1 << VRING_PACKED_DESC_F_AVAIL
);
901 desc
.flags
&= ~(1 << VRING_PACKED_DESC_F_USED
);
904 caches
= vring_get_region_caches(vq
);
909 vring_packed_desc_write(vq
->vdev
, &desc
, &caches
->desc
, head
, strict_order
);
912 /* Called within rcu_read_lock(). */
913 void virtqueue_fill(VirtQueue
*vq
, const VirtQueueElement
*elem
,
914 unsigned int len
, unsigned int idx
)
916 trace_virtqueue_fill(vq
, elem
, len
, idx
);
918 virtqueue_unmap_sg(vq
, elem
, len
);
920 if (virtio_device_disabled(vq
->vdev
)) {
924 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
925 virtqueue_packed_fill(vq
, elem
, len
, idx
);
927 virtqueue_split_fill(vq
, elem
, len
, idx
);
931 /* Called within rcu_read_lock(). */
932 static void virtqueue_split_flush(VirtQueue
*vq
, unsigned int count
)
936 if (unlikely(!vq
->vring
.used
)) {
940 /* Make sure buffer is written before we update index. */
942 trace_virtqueue_flush(vq
, count
);
945 vring_used_idx_set(vq
, new);
947 if (unlikely((int16_t)(new - vq
->signalled_used
) < (uint16_t)(new - old
)))
948 vq
->signalled_used_valid
= false;
951 static void virtqueue_packed_flush(VirtQueue
*vq
, unsigned int count
)
953 unsigned int i
, ndescs
= 0;
955 if (unlikely(!vq
->vring
.desc
)) {
959 for (i
= 1; i
< count
; i
++) {
960 virtqueue_packed_fill_desc(vq
, &vq
->used_elems
[i
], i
, false);
961 ndescs
+= vq
->used_elems
[i
].ndescs
;
963 virtqueue_packed_fill_desc(vq
, &vq
->used_elems
[0], 0, true);
964 ndescs
+= vq
->used_elems
[0].ndescs
;
967 vq
->used_idx
+= ndescs
;
968 if (vq
->used_idx
>= vq
->vring
.num
) {
969 vq
->used_idx
-= vq
->vring
.num
;
970 vq
->used_wrap_counter
^= 1;
971 vq
->signalled_used_valid
= false;
975 void virtqueue_flush(VirtQueue
*vq
, unsigned int count
)
977 if (virtio_device_disabled(vq
->vdev
)) {
982 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
983 virtqueue_packed_flush(vq
, count
);
985 virtqueue_split_flush(vq
, count
);
989 void virtqueue_push(VirtQueue
*vq
, const VirtQueueElement
*elem
,
992 RCU_READ_LOCK_GUARD();
993 virtqueue_fill(vq
, elem
, len
, 0);
994 virtqueue_flush(vq
, 1);
997 /* Called within rcu_read_lock(). */
998 static int virtqueue_num_heads(VirtQueue
*vq
, unsigned int idx
)
1000 uint16_t avail_idx
, num_heads
;
1002 /* Use shadow index whenever possible. */
1003 avail_idx
= (vq
->shadow_avail_idx
!= idx
) ? vq
->shadow_avail_idx
1004 : vring_avail_idx(vq
);
1005 num_heads
= avail_idx
- idx
;
1007 /* Check it isn't doing very strange things with descriptor numbers. */
1008 if (num_heads
> vq
->vring
.num
) {
1009 virtio_error(vq
->vdev
, "Guest moved used index from %u to %u",
1010 idx
, vq
->shadow_avail_idx
);
1014 * On success, callers read a descriptor at vq->last_avail_idx.
1015 * Make sure descriptor read does not bypass avail index read.
1017 * This is necessary even if we are using a shadow index, since
1018 * the shadow index could have been initialized by calling
1019 * vring_avail_idx() outside of this function, i.e., by a guest
1020 * memory read not accompanied by a barrier.
1029 /* Called within rcu_read_lock(). */
1030 static bool virtqueue_get_head(VirtQueue
*vq
, unsigned int idx
,
1033 /* Grab the next descriptor number they're advertising, and increment
1034 * the index we've seen. */
1035 *head
= vring_avail_ring(vq
, idx
% vq
->vring
.num
);
1037 /* If their number is silly, that's a fatal mistake. */
1038 if (*head
>= vq
->vring
.num
) {
1039 virtio_error(vq
->vdev
, "Guest says index %u is available", *head
);
1047 VIRTQUEUE_READ_DESC_ERROR
= -1,
1048 VIRTQUEUE_READ_DESC_DONE
= 0, /* end of chain */
1049 VIRTQUEUE_READ_DESC_MORE
= 1, /* more buffers in chain */
1052 /* Reads the 'desc->next' descriptor into '*desc'. */
1053 static int virtqueue_split_read_next_desc(VirtIODevice
*vdev
, VRingDesc
*desc
,
1054 MemoryRegionCache
*desc_cache
,
1057 /* If this descriptor says it doesn't chain, we're done. */
1058 if (!(desc
->flags
& VRING_DESC_F_NEXT
)) {
1059 return VIRTQUEUE_READ_DESC_DONE
;
1062 /* Check they're not leading us off end of descriptors. */
1063 if (desc
->next
>= max
) {
1064 virtio_error(vdev
, "Desc next is %u", desc
->next
);
1065 return VIRTQUEUE_READ_DESC_ERROR
;
1068 vring_split_desc_read(vdev
, desc
, desc_cache
, desc
->next
);
1069 return VIRTQUEUE_READ_DESC_MORE
;
1072 /* Called within rcu_read_lock(). */
1073 static void virtqueue_split_get_avail_bytes(VirtQueue
*vq
,
1074 unsigned int *in_bytes
, unsigned int *out_bytes
,
1075 unsigned max_in_bytes
, unsigned max_out_bytes
,
1076 VRingMemoryRegionCaches
*caches
)
1078 VirtIODevice
*vdev
= vq
->vdev
;
1080 unsigned int total_bufs
, in_total
, out_total
;
1081 MemoryRegionCache indirect_desc_cache
;
1085 address_space_cache_init_empty(&indirect_desc_cache
);
1087 idx
= vq
->last_avail_idx
;
1088 total_bufs
= in_total
= out_total
= 0;
1090 while ((rc
= virtqueue_num_heads(vq
, idx
)) > 0) {
1091 MemoryRegionCache
*desc_cache
= &caches
->desc
;
1092 unsigned int num_bufs
;
1095 unsigned int max
= vq
->vring
.num
;
1097 num_bufs
= total_bufs
;
1099 if (!virtqueue_get_head(vq
, idx
++, &i
)) {
1103 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
1105 if (desc
.flags
& VRING_DESC_F_INDIRECT
) {
1106 if (!desc
.len
|| (desc
.len
% sizeof(VRingDesc
))) {
1107 virtio_error(vdev
, "Invalid size for indirect buffer table");
1111 /* If we've got too many, that implies a descriptor loop. */
1112 if (num_bufs
>= max
) {
1113 virtio_error(vdev
, "Looped descriptor");
1117 /* loop over the indirect descriptor table */
1118 len
= address_space_cache_init(&indirect_desc_cache
,
1120 desc
.addr
, desc
.len
, false);
1121 desc_cache
= &indirect_desc_cache
;
1122 if (len
< desc
.len
) {
1123 virtio_error(vdev
, "Cannot map indirect buffer");
1127 max
= desc
.len
/ sizeof(VRingDesc
);
1129 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
1133 /* If we've got too many, that implies a descriptor loop. */
1134 if (++num_bufs
> max
) {
1135 virtio_error(vdev
, "Looped descriptor");
1139 if (desc
.flags
& VRING_DESC_F_WRITE
) {
1140 in_total
+= desc
.len
;
1142 out_total
+= desc
.len
;
1144 if (in_total
>= max_in_bytes
&& out_total
>= max_out_bytes
) {
1148 rc
= virtqueue_split_read_next_desc(vdev
, &desc
, desc_cache
, max
);
1149 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
1151 if (rc
== VIRTQUEUE_READ_DESC_ERROR
) {
1155 if (desc_cache
== &indirect_desc_cache
) {
1156 address_space_cache_destroy(&indirect_desc_cache
);
1159 total_bufs
= num_bufs
;
1168 address_space_cache_destroy(&indirect_desc_cache
);
1170 *in_bytes
= in_total
;
1173 *out_bytes
= out_total
;
1178 in_total
= out_total
= 0;
1182 static int virtqueue_packed_read_next_desc(VirtQueue
*vq
,
1183 VRingPackedDesc
*desc
,
1190 /* If this descriptor says it doesn't chain, we're done. */
1191 if (!indirect
&& !(desc
->flags
& VRING_DESC_F_NEXT
)) {
1192 return VIRTQUEUE_READ_DESC_DONE
;
1198 return VIRTQUEUE_READ_DESC_DONE
;
1200 (*next
) -= vq
->vring
.num
;
1204 vring_packed_desc_read(vq
->vdev
, desc
, desc_cache
, *next
, false);
1205 return VIRTQUEUE_READ_DESC_MORE
;
1208 /* Called within rcu_read_lock(). */
1209 static void virtqueue_packed_get_avail_bytes(VirtQueue
*vq
,
1210 unsigned int *in_bytes
,
1211 unsigned int *out_bytes
,
1212 unsigned max_in_bytes
,
1213 unsigned max_out_bytes
,
1214 VRingMemoryRegionCaches
*caches
)
1216 VirtIODevice
*vdev
= vq
->vdev
;
1218 unsigned int total_bufs
, in_total
, out_total
;
1219 MemoryRegionCache indirect_desc_cache
;
1220 MemoryRegionCache
*desc_cache
;
1222 VRingPackedDesc desc
;
1225 address_space_cache_init_empty(&indirect_desc_cache
);
1227 idx
= vq
->last_avail_idx
;
1228 wrap_counter
= vq
->last_avail_wrap_counter
;
1229 total_bufs
= in_total
= out_total
= 0;
1232 unsigned int num_bufs
= total_bufs
;
1233 unsigned int i
= idx
;
1235 unsigned int max
= vq
->vring
.num
;
1237 desc_cache
= &caches
->desc
;
1239 vring_packed_desc_read(vdev
, &desc
, desc_cache
, idx
, true);
1240 if (!is_desc_avail(desc
.flags
, wrap_counter
)) {
1244 if (desc
.flags
& VRING_DESC_F_INDIRECT
) {
1245 if (desc
.len
% sizeof(VRingPackedDesc
)) {
1246 virtio_error(vdev
, "Invalid size for indirect buffer table");
1250 /* If we've got too many, that implies a descriptor loop. */
1251 if (num_bufs
>= max
) {
1252 virtio_error(vdev
, "Looped descriptor");
1256 /* loop over the indirect descriptor table */
1257 len
= address_space_cache_init(&indirect_desc_cache
,
1259 desc
.addr
, desc
.len
, false);
1260 desc_cache
= &indirect_desc_cache
;
1261 if (len
< desc
.len
) {
1262 virtio_error(vdev
, "Cannot map indirect buffer");
1266 max
= desc
.len
/ sizeof(VRingPackedDesc
);
1268 vring_packed_desc_read(vdev
, &desc
, desc_cache
, i
, false);
1272 /* If we've got too many, that implies a descriptor loop. */
1273 if (++num_bufs
> max
) {
1274 virtio_error(vdev
, "Looped descriptor");
1278 if (desc
.flags
& VRING_DESC_F_WRITE
) {
1279 in_total
+= desc
.len
;
1281 out_total
+= desc
.len
;
1283 if (in_total
>= max_in_bytes
&& out_total
>= max_out_bytes
) {
1287 rc
= virtqueue_packed_read_next_desc(vq
, &desc
, desc_cache
, max
,
1289 &indirect_desc_cache
);
1290 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
1292 if (desc_cache
== &indirect_desc_cache
) {
1293 address_space_cache_destroy(&indirect_desc_cache
);
1297 idx
+= num_bufs
- total_bufs
;
1298 total_bufs
= num_bufs
;
1301 if (idx
>= vq
->vring
.num
) {
1302 idx
-= vq
->vring
.num
;
1307 /* Record the index and wrap counter for a kick we want */
1308 vq
->shadow_avail_idx
= idx
;
1309 vq
->shadow_avail_wrap_counter
= wrap_counter
;
1311 address_space_cache_destroy(&indirect_desc_cache
);
1313 *in_bytes
= in_total
;
1316 *out_bytes
= out_total
;
1321 in_total
= out_total
= 0;
1325 void virtqueue_get_avail_bytes(VirtQueue
*vq
, unsigned int *in_bytes
,
1326 unsigned int *out_bytes
,
1327 unsigned max_in_bytes
, unsigned max_out_bytes
)
1330 VRingMemoryRegionCaches
*caches
;
1332 RCU_READ_LOCK_GUARD();
1334 if (unlikely(!vq
->vring
.desc
)) {
1338 caches
= vring_get_region_caches(vq
);
1343 desc_size
= virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
) ?
1344 sizeof(VRingPackedDesc
) : sizeof(VRingDesc
);
1345 if (caches
->desc
.len
< vq
->vring
.num
* desc_size
) {
1346 virtio_error(vq
->vdev
, "Cannot map descriptor ring");
1350 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
1351 virtqueue_packed_get_avail_bytes(vq
, in_bytes
, out_bytes
,
1352 max_in_bytes
, max_out_bytes
,
1355 virtqueue_split_get_avail_bytes(vq
, in_bytes
, out_bytes
,
1356 max_in_bytes
, max_out_bytes
,
1370 int virtqueue_avail_bytes(VirtQueue
*vq
, unsigned int in_bytes
,
1371 unsigned int out_bytes
)
1373 unsigned int in_total
, out_total
;
1375 virtqueue_get_avail_bytes(vq
, &in_total
, &out_total
, in_bytes
, out_bytes
);
1376 return in_bytes
<= in_total
&& out_bytes
<= out_total
;
1379 static bool virtqueue_map_desc(VirtIODevice
*vdev
, unsigned int *p_num_sg
,
1380 hwaddr
*addr
, struct iovec
*iov
,
1381 unsigned int max_num_sg
, bool is_write
,
1382 hwaddr pa
, size_t sz
)
1385 unsigned num_sg
= *p_num_sg
;
1386 assert(num_sg
<= max_num_sg
);
1389 virtio_error(vdev
, "virtio: zero sized buffers are not allowed");
1396 if (num_sg
== max_num_sg
) {
1397 virtio_error(vdev
, "virtio: too many write descriptors in "
1402 iov
[num_sg
].iov_base
= dma_memory_map(vdev
->dma_as
, pa
, &len
,
1404 DMA_DIRECTION_FROM_DEVICE
:
1405 DMA_DIRECTION_TO_DEVICE
,
1406 MEMTXATTRS_UNSPECIFIED
);
1407 if (!iov
[num_sg
].iov_base
) {
1408 virtio_error(vdev
, "virtio: bogus descriptor or out of resources");
1412 iov
[num_sg
].iov_len
= len
;
1426 /* Only used by error code paths before we have a VirtQueueElement (therefore
1427 * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to
1430 static void virtqueue_undo_map_desc(unsigned int out_num
, unsigned int in_num
,
1435 for (i
= 0; i
< out_num
+ in_num
; i
++) {
1436 int is_write
= i
>= out_num
;
1438 cpu_physical_memory_unmap(iov
->iov_base
, iov
->iov_len
, is_write
, 0);
1443 static void virtqueue_map_iovec(VirtIODevice
*vdev
, struct iovec
*sg
,
1444 hwaddr
*addr
, unsigned int num_sg
,
1450 for (i
= 0; i
< num_sg
; i
++) {
1451 len
= sg
[i
].iov_len
;
1452 sg
[i
].iov_base
= dma_memory_map(vdev
->dma_as
,
1453 addr
[i
], &len
, is_write
?
1454 DMA_DIRECTION_FROM_DEVICE
:
1455 DMA_DIRECTION_TO_DEVICE
,
1456 MEMTXATTRS_UNSPECIFIED
);
1457 if (!sg
[i
].iov_base
) {
1458 error_report("virtio: error trying to map MMIO memory");
1461 if (len
!= sg
[i
].iov_len
) {
1462 error_report("virtio: unexpected memory split");
1468 void virtqueue_map(VirtIODevice
*vdev
, VirtQueueElement
*elem
)
1470 virtqueue_map_iovec(vdev
, elem
->in_sg
, elem
->in_addr
, elem
->in_num
, true);
1471 virtqueue_map_iovec(vdev
, elem
->out_sg
, elem
->out_addr
, elem
->out_num
,
1475 static void *virtqueue_alloc_element(size_t sz
, unsigned out_num
, unsigned in_num
)
1477 VirtQueueElement
*elem
;
1478 size_t in_addr_ofs
= QEMU_ALIGN_UP(sz
, __alignof__(elem
->in_addr
[0]));
1479 size_t out_addr_ofs
= in_addr_ofs
+ in_num
* sizeof(elem
->in_addr
[0]);
1480 size_t out_addr_end
= out_addr_ofs
+ out_num
* sizeof(elem
->out_addr
[0]);
1481 size_t in_sg_ofs
= QEMU_ALIGN_UP(out_addr_end
, __alignof__(elem
->in_sg
[0]));
1482 size_t out_sg_ofs
= in_sg_ofs
+ in_num
* sizeof(elem
->in_sg
[0]);
1483 size_t out_sg_end
= out_sg_ofs
+ out_num
* sizeof(elem
->out_sg
[0]);
1485 assert(sz
>= sizeof(VirtQueueElement
));
1486 elem
= g_malloc(out_sg_end
);
1487 trace_virtqueue_alloc_element(elem
, sz
, in_num
, out_num
);
1488 elem
->out_num
= out_num
;
1489 elem
->in_num
= in_num
;
1490 elem
->in_addr
= (void *)elem
+ in_addr_ofs
;
1491 elem
->out_addr
= (void *)elem
+ out_addr_ofs
;
1492 elem
->in_sg
= (void *)elem
+ in_sg_ofs
;
1493 elem
->out_sg
= (void *)elem
+ out_sg_ofs
;
1497 static void *virtqueue_split_pop(VirtQueue
*vq
, size_t sz
)
1499 unsigned int i
, head
, max
;
1500 VRingMemoryRegionCaches
*caches
;
1501 MemoryRegionCache indirect_desc_cache
;
1502 MemoryRegionCache
*desc_cache
;
1504 VirtIODevice
*vdev
= vq
->vdev
;
1505 VirtQueueElement
*elem
= NULL
;
1506 unsigned out_num
, in_num
, elem_entries
;
1507 hwaddr addr
[VIRTQUEUE_MAX_SIZE
];
1508 struct iovec iov
[VIRTQUEUE_MAX_SIZE
];
1512 address_space_cache_init_empty(&indirect_desc_cache
);
1514 RCU_READ_LOCK_GUARD();
1515 if (virtio_queue_empty_rcu(vq
)) {
1518 /* Needed after virtio_queue_empty(), see comment in
1519 * virtqueue_num_heads(). */
1522 /* When we start there are none of either input nor output. */
1523 out_num
= in_num
= elem_entries
= 0;
1525 max
= vq
->vring
.num
;
1527 if (vq
->inuse
>= vq
->vring
.num
) {
1528 virtio_error(vdev
, "Virtqueue size exceeded");
1532 if (!virtqueue_get_head(vq
, vq
->last_avail_idx
++, &head
)) {
1536 if (virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
)) {
1537 vring_set_avail_event(vq
, vq
->last_avail_idx
);
1542 caches
= vring_get_region_caches(vq
);
1544 virtio_error(vdev
, "Region caches not initialized");
1548 if (caches
->desc
.len
< max
* sizeof(VRingDesc
)) {
1549 virtio_error(vdev
, "Cannot map descriptor ring");
1553 desc_cache
= &caches
->desc
;
1554 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
1555 if (desc
.flags
& VRING_DESC_F_INDIRECT
) {
1556 if (!desc
.len
|| (desc
.len
% sizeof(VRingDesc
))) {
1557 virtio_error(vdev
, "Invalid size for indirect buffer table");
1561 /* loop over the indirect descriptor table */
1562 len
= address_space_cache_init(&indirect_desc_cache
, vdev
->dma_as
,
1563 desc
.addr
, desc
.len
, false);
1564 desc_cache
= &indirect_desc_cache
;
1565 if (len
< desc
.len
) {
1566 virtio_error(vdev
, "Cannot map indirect buffer");
1570 max
= desc
.len
/ sizeof(VRingDesc
);
1572 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
1575 /* Collect all the descriptors */
1579 if (desc
.flags
& VRING_DESC_F_WRITE
) {
1580 map_ok
= virtqueue_map_desc(vdev
, &in_num
, addr
+ out_num
,
1582 VIRTQUEUE_MAX_SIZE
- out_num
, true,
1583 desc
.addr
, desc
.len
);
1586 virtio_error(vdev
, "Incorrect order for descriptors");
1589 map_ok
= virtqueue_map_desc(vdev
, &out_num
, addr
, iov
,
1590 VIRTQUEUE_MAX_SIZE
, false,
1591 desc
.addr
, desc
.len
);
1597 /* If we've got too many, that implies a descriptor loop. */
1598 if (++elem_entries
> max
) {
1599 virtio_error(vdev
, "Looped descriptor");
1603 rc
= virtqueue_split_read_next_desc(vdev
, &desc
, desc_cache
, max
);
1604 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
1606 if (rc
== VIRTQUEUE_READ_DESC_ERROR
) {
1610 /* Now copy what we have collected and mapped */
1611 elem
= virtqueue_alloc_element(sz
, out_num
, in_num
);
1614 for (i
= 0; i
< out_num
; i
++) {
1615 elem
->out_addr
[i
] = addr
[i
];
1616 elem
->out_sg
[i
] = iov
[i
];
1618 for (i
= 0; i
< in_num
; i
++) {
1619 elem
->in_addr
[i
] = addr
[out_num
+ i
];
1620 elem
->in_sg
[i
] = iov
[out_num
+ i
];
1625 trace_virtqueue_pop(vq
, elem
, elem
->in_num
, elem
->out_num
);
1627 address_space_cache_destroy(&indirect_desc_cache
);
1632 virtqueue_undo_map_desc(out_num
, in_num
, iov
);
1636 static void *virtqueue_packed_pop(VirtQueue
*vq
, size_t sz
)
1638 unsigned int i
, max
;
1639 VRingMemoryRegionCaches
*caches
;
1640 MemoryRegionCache indirect_desc_cache
;
1641 MemoryRegionCache
*desc_cache
;
1643 VirtIODevice
*vdev
= vq
->vdev
;
1644 VirtQueueElement
*elem
= NULL
;
1645 unsigned out_num
, in_num
, elem_entries
;
1646 hwaddr addr
[VIRTQUEUE_MAX_SIZE
];
1647 struct iovec iov
[VIRTQUEUE_MAX_SIZE
];
1648 VRingPackedDesc desc
;
1652 address_space_cache_init_empty(&indirect_desc_cache
);
1654 RCU_READ_LOCK_GUARD();
1655 if (virtio_queue_packed_empty_rcu(vq
)) {
1659 /* When we start there are none of either input nor output. */
1660 out_num
= in_num
= elem_entries
= 0;
1662 max
= vq
->vring
.num
;
1664 if (vq
->inuse
>= vq
->vring
.num
) {
1665 virtio_error(vdev
, "Virtqueue size exceeded");
1669 i
= vq
->last_avail_idx
;
1671 caches
= vring_get_region_caches(vq
);
1673 virtio_error(vdev
, "Region caches not initialized");
1677 if (caches
->desc
.len
< max
* sizeof(VRingDesc
)) {
1678 virtio_error(vdev
, "Cannot map descriptor ring");
1682 desc_cache
= &caches
->desc
;
1683 vring_packed_desc_read(vdev
, &desc
, desc_cache
, i
, true);
1685 if (desc
.flags
& VRING_DESC_F_INDIRECT
) {
1686 if (desc
.len
% sizeof(VRingPackedDesc
)) {
1687 virtio_error(vdev
, "Invalid size for indirect buffer table");
1691 /* loop over the indirect descriptor table */
1692 len
= address_space_cache_init(&indirect_desc_cache
, vdev
->dma_as
,
1693 desc
.addr
, desc
.len
, false);
1694 desc_cache
= &indirect_desc_cache
;
1695 if (len
< desc
.len
) {
1696 virtio_error(vdev
, "Cannot map indirect buffer");
1700 max
= desc
.len
/ sizeof(VRingPackedDesc
);
1702 vring_packed_desc_read(vdev
, &desc
, desc_cache
, i
, false);
1705 /* Collect all the descriptors */
1709 if (desc
.flags
& VRING_DESC_F_WRITE
) {
1710 map_ok
= virtqueue_map_desc(vdev
, &in_num
, addr
+ out_num
,
1712 VIRTQUEUE_MAX_SIZE
- out_num
, true,
1713 desc
.addr
, desc
.len
);
1716 virtio_error(vdev
, "Incorrect order for descriptors");
1719 map_ok
= virtqueue_map_desc(vdev
, &out_num
, addr
, iov
,
1720 VIRTQUEUE_MAX_SIZE
, false,
1721 desc
.addr
, desc
.len
);
1727 /* If we've got too many, that implies a descriptor loop. */
1728 if (++elem_entries
> max
) {
1729 virtio_error(vdev
, "Looped descriptor");
1733 rc
= virtqueue_packed_read_next_desc(vq
, &desc
, desc_cache
, max
, &i
,
1735 &indirect_desc_cache
);
1736 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
1738 /* Now copy what we have collected and mapped */
1739 elem
= virtqueue_alloc_element(sz
, out_num
, in_num
);
1740 for (i
= 0; i
< out_num
; i
++) {
1741 elem
->out_addr
[i
] = addr
[i
];
1742 elem
->out_sg
[i
] = iov
[i
];
1744 for (i
= 0; i
< in_num
; i
++) {
1745 elem
->in_addr
[i
] = addr
[out_num
+ i
];
1746 elem
->in_sg
[i
] = iov
[out_num
+ i
];
1750 elem
->ndescs
= (desc_cache
== &indirect_desc_cache
) ? 1 : elem_entries
;
1751 vq
->last_avail_idx
+= elem
->ndescs
;
1752 vq
->inuse
+= elem
->ndescs
;
1754 if (vq
->last_avail_idx
>= vq
->vring
.num
) {
1755 vq
->last_avail_idx
-= vq
->vring
.num
;
1756 vq
->last_avail_wrap_counter
^= 1;
1759 vq
->shadow_avail_idx
= vq
->last_avail_idx
;
1760 vq
->shadow_avail_wrap_counter
= vq
->last_avail_wrap_counter
;
1762 trace_virtqueue_pop(vq
, elem
, elem
->in_num
, elem
->out_num
);
1764 address_space_cache_destroy(&indirect_desc_cache
);
1769 virtqueue_undo_map_desc(out_num
, in_num
, iov
);
1773 void *virtqueue_pop(VirtQueue
*vq
, size_t sz
)
1775 if (virtio_device_disabled(vq
->vdev
)) {
1779 if (virtio_vdev_has_feature(vq
->vdev
, VIRTIO_F_RING_PACKED
)) {
1780 return virtqueue_packed_pop(vq
, sz
);
1782 return virtqueue_split_pop(vq
, sz
);
1786 static unsigned int virtqueue_packed_drop_all(VirtQueue
*vq
)
1788 VRingMemoryRegionCaches
*caches
;
1789 MemoryRegionCache
*desc_cache
;
1790 unsigned int dropped
= 0;
1791 VirtQueueElement elem
= {};
1792 VirtIODevice
*vdev
= vq
->vdev
;
1793 VRingPackedDesc desc
;
1795 RCU_READ_LOCK_GUARD();
1797 caches
= vring_get_region_caches(vq
);
1802 desc_cache
= &caches
->desc
;
1804 virtio_queue_set_notification(vq
, 0);
1806 while (vq
->inuse
< vq
->vring
.num
) {
1807 unsigned int idx
= vq
->last_avail_idx
;
1809 * works similar to virtqueue_pop but does not map buffers
1810 * and does not allocate any memory.
1812 vring_packed_desc_read(vdev
, &desc
, desc_cache
,
1813 vq
->last_avail_idx
, true);
1814 if (!is_desc_avail(desc
.flags
, vq
->last_avail_wrap_counter
)) {
1817 elem
.index
= desc
.id
;
1819 while (virtqueue_packed_read_next_desc(vq
, &desc
, desc_cache
,
1820 vq
->vring
.num
, &idx
, false)) {
1824 * immediately push the element, nothing to unmap
1825 * as both in_num and out_num are set to 0.
1827 virtqueue_push(vq
, &elem
, 0);
1829 vq
->last_avail_idx
+= elem
.ndescs
;
1830 if (vq
->last_avail_idx
>= vq
->vring
.num
) {
1831 vq
->last_avail_idx
-= vq
->vring
.num
;
1832 vq
->last_avail_wrap_counter
^= 1;
1839 static unsigned int virtqueue_split_drop_all(VirtQueue
*vq
)
1841 unsigned int dropped
= 0;
1842 VirtQueueElement elem
= {};
1843 VirtIODevice
*vdev
= vq
->vdev
;
1844 bool fEventIdx
= virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
);
1846 while (!virtio_queue_empty(vq
) && vq
->inuse
< vq
->vring
.num
) {
1847 /* works similar to virtqueue_pop but does not map buffers
1848 * and does not allocate any memory */
1850 if (!virtqueue_get_head(vq
, vq
->last_avail_idx
, &elem
.index
)) {
1854 vq
->last_avail_idx
++;
1856 vring_set_avail_event(vq
, vq
->last_avail_idx
);
1858 /* immediately push the element, nothing to unmap
1859 * as both in_num and out_num are set to 0 */
1860 virtqueue_push(vq
, &elem
, 0);
1867 /* virtqueue_drop_all:
1868 * @vq: The #VirtQueue
1869 * Drops all queued buffers and indicates them to the guest
1870 * as if they are done. Useful when buffers can not be
1871 * processed but must be returned to the guest.
1873 unsigned int virtqueue_drop_all(VirtQueue
*vq
)
1875 struct VirtIODevice
*vdev
= vq
->vdev
;
1877 if (virtio_device_disabled(vq
->vdev
)) {
1881 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
1882 return virtqueue_packed_drop_all(vq
);
1884 return virtqueue_split_drop_all(vq
);
1888 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1889 * it is what QEMU has always done by mistake. We can change it sooner
1890 * or later by bumping the version number of the affected vm states.
1891 * In the meanwhile, since the in-memory layout of VirtQueueElement
1892 * has changed, we need to marshal to and from the layout that was
1893 * used before the change.
1895 typedef struct VirtQueueElementOld
{
1897 unsigned int out_num
;
1898 unsigned int in_num
;
1899 hwaddr in_addr
[VIRTQUEUE_MAX_SIZE
];
1900 hwaddr out_addr
[VIRTQUEUE_MAX_SIZE
];
1901 struct iovec in_sg
[VIRTQUEUE_MAX_SIZE
];
1902 struct iovec out_sg
[VIRTQUEUE_MAX_SIZE
];
1903 } VirtQueueElementOld
;
1905 void *qemu_get_virtqueue_element(VirtIODevice
*vdev
, QEMUFile
*f
, size_t sz
)
1907 VirtQueueElement
*elem
;
1908 VirtQueueElementOld data
;
1911 qemu_get_buffer(f
, (uint8_t *)&data
, sizeof(VirtQueueElementOld
));
1913 /* TODO: teach all callers that this can fail, and return failure instead
1914 * of asserting here.
1915 * This is just one thing (there are probably more) that must be
1916 * fixed before we can allow NDEBUG compilation.
1918 assert(ARRAY_SIZE(data
.in_addr
) >= data
.in_num
);
1919 assert(ARRAY_SIZE(data
.out_addr
) >= data
.out_num
);
1921 elem
= virtqueue_alloc_element(sz
, data
.out_num
, data
.in_num
);
1922 elem
->index
= data
.index
;
1924 for (i
= 0; i
< elem
->in_num
; i
++) {
1925 elem
->in_addr
[i
] = data
.in_addr
[i
];
1928 for (i
= 0; i
< elem
->out_num
; i
++) {
1929 elem
->out_addr
[i
] = data
.out_addr
[i
];
1932 for (i
= 0; i
< elem
->in_num
; i
++) {
1933 /* Base is overwritten by virtqueue_map. */
1934 elem
->in_sg
[i
].iov_base
= 0;
1935 elem
->in_sg
[i
].iov_len
= data
.in_sg
[i
].iov_len
;
1938 for (i
= 0; i
< elem
->out_num
; i
++) {
1939 /* Base is overwritten by virtqueue_map. */
1940 elem
->out_sg
[i
].iov_base
= 0;
1941 elem
->out_sg
[i
].iov_len
= data
.out_sg
[i
].iov_len
;
1944 if (virtio_host_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
1945 qemu_get_be32s(f
, &elem
->ndescs
);
1948 virtqueue_map(vdev
, elem
);
1952 void qemu_put_virtqueue_element(VirtIODevice
*vdev
, QEMUFile
*f
,
1953 VirtQueueElement
*elem
)
1955 VirtQueueElementOld data
;
1958 memset(&data
, 0, sizeof(data
));
1959 data
.index
= elem
->index
;
1960 data
.in_num
= elem
->in_num
;
1961 data
.out_num
= elem
->out_num
;
1963 for (i
= 0; i
< elem
->in_num
; i
++) {
1964 data
.in_addr
[i
] = elem
->in_addr
[i
];
1967 for (i
= 0; i
< elem
->out_num
; i
++) {
1968 data
.out_addr
[i
] = elem
->out_addr
[i
];
1971 for (i
= 0; i
< elem
->in_num
; i
++) {
1972 /* Base is overwritten by virtqueue_map when loading. Do not
1973 * save it, as it would leak the QEMU address space layout. */
1974 data
.in_sg
[i
].iov_len
= elem
->in_sg
[i
].iov_len
;
1977 for (i
= 0; i
< elem
->out_num
; i
++) {
1978 /* Do not save iov_base as above. */
1979 data
.out_sg
[i
].iov_len
= elem
->out_sg
[i
].iov_len
;
1982 if (virtio_host_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
1983 qemu_put_be32s(f
, &elem
->ndescs
);
1986 qemu_put_buffer(f
, (uint8_t *)&data
, sizeof(VirtQueueElementOld
));
1990 static void virtio_notify_vector(VirtIODevice
*vdev
, uint16_t vector
)
1992 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
1993 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
1995 if (virtio_device_disabled(vdev
)) {
2000 k
->notify(qbus
->parent
, vector
);
2004 void virtio_update_irq(VirtIODevice
*vdev
)
2006 virtio_notify_vector(vdev
, VIRTIO_NO_VECTOR
);
2009 static int virtio_validate_features(VirtIODevice
*vdev
)
2011 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2013 if (virtio_host_has_feature(vdev
, VIRTIO_F_IOMMU_PLATFORM
) &&
2014 !virtio_vdev_has_feature(vdev
, VIRTIO_F_IOMMU_PLATFORM
)) {
2018 if (k
->validate_features
) {
2019 return k
->validate_features(vdev
);
2025 int virtio_set_status(VirtIODevice
*vdev
, uint8_t val
)
2027 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2028 trace_virtio_set_status(vdev
, val
);
2030 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
2031 if (!(vdev
->status
& VIRTIO_CONFIG_S_FEATURES_OK
) &&
2032 val
& VIRTIO_CONFIG_S_FEATURES_OK
) {
2033 int ret
= virtio_validate_features(vdev
);
2041 if ((vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
) !=
2042 (val
& VIRTIO_CONFIG_S_DRIVER_OK
)) {
2043 virtio_set_started(vdev
, val
& VIRTIO_CONFIG_S_DRIVER_OK
);
2046 if (k
->set_status
) {
2047 k
->set_status(vdev
, val
);
2054 static enum virtio_device_endian
virtio_default_endian(void)
2056 if (target_words_bigendian()) {
2057 return VIRTIO_DEVICE_ENDIAN_BIG
;
2059 return VIRTIO_DEVICE_ENDIAN_LITTLE
;
2063 static enum virtio_device_endian
virtio_current_cpu_endian(void)
2065 if (cpu_virtio_is_big_endian(current_cpu
)) {
2066 return VIRTIO_DEVICE_ENDIAN_BIG
;
2068 return VIRTIO_DEVICE_ENDIAN_LITTLE
;
2072 static void __virtio_queue_reset(VirtIODevice
*vdev
, uint32_t i
)
2074 vdev
->vq
[i
].vring
.desc
= 0;
2075 vdev
->vq
[i
].vring
.avail
= 0;
2076 vdev
->vq
[i
].vring
.used
= 0;
2077 vdev
->vq
[i
].last_avail_idx
= 0;
2078 vdev
->vq
[i
].shadow_avail_idx
= 0;
2079 vdev
->vq
[i
].used_idx
= 0;
2080 vdev
->vq
[i
].last_avail_wrap_counter
= true;
2081 vdev
->vq
[i
].shadow_avail_wrap_counter
= true;
2082 vdev
->vq
[i
].used_wrap_counter
= true;
2083 virtio_queue_set_vector(vdev
, i
, VIRTIO_NO_VECTOR
);
2084 vdev
->vq
[i
].signalled_used
= 0;
2085 vdev
->vq
[i
].signalled_used_valid
= false;
2086 vdev
->vq
[i
].notification
= true;
2087 vdev
->vq
[i
].vring
.num
= vdev
->vq
[i
].vring
.num_default
;
2088 vdev
->vq
[i
].inuse
= 0;
2089 virtio_virtqueue_reset_region_cache(&vdev
->vq
[i
]);
2092 void virtio_queue_reset(VirtIODevice
*vdev
, uint32_t queue_index
)
2094 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2096 if (k
->queue_reset
) {
2097 k
->queue_reset(vdev
, queue_index
);
2100 __virtio_queue_reset(vdev
, queue_index
);
2103 void virtio_queue_enable(VirtIODevice
*vdev
, uint32_t queue_index
)
2105 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2108 * TODO: Seabios is currently out of spec and triggering this error.
2109 * So this needs to be fixed in Seabios, then this can
2110 * be re-enabled for new machine types only, and also after
2111 * being converted to LOG_GUEST_ERROR.
2113 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2114 error_report("queue_enable is only supported in devices of virtio "
2119 if (k
->queue_enable
) {
2120 k
->queue_enable(vdev
, queue_index
);
2124 void virtio_reset(void *opaque
)
2126 VirtIODevice
*vdev
= opaque
;
2127 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2130 virtio_set_status(vdev
, 0);
2132 /* Guest initiated reset */
2133 vdev
->device_endian
= virtio_current_cpu_endian();
2136 vdev
->device_endian
= virtio_default_endian();
2143 vdev
->start_on_kick
= false;
2144 vdev
->started
= false;
2145 vdev
->broken
= false;
2146 vdev
->guest_features
= 0;
2147 vdev
->queue_sel
= 0;
2149 vdev
->disabled
= false;
2150 qatomic_set(&vdev
->isr
, 0);
2151 vdev
->config_vector
= VIRTIO_NO_VECTOR
;
2152 virtio_notify_vector(vdev
, vdev
->config_vector
);
2154 for(i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2155 __virtio_queue_reset(vdev
, i
);
2159 void virtio_queue_set_addr(VirtIODevice
*vdev
, int n
, hwaddr addr
)
2161 if (!vdev
->vq
[n
].vring
.num
) {
2164 vdev
->vq
[n
].vring
.desc
= addr
;
2165 virtio_queue_update_rings(vdev
, n
);
2168 hwaddr
virtio_queue_get_addr(VirtIODevice
*vdev
, int n
)
2170 return vdev
->vq
[n
].vring
.desc
;
2173 void virtio_queue_set_rings(VirtIODevice
*vdev
, int n
, hwaddr desc
,
2174 hwaddr avail
, hwaddr used
)
2176 if (!vdev
->vq
[n
].vring
.num
) {
2179 vdev
->vq
[n
].vring
.desc
= desc
;
2180 vdev
->vq
[n
].vring
.avail
= avail
;
2181 vdev
->vq
[n
].vring
.used
= used
;
2182 virtio_init_region_cache(vdev
, n
);
2185 void virtio_queue_set_num(VirtIODevice
*vdev
, int n
, int num
)
2187 /* Don't allow guest to flip queue between existent and
2188 * nonexistent states, or to set it to an invalid size.
2190 if (!!num
!= !!vdev
->vq
[n
].vring
.num
||
2191 num
> VIRTQUEUE_MAX_SIZE
||
2195 vdev
->vq
[n
].vring
.num
= num
;
2198 VirtQueue
*virtio_vector_first_queue(VirtIODevice
*vdev
, uint16_t vector
)
2200 return QLIST_FIRST(&vdev
->vector_queues
[vector
]);
2203 VirtQueue
*virtio_vector_next_queue(VirtQueue
*vq
)
2205 return QLIST_NEXT(vq
, node
);
2208 int virtio_queue_get_num(VirtIODevice
*vdev
, int n
)
2210 return vdev
->vq
[n
].vring
.num
;
2213 int virtio_queue_get_max_num(VirtIODevice
*vdev
, int n
)
2215 return vdev
->vq
[n
].vring
.num_default
;
2218 int virtio_get_num_queues(VirtIODevice
*vdev
)
2222 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2223 if (!virtio_queue_get_num(vdev
, i
)) {
2231 void virtio_queue_set_align(VirtIODevice
*vdev
, int n
, int align
)
2233 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2234 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2236 /* virtio-1 compliant devices cannot change the alignment */
2237 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
2238 error_report("tried to modify queue alignment for virtio-1 device");
2241 /* Check that the transport told us it was going to do this
2242 * (so a buggy transport will immediately assert rather than
2243 * silently failing to migrate this state)
2245 assert(k
->has_variable_vring_alignment
);
2248 vdev
->vq
[n
].vring
.align
= align
;
2249 virtio_queue_update_rings(vdev
, n
);
2253 static void virtio_queue_notify_vq(VirtQueue
*vq
)
2255 if (vq
->vring
.desc
&& vq
->handle_output
) {
2256 VirtIODevice
*vdev
= vq
->vdev
;
2258 if (unlikely(vdev
->broken
)) {
2262 trace_virtio_queue_notify(vdev
, vq
- vdev
->vq
, vq
);
2263 vq
->handle_output(vdev
, vq
);
2265 if (unlikely(vdev
->start_on_kick
)) {
2266 virtio_set_started(vdev
, true);
2271 void virtio_queue_notify(VirtIODevice
*vdev
, int n
)
2273 VirtQueue
*vq
= &vdev
->vq
[n
];
2275 if (unlikely(!vq
->vring
.desc
|| vdev
->broken
)) {
2279 trace_virtio_queue_notify(vdev
, vq
- vdev
->vq
, vq
);
2280 if (vq
->host_notifier_enabled
) {
2281 event_notifier_set(&vq
->host_notifier
);
2282 } else if (vq
->handle_output
) {
2283 vq
->handle_output(vdev
, vq
);
2285 if (unlikely(vdev
->start_on_kick
)) {
2286 virtio_set_started(vdev
, true);
2291 uint16_t virtio_queue_vector(VirtIODevice
*vdev
, int n
)
2293 return n
< VIRTIO_QUEUE_MAX
? vdev
->vq
[n
].vector
:
2297 void virtio_queue_set_vector(VirtIODevice
*vdev
, int n
, uint16_t vector
)
2299 VirtQueue
*vq
= &vdev
->vq
[n
];
2301 if (n
< VIRTIO_QUEUE_MAX
) {
2302 if (vdev
->vector_queues
&&
2303 vdev
->vq
[n
].vector
!= VIRTIO_NO_VECTOR
) {
2304 QLIST_REMOVE(vq
, node
);
2306 vdev
->vq
[n
].vector
= vector
;
2307 if (vdev
->vector_queues
&&
2308 vector
!= VIRTIO_NO_VECTOR
) {
2309 QLIST_INSERT_HEAD(&vdev
->vector_queues
[vector
], vq
, node
);
2314 VirtQueue
*virtio_add_queue(VirtIODevice
*vdev
, int queue_size
,
2315 VirtIOHandleOutput handle_output
)
2319 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2320 if (vdev
->vq
[i
].vring
.num
== 0)
2324 if (i
== VIRTIO_QUEUE_MAX
|| queue_size
> VIRTQUEUE_MAX_SIZE
)
2327 vdev
->vq
[i
].vring
.num
= queue_size
;
2328 vdev
->vq
[i
].vring
.num_default
= queue_size
;
2329 vdev
->vq
[i
].vring
.align
= VIRTIO_PCI_VRING_ALIGN
;
2330 vdev
->vq
[i
].handle_output
= handle_output
;
2331 vdev
->vq
[i
].used_elems
= g_new0(VirtQueueElement
, queue_size
);
2333 return &vdev
->vq
[i
];
2336 void virtio_delete_queue(VirtQueue
*vq
)
2339 vq
->vring
.num_default
= 0;
2340 vq
->handle_output
= NULL
;
2341 g_free(vq
->used_elems
);
2342 vq
->used_elems
= NULL
;
2343 virtio_virtqueue_reset_region_cache(vq
);
2346 void virtio_del_queue(VirtIODevice
*vdev
, int n
)
2348 if (n
< 0 || n
>= VIRTIO_QUEUE_MAX
) {
2352 virtio_delete_queue(&vdev
->vq
[n
]);
2355 static void virtio_set_isr(VirtIODevice
*vdev
, int value
)
2357 uint8_t old
= qatomic_read(&vdev
->isr
);
2359 /* Do not write ISR if it does not change, so that its cacheline remains
2360 * shared in the common case where the guest does not read it.
2362 if ((old
& value
) != value
) {
2363 qatomic_or(&vdev
->isr
, value
);
2367 /* Called within rcu_read_lock(). */
2368 static bool virtio_split_should_notify(VirtIODevice
*vdev
, VirtQueue
*vq
)
2372 /* We need to expose used array entries before checking used event. */
2374 /* Always notify when queue is empty (when feature acknowledge) */
2375 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_NOTIFY_ON_EMPTY
) &&
2376 !vq
->inuse
&& virtio_queue_empty(vq
)) {
2380 if (!virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
)) {
2381 return !(vring_avail_flags(vq
) & VRING_AVAIL_F_NO_INTERRUPT
);
2384 v
= vq
->signalled_used_valid
;
2385 vq
->signalled_used_valid
= true;
2386 old
= vq
->signalled_used
;
2387 new = vq
->signalled_used
= vq
->used_idx
;
2388 return !v
|| vring_need_event(vring_get_used_event(vq
), new, old
);
2391 static bool vring_packed_need_event(VirtQueue
*vq
, bool wrap
,
2392 uint16_t off_wrap
, uint16_t new,
2395 int off
= off_wrap
& ~(1 << 15);
2397 if (wrap
!= off_wrap
>> 15) {
2398 off
-= vq
->vring
.num
;
2401 return vring_need_event(off
, new, old
);
2404 /* Called within rcu_read_lock(). */
2405 static bool virtio_packed_should_notify(VirtIODevice
*vdev
, VirtQueue
*vq
)
2407 VRingPackedDescEvent e
;
2410 VRingMemoryRegionCaches
*caches
;
2412 caches
= vring_get_region_caches(vq
);
2417 vring_packed_event_read(vdev
, &caches
->avail
, &e
);
2419 old
= vq
->signalled_used
;
2420 new = vq
->signalled_used
= vq
->used_idx
;
2421 v
= vq
->signalled_used_valid
;
2422 vq
->signalled_used_valid
= true;
2424 if (e
.flags
== VRING_PACKED_EVENT_FLAG_DISABLE
) {
2426 } else if (e
.flags
== VRING_PACKED_EVENT_FLAG_ENABLE
) {
2430 return !v
|| vring_packed_need_event(vq
, vq
->used_wrap_counter
,
2431 e
.off_wrap
, new, old
);
2434 /* Called within rcu_read_lock(). */
2435 static bool virtio_should_notify(VirtIODevice
*vdev
, VirtQueue
*vq
)
2437 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
2438 return virtio_packed_should_notify(vdev
, vq
);
2440 return virtio_split_should_notify(vdev
, vq
);
2444 void virtio_notify_irqfd(VirtIODevice
*vdev
, VirtQueue
*vq
)
2446 WITH_RCU_READ_LOCK_GUARD() {
2447 if (!virtio_should_notify(vdev
, vq
)) {
2452 trace_virtio_notify_irqfd(vdev
, vq
);
2455 * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2456 * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2457 * incorrectly polling this bit during crashdump and hibernation
2458 * in MSI mode, causing a hang if this bit is never updated.
2459 * Recent releases of Windows do not really shut down, but rather
2460 * log out and hibernate to make the next startup faster. Hence,
2461 * this manifested as a more serious hang during shutdown with
2463 * Next driver release from 2016 fixed this problem, so working around it
2464 * is not a must, but it's easy to do so let's do it here.
2466 * Note: it's safe to update ISR from any thread as it was switched
2467 * to an atomic operation.
2469 virtio_set_isr(vq
->vdev
, 0x1);
2470 event_notifier_set(&vq
->guest_notifier
);
2473 static void virtio_irq(VirtQueue
*vq
)
2475 virtio_set_isr(vq
->vdev
, 0x1);
2476 virtio_notify_vector(vq
->vdev
, vq
->vector
);
2479 void virtio_notify(VirtIODevice
*vdev
, VirtQueue
*vq
)
2481 WITH_RCU_READ_LOCK_GUARD() {
2482 if (!virtio_should_notify(vdev
, vq
)) {
2487 trace_virtio_notify(vdev
, vq
);
2491 void virtio_notify_config(VirtIODevice
*vdev
)
2493 if (!(vdev
->status
& VIRTIO_CONFIG_S_DRIVER_OK
))
2496 virtio_set_isr(vdev
, 0x3);
2498 virtio_notify_vector(vdev
, vdev
->config_vector
);
2501 static bool virtio_device_endian_needed(void *opaque
)
2503 VirtIODevice
*vdev
= opaque
;
2505 assert(vdev
->device_endian
!= VIRTIO_DEVICE_ENDIAN_UNKNOWN
);
2506 if (!virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
2507 return vdev
->device_endian
!= virtio_default_endian();
2509 /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2510 return vdev
->device_endian
!= VIRTIO_DEVICE_ENDIAN_LITTLE
;
2513 static bool virtio_64bit_features_needed(void *opaque
)
2515 VirtIODevice
*vdev
= opaque
;
2517 return (vdev
->host_features
>> 32) != 0;
2520 static bool virtio_virtqueue_needed(void *opaque
)
2522 VirtIODevice
*vdev
= opaque
;
2524 return virtio_host_has_feature(vdev
, VIRTIO_F_VERSION_1
);
2527 static bool virtio_packed_virtqueue_needed(void *opaque
)
2529 VirtIODevice
*vdev
= opaque
;
2531 return virtio_host_has_feature(vdev
, VIRTIO_F_RING_PACKED
);
2534 static bool virtio_ringsize_needed(void *opaque
)
2536 VirtIODevice
*vdev
= opaque
;
2539 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2540 if (vdev
->vq
[i
].vring
.num
!= vdev
->vq
[i
].vring
.num_default
) {
2547 static bool virtio_extra_state_needed(void *opaque
)
2549 VirtIODevice
*vdev
= opaque
;
2550 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2551 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2553 return k
->has_extra_state
&&
2554 k
->has_extra_state(qbus
->parent
);
2557 static bool virtio_broken_needed(void *opaque
)
2559 VirtIODevice
*vdev
= opaque
;
2561 return vdev
->broken
;
2564 static bool virtio_started_needed(void *opaque
)
2566 VirtIODevice
*vdev
= opaque
;
2568 return vdev
->started
;
2571 static bool virtio_disabled_needed(void *opaque
)
2573 VirtIODevice
*vdev
= opaque
;
2575 return vdev
->disabled
;
2578 static const VMStateDescription vmstate_virtqueue
= {
2579 .name
= "virtqueue_state",
2581 .minimum_version_id
= 1,
2582 .fields
= (VMStateField
[]) {
2583 VMSTATE_UINT64(vring
.avail
, struct VirtQueue
),
2584 VMSTATE_UINT64(vring
.used
, struct VirtQueue
),
2585 VMSTATE_END_OF_LIST()
2589 static const VMStateDescription vmstate_packed_virtqueue
= {
2590 .name
= "packed_virtqueue_state",
2592 .minimum_version_id
= 1,
2593 .fields
= (VMStateField
[]) {
2594 VMSTATE_UINT16(last_avail_idx
, struct VirtQueue
),
2595 VMSTATE_BOOL(last_avail_wrap_counter
, struct VirtQueue
),
2596 VMSTATE_UINT16(used_idx
, struct VirtQueue
),
2597 VMSTATE_BOOL(used_wrap_counter
, struct VirtQueue
),
2598 VMSTATE_UINT32(inuse
, struct VirtQueue
),
2599 VMSTATE_END_OF_LIST()
2603 static const VMStateDescription vmstate_virtio_virtqueues
= {
2604 .name
= "virtio/virtqueues",
2606 .minimum_version_id
= 1,
2607 .needed
= &virtio_virtqueue_needed
,
2608 .fields
= (VMStateField
[]) {
2609 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq
, struct VirtIODevice
,
2610 VIRTIO_QUEUE_MAX
, 0, vmstate_virtqueue
, VirtQueue
),
2611 VMSTATE_END_OF_LIST()
2615 static const VMStateDescription vmstate_virtio_packed_virtqueues
= {
2616 .name
= "virtio/packed_virtqueues",
2618 .minimum_version_id
= 1,
2619 .needed
= &virtio_packed_virtqueue_needed
,
2620 .fields
= (VMStateField
[]) {
2621 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq
, struct VirtIODevice
,
2622 VIRTIO_QUEUE_MAX
, 0, vmstate_packed_virtqueue
, VirtQueue
),
2623 VMSTATE_END_OF_LIST()
2627 static const VMStateDescription vmstate_ringsize
= {
2628 .name
= "ringsize_state",
2630 .minimum_version_id
= 1,
2631 .fields
= (VMStateField
[]) {
2632 VMSTATE_UINT32(vring
.num_default
, struct VirtQueue
),
2633 VMSTATE_END_OF_LIST()
2637 static const VMStateDescription vmstate_virtio_ringsize
= {
2638 .name
= "virtio/ringsize",
2640 .minimum_version_id
= 1,
2641 .needed
= &virtio_ringsize_needed
,
2642 .fields
= (VMStateField
[]) {
2643 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq
, struct VirtIODevice
,
2644 VIRTIO_QUEUE_MAX
, 0, vmstate_ringsize
, VirtQueue
),
2645 VMSTATE_END_OF_LIST()
2649 static int get_extra_state(QEMUFile
*f
, void *pv
, size_t size
,
2650 const VMStateField
*field
)
2652 VirtIODevice
*vdev
= pv
;
2653 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2654 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2656 if (!k
->load_extra_state
) {
2659 return k
->load_extra_state(qbus
->parent
, f
);
2663 static int put_extra_state(QEMUFile
*f
, void *pv
, size_t size
,
2664 const VMStateField
*field
, JSONWriter
*vmdesc
)
2666 VirtIODevice
*vdev
= pv
;
2667 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2668 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2670 k
->save_extra_state(qbus
->parent
, f
);
2674 static const VMStateInfo vmstate_info_extra_state
= {
2675 .name
= "virtqueue_extra_state",
2676 .get
= get_extra_state
,
2677 .put
= put_extra_state
,
2680 static const VMStateDescription vmstate_virtio_extra_state
= {
2681 .name
= "virtio/extra_state",
2683 .minimum_version_id
= 1,
2684 .needed
= &virtio_extra_state_needed
,
2685 .fields
= (VMStateField
[]) {
2687 .name
= "extra_state",
2689 .field_exists
= NULL
,
2691 .info
= &vmstate_info_extra_state
,
2692 .flags
= VMS_SINGLE
,
2695 VMSTATE_END_OF_LIST()
2699 static const VMStateDescription vmstate_virtio_device_endian
= {
2700 .name
= "virtio/device_endian",
2702 .minimum_version_id
= 1,
2703 .needed
= &virtio_device_endian_needed
,
2704 .fields
= (VMStateField
[]) {
2705 VMSTATE_UINT8(device_endian
, VirtIODevice
),
2706 VMSTATE_END_OF_LIST()
2710 static const VMStateDescription vmstate_virtio_64bit_features
= {
2711 .name
= "virtio/64bit_features",
2713 .minimum_version_id
= 1,
2714 .needed
= &virtio_64bit_features_needed
,
2715 .fields
= (VMStateField
[]) {
2716 VMSTATE_UINT64(guest_features
, VirtIODevice
),
2717 VMSTATE_END_OF_LIST()
2721 static const VMStateDescription vmstate_virtio_broken
= {
2722 .name
= "virtio/broken",
2724 .minimum_version_id
= 1,
2725 .needed
= &virtio_broken_needed
,
2726 .fields
= (VMStateField
[]) {
2727 VMSTATE_BOOL(broken
, VirtIODevice
),
2728 VMSTATE_END_OF_LIST()
2732 static const VMStateDescription vmstate_virtio_started
= {
2733 .name
= "virtio/started",
2735 .minimum_version_id
= 1,
2736 .needed
= &virtio_started_needed
,
2737 .fields
= (VMStateField
[]) {
2738 VMSTATE_BOOL(started
, VirtIODevice
),
2739 VMSTATE_END_OF_LIST()
2743 static const VMStateDescription vmstate_virtio_disabled
= {
2744 .name
= "virtio/disabled",
2746 .minimum_version_id
= 1,
2747 .needed
= &virtio_disabled_needed
,
2748 .fields
= (VMStateField
[]) {
2749 VMSTATE_BOOL(disabled
, VirtIODevice
),
2750 VMSTATE_END_OF_LIST()
2754 static const VMStateDescription vmstate_virtio
= {
2757 .minimum_version_id
= 1,
2758 .fields
= (VMStateField
[]) {
2759 VMSTATE_END_OF_LIST()
2761 .subsections
= (const VMStateDescription
*[]) {
2762 &vmstate_virtio_device_endian
,
2763 &vmstate_virtio_64bit_features
,
2764 &vmstate_virtio_virtqueues
,
2765 &vmstate_virtio_ringsize
,
2766 &vmstate_virtio_broken
,
2767 &vmstate_virtio_extra_state
,
2768 &vmstate_virtio_started
,
2769 &vmstate_virtio_packed_virtqueues
,
2770 &vmstate_virtio_disabled
,
2775 int virtio_save(VirtIODevice
*vdev
, QEMUFile
*f
)
2777 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2778 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2779 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2780 uint32_t guest_features_lo
= (vdev
->guest_features
& 0xffffffff);
2783 if (k
->save_config
) {
2784 k
->save_config(qbus
->parent
, f
);
2787 qemu_put_8s(f
, &vdev
->status
);
2788 qemu_put_8s(f
, &vdev
->isr
);
2789 qemu_put_be16s(f
, &vdev
->queue_sel
);
2790 qemu_put_be32s(f
, &guest_features_lo
);
2791 qemu_put_be32(f
, vdev
->config_len
);
2792 qemu_put_buffer(f
, vdev
->config
, vdev
->config_len
);
2794 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2795 if (vdev
->vq
[i
].vring
.num
== 0)
2799 qemu_put_be32(f
, i
);
2801 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2802 if (vdev
->vq
[i
].vring
.num
== 0)
2805 qemu_put_be32(f
, vdev
->vq
[i
].vring
.num
);
2806 if (k
->has_variable_vring_alignment
) {
2807 qemu_put_be32(f
, vdev
->vq
[i
].vring
.align
);
2810 * Save desc now, the rest of the ring addresses are saved in
2811 * subsections for VIRTIO-1 devices.
2813 qemu_put_be64(f
, vdev
->vq
[i
].vring
.desc
);
2814 qemu_put_be16s(f
, &vdev
->vq
[i
].last_avail_idx
);
2815 if (k
->save_queue
) {
2816 k
->save_queue(qbus
->parent
, i
, f
);
2820 if (vdc
->save
!= NULL
) {
2825 int ret
= vmstate_save_state(f
, vdc
->vmsd
, vdev
, NULL
);
2832 return vmstate_save_state(f
, &vmstate_virtio
, vdev
, NULL
);
2835 /* A wrapper for use as a VMState .put function */
2836 static int virtio_device_put(QEMUFile
*f
, void *opaque
, size_t size
,
2837 const VMStateField
*field
, JSONWriter
*vmdesc
)
2839 return virtio_save(VIRTIO_DEVICE(opaque
), f
);
2842 /* A wrapper for use as a VMState .get function */
2843 static int coroutine_mixed_fn
2844 virtio_device_get(QEMUFile
*f
, void *opaque
, size_t size
,
2845 const VMStateField
*field
)
2847 VirtIODevice
*vdev
= VIRTIO_DEVICE(opaque
);
2848 DeviceClass
*dc
= DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev
));
2850 return virtio_load(vdev
, f
, dc
->vmsd
->version_id
);
2853 const VMStateInfo virtio_vmstate_info
= {
2855 .get
= virtio_device_get
,
2856 .put
= virtio_device_put
,
2859 static int virtio_set_features_nocheck(VirtIODevice
*vdev
, uint64_t val
)
2861 VirtioDeviceClass
*k
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2862 bool bad
= (val
& ~(vdev
->host_features
)) != 0;
2864 val
&= vdev
->host_features
;
2865 if (k
->set_features
) {
2866 k
->set_features(vdev
, val
);
2868 vdev
->guest_features
= val
;
2869 return bad
? -1 : 0;
2872 typedef struct VirtioSetFeaturesNocheckData
{
2877 } VirtioSetFeaturesNocheckData
;
2879 static void virtio_set_features_nocheck_bh(void *opaque
)
2881 VirtioSetFeaturesNocheckData
*data
= opaque
;
2883 data
->ret
= virtio_set_features_nocheck(data
->vdev
, data
->val
);
2884 aio_co_wake(data
->co
);
2887 static int coroutine_mixed_fn
2888 virtio_set_features_nocheck_maybe_co(VirtIODevice
*vdev
, uint64_t val
)
2890 if (qemu_in_coroutine()) {
2891 VirtioSetFeaturesNocheckData data
= {
2892 .co
= qemu_coroutine_self(),
2896 aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
2897 virtio_set_features_nocheck_bh
, &data
);
2898 qemu_coroutine_yield();
2901 return virtio_set_features_nocheck(vdev
, val
);
2905 int virtio_set_features(VirtIODevice
*vdev
, uint64_t val
)
2909 * The driver must not attempt to set features after feature negotiation
2912 if (vdev
->status
& VIRTIO_CONFIG_S_FEATURES_OK
) {
2916 if (val
& (1ull << VIRTIO_F_BAD_FEATURE
)) {
2917 qemu_log_mask(LOG_GUEST_ERROR
,
2918 "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
2919 __func__
, vdev
->name
);
2922 ret
= virtio_set_features_nocheck(vdev
, val
);
2923 if (virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
)) {
2924 /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */
2926 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
2927 if (vdev
->vq
[i
].vring
.num
!= 0) {
2928 virtio_init_region_cache(vdev
, i
);
2933 if (!virtio_device_started(vdev
, vdev
->status
) &&
2934 !virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
2935 vdev
->start_on_kick
= true;
2941 size_t virtio_get_config_size(const VirtIOConfigSizeParams
*params
,
2942 uint64_t host_features
)
2944 size_t config_size
= params
->min_size
;
2945 const VirtIOFeature
*feature_sizes
= params
->feature_sizes
;
2948 for (i
= 0; feature_sizes
[i
].flags
!= 0; i
++) {
2949 if (host_features
& feature_sizes
[i
].flags
) {
2950 config_size
= MAX(feature_sizes
[i
].end
, config_size
);
2954 assert(config_size
<= params
->max_size
);
2958 int coroutine_mixed_fn
2959 virtio_load(VirtIODevice
*vdev
, QEMUFile
*f
, int version_id
)
2965 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
2966 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
2967 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_GET_CLASS(vdev
);
2970 * We poison the endianness to ensure it does not get used before
2971 * subsections have been loaded.
2973 vdev
->device_endian
= VIRTIO_DEVICE_ENDIAN_UNKNOWN
;
2975 if (k
->load_config
) {
2976 ret
= k
->load_config(qbus
->parent
, f
);
2981 qemu_get_8s(f
, &vdev
->status
);
2982 qemu_get_8s(f
, &vdev
->isr
);
2983 qemu_get_be16s(f
, &vdev
->queue_sel
);
2984 if (vdev
->queue_sel
>= VIRTIO_QUEUE_MAX
) {
2987 qemu_get_be32s(f
, &features
);
2990 * Temporarily set guest_features low bits - needed by
2991 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
2992 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
2994 * Note: devices should always test host features in future - don't create
2995 * new dependencies like this.
2997 vdev
->guest_features
= features
;
2999 config_len
= qemu_get_be32(f
);
3002 * There are cases where the incoming config can be bigger or smaller
3003 * than what we have; so load what we have space for, and skip
3004 * any excess that's in the stream.
3006 qemu_get_buffer(f
, vdev
->config
, MIN(config_len
, vdev
->config_len
));
3008 while (config_len
> vdev
->config_len
) {
3013 num
= qemu_get_be32(f
);
3015 if (num
> VIRTIO_QUEUE_MAX
) {
3016 error_report("Invalid number of virtqueues: 0x%x", num
);
3020 for (i
= 0; i
< num
; i
++) {
3021 vdev
->vq
[i
].vring
.num
= qemu_get_be32(f
);
3022 if (k
->has_variable_vring_alignment
) {
3023 vdev
->vq
[i
].vring
.align
= qemu_get_be32(f
);
3025 vdev
->vq
[i
].vring
.desc
= qemu_get_be64(f
);
3026 qemu_get_be16s(f
, &vdev
->vq
[i
].last_avail_idx
);
3027 vdev
->vq
[i
].signalled_used_valid
= false;
3028 vdev
->vq
[i
].notification
= true;
3030 if (!vdev
->vq
[i
].vring
.desc
&& vdev
->vq
[i
].last_avail_idx
) {
3031 error_report("VQ %d address 0x0 "
3032 "inconsistent with Host index 0x%x",
3033 i
, vdev
->vq
[i
].last_avail_idx
);
3036 if (k
->load_queue
) {
3037 ret
= k
->load_queue(qbus
->parent
, i
, f
);
3043 virtio_notify_vector(vdev
, VIRTIO_NO_VECTOR
);
3045 if (vdc
->load
!= NULL
) {
3046 ret
= vdc
->load(vdev
, f
, version_id
);
3053 ret
= vmstate_load_state(f
, vdc
->vmsd
, vdev
, version_id
);
3060 ret
= vmstate_load_state(f
, &vmstate_virtio
, vdev
, 1);
3065 if (vdev
->device_endian
== VIRTIO_DEVICE_ENDIAN_UNKNOWN
) {
3066 vdev
->device_endian
= virtio_default_endian();
3069 if (virtio_64bit_features_needed(vdev
)) {
3071 * Subsection load filled vdev->guest_features. Run them
3072 * through virtio_set_features to sanity-check them against
3075 uint64_t features64
= vdev
->guest_features
;
3076 if (virtio_set_features_nocheck_maybe_co(vdev
, features64
) < 0) {
3077 error_report("Features 0x%" PRIx64
" unsupported. "
3078 "Allowed features: 0x%" PRIx64
,
3079 features64
, vdev
->host_features
);
3083 if (virtio_set_features_nocheck_maybe_co(vdev
, features
) < 0) {
3084 error_report("Features 0x%x unsupported. "
3085 "Allowed features: 0x%" PRIx64
,
3086 features
, vdev
->host_features
);
3091 if (!virtio_device_started(vdev
, vdev
->status
) &&
3092 !virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
3093 vdev
->start_on_kick
= true;
3096 RCU_READ_LOCK_GUARD();
3097 for (i
= 0; i
< num
; i
++) {
3098 if (vdev
->vq
[i
].vring
.desc
) {
3102 * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3103 * only the region cache needs to be set up. Legacy devices need
3104 * to calculate used and avail ring addresses based on the desc
3107 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
3108 virtio_init_region_cache(vdev
, i
);
3110 virtio_queue_update_rings(vdev
, i
);
3113 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3114 vdev
->vq
[i
].shadow_avail_idx
= vdev
->vq
[i
].last_avail_idx
;
3115 vdev
->vq
[i
].shadow_avail_wrap_counter
=
3116 vdev
->vq
[i
].last_avail_wrap_counter
;
3120 nheads
= vring_avail_idx(&vdev
->vq
[i
]) - vdev
->vq
[i
].last_avail_idx
;
3121 /* Check it isn't doing strange things with descriptor numbers. */
3122 if (nheads
> vdev
->vq
[i
].vring
.num
) {
3123 virtio_error(vdev
, "VQ %d size 0x%x Guest index 0x%x "
3124 "inconsistent with Host index 0x%x: delta 0x%x",
3125 i
, vdev
->vq
[i
].vring
.num
,
3126 vring_avail_idx(&vdev
->vq
[i
]),
3127 vdev
->vq
[i
].last_avail_idx
, nheads
);
3128 vdev
->vq
[i
].used_idx
= 0;
3129 vdev
->vq
[i
].shadow_avail_idx
= 0;
3130 vdev
->vq
[i
].inuse
= 0;
3133 vdev
->vq
[i
].used_idx
= vring_used_idx(&vdev
->vq
[i
]);
3134 vdev
->vq
[i
].shadow_avail_idx
= vring_avail_idx(&vdev
->vq
[i
]);
3137 * Some devices migrate VirtQueueElements that have been popped
3138 * from the avail ring but not yet returned to the used ring.
3139 * Since max ring size < UINT16_MAX it's safe to use modulo
3140 * UINT16_MAX + 1 subtraction.
3142 vdev
->vq
[i
].inuse
= (uint16_t)(vdev
->vq
[i
].last_avail_idx
-
3143 vdev
->vq
[i
].used_idx
);
3144 if (vdev
->vq
[i
].inuse
> vdev
->vq
[i
].vring
.num
) {
3145 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3147 i
, vdev
->vq
[i
].vring
.num
,
3148 vdev
->vq
[i
].last_avail_idx
,
3149 vdev
->vq
[i
].used_idx
);
3155 if (vdc
->post_load
) {
3156 ret
= vdc
->post_load(vdev
);
3165 void virtio_cleanup(VirtIODevice
*vdev
)
3167 qemu_del_vm_change_state_handler(vdev
->vmstate
);
3170 static void virtio_vmstate_change(void *opaque
, bool running
, RunState state
)
3172 VirtIODevice
*vdev
= opaque
;
3173 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3174 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
3175 bool backend_run
= running
&& virtio_device_started(vdev
, vdev
->status
);
3176 vdev
->vm_running
= running
;
3179 virtio_set_status(vdev
, vdev
->status
);
3182 if (k
->vmstate_change
) {
3183 k
->vmstate_change(qbus
->parent
, backend_run
);
3187 virtio_set_status(vdev
, vdev
->status
);
3191 void virtio_instance_init_common(Object
*proxy_obj
, void *data
,
3192 size_t vdev_size
, const char *vdev_name
)
3194 DeviceState
*vdev
= data
;
3196 object_initialize_child_with_props(proxy_obj
, "virtio-backend", vdev
,
3197 vdev_size
, vdev_name
, &error_abort
,
3199 qdev_alias_all_properties(vdev
, proxy_obj
);
3202 void virtio_init(VirtIODevice
*vdev
, uint16_t device_id
, size_t config_size
)
3204 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3205 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
3207 int nvectors
= k
->query_nvectors
? k
->query_nvectors(qbus
->parent
) : 0;
3210 vdev
->vector_queues
=
3211 g_malloc0(sizeof(*vdev
->vector_queues
) * nvectors
);
3214 vdev
->start_on_kick
= false;
3215 vdev
->started
= false;
3216 vdev
->vhost_started
= false;
3217 vdev
->device_id
= device_id
;
3219 qatomic_set(&vdev
->isr
, 0);
3220 vdev
->queue_sel
= 0;
3221 vdev
->config_vector
= VIRTIO_NO_VECTOR
;
3222 vdev
->vq
= g_new0(VirtQueue
, VIRTIO_QUEUE_MAX
);
3223 vdev
->vm_running
= runstate_is_running();
3224 vdev
->broken
= false;
3225 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
3226 vdev
->vq
[i
].vector
= VIRTIO_NO_VECTOR
;
3227 vdev
->vq
[i
].vdev
= vdev
;
3228 vdev
->vq
[i
].queue_index
= i
;
3229 vdev
->vq
[i
].host_notifier_enabled
= false;
3232 vdev
->name
= virtio_id_to_name(device_id
);
3233 vdev
->config_len
= config_size
;
3234 if (vdev
->config_len
) {
3235 vdev
->config
= g_malloc0(config_size
);
3237 vdev
->config
= NULL
;
3239 vdev
->vmstate
= qdev_add_vm_change_state_handler(DEVICE(vdev
),
3240 virtio_vmstate_change
, vdev
);
3241 vdev
->device_endian
= virtio_default_endian();
3242 vdev
->use_guest_notifier_mask
= true;
3246 * Only devices that have already been around prior to defining the virtio
3247 * standard support legacy mode; this includes devices not specified in the
3248 * standard. All newer devices conform to the virtio standard only.
3250 bool virtio_legacy_allowed(VirtIODevice
*vdev
)
3252 switch (vdev
->device_id
) {
3254 case VIRTIO_ID_BLOCK
:
3255 case VIRTIO_ID_CONSOLE
:
3257 case VIRTIO_ID_BALLOON
:
3258 case VIRTIO_ID_RPMSG
:
3259 case VIRTIO_ID_SCSI
:
3261 case VIRTIO_ID_RPROC_SERIAL
:
3262 case VIRTIO_ID_CAIF
:
3269 bool virtio_legacy_check_disabled(VirtIODevice
*vdev
)
3271 return vdev
->disable_legacy_check
;
3274 hwaddr
virtio_queue_get_desc_addr(VirtIODevice
*vdev
, int n
)
3276 return vdev
->vq
[n
].vring
.desc
;
3279 bool virtio_queue_enabled_legacy(VirtIODevice
*vdev
, int n
)
3281 return virtio_queue_get_desc_addr(vdev
, n
) != 0;
3284 bool virtio_queue_enabled(VirtIODevice
*vdev
, int n
)
3286 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3287 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
3289 if (k
->queue_enabled
) {
3290 return k
->queue_enabled(qbus
->parent
, n
);
3292 return virtio_queue_enabled_legacy(vdev
, n
);
3295 hwaddr
virtio_queue_get_avail_addr(VirtIODevice
*vdev
, int n
)
3297 return vdev
->vq
[n
].vring
.avail
;
3300 hwaddr
virtio_queue_get_used_addr(VirtIODevice
*vdev
, int n
)
3302 return vdev
->vq
[n
].vring
.used
;
3305 hwaddr
virtio_queue_get_desc_size(VirtIODevice
*vdev
, int n
)
3307 return sizeof(VRingDesc
) * vdev
->vq
[n
].vring
.num
;
3310 hwaddr
virtio_queue_get_avail_size(VirtIODevice
*vdev
, int n
)
3314 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3315 return sizeof(struct VRingPackedDescEvent
);
3318 s
= virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
) ? 2 : 0;
3319 return offsetof(VRingAvail
, ring
) +
3320 sizeof(uint16_t) * vdev
->vq
[n
].vring
.num
+ s
;
3323 hwaddr
virtio_queue_get_used_size(VirtIODevice
*vdev
, int n
)
3327 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3328 return sizeof(struct VRingPackedDescEvent
);
3331 s
= virtio_vdev_has_feature(vdev
, VIRTIO_RING_F_EVENT_IDX
) ? 2 : 0;
3332 return offsetof(VRingUsed
, ring
) +
3333 sizeof(VRingUsedElem
) * vdev
->vq
[n
].vring
.num
+ s
;
3336 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice
*vdev
,
3339 unsigned int avail
, used
;
3341 avail
= vdev
->vq
[n
].last_avail_idx
;
3342 avail
|= ((uint16_t)vdev
->vq
[n
].last_avail_wrap_counter
) << 15;
3344 used
= vdev
->vq
[n
].used_idx
;
3345 used
|= ((uint16_t)vdev
->vq
[n
].used_wrap_counter
) << 15;
3347 return avail
| used
<< 16;
3350 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice
*vdev
,
3353 return vdev
->vq
[n
].last_avail_idx
;
3356 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice
*vdev
, int n
)
3358 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3359 return virtio_queue_packed_get_last_avail_idx(vdev
, n
);
3361 return virtio_queue_split_get_last_avail_idx(vdev
, n
);
3365 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice
*vdev
,
3366 int n
, unsigned int idx
)
3368 struct VirtQueue
*vq
= &vdev
->vq
[n
];
3370 vq
->last_avail_idx
= vq
->shadow_avail_idx
= idx
& 0x7fff;
3371 vq
->last_avail_wrap_counter
=
3372 vq
->shadow_avail_wrap_counter
= !!(idx
& 0x8000);
3374 vq
->used_idx
= idx
& 0x7fff;
3375 vq
->used_wrap_counter
= !!(idx
& 0x8000);
3378 static void virtio_queue_split_set_last_avail_idx(VirtIODevice
*vdev
,
3379 int n
, unsigned int idx
)
3381 vdev
->vq
[n
].last_avail_idx
= idx
;
3382 vdev
->vq
[n
].shadow_avail_idx
= idx
;
3385 void virtio_queue_set_last_avail_idx(VirtIODevice
*vdev
, int n
,
3388 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3389 virtio_queue_packed_set_last_avail_idx(vdev
, n
, idx
);
3391 virtio_queue_split_set_last_avail_idx(vdev
, n
, idx
);
3395 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice
*vdev
,
3398 /* We don't have a reference like avail idx in shared memory */
3402 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice
*vdev
,
3405 RCU_READ_LOCK_GUARD();
3406 if (vdev
->vq
[n
].vring
.desc
) {
3407 vdev
->vq
[n
].last_avail_idx
= vring_used_idx(&vdev
->vq
[n
]);
3408 vdev
->vq
[n
].shadow_avail_idx
= vdev
->vq
[n
].last_avail_idx
;
3412 void virtio_queue_restore_last_avail_idx(VirtIODevice
*vdev
, int n
)
3414 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3415 virtio_queue_packed_restore_last_avail_idx(vdev
, n
);
3417 virtio_queue_split_restore_last_avail_idx(vdev
, n
);
3421 static void virtio_queue_packed_update_used_idx(VirtIODevice
*vdev
, int n
)
3423 /* used idx was updated through set_last_avail_idx() */
3427 static void virtio_split_packed_update_used_idx(VirtIODevice
*vdev
, int n
)
3429 RCU_READ_LOCK_GUARD();
3430 if (vdev
->vq
[n
].vring
.desc
) {
3431 vdev
->vq
[n
].used_idx
= vring_used_idx(&vdev
->vq
[n
]);
3435 void virtio_queue_update_used_idx(VirtIODevice
*vdev
, int n
)
3437 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3438 return virtio_queue_packed_update_used_idx(vdev
, n
);
3440 return virtio_split_packed_update_used_idx(vdev
, n
);
3444 void virtio_queue_invalidate_signalled_used(VirtIODevice
*vdev
, int n
)
3446 vdev
->vq
[n
].signalled_used_valid
= false;
3449 VirtQueue
*virtio_get_queue(VirtIODevice
*vdev
, int n
)
3451 return vdev
->vq
+ n
;
3454 uint16_t virtio_get_queue_index(VirtQueue
*vq
)
3456 return vq
->queue_index
;
3459 static void virtio_queue_guest_notifier_read(EventNotifier
*n
)
3461 VirtQueue
*vq
= container_of(n
, VirtQueue
, guest_notifier
);
3462 if (event_notifier_test_and_clear(n
)) {
3466 static void virtio_config_guest_notifier_read(EventNotifier
*n
)
3468 VirtIODevice
*vdev
= container_of(n
, VirtIODevice
, config_notifier
);
3470 if (event_notifier_test_and_clear(n
)) {
3471 virtio_notify_config(vdev
);
3474 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue
*vq
, bool assign
,
3477 if (assign
&& !with_irqfd
) {
3478 event_notifier_set_handler(&vq
->guest_notifier
,
3479 virtio_queue_guest_notifier_read
);
3481 event_notifier_set_handler(&vq
->guest_notifier
, NULL
);
3484 /* Test and clear notifier before closing it,
3485 * in case poll callback didn't have time to run. */
3486 virtio_queue_guest_notifier_read(&vq
->guest_notifier
);
3490 void virtio_config_set_guest_notifier_fd_handler(VirtIODevice
*vdev
,
3491 bool assign
, bool with_irqfd
)
3494 n
= &vdev
->config_notifier
;
3495 if (assign
&& !with_irqfd
) {
3496 event_notifier_set_handler(n
, virtio_config_guest_notifier_read
);
3498 event_notifier_set_handler(n
, NULL
);
3501 /* Test and clear notifier before closing it,*/
3502 /* in case poll callback didn't have time to run. */
3503 virtio_config_guest_notifier_read(n
);
3507 EventNotifier
*virtio_queue_get_guest_notifier(VirtQueue
*vq
)
3509 return &vq
->guest_notifier
;
3512 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier
*n
)
3514 VirtQueue
*vq
= container_of(n
, VirtQueue
, host_notifier
);
3516 virtio_queue_set_notification(vq
, 0);
3519 static bool virtio_queue_host_notifier_aio_poll(void *opaque
)
3521 EventNotifier
*n
= opaque
;
3522 VirtQueue
*vq
= container_of(n
, VirtQueue
, host_notifier
);
3524 return vq
->vring
.desc
&& !virtio_queue_empty(vq
);
3527 static void virtio_queue_host_notifier_aio_poll_ready(EventNotifier
*n
)
3529 VirtQueue
*vq
= container_of(n
, VirtQueue
, host_notifier
);
3531 virtio_queue_notify_vq(vq
);
3534 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier
*n
)
3536 VirtQueue
*vq
= container_of(n
, VirtQueue
, host_notifier
);
3538 /* Caller polls once more after this to catch requests that race with us */
3539 virtio_queue_set_notification(vq
, 1);
3542 void virtio_queue_aio_attach_host_notifier(VirtQueue
*vq
, AioContext
*ctx
)
3544 aio_set_event_notifier(ctx
, &vq
->host_notifier
,
3545 virtio_queue_host_notifier_read
,
3546 virtio_queue_host_notifier_aio_poll
,
3547 virtio_queue_host_notifier_aio_poll_ready
);
3548 aio_set_event_notifier_poll(ctx
, &vq
->host_notifier
,
3549 virtio_queue_host_notifier_aio_poll_begin
,
3550 virtio_queue_host_notifier_aio_poll_end
);
3554 * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
3555 * this for rx virtqueues and similar cases where the virtqueue handler
3556 * function does not pop all elements. When the virtqueue is left non-empty
3557 * polling consumes CPU cycles and should not be used.
3559 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue
*vq
, AioContext
*ctx
)
3561 aio_set_event_notifier(ctx
, &vq
->host_notifier
,
3562 virtio_queue_host_notifier_read
,
3566 void virtio_queue_aio_detach_host_notifier(VirtQueue
*vq
, AioContext
*ctx
)
3568 aio_set_event_notifier(ctx
, &vq
->host_notifier
, NULL
, NULL
, NULL
);
3571 void virtio_queue_host_notifier_read(EventNotifier
*n
)
3573 VirtQueue
*vq
= container_of(n
, VirtQueue
, host_notifier
);
3574 if (event_notifier_test_and_clear(n
)) {
3575 virtio_queue_notify_vq(vq
);
3579 EventNotifier
*virtio_queue_get_host_notifier(VirtQueue
*vq
)
3581 return &vq
->host_notifier
;
3584 EventNotifier
*virtio_config_get_guest_notifier(VirtIODevice
*vdev
)
3586 return &vdev
->config_notifier
;
3589 void virtio_queue_set_host_notifier_enabled(VirtQueue
*vq
, bool enabled
)
3591 vq
->host_notifier_enabled
= enabled
;
3594 int virtio_queue_set_host_notifier_mr(VirtIODevice
*vdev
, int n
,
3595 MemoryRegion
*mr
, bool assign
)
3597 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3598 VirtioBusClass
*k
= VIRTIO_BUS_GET_CLASS(qbus
);
3600 if (k
->set_host_notifier_mr
) {
3601 return k
->set_host_notifier_mr(qbus
->parent
, n
, mr
, assign
);
3607 void virtio_device_set_child_bus_name(VirtIODevice
*vdev
, char *bus_name
)
3609 g_free(vdev
->bus_name
);
3610 vdev
->bus_name
= g_strdup(bus_name
);
3613 void G_GNUC_PRINTF(2, 3) virtio_error(VirtIODevice
*vdev
, const char *fmt
, ...)
3618 error_vreport(fmt
, ap
);
3621 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_VERSION_1
)) {
3622 vdev
->status
= vdev
->status
| VIRTIO_CONFIG_S_NEEDS_RESET
;
3623 virtio_notify_config(vdev
);
3626 vdev
->broken
= true;
3629 static void virtio_memory_listener_commit(MemoryListener
*listener
)
3631 VirtIODevice
*vdev
= container_of(listener
, VirtIODevice
, listener
);
3634 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
3635 if (vdev
->vq
[i
].vring
.num
== 0) {
3638 virtio_init_region_cache(vdev
, i
);
3642 static void virtio_device_realize(DeviceState
*dev
, Error
**errp
)
3644 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
3645 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_GET_CLASS(dev
);
3648 /* Devices should either use vmsd or the load/save methods */
3649 assert(!vdc
->vmsd
|| !vdc
->load
);
3651 if (vdc
->realize
!= NULL
) {
3652 vdc
->realize(dev
, &err
);
3654 error_propagate(errp
, err
);
3659 virtio_bus_device_plugged(vdev
, &err
);
3661 error_propagate(errp
, err
);
3662 vdc
->unrealize(dev
);
3666 vdev
->listener
.commit
= virtio_memory_listener_commit
;
3667 vdev
->listener
.name
= "virtio";
3668 memory_listener_register(&vdev
->listener
, vdev
->dma_as
);
3671 static void virtio_device_unrealize(DeviceState
*dev
)
3673 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
3674 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_GET_CLASS(dev
);
3676 memory_listener_unregister(&vdev
->listener
);
3677 virtio_bus_device_unplugged(vdev
);
3679 if (vdc
->unrealize
!= NULL
) {
3680 vdc
->unrealize(dev
);
3683 g_free(vdev
->bus_name
);
3684 vdev
->bus_name
= NULL
;
3687 static void virtio_device_free_virtqueues(VirtIODevice
*vdev
)
3694 for (i
= 0; i
< VIRTIO_QUEUE_MAX
; i
++) {
3695 if (vdev
->vq
[i
].vring
.num
== 0) {
3698 virtio_virtqueue_reset_region_cache(&vdev
->vq
[i
]);
3703 static void virtio_device_instance_finalize(Object
*obj
)
3705 VirtIODevice
*vdev
= VIRTIO_DEVICE(obj
);
3707 virtio_device_free_virtqueues(vdev
);
3709 g_free(vdev
->config
);
3710 g_free(vdev
->vector_queues
);
3713 static Property virtio_properties
[] = {
3714 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice
, host_features
),
3715 DEFINE_PROP_BOOL("use-started", VirtIODevice
, use_started
, true),
3716 DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice
, use_disabled_flag
, true),
3717 DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice
,
3718 disable_legacy_check
, false),
3719 DEFINE_PROP_END_OF_LIST(),
3722 static int virtio_device_start_ioeventfd_impl(VirtIODevice
*vdev
)
3724 VirtioBusState
*qbus
= VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev
)));
3728 * Batch all the host notifiers in a single transaction to avoid
3729 * quadratic time complexity in address_space_update_ioeventfds().
3731 memory_region_transaction_begin();
3732 for (n
= 0; n
< VIRTIO_QUEUE_MAX
; n
++) {
3733 VirtQueue
*vq
= &vdev
->vq
[n
];
3734 if (!virtio_queue_get_num(vdev
, n
)) {
3737 r
= virtio_bus_set_host_notifier(qbus
, n
, true);
3742 event_notifier_set_handler(&vq
->host_notifier
,
3743 virtio_queue_host_notifier_read
);
3746 for (n
= 0; n
< VIRTIO_QUEUE_MAX
; n
++) {
3747 /* Kick right away to begin processing requests already in vring */
3748 VirtQueue
*vq
= &vdev
->vq
[n
];
3749 if (!vq
->vring
.num
) {
3752 event_notifier_set(&vq
->host_notifier
);
3754 memory_region_transaction_commit();
3758 i
= n
; /* save n for a second iteration after transaction is committed. */
3760 VirtQueue
*vq
= &vdev
->vq
[n
];
3761 if (!virtio_queue_get_num(vdev
, n
)) {
3765 event_notifier_set_handler(&vq
->host_notifier
, NULL
);
3766 r
= virtio_bus_set_host_notifier(qbus
, n
, false);
3770 * The transaction expects the ioeventfds to be open when it
3771 * commits. Do it now, before the cleanup loop.
3773 memory_region_transaction_commit();
3776 if (!virtio_queue_get_num(vdev
, i
)) {
3779 virtio_bus_cleanup_host_notifier(qbus
, i
);
3784 int virtio_device_start_ioeventfd(VirtIODevice
*vdev
)
3786 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3787 VirtioBusState
*vbus
= VIRTIO_BUS(qbus
);
3789 return virtio_bus_start_ioeventfd(vbus
);
3792 static void virtio_device_stop_ioeventfd_impl(VirtIODevice
*vdev
)
3794 VirtioBusState
*qbus
= VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev
)));
3798 * Batch all the host notifiers in a single transaction to avoid
3799 * quadratic time complexity in address_space_update_ioeventfds().
3801 memory_region_transaction_begin();
3802 for (n
= 0; n
< VIRTIO_QUEUE_MAX
; n
++) {
3803 VirtQueue
*vq
= &vdev
->vq
[n
];
3805 if (!virtio_queue_get_num(vdev
, n
)) {
3808 event_notifier_set_handler(&vq
->host_notifier
, NULL
);
3809 r
= virtio_bus_set_host_notifier(qbus
, n
, false);
3813 * The transaction expects the ioeventfds to be open when it
3814 * commits. Do it now, before the cleanup loop.
3816 memory_region_transaction_commit();
3818 for (n
= 0; n
< VIRTIO_QUEUE_MAX
; n
++) {
3819 if (!virtio_queue_get_num(vdev
, n
)) {
3822 virtio_bus_cleanup_host_notifier(qbus
, n
);
3826 int virtio_device_grab_ioeventfd(VirtIODevice
*vdev
)
3828 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3829 VirtioBusState
*vbus
= VIRTIO_BUS(qbus
);
3831 return virtio_bus_grab_ioeventfd(vbus
);
3834 void virtio_device_release_ioeventfd(VirtIODevice
*vdev
)
3836 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3837 VirtioBusState
*vbus
= VIRTIO_BUS(qbus
);
3839 virtio_bus_release_ioeventfd(vbus
);
3842 static void virtio_device_class_init(ObjectClass
*klass
, void *data
)
3844 /* Set the default value here. */
3845 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_CLASS(klass
);
3846 DeviceClass
*dc
= DEVICE_CLASS(klass
);
3848 dc
->realize
= virtio_device_realize
;
3849 dc
->unrealize
= virtio_device_unrealize
;
3850 dc
->bus_type
= TYPE_VIRTIO_BUS
;
3851 device_class_set_props(dc
, virtio_properties
);
3852 vdc
->start_ioeventfd
= virtio_device_start_ioeventfd_impl
;
3853 vdc
->stop_ioeventfd
= virtio_device_stop_ioeventfd_impl
;
3855 vdc
->legacy_features
|= VIRTIO_LEGACY_FEATURES
;
3858 bool virtio_device_ioeventfd_enabled(VirtIODevice
*vdev
)
3860 BusState
*qbus
= qdev_get_parent_bus(DEVICE(vdev
));
3861 VirtioBusState
*vbus
= VIRTIO_BUS(qbus
);
3863 return virtio_bus_ioeventfd_enabled(vbus
);
3866 VirtQueueStatus
*qmp_x_query_virtio_queue_status(const char *path
,
3871 VirtQueueStatus
*status
;
3873 vdev
= qmp_find_virtio_device(path
);
3875 error_setg(errp
, "Path %s is not a VirtIODevice", path
);
3879 if (queue
>= VIRTIO_QUEUE_MAX
|| !virtio_queue_get_num(vdev
, queue
)) {
3880 error_setg(errp
, "Invalid virtqueue number %d", queue
);
3884 status
= g_new0(VirtQueueStatus
, 1);
3885 status
->name
= g_strdup(vdev
->name
);
3886 status
->queue_index
= vdev
->vq
[queue
].queue_index
;
3887 status
->inuse
= vdev
->vq
[queue
].inuse
;
3888 status
->vring_num
= vdev
->vq
[queue
].vring
.num
;
3889 status
->vring_num_default
= vdev
->vq
[queue
].vring
.num_default
;
3890 status
->vring_align
= vdev
->vq
[queue
].vring
.align
;
3891 status
->vring_desc
= vdev
->vq
[queue
].vring
.desc
;
3892 status
->vring_avail
= vdev
->vq
[queue
].vring
.avail
;
3893 status
->vring_used
= vdev
->vq
[queue
].vring
.used
;
3894 status
->used_idx
= vdev
->vq
[queue
].used_idx
;
3895 status
->signalled_used
= vdev
->vq
[queue
].signalled_used
;
3896 status
->signalled_used_valid
= vdev
->vq
[queue
].signalled_used_valid
;
3898 if (vdev
->vhost_started
) {
3899 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_GET_CLASS(vdev
);
3900 struct vhost_dev
*hdev
= vdc
->get_vhost(vdev
);
3902 /* check if vq index exists for vhost as well */
3903 if (queue
>= hdev
->vq_index
&& queue
< hdev
->vq_index
+ hdev
->nvqs
) {
3904 status
->has_last_avail_idx
= true;
3906 int vhost_vq_index
=
3907 hdev
->vhost_ops
->vhost_get_vq_index(hdev
, queue
);
3908 struct vhost_vring_state state
= {
3909 .index
= vhost_vq_index
,
3912 status
->last_avail_idx
=
3913 hdev
->vhost_ops
->vhost_get_vring_base(hdev
, &state
);
3916 status
->has_shadow_avail_idx
= true;
3917 status
->has_last_avail_idx
= true;
3918 status
->last_avail_idx
= vdev
->vq
[queue
].last_avail_idx
;
3919 status
->shadow_avail_idx
= vdev
->vq
[queue
].shadow_avail_idx
;
3925 static strList
*qmp_decode_vring_desc_flags(uint16_t flags
)
3927 strList
*list
= NULL
;
3935 { VRING_DESC_F_NEXT
, "next" },
3936 { VRING_DESC_F_WRITE
, "write" },
3937 { VRING_DESC_F_INDIRECT
, "indirect" },
3938 { 1 << VRING_PACKED_DESC_F_AVAIL
, "avail" },
3939 { 1 << VRING_PACKED_DESC_F_USED
, "used" },
3943 for (i
= 0; map
[i
].flag
; i
++) {
3944 if ((map
[i
].flag
& flags
) == 0) {
3947 node
= g_malloc0(sizeof(strList
));
3948 node
->value
= g_strdup(map
[i
].value
);
3956 VirtioQueueElement
*qmp_x_query_virtio_queue_element(const char *path
,
3964 VirtioQueueElement
*element
= NULL
;
3966 vdev
= qmp_find_virtio_device(path
);
3968 error_setg(errp
, "Path %s is not a VirtIO device", path
);
3972 if (queue
>= VIRTIO_QUEUE_MAX
|| !virtio_queue_get_num(vdev
, queue
)) {
3973 error_setg(errp
, "Invalid virtqueue number %d", queue
);
3976 vq
= &vdev
->vq
[queue
];
3978 if (virtio_vdev_has_feature(vdev
, VIRTIO_F_RING_PACKED
)) {
3979 error_setg(errp
, "Packed ring not supported");
3982 unsigned int head
, i
, max
;
3983 VRingMemoryRegionCaches
*caches
;
3984 MemoryRegionCache indirect_desc_cache
;
3985 MemoryRegionCache
*desc_cache
;
3987 VirtioRingDescList
*list
= NULL
;
3988 VirtioRingDescList
*node
;
3991 address_space_cache_init_empty(&indirect_desc_cache
);
3993 RCU_READ_LOCK_GUARD();
3995 max
= vq
->vring
.num
;
3998 head
= vring_avail_ring(vq
, vq
->last_avail_idx
% vq
->vring
.num
);
4000 head
= vring_avail_ring(vq
, index
% vq
->vring
.num
);
4004 caches
= vring_get_region_caches(vq
);
4006 error_setg(errp
, "Region caches not initialized");
4009 if (caches
->desc
.len
< max
* sizeof(VRingDesc
)) {
4010 error_setg(errp
, "Cannot map descriptor ring");
4014 desc_cache
= &caches
->desc
;
4015 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
4016 if (desc
.flags
& VRING_DESC_F_INDIRECT
) {
4018 len
= address_space_cache_init(&indirect_desc_cache
, vdev
->dma_as
,
4019 desc
.addr
, desc
.len
, false);
4020 desc_cache
= &indirect_desc_cache
;
4021 if (len
< desc
.len
) {
4022 error_setg(errp
, "Cannot map indirect buffer");
4026 max
= desc
.len
/ sizeof(VRingDesc
);
4028 vring_split_desc_read(vdev
, &desc
, desc_cache
, i
);
4031 element
= g_new0(VirtioQueueElement
, 1);
4032 element
->avail
= g_new0(VirtioRingAvail
, 1);
4033 element
->used
= g_new0(VirtioRingUsed
, 1);
4034 element
->name
= g_strdup(vdev
->name
);
4035 element
->index
= head
;
4036 element
->avail
->flags
= vring_avail_flags(vq
);
4037 element
->avail
->idx
= vring_avail_idx(vq
);
4038 element
->avail
->ring
= head
;
4039 element
->used
->flags
= vring_used_flags(vq
);
4040 element
->used
->idx
= vring_used_idx(vq
);
4044 /* A buggy driver may produce an infinite loop */
4045 if (ndescs
>= max
) {
4048 node
= g_new0(VirtioRingDescList
, 1);
4049 node
->value
= g_new0(VirtioRingDesc
, 1);
4050 node
->value
->addr
= desc
.addr
;
4051 node
->value
->len
= desc
.len
;
4052 node
->value
->flags
= qmp_decode_vring_desc_flags(desc
.flags
);
4057 rc
= virtqueue_split_read_next_desc(vdev
, &desc
, desc_cache
, max
);
4058 } while (rc
== VIRTQUEUE_READ_DESC_MORE
);
4059 element
->descs
= list
;
4061 address_space_cache_destroy(&indirect_desc_cache
);
4067 static const TypeInfo virtio_device_info
= {
4068 .name
= TYPE_VIRTIO_DEVICE
,
4069 .parent
= TYPE_DEVICE
,
4070 .instance_size
= sizeof(VirtIODevice
),
4071 .class_init
= virtio_device_class_init
,
4072 .instance_finalize
= virtio_device_instance_finalize
,
4074 .class_size
= sizeof(VirtioDeviceClass
),
4077 static void virtio_register_types(void)
4079 type_register_static(&virtio_device_info
);
4082 type_init(virtio_register_types
)