4 * Copyright (c) 2020 Red Hat, Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
23 #include "qemu/range.h"
24 #include "qemu/reserved-region.h"
25 #include "exec/target_page.h"
26 #include "hw/qdev-properties.h"
27 #include "hw/virtio/virtio.h"
28 #include "sysemu/kvm.h"
29 #include "sysemu/reset.h"
30 #include "sysemu/sysemu.h"
31 #include "qemu/reserved-region.h"
32 #include "qemu/units.h"
33 #include "qapi/error.h"
34 #include "qemu/error-report.h"
37 #include "standard-headers/linux/virtio_ids.h"
39 #include "hw/virtio/virtio-bus.h"
40 #include "hw/virtio/virtio-iommu.h"
41 #include "hw/pci/pci_bus.h"
42 #include "hw/pci/pci.h"
45 #define VIOMMU_DEFAULT_QUEUE_SIZE 256
46 #define VIOMMU_PROBE_SIZE 512
48 typedef struct VirtIOIOMMUDomain
{
52 QLIST_HEAD(, VirtIOIOMMUEndpoint
) endpoint_list
;
55 typedef struct VirtIOIOMMUEndpoint
{
57 VirtIOIOMMUDomain
*domain
;
58 IOMMUMemoryRegion
*iommu_mr
;
59 QLIST_ENTRY(VirtIOIOMMUEndpoint
) next
;
60 } VirtIOIOMMUEndpoint
;
62 typedef struct VirtIOIOMMUInterval
{
65 } VirtIOIOMMUInterval
;
67 typedef struct VirtIOIOMMUMapping
{
72 static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice
*dev
)
74 return PCI_BUILD_BDF(pci_bus_num(dev
->bus
), dev
->devfn
);
77 static bool virtio_iommu_device_bypassed(IOMMUDevice
*sdev
)
81 VirtIOIOMMU
*s
= sdev
->viommu
;
82 VirtIOIOMMUEndpoint
*ep
;
84 sid
= virtio_iommu_get_bdf(sdev
);
86 qemu_rec_mutex_lock(&s
->mutex
);
87 /* need to check bypass before system reset */
89 bypassed
= s
->config
.bypass
;
93 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(sid
));
94 if (!ep
|| !ep
->domain
) {
95 bypassed
= s
->config
.bypass
;
97 bypassed
= ep
->domain
->bypass
;
101 qemu_rec_mutex_unlock(&s
->mutex
);
105 /* Return whether the device is using IOMMU translation. */
106 static bool virtio_iommu_switch_address_space(IOMMUDevice
*sdev
)
112 use_remapping
= !virtio_iommu_device_bypassed(sdev
);
114 trace_virtio_iommu_switch_address_space(pci_bus_num(sdev
->bus
),
115 PCI_SLOT(sdev
->devfn
),
116 PCI_FUNC(sdev
->devfn
),
119 /* Turn off first then on the other */
121 memory_region_set_enabled(&sdev
->bypass_mr
, false);
122 memory_region_set_enabled(MEMORY_REGION(&sdev
->iommu_mr
), true);
124 memory_region_set_enabled(MEMORY_REGION(&sdev
->iommu_mr
), false);
125 memory_region_set_enabled(&sdev
->bypass_mr
, true);
128 return use_remapping
;
131 static void virtio_iommu_switch_address_space_all(VirtIOIOMMU
*s
)
134 IOMMUPciBus
*iommu_pci_bus
;
137 g_hash_table_iter_init(&iter
, s
->as_by_busptr
);
138 while (g_hash_table_iter_next(&iter
, NULL
, (void **)&iommu_pci_bus
)) {
139 for (i
= 0; i
< PCI_DEVFN_MAX
; i
++) {
140 if (!iommu_pci_bus
->pbdev
[i
]) {
143 virtio_iommu_switch_address_space(iommu_pci_bus
->pbdev
[i
]);
149 * The bus number is used for lookup when SID based operations occur.
150 * In that case we lazily populate the IOMMUPciBus array from the bus hash
151 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus
152 * numbers may not be always initialized yet.
154 static IOMMUPciBus
*iommu_find_iommu_pcibus(VirtIOIOMMU
*s
, uint8_t bus_num
)
156 IOMMUPciBus
*iommu_pci_bus
= s
->iommu_pcibus_by_bus_num
[bus_num
];
158 if (!iommu_pci_bus
) {
161 g_hash_table_iter_init(&iter
, s
->as_by_busptr
);
162 while (g_hash_table_iter_next(&iter
, NULL
, (void **)&iommu_pci_bus
)) {
163 if (pci_bus_num(iommu_pci_bus
->bus
) == bus_num
) {
164 s
->iommu_pcibus_by_bus_num
[bus_num
] = iommu_pci_bus
;
165 return iommu_pci_bus
;
170 return iommu_pci_bus
;
173 static IOMMUMemoryRegion
*virtio_iommu_mr(VirtIOIOMMU
*s
, uint32_t sid
)
175 uint8_t bus_n
, devfn
;
176 IOMMUPciBus
*iommu_pci_bus
;
179 bus_n
= PCI_BUS_NUM(sid
);
180 iommu_pci_bus
= iommu_find_iommu_pcibus(s
, bus_n
);
182 devfn
= sid
& (PCI_DEVFN_MAX
- 1);
183 dev
= iommu_pci_bus
->pbdev
[devfn
];
185 return &dev
->iommu_mr
;
191 static gint
interval_cmp(gconstpointer a
, gconstpointer b
, gpointer user_data
)
193 VirtIOIOMMUInterval
*inta
= (VirtIOIOMMUInterval
*)a
;
194 VirtIOIOMMUInterval
*intb
= (VirtIOIOMMUInterval
*)b
;
196 if (inta
->high
< intb
->low
) {
198 } else if (intb
->high
< inta
->low
) {
205 static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion
*mr
,
206 IOMMUTLBEvent
*event
,
207 hwaddr virt_start
, hwaddr virt_end
)
209 uint64_t delta
= virt_end
- virt_start
;
211 event
->entry
.iova
= virt_start
;
212 event
->entry
.addr_mask
= delta
;
214 if (delta
== UINT64_MAX
) {
215 memory_region_notify_iommu(mr
, 0, *event
);
218 while (virt_start
!= virt_end
+ 1) {
219 uint64_t mask
= dma_aligned_pow2_mask(virt_start
, virt_end
, 64);
221 event
->entry
.addr_mask
= mask
;
222 event
->entry
.iova
= virt_start
;
223 memory_region_notify_iommu(mr
, 0, *event
);
224 virt_start
+= mask
+ 1;
225 if (event
->entry
.perm
!= IOMMU_NONE
) {
226 event
->entry
.translated_addr
+= mask
+ 1;
231 static void virtio_iommu_notify_map(IOMMUMemoryRegion
*mr
, hwaddr virt_start
,
232 hwaddr virt_end
, hwaddr paddr
,
236 IOMMUAccessFlags perm
= IOMMU_ACCESS_FLAG(flags
& VIRTIO_IOMMU_MAP_F_READ
,
237 flags
& VIRTIO_IOMMU_MAP_F_WRITE
);
239 if (!(mr
->iommu_notify_flags
& IOMMU_NOTIFIER_MAP
) ||
240 (flags
& VIRTIO_IOMMU_MAP_F_MMIO
) || !perm
) {
244 trace_virtio_iommu_notify_map(mr
->parent_obj
.name
, virt_start
, virt_end
,
247 event
.type
= IOMMU_NOTIFIER_MAP
;
248 event
.entry
.target_as
= &address_space_memory
;
249 event
.entry
.perm
= perm
;
250 event
.entry
.translated_addr
= paddr
;
252 virtio_iommu_notify_map_unmap(mr
, &event
, virt_start
, virt_end
);
255 static void virtio_iommu_notify_unmap(IOMMUMemoryRegion
*mr
, hwaddr virt_start
,
260 if (!(mr
->iommu_notify_flags
& IOMMU_NOTIFIER_UNMAP
)) {
264 trace_virtio_iommu_notify_unmap(mr
->parent_obj
.name
, virt_start
, virt_end
);
266 event
.type
= IOMMU_NOTIFIER_UNMAP
;
267 event
.entry
.target_as
= &address_space_memory
;
268 event
.entry
.perm
= IOMMU_NONE
;
269 event
.entry
.translated_addr
= 0;
271 virtio_iommu_notify_map_unmap(mr
, &event
, virt_start
, virt_end
);
274 static gboolean
virtio_iommu_notify_unmap_cb(gpointer key
, gpointer value
,
277 VirtIOIOMMUInterval
*interval
= (VirtIOIOMMUInterval
*) key
;
278 IOMMUMemoryRegion
*mr
= (IOMMUMemoryRegion
*) data
;
280 virtio_iommu_notify_unmap(mr
, interval
->low
, interval
->high
);
285 static gboolean
virtio_iommu_notify_map_cb(gpointer key
, gpointer value
,
288 VirtIOIOMMUMapping
*mapping
= (VirtIOIOMMUMapping
*) value
;
289 VirtIOIOMMUInterval
*interval
= (VirtIOIOMMUInterval
*) key
;
290 IOMMUMemoryRegion
*mr
= (IOMMUMemoryRegion
*) data
;
292 virtio_iommu_notify_map(mr
, interval
->low
, interval
->high
,
293 mapping
->phys_addr
, mapping
->flags
);
298 static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint
*ep
)
300 VirtIOIOMMUDomain
*domain
= ep
->domain
;
301 IOMMUDevice
*sdev
= container_of(ep
->iommu_mr
, IOMMUDevice
, iommu_mr
);
306 g_tree_foreach(domain
->mappings
, virtio_iommu_notify_unmap_cb
,
308 QLIST_REMOVE(ep
, next
);
310 virtio_iommu_switch_address_space(sdev
);
313 static VirtIOIOMMUEndpoint
*virtio_iommu_get_endpoint(VirtIOIOMMU
*s
,
316 VirtIOIOMMUEndpoint
*ep
;
317 IOMMUMemoryRegion
*mr
;
319 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(ep_id
));
323 mr
= virtio_iommu_mr(s
, ep_id
);
327 ep
= g_malloc0(sizeof(*ep
));
330 trace_virtio_iommu_get_endpoint(ep_id
);
331 g_tree_insert(s
->endpoints
, GUINT_TO_POINTER(ep_id
), ep
);
335 static void virtio_iommu_put_endpoint(gpointer data
)
337 VirtIOIOMMUEndpoint
*ep
= (VirtIOIOMMUEndpoint
*)data
;
340 virtio_iommu_detach_endpoint_from_domain(ep
);
343 trace_virtio_iommu_put_endpoint(ep
->id
);
347 static VirtIOIOMMUDomain
*virtio_iommu_get_domain(VirtIOIOMMU
*s
,
351 VirtIOIOMMUDomain
*domain
;
353 domain
= g_tree_lookup(s
->domains
, GUINT_TO_POINTER(domain_id
));
355 if (domain
->bypass
!= bypass
) {
360 domain
= g_malloc0(sizeof(*domain
));
361 domain
->id
= domain_id
;
362 domain
->mappings
= g_tree_new_full((GCompareDataFunc
)interval_cmp
,
363 NULL
, (GDestroyNotify
)g_free
,
364 (GDestroyNotify
)g_free
);
365 domain
->bypass
= bypass
;
366 g_tree_insert(s
->domains
, GUINT_TO_POINTER(domain_id
), domain
);
367 QLIST_INIT(&domain
->endpoint_list
);
368 trace_virtio_iommu_get_domain(domain_id
);
372 static void virtio_iommu_put_domain(gpointer data
)
374 VirtIOIOMMUDomain
*domain
= (VirtIOIOMMUDomain
*)data
;
375 VirtIOIOMMUEndpoint
*iter
, *tmp
;
377 QLIST_FOREACH_SAFE(iter
, &domain
->endpoint_list
, next
, tmp
) {
378 virtio_iommu_detach_endpoint_from_domain(iter
);
380 g_tree_destroy(domain
->mappings
);
381 trace_virtio_iommu_put_domain(domain
->id
);
385 static void add_prop_resv_regions(IOMMUDevice
*sdev
)
387 VirtIOIOMMU
*s
= sdev
->viommu
;
390 for (i
= 0; i
< s
->nr_prop_resv_regions
; i
++) {
391 ReservedRegion
*reg
= g_new0(ReservedRegion
, 1);
393 *reg
= s
->prop_resv_regions
[i
];
394 sdev
->resv_regions
= resv_region_list_insert(sdev
->resv_regions
, reg
);
398 static AddressSpace
*virtio_iommu_find_add_as(PCIBus
*bus
, void *opaque
,
401 VirtIOIOMMU
*s
= opaque
;
402 IOMMUPciBus
*sbus
= g_hash_table_lookup(s
->as_by_busptr
, bus
);
403 static uint32_t mr_index
;
407 sbus
= g_malloc0(sizeof(IOMMUPciBus
) +
408 sizeof(IOMMUDevice
*) * PCI_DEVFN_MAX
);
410 g_hash_table_insert(s
->as_by_busptr
, bus
, sbus
);
413 sdev
= sbus
->pbdev
[devfn
];
415 char *name
= g_strdup_printf("%s-%d-%d",
416 TYPE_VIRTIO_IOMMU_MEMORY_REGION
,
418 sdev
= sbus
->pbdev
[devfn
] = g_new0(IOMMUDevice
, 1);
424 trace_virtio_iommu_init_iommu_mr(name
);
426 memory_region_init(&sdev
->root
, OBJECT(s
), name
, UINT64_MAX
);
427 address_space_init(&sdev
->as
, &sdev
->root
, TYPE_VIRTIO_IOMMU
);
428 add_prop_resv_regions(sdev
);
431 * Build the IOMMU disabled container with aliases to the
432 * shared MRs. Note that aliasing to a shared memory region
433 * could help the memory API to detect same FlatViews so we
434 * can have devices to share the same FlatView when in bypass
435 * mode. (either by not configuring virtio-iommu driver or with
436 * "iommu=pt"). It will greatly reduce the total number of
437 * FlatViews of the system hence VM runs faster.
439 memory_region_init_alias(&sdev
->bypass_mr
, OBJECT(s
),
440 "system", get_system_memory(), 0,
441 memory_region_size(get_system_memory()));
443 memory_region_init_iommu(&sdev
->iommu_mr
, sizeof(sdev
->iommu_mr
),
444 TYPE_VIRTIO_IOMMU_MEMORY_REGION
,
449 * Hook both the containers under the root container, we
450 * switch between iommu & bypass MRs by enable/disable
451 * corresponding sub-containers
453 memory_region_add_subregion_overlap(&sdev
->root
, 0,
454 MEMORY_REGION(&sdev
->iommu_mr
),
456 memory_region_add_subregion_overlap(&sdev
->root
, 0,
457 &sdev
->bypass_mr
, 0);
459 virtio_iommu_switch_address_space(sdev
);
465 static const PCIIOMMUOps virtio_iommu_ops
= {
466 .get_address_space
= virtio_iommu_find_add_as
,
469 static int virtio_iommu_attach(VirtIOIOMMU
*s
,
470 struct virtio_iommu_req_attach
*req
)
472 uint32_t domain_id
= le32_to_cpu(req
->domain
);
473 uint32_t ep_id
= le32_to_cpu(req
->endpoint
);
474 uint32_t flags
= le32_to_cpu(req
->flags
);
475 VirtIOIOMMUDomain
*domain
;
476 VirtIOIOMMUEndpoint
*ep
;
479 trace_virtio_iommu_attach(domain_id
, ep_id
);
481 if (flags
& ~VIRTIO_IOMMU_ATTACH_F_BYPASS
) {
482 return VIRTIO_IOMMU_S_INVAL
;
485 ep
= virtio_iommu_get_endpoint(s
, ep_id
);
487 return VIRTIO_IOMMU_S_NOENT
;
491 VirtIOIOMMUDomain
*previous_domain
= ep
->domain
;
493 * the device is already attached to a domain,
496 virtio_iommu_detach_endpoint_from_domain(ep
);
497 if (QLIST_EMPTY(&previous_domain
->endpoint_list
)) {
498 g_tree_remove(s
->domains
, GUINT_TO_POINTER(previous_domain
->id
));
502 domain
= virtio_iommu_get_domain(s
, domain_id
,
503 flags
& VIRTIO_IOMMU_ATTACH_F_BYPASS
);
505 /* Incompatible bypass flag */
506 return VIRTIO_IOMMU_S_INVAL
;
508 QLIST_INSERT_HEAD(&domain
->endpoint_list
, ep
, next
);
511 sdev
= container_of(ep
->iommu_mr
, IOMMUDevice
, iommu_mr
);
512 virtio_iommu_switch_address_space(sdev
);
514 /* Replay domain mappings on the associated memory region */
515 g_tree_foreach(domain
->mappings
, virtio_iommu_notify_map_cb
,
518 return VIRTIO_IOMMU_S_OK
;
521 static int virtio_iommu_detach(VirtIOIOMMU
*s
,
522 struct virtio_iommu_req_detach
*req
)
524 uint32_t domain_id
= le32_to_cpu(req
->domain
);
525 uint32_t ep_id
= le32_to_cpu(req
->endpoint
);
526 VirtIOIOMMUDomain
*domain
;
527 VirtIOIOMMUEndpoint
*ep
;
529 trace_virtio_iommu_detach(domain_id
, ep_id
);
531 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(ep_id
));
533 return VIRTIO_IOMMU_S_NOENT
;
538 if (!domain
|| domain
->id
!= domain_id
) {
539 return VIRTIO_IOMMU_S_INVAL
;
542 virtio_iommu_detach_endpoint_from_domain(ep
);
544 if (QLIST_EMPTY(&domain
->endpoint_list
)) {
545 g_tree_remove(s
->domains
, GUINT_TO_POINTER(domain
->id
));
547 return VIRTIO_IOMMU_S_OK
;
550 static int virtio_iommu_map(VirtIOIOMMU
*s
,
551 struct virtio_iommu_req_map
*req
)
553 uint32_t domain_id
= le32_to_cpu(req
->domain
);
554 uint64_t phys_start
= le64_to_cpu(req
->phys_start
);
555 uint64_t virt_start
= le64_to_cpu(req
->virt_start
);
556 uint64_t virt_end
= le64_to_cpu(req
->virt_end
);
557 uint32_t flags
= le32_to_cpu(req
->flags
);
558 VirtIOIOMMUDomain
*domain
;
559 VirtIOIOMMUInterval
*interval
;
560 VirtIOIOMMUMapping
*mapping
;
561 VirtIOIOMMUEndpoint
*ep
;
563 if (flags
& ~VIRTIO_IOMMU_MAP_F_MASK
) {
564 return VIRTIO_IOMMU_S_INVAL
;
567 domain
= g_tree_lookup(s
->domains
, GUINT_TO_POINTER(domain_id
));
569 return VIRTIO_IOMMU_S_NOENT
;
572 if (domain
->bypass
) {
573 return VIRTIO_IOMMU_S_INVAL
;
576 interval
= g_malloc0(sizeof(*interval
));
578 interval
->low
= virt_start
;
579 interval
->high
= virt_end
;
581 mapping
= g_tree_lookup(domain
->mappings
, (gpointer
)interval
);
584 return VIRTIO_IOMMU_S_INVAL
;
587 trace_virtio_iommu_map(domain_id
, virt_start
, virt_end
, phys_start
, flags
);
589 mapping
= g_malloc0(sizeof(*mapping
));
590 mapping
->phys_addr
= phys_start
;
591 mapping
->flags
= flags
;
593 g_tree_insert(domain
->mappings
, interval
, mapping
);
595 QLIST_FOREACH(ep
, &domain
->endpoint_list
, next
) {
596 virtio_iommu_notify_map(ep
->iommu_mr
, virt_start
, virt_end
, phys_start
,
600 return VIRTIO_IOMMU_S_OK
;
603 static int virtio_iommu_unmap(VirtIOIOMMU
*s
,
604 struct virtio_iommu_req_unmap
*req
)
606 uint32_t domain_id
= le32_to_cpu(req
->domain
);
607 uint64_t virt_start
= le64_to_cpu(req
->virt_start
);
608 uint64_t virt_end
= le64_to_cpu(req
->virt_end
);
609 VirtIOIOMMUMapping
*iter_val
;
610 VirtIOIOMMUInterval interval
, *iter_key
;
611 VirtIOIOMMUDomain
*domain
;
612 VirtIOIOMMUEndpoint
*ep
;
613 int ret
= VIRTIO_IOMMU_S_OK
;
615 trace_virtio_iommu_unmap(domain_id
, virt_start
, virt_end
);
617 domain
= g_tree_lookup(s
->domains
, GUINT_TO_POINTER(domain_id
));
619 return VIRTIO_IOMMU_S_NOENT
;
622 if (domain
->bypass
) {
623 return VIRTIO_IOMMU_S_INVAL
;
626 interval
.low
= virt_start
;
627 interval
.high
= virt_end
;
629 while (g_tree_lookup_extended(domain
->mappings
, &interval
,
630 (void **)&iter_key
, (void**)&iter_val
)) {
631 uint64_t current_low
= iter_key
->low
;
632 uint64_t current_high
= iter_key
->high
;
634 if (interval
.low
<= current_low
&& interval
.high
>= current_high
) {
635 QLIST_FOREACH(ep
, &domain
->endpoint_list
, next
) {
636 virtio_iommu_notify_unmap(ep
->iommu_mr
, current_low
,
639 g_tree_remove(domain
->mappings
, iter_key
);
640 trace_virtio_iommu_unmap_done(domain_id
, current_low
, current_high
);
642 ret
= VIRTIO_IOMMU_S_RANGE
;
649 static ssize_t
virtio_iommu_fill_resv_mem_prop(IOMMUDevice
*sdev
, uint32_t ep
,
650 uint8_t *buf
, size_t free
)
652 struct virtio_iommu_probe_resv_mem prop
= {};
653 size_t size
= sizeof(prop
), length
= size
- sizeof(prop
.head
), total
;
656 total
= size
* g_list_length(sdev
->resv_regions
);
661 for (l
= sdev
->resv_regions
; l
; l
= l
->next
) {
662 ReservedRegion
*reg
= l
->data
;
663 unsigned subtype
= reg
->type
;
664 Range
*range
= ®
->range
;
666 assert(subtype
== VIRTIO_IOMMU_RESV_MEM_T_RESERVED
||
667 subtype
== VIRTIO_IOMMU_RESV_MEM_T_MSI
);
668 prop
.head
.type
= cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM
);
669 prop
.head
.length
= cpu_to_le16(length
);
670 prop
.subtype
= subtype
;
671 prop
.start
= cpu_to_le64(range_lob(range
));
672 prop
.end
= cpu_to_le64(range_upb(range
));
674 memcpy(buf
, &prop
, size
);
676 trace_virtio_iommu_fill_resv_property(ep
, prop
.subtype
,
677 prop
.start
, prop
.end
);
684 * virtio_iommu_probe - Fill the probe request buffer with
685 * the properties the device is able to return
687 static int virtio_iommu_probe(VirtIOIOMMU
*s
,
688 struct virtio_iommu_req_probe
*req
,
691 uint32_t ep_id
= le32_to_cpu(req
->endpoint
);
692 IOMMUMemoryRegion
*iommu_mr
= virtio_iommu_mr(s
, ep_id
);
693 size_t free
= VIOMMU_PROBE_SIZE
;
698 return VIRTIO_IOMMU_S_NOENT
;
701 sdev
= container_of(iommu_mr
, IOMMUDevice
, iommu_mr
);
703 count
= virtio_iommu_fill_resv_mem_prop(sdev
, ep_id
, buf
, free
);
705 return VIRTIO_IOMMU_S_INVAL
;
709 sdev
->probe_done
= true;
711 return VIRTIO_IOMMU_S_OK
;
714 static int virtio_iommu_iov_to_req(struct iovec
*iov
,
715 unsigned int iov_cnt
,
716 void *req
, size_t payload_sz
)
718 size_t sz
= iov_to_buf(iov
, iov_cnt
, 0, req
, payload_sz
);
720 if (unlikely(sz
!= payload_sz
)) {
721 return VIRTIO_IOMMU_S_INVAL
;
726 #define virtio_iommu_handle_req(__req) \
727 static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \
729 unsigned int iov_cnt) \
731 struct virtio_iommu_req_ ## __req req; \
732 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \
733 sizeof(req) - sizeof(struct virtio_iommu_req_tail));\
735 return ret ? ret : virtio_iommu_ ## __req(s, &req); \
738 virtio_iommu_handle_req(attach
)
739 virtio_iommu_handle_req(detach
)
740 virtio_iommu_handle_req(map
)
741 virtio_iommu_handle_req(unmap
)
743 static int virtio_iommu_handle_probe(VirtIOIOMMU
*s
,
745 unsigned int iov_cnt
,
748 struct virtio_iommu_req_probe req
;
749 int ret
= virtio_iommu_iov_to_req(iov
, iov_cnt
, &req
, sizeof(req
));
751 return ret
? ret
: virtio_iommu_probe(s
, &req
, buf
);
754 static void virtio_iommu_handle_command(VirtIODevice
*vdev
, VirtQueue
*vq
)
756 VirtIOIOMMU
*s
= VIRTIO_IOMMU(vdev
);
757 struct virtio_iommu_req_head head
;
758 struct virtio_iommu_req_tail tail
= {};
759 VirtQueueElement
*elem
;
760 unsigned int iov_cnt
;
766 size_t output_size
= sizeof(tail
);
768 elem
= virtqueue_pop(vq
, sizeof(VirtQueueElement
));
773 if (iov_size(elem
->in_sg
, elem
->in_num
) < sizeof(tail
) ||
774 iov_size(elem
->out_sg
, elem
->out_num
) < sizeof(head
)) {
775 virtio_error(vdev
, "virtio-iommu bad head/tail size");
776 virtqueue_detach_element(vq
, elem
, 0);
781 iov_cnt
= elem
->out_num
;
783 sz
= iov_to_buf(iov
, iov_cnt
, 0, &head
, sizeof(head
));
784 if (unlikely(sz
!= sizeof(head
))) {
785 tail
.status
= VIRTIO_IOMMU_S_DEVERR
;
788 qemu_rec_mutex_lock(&s
->mutex
);
790 case VIRTIO_IOMMU_T_ATTACH
:
791 tail
.status
= virtio_iommu_handle_attach(s
, iov
, iov_cnt
);
793 case VIRTIO_IOMMU_T_DETACH
:
794 tail
.status
= virtio_iommu_handle_detach(s
, iov
, iov_cnt
);
796 case VIRTIO_IOMMU_T_MAP
:
797 tail
.status
= virtio_iommu_handle_map(s
, iov
, iov_cnt
);
799 case VIRTIO_IOMMU_T_UNMAP
:
800 tail
.status
= virtio_iommu_handle_unmap(s
, iov
, iov_cnt
);
802 case VIRTIO_IOMMU_T_PROBE
:
804 struct virtio_iommu_req_tail
*ptail
;
806 output_size
= s
->config
.probe_size
+ sizeof(tail
);
807 buf
= g_malloc0(output_size
);
809 ptail
= buf
+ s
->config
.probe_size
;
810 ptail
->status
= virtio_iommu_handle_probe(s
, iov
, iov_cnt
, buf
);
814 tail
.status
= VIRTIO_IOMMU_S_UNSUPP
;
816 qemu_rec_mutex_unlock(&s
->mutex
);
819 sz
= iov_from_buf(elem
->in_sg
, elem
->in_num
, 0,
820 buf
? buf
: &tail
, output_size
);
821 assert(sz
== output_size
);
823 virtqueue_push(vq
, elem
, sz
);
824 virtio_notify(vdev
, vq
);
831 static void virtio_iommu_report_fault(VirtIOIOMMU
*viommu
, uint8_t reason
,
832 int flags
, uint32_t endpoint
,
835 VirtIODevice
*vdev
= &viommu
->parent_obj
;
836 VirtQueue
*vq
= viommu
->event_vq
;
837 struct virtio_iommu_fault fault
;
838 VirtQueueElement
*elem
;
841 memset(&fault
, 0, sizeof(fault
));
842 fault
.reason
= reason
;
843 fault
.flags
= cpu_to_le32(flags
);
844 fault
.endpoint
= cpu_to_le32(endpoint
);
845 fault
.address
= cpu_to_le64(address
);
847 elem
= virtqueue_pop(vq
, sizeof(VirtQueueElement
));
851 "no buffer available in event queue to report event");
855 if (iov_size(elem
->in_sg
, elem
->in_num
) < sizeof(fault
)) {
856 virtio_error(vdev
, "error buffer of wrong size");
857 virtqueue_detach_element(vq
, elem
, 0);
862 sz
= iov_from_buf(elem
->in_sg
, elem
->in_num
, 0,
863 &fault
, sizeof(fault
));
864 assert(sz
== sizeof(fault
));
866 trace_virtio_iommu_report_fault(reason
, flags
, endpoint
, address
);
867 virtqueue_push(vq
, elem
, sz
);
868 virtio_notify(vdev
, vq
);
873 static IOMMUTLBEntry
virtio_iommu_translate(IOMMUMemoryRegion
*mr
, hwaddr addr
,
874 IOMMUAccessFlags flag
,
877 IOMMUDevice
*sdev
= container_of(mr
, IOMMUDevice
, iommu_mr
);
878 VirtIOIOMMUInterval interval
, *mapping_key
;
879 VirtIOIOMMUMapping
*mapping_value
;
880 VirtIOIOMMU
*s
= sdev
->viommu
;
881 bool read_fault
, write_fault
;
882 VirtIOIOMMUEndpoint
*ep
;
890 interval
.high
= addr
+ 1;
891 granule
= ctz64(s
->config
.page_size_mask
);
893 IOMMUTLBEntry entry
= {
894 .target_as
= &address_space_memory
,
896 .translated_addr
= addr
,
897 .addr_mask
= BIT_ULL(granule
) - 1,
901 bypass_allowed
= s
->config
.bypass
;
903 sid
= virtio_iommu_get_bdf(sdev
);
905 trace_virtio_iommu_translate(mr
->parent_obj
.name
, sid
, addr
, flag
);
906 qemu_rec_mutex_lock(&s
->mutex
);
908 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(sid
));
911 assert(ep
&& ep
->domain
&& !ep
->domain
->bypass
);
914 if (!bypass_allowed
) {
915 error_report_once("%s sid=%d is not known!!", __func__
, sid
);
916 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_UNKNOWN
,
917 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
925 for (l
= sdev
->resv_regions
; l
; l
= l
->next
) {
926 ReservedRegion
*reg
= l
->data
;
928 if (range_contains(®
->range
, addr
)) {
930 case VIRTIO_IOMMU_RESV_MEM_T_MSI
:
933 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED
:
935 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_MAPPING
,
936 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
945 if (!bypass_allowed
) {
946 error_report_once("%s %02x:%02x.%01x not attached to any domain",
947 __func__
, PCI_BUS_NUM(sid
),
948 PCI_SLOT(sid
), PCI_FUNC(sid
));
949 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_DOMAIN
,
950 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
956 } else if (ep
->domain
->bypass
) {
961 found
= g_tree_lookup_extended(ep
->domain
->mappings
, (gpointer
)(&interval
),
962 (void **)&mapping_key
,
963 (void **)&mapping_value
);
965 error_report_once("%s no mapping for 0x%"PRIx64
" for sid=%d",
966 __func__
, addr
, sid
);
967 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_MAPPING
,
968 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
973 read_fault
= (flag
& IOMMU_RO
) &&
974 !(mapping_value
->flags
& VIRTIO_IOMMU_MAP_F_READ
);
975 write_fault
= (flag
& IOMMU_WO
) &&
976 !(mapping_value
->flags
& VIRTIO_IOMMU_MAP_F_WRITE
);
978 flags
= read_fault
? VIRTIO_IOMMU_FAULT_F_READ
: 0;
979 flags
|= write_fault
? VIRTIO_IOMMU_FAULT_F_WRITE
: 0;
981 error_report_once("%s permission error on 0x%"PRIx64
"(%d): allowed=%d",
982 __func__
, addr
, flag
, mapping_value
->flags
);
983 flags
|= VIRTIO_IOMMU_FAULT_F_ADDRESS
;
984 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_MAPPING
,
985 flags
| VIRTIO_IOMMU_FAULT_F_ADDRESS
,
989 entry
.translated_addr
= addr
- mapping_key
->low
+ mapping_value
->phys_addr
;
991 trace_virtio_iommu_translate_out(addr
, entry
.translated_addr
, sid
);
994 qemu_rec_mutex_unlock(&s
->mutex
);
998 static void virtio_iommu_get_config(VirtIODevice
*vdev
, uint8_t *config_data
)
1000 VirtIOIOMMU
*dev
= VIRTIO_IOMMU(vdev
);
1001 struct virtio_iommu_config
*dev_config
= &dev
->config
;
1002 struct virtio_iommu_config
*out_config
= (void *)config_data
;
1004 out_config
->page_size_mask
= cpu_to_le64(dev_config
->page_size_mask
);
1005 out_config
->input_range
.start
= cpu_to_le64(dev_config
->input_range
.start
);
1006 out_config
->input_range
.end
= cpu_to_le64(dev_config
->input_range
.end
);
1007 out_config
->domain_range
.start
= cpu_to_le32(dev_config
->domain_range
.start
);
1008 out_config
->domain_range
.end
= cpu_to_le32(dev_config
->domain_range
.end
);
1009 out_config
->probe_size
= cpu_to_le32(dev_config
->probe_size
);
1010 out_config
->bypass
= dev_config
->bypass
;
1012 trace_virtio_iommu_get_config(dev_config
->page_size_mask
,
1013 dev_config
->input_range
.start
,
1014 dev_config
->input_range
.end
,
1015 dev_config
->domain_range
.start
,
1016 dev_config
->domain_range
.end
,
1017 dev_config
->probe_size
,
1018 dev_config
->bypass
);
1021 static void virtio_iommu_set_config(VirtIODevice
*vdev
,
1022 const uint8_t *config_data
)
1024 VirtIOIOMMU
*dev
= VIRTIO_IOMMU(vdev
);
1025 struct virtio_iommu_config
*dev_config
= &dev
->config
;
1026 const struct virtio_iommu_config
*in_config
= (void *)config_data
;
1028 if (in_config
->bypass
!= dev_config
->bypass
) {
1029 if (!virtio_vdev_has_feature(vdev
, VIRTIO_IOMMU_F_BYPASS_CONFIG
)) {
1030 virtio_error(vdev
, "cannot set config.bypass");
1032 } else if (in_config
->bypass
!= 0 && in_config
->bypass
!= 1) {
1033 virtio_error(vdev
, "invalid config.bypass value '%u'",
1037 dev_config
->bypass
= in_config
->bypass
;
1038 virtio_iommu_switch_address_space_all(dev
);
1041 trace_virtio_iommu_set_config(in_config
->bypass
);
1044 static uint64_t virtio_iommu_get_features(VirtIODevice
*vdev
, uint64_t f
,
1047 VirtIOIOMMU
*dev
= VIRTIO_IOMMU(vdev
);
1050 trace_virtio_iommu_get_features(f
);
1054 static gint
int_cmp(gconstpointer a
, gconstpointer b
, gpointer user_data
)
1056 guint ua
= GPOINTER_TO_UINT(a
);
1057 guint ub
= GPOINTER_TO_UINT(b
);
1058 return (ua
> ub
) - (ua
< ub
);
1061 static gboolean
virtio_iommu_remap(gpointer key
, gpointer value
, gpointer data
)
1063 VirtIOIOMMUMapping
*mapping
= (VirtIOIOMMUMapping
*) value
;
1064 VirtIOIOMMUInterval
*interval
= (VirtIOIOMMUInterval
*) key
;
1065 IOMMUMemoryRegion
*mr
= (IOMMUMemoryRegion
*) data
;
1067 trace_virtio_iommu_remap(mr
->parent_obj
.name
, interval
->low
, interval
->high
,
1068 mapping
->phys_addr
);
1069 virtio_iommu_notify_map(mr
, interval
->low
, interval
->high
,
1070 mapping
->phys_addr
, mapping
->flags
);
1074 static void virtio_iommu_replay(IOMMUMemoryRegion
*mr
, IOMMUNotifier
*n
)
1076 IOMMUDevice
*sdev
= container_of(mr
, IOMMUDevice
, iommu_mr
);
1077 VirtIOIOMMU
*s
= sdev
->viommu
;
1079 VirtIOIOMMUEndpoint
*ep
;
1081 sid
= virtio_iommu_get_bdf(sdev
);
1083 qemu_rec_mutex_lock(&s
->mutex
);
1085 if (!s
->endpoints
) {
1089 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(sid
));
1090 if (!ep
|| !ep
->domain
) {
1094 g_tree_foreach(ep
->domain
->mappings
, virtio_iommu_remap
, mr
);
1097 qemu_rec_mutex_unlock(&s
->mutex
);
1100 static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion
*iommu_mr
,
1101 IOMMUNotifierFlag old
,
1102 IOMMUNotifierFlag
new,
1105 if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP
) {
1106 error_setg(errp
, "Virtio-iommu does not support dev-iotlb yet");
1110 if (old
== IOMMU_NOTIFIER_NONE
) {
1111 trace_virtio_iommu_notify_flag_add(iommu_mr
->parent_obj
.name
);
1112 } else if (new == IOMMU_NOTIFIER_NONE
) {
1113 trace_virtio_iommu_notify_flag_del(iommu_mr
->parent_obj
.name
);
1119 * The default mask depends on the "granule" property. For example, with
1120 * 4k granule, it is -(4 * KiB). When an assigned device has page size
1121 * restrictions due to the hardware IOMMU configuration, apply this restriction
1124 static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion
*mr
,
1128 IOMMUDevice
*sdev
= container_of(mr
, IOMMUDevice
, iommu_mr
);
1129 VirtIOIOMMU
*s
= sdev
->viommu
;
1130 uint64_t cur_mask
= s
->config
.page_size_mask
;
1132 trace_virtio_iommu_set_page_size_mask(mr
->parent_obj
.name
, cur_mask
,
1135 if ((cur_mask
& new_mask
) == 0) {
1136 error_setg(errp
, "virtio-iommu %s reports a page size mask 0x%"PRIx64
1137 " incompatible with currently supported mask 0x%"PRIx64
,
1138 mr
->parent_obj
.name
, new_mask
, cur_mask
);
1143 * Once the granule is frozen we can't change the mask anymore. If by
1144 * chance the hotplugged device supports the same granule, we can still
1147 if (s
->granule_frozen
) {
1148 int cur_granule
= ctz64(cur_mask
);
1150 if (!(BIT_ULL(cur_granule
) & new_mask
)) {
1151 error_setg(errp
, "virtio-iommu %s does not support frozen granule 0x%llx",
1152 mr
->parent_obj
.name
, BIT_ULL(cur_granule
));
1158 s
->config
.page_size_mask
&= new_mask
;
1163 * rebuild_resv_regions: rebuild resv regions with both the
1164 * info of host resv ranges and property set resv ranges
1166 static int rebuild_resv_regions(IOMMUDevice
*sdev
)
1171 /* free the existing list and rebuild it from scratch */
1172 g_list_free_full(sdev
->resv_regions
, g_free
);
1173 sdev
->resv_regions
= NULL
;
1175 /* First add host reserved regions if any, all tagged as RESERVED */
1176 for (l
= sdev
->host_resv_ranges
; l
; l
= l
->next
) {
1177 ReservedRegion
*reg
= g_new0(ReservedRegion
, 1);
1178 Range
*r
= (Range
*)l
->data
;
1180 reg
->type
= VIRTIO_IOMMU_RESV_MEM_T_RESERVED
;
1181 range_set_bounds(®
->range
, range_lob(r
), range_upb(r
));
1182 sdev
->resv_regions
= resv_region_list_insert(sdev
->resv_regions
, reg
);
1183 trace_virtio_iommu_host_resv_regions(sdev
->iommu_mr
.parent_obj
.name
, i
,
1184 range_lob(®
->range
),
1185 range_upb(®
->range
));
1189 * then add higher priority reserved regions set by the machine
1190 * through properties
1192 add_prop_resv_regions(sdev
);
1197 * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
1199 * The function turns those into reserved ranges. Once some
1200 * reserved ranges have been set, new reserved regions cannot be
1201 * added outside of the original ones.
1204 * @iova_ranges: list of usable IOVA ranges
1205 * @errp: error handle
1207 static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion
*mr
,
1211 IOMMUDevice
*sdev
= container_of(mr
, IOMMUDevice
, iommu_mr
);
1212 GList
*current_ranges
= sdev
->host_resv_ranges
;
1213 GList
*l
, *tmp
, *new_ranges
= NULL
;
1216 /* check that each new resv region is included in an existing one */
1217 if (sdev
->host_resv_ranges
) {
1218 range_inverse_array(iova_ranges
,
1222 for (tmp
= new_ranges
; tmp
; tmp
= tmp
->next
) {
1223 Range
*newr
= (Range
*)tmp
->data
;
1224 bool included
= false;
1226 for (l
= current_ranges
; l
; l
= l
->next
) {
1227 Range
* r
= (Range
*)l
->data
;
1229 if (range_contains_range(r
, newr
)) {
1238 /* all new reserved ranges are included in existing ones */
1243 if (sdev
->probe_done
) {
1244 warn_report("%s: Notified about new host reserved regions after probe",
1245 mr
->parent_obj
.name
);
1248 range_inverse_array(iova_ranges
,
1249 &sdev
->host_resv_ranges
,
1251 rebuild_resv_regions(sdev
);
1255 error_setg(errp
, "IOMMU mr=%s Conflicting host reserved ranges set!",
1256 mr
->parent_obj
.name
);
1258 g_list_free_full(new_ranges
, g_free
);
1262 static void virtio_iommu_system_reset(void *opaque
)
1264 VirtIOIOMMU
*s
= opaque
;
1266 trace_virtio_iommu_system_reset();
1268 memset(s
->iommu_pcibus_by_bus_num
, 0, sizeof(s
->iommu_pcibus_by_bus_num
));
1271 * config.bypass is sticky across device reset, but should be restored on
1274 s
->config
.bypass
= s
->boot_bypass
;
1275 virtio_iommu_switch_address_space_all(s
);
1279 static void virtio_iommu_freeze_granule(Notifier
*notifier
, void *data
)
1281 VirtIOIOMMU
*s
= container_of(notifier
, VirtIOIOMMU
, machine_done
);
1284 if (likely(s
->config
.bypass
)) {
1286 * Transient IOMMU MR enable to collect page_size_mask requirements
1287 * through memory_region_iommu_set_page_size_mask() called by
1288 * VFIO region_add() callback
1290 s
->config
.bypass
= false;
1291 virtio_iommu_switch_address_space_all(s
);
1292 /* restore default */
1293 s
->config
.bypass
= true;
1294 virtio_iommu_switch_address_space_all(s
);
1296 s
->granule_frozen
= true;
1297 granule
= ctz64(s
->config
.page_size_mask
);
1298 trace_virtio_iommu_freeze_granule(BIT_ULL(granule
));
1301 static void virtio_iommu_device_realize(DeviceState
*dev
, Error
**errp
)
1303 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
1304 VirtIOIOMMU
*s
= VIRTIO_IOMMU(dev
);
1306 virtio_init(vdev
, VIRTIO_ID_IOMMU
, sizeof(struct virtio_iommu_config
));
1308 s
->req_vq
= virtio_add_queue(vdev
, VIOMMU_DEFAULT_QUEUE_SIZE
,
1309 virtio_iommu_handle_command
);
1310 s
->event_vq
= virtio_add_queue(vdev
, VIOMMU_DEFAULT_QUEUE_SIZE
, NULL
);
1313 * config.bypass is needed to get initial address space early, such as
1316 s
->config
.bypass
= s
->boot_bypass
;
1317 if (s
->aw_bits
< 32 || s
->aw_bits
> 64) {
1318 error_setg(errp
, "aw-bits must be within [32,64]");
1321 s
->config
.input_range
.end
=
1322 s
->aw_bits
== 64 ? UINT64_MAX
: BIT_ULL(s
->aw_bits
) - 1;
1324 switch (s
->granule_mode
) {
1325 case GRANULE_MODE_4K
:
1326 s
->config
.page_size_mask
= -(4 * KiB
);
1328 case GRANULE_MODE_8K
:
1329 s
->config
.page_size_mask
= -(8 * KiB
);
1331 case GRANULE_MODE_16K
:
1332 s
->config
.page_size_mask
= -(16 * KiB
);
1334 case GRANULE_MODE_64K
:
1335 s
->config
.page_size_mask
= -(64 * KiB
);
1337 case GRANULE_MODE_HOST
:
1338 s
->config
.page_size_mask
= qemu_real_host_page_mask();
1341 error_setg(errp
, "Unsupported granule mode");
1343 s
->config
.domain_range
.end
= UINT32_MAX
;
1344 s
->config
.probe_size
= VIOMMU_PROBE_SIZE
;
1346 virtio_add_feature(&s
->features
, VIRTIO_RING_F_EVENT_IDX
);
1347 virtio_add_feature(&s
->features
, VIRTIO_RING_F_INDIRECT_DESC
);
1348 virtio_add_feature(&s
->features
, VIRTIO_F_VERSION_1
);
1349 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_INPUT_RANGE
);
1350 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_DOMAIN_RANGE
);
1351 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_MAP_UNMAP
);
1352 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_MMIO
);
1353 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_PROBE
);
1354 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_BYPASS_CONFIG
);
1356 qemu_rec_mutex_init(&s
->mutex
);
1358 s
->as_by_busptr
= g_hash_table_new_full(NULL
, NULL
, NULL
, g_free
);
1360 if (s
->primary_bus
) {
1361 pci_setup_iommu(s
->primary_bus
, &virtio_iommu_ops
, s
);
1363 error_setg(errp
, "VIRTIO-IOMMU is not attached to any PCI bus!");
1366 s
->machine_done
.notify
= virtio_iommu_freeze_granule
;
1367 qemu_add_machine_init_done_notifier(&s
->machine_done
);
1369 qemu_register_reset(virtio_iommu_system_reset
, s
);
1372 static void virtio_iommu_device_unrealize(DeviceState
*dev
)
1374 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
1375 VirtIOIOMMU
*s
= VIRTIO_IOMMU(dev
);
1377 qemu_unregister_reset(virtio_iommu_system_reset
, s
);
1378 qemu_remove_machine_init_done_notifier(&s
->machine_done
);
1380 g_hash_table_destroy(s
->as_by_busptr
);
1382 g_tree_destroy(s
->domains
);
1385 g_tree_destroy(s
->endpoints
);
1388 qemu_rec_mutex_destroy(&s
->mutex
);
1390 virtio_delete_queue(s
->req_vq
);
1391 virtio_delete_queue(s
->event_vq
);
1392 virtio_cleanup(vdev
);
1395 static void virtio_iommu_device_reset(VirtIODevice
*vdev
)
1397 VirtIOIOMMU
*s
= VIRTIO_IOMMU(vdev
);
1399 trace_virtio_iommu_device_reset();
1402 g_tree_destroy(s
->domains
);
1405 g_tree_destroy(s
->endpoints
);
1407 s
->domains
= g_tree_new_full((GCompareDataFunc
)int_cmp
,
1408 NULL
, NULL
, virtio_iommu_put_domain
);
1409 s
->endpoints
= g_tree_new_full((GCompareDataFunc
)int_cmp
,
1410 NULL
, NULL
, virtio_iommu_put_endpoint
);
1413 static void virtio_iommu_set_status(VirtIODevice
*vdev
, uint8_t status
)
1415 trace_virtio_iommu_device_status(status
);
1418 static void virtio_iommu_instance_init(Object
*obj
)
1422 #define VMSTATE_INTERVAL \
1424 .name = "interval", \
1426 .minimum_version_id = 1, \
1427 .fields = (const VMStateField[]) { \
1428 VMSTATE_UINT64(low, VirtIOIOMMUInterval), \
1429 VMSTATE_UINT64(high, VirtIOIOMMUInterval), \
1430 VMSTATE_END_OF_LIST() \
1434 #define VMSTATE_MAPPING \
1436 .name = "mapping", \
1438 .minimum_version_id = 1, \
1439 .fields = (const VMStateField[]) { \
1440 VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\
1441 VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \
1442 VMSTATE_END_OF_LIST() \
1446 static const VMStateDescription vmstate_interval_mapping
[2] = {
1447 VMSTATE_MAPPING
, /* value */
1448 VMSTATE_INTERVAL
/* key */
1451 static int domain_preload(void *opaque
)
1453 VirtIOIOMMUDomain
*domain
= opaque
;
1455 domain
->mappings
= g_tree_new_full((GCompareDataFunc
)interval_cmp
,
1456 NULL
, g_free
, g_free
);
1460 static const VMStateDescription vmstate_endpoint
= {
1463 .minimum_version_id
= 1,
1464 .fields
= (const VMStateField
[]) {
1465 VMSTATE_UINT32(id
, VirtIOIOMMUEndpoint
),
1466 VMSTATE_END_OF_LIST()
1470 static const VMStateDescription vmstate_domain
= {
1473 .minimum_version_id
= 2,
1474 .pre_load
= domain_preload
,
1475 .fields
= (const VMStateField
[]) {
1476 VMSTATE_UINT32(id
, VirtIOIOMMUDomain
),
1477 VMSTATE_GTREE_V(mappings
, VirtIOIOMMUDomain
, 1,
1478 vmstate_interval_mapping
,
1479 VirtIOIOMMUInterval
, VirtIOIOMMUMapping
),
1480 VMSTATE_QLIST_V(endpoint_list
, VirtIOIOMMUDomain
, 1,
1481 vmstate_endpoint
, VirtIOIOMMUEndpoint
, next
),
1482 VMSTATE_BOOL_V(bypass
, VirtIOIOMMUDomain
, 2),
1483 VMSTATE_END_OF_LIST()
1487 static gboolean
reconstruct_endpoints(gpointer key
, gpointer value
,
1490 VirtIOIOMMU
*s
= (VirtIOIOMMU
*)data
;
1491 VirtIOIOMMUDomain
*d
= (VirtIOIOMMUDomain
*)value
;
1492 VirtIOIOMMUEndpoint
*iter
;
1493 IOMMUMemoryRegion
*mr
;
1495 QLIST_FOREACH(iter
, &d
->endpoint_list
, next
) {
1496 mr
= virtio_iommu_mr(s
, iter
->id
);
1500 iter
->iommu_mr
= mr
;
1501 g_tree_insert(s
->endpoints
, GUINT_TO_POINTER(iter
->id
), iter
);
1503 return false; /* continue the domain traversal */
1506 static int iommu_post_load(void *opaque
, int version_id
)
1508 VirtIOIOMMU
*s
= opaque
;
1510 g_tree_foreach(s
->domains
, reconstruct_endpoints
, s
);
1513 * Memory regions are dynamically turned on/off depending on
1514 * 'config.bypass' and attached domain type if there is. After
1515 * migration, we need to make sure the memory regions are
1518 virtio_iommu_switch_address_space_all(s
);
1522 static const VMStateDescription vmstate_virtio_iommu_device
= {
1523 .name
= "virtio-iommu-device",
1524 .minimum_version_id
= 2,
1526 .post_load
= iommu_post_load
,
1527 .fields
= (const VMStateField
[]) {
1528 VMSTATE_GTREE_DIRECT_KEY_V(domains
, VirtIOIOMMU
, 2,
1529 &vmstate_domain
, VirtIOIOMMUDomain
),
1530 VMSTATE_UINT8_V(config
.bypass
, VirtIOIOMMU
, 2),
1531 VMSTATE_END_OF_LIST()
1535 static const VMStateDescription vmstate_virtio_iommu
= {
1536 .name
= "virtio-iommu",
1537 .minimum_version_id
= 2,
1538 .priority
= MIG_PRI_IOMMU
,
1540 .fields
= (const VMStateField
[]) {
1541 VMSTATE_VIRTIO_DEVICE
,
1542 VMSTATE_END_OF_LIST()
1546 static Property virtio_iommu_properties
[] = {
1547 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU
, primary_bus
,
1548 TYPE_PCI_BUS
, PCIBus
*),
1549 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU
, boot_bypass
, true),
1550 DEFINE_PROP_GRANULE_MODE("granule", VirtIOIOMMU
, granule_mode
,
1552 DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU
, aw_bits
, 64),
1553 DEFINE_PROP_END_OF_LIST(),
1556 static void virtio_iommu_class_init(ObjectClass
*klass
, void *data
)
1558 DeviceClass
*dc
= DEVICE_CLASS(klass
);
1559 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_CLASS(klass
);
1561 device_class_set_props(dc
, virtio_iommu_properties
);
1562 dc
->vmsd
= &vmstate_virtio_iommu
;
1564 set_bit(DEVICE_CATEGORY_MISC
, dc
->categories
);
1565 vdc
->realize
= virtio_iommu_device_realize
;
1566 vdc
->unrealize
= virtio_iommu_device_unrealize
;
1567 vdc
->reset
= virtio_iommu_device_reset
;
1568 vdc
->get_config
= virtio_iommu_get_config
;
1569 vdc
->set_config
= virtio_iommu_set_config
;
1570 vdc
->get_features
= virtio_iommu_get_features
;
1571 vdc
->set_status
= virtio_iommu_set_status
;
1572 vdc
->vmsd
= &vmstate_virtio_iommu_device
;
1575 static void virtio_iommu_memory_region_class_init(ObjectClass
*klass
,
1578 IOMMUMemoryRegionClass
*imrc
= IOMMU_MEMORY_REGION_CLASS(klass
);
1580 imrc
->translate
= virtio_iommu_translate
;
1581 imrc
->replay
= virtio_iommu_replay
;
1582 imrc
->notify_flag_changed
= virtio_iommu_notify_flag_changed
;
1583 imrc
->iommu_set_page_size_mask
= virtio_iommu_set_page_size_mask
;
1584 imrc
->iommu_set_iova_ranges
= virtio_iommu_set_iova_ranges
;
1587 static const TypeInfo virtio_iommu_info
= {
1588 .name
= TYPE_VIRTIO_IOMMU
,
1589 .parent
= TYPE_VIRTIO_DEVICE
,
1590 .instance_size
= sizeof(VirtIOIOMMU
),
1591 .instance_init
= virtio_iommu_instance_init
,
1592 .class_init
= virtio_iommu_class_init
,
1595 static const TypeInfo virtio_iommu_memory_region_info
= {
1596 .parent
= TYPE_IOMMU_MEMORY_REGION
,
1597 .name
= TYPE_VIRTIO_IOMMU_MEMORY_REGION
,
1598 .class_init
= virtio_iommu_memory_region_class_init
,
1601 static void virtio_register_types(void)
1603 type_register_static(&virtio_iommu_info
);
1604 type_register_static(&virtio_iommu_memory_region_info
);
1607 type_init(virtio_register_types
)