2 * QEMU sPAPR IOMMU (TCE) code
4 * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qemu/error-report.h"
23 #include "qemu/module.h"
24 #include "sysemu/kvm.h"
26 #include "migration/vmstate.h"
27 #include "sysemu/dma.h"
30 #include "hw/ppc/spapr.h"
31 #include "hw/ppc/spapr_vio.h"
42 #define IOMMU_PAGE_SIZE(shift) (1ULL << (shift))
43 #define IOMMU_PAGE_MASK(shift) (~(IOMMU_PAGE_SIZE(shift) - 1))
45 static QLIST_HEAD(, SpaprTceTable
) spapr_tce_tables
;
47 SpaprTceTable
*spapr_tce_find_by_liobn(target_ulong liobn
)
51 if (liobn
& 0xFFFFFFFF00000000ULL
) {
52 hcall_dprintf("Request for out-of-bounds LIOBN 0x" TARGET_FMT_lx
"\n",
57 QLIST_FOREACH(tcet
, &spapr_tce_tables
, list
) {
58 if (tcet
->liobn
== (uint32_t)liobn
) {
66 static IOMMUAccessFlags
spapr_tce_iommu_access_flags(uint64_t tce
)
68 switch (tce
& SPAPR_TCE_RW
) {
75 default: /* SPAPR_TCE_RW */
80 static uint64_t *spapr_tce_alloc_table(uint32_t liobn
,
87 uint64_t *table
= NULL
;
90 table
= kvmppc_create_spapr_tce(liobn
, page_shift
, bus_offset
, nb_table
,
96 table
= g_new0(uint64_t, nb_table
);
99 trace_spapr_iommu_new_table(liobn
, table
, *fd
);
104 static void spapr_tce_free_table(uint64_t *table
, int fd
, uint32_t nb_table
)
106 if (!kvm_enabled() ||
107 (kvmppc_remove_spapr_tce(table
, fd
, nb_table
) != 0)) {
112 /* Called from RCU critical section */
113 static IOMMUTLBEntry
spapr_tce_translate_iommu(IOMMUMemoryRegion
*iommu
,
115 IOMMUAccessFlags flag
,
118 SpaprTceTable
*tcet
= container_of(iommu
, SpaprTceTable
, iommu
);
120 IOMMUTLBEntry ret
= {
121 .target_as
= &address_space_memory
,
123 .translated_addr
= 0,
124 .addr_mask
= ~(hwaddr
)0,
128 if ((addr
>> tcet
->page_shift
) < tcet
->nb_table
) {
129 /* Check if we are in bound */
130 hwaddr page_mask
= IOMMU_PAGE_MASK(tcet
->page_shift
);
132 tce
= tcet
->table
[addr
>> tcet
->page_shift
];
133 ret
.iova
= addr
& page_mask
;
134 ret
.translated_addr
= tce
& page_mask
;
135 ret
.addr_mask
= ~page_mask
;
136 ret
.perm
= spapr_tce_iommu_access_flags(tce
);
138 trace_spapr_iommu_xlate(tcet
->liobn
, addr
, ret
.translated_addr
, ret
.perm
,
144 static void spapr_tce_replay(IOMMUMemoryRegion
*iommu_mr
, IOMMUNotifier
*n
)
146 MemoryRegion
*mr
= MEMORY_REGION(iommu_mr
);
147 IOMMUMemoryRegionClass
*imrc
= IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr
);
148 hwaddr addr
, granularity
;
150 SpaprTceTable
*tcet
= container_of(iommu_mr
, SpaprTceTable
, iommu
);
152 if (tcet
->skipping_replay
) {
156 granularity
= memory_region_iommu_get_min_page_size(iommu_mr
);
158 for (addr
= 0; addr
< memory_region_size(mr
); addr
+= granularity
) {
159 iotlb
= imrc
->translate(iommu_mr
, addr
, IOMMU_NONE
, n
->iommu_idx
);
160 if (iotlb
.perm
!= IOMMU_NONE
) {
161 n
->notify(n
, &iotlb
);
165 * if (2^64 - MR size) < granularity, it's possible to get an
166 * infinite loop here. This should catch such a wraparound.
168 if ((addr
+ granularity
) < addr
) {
174 static int spapr_tce_table_pre_save(void *opaque
)
176 SpaprTceTable
*tcet
= SPAPR_TCE_TABLE(opaque
);
178 tcet
->mig_table
= tcet
->table
;
179 tcet
->mig_nb_table
= tcet
->nb_table
;
181 trace_spapr_iommu_pre_save(tcet
->liobn
, tcet
->mig_nb_table
,
182 tcet
->bus_offset
, tcet
->page_shift
);
187 static uint64_t spapr_tce_get_min_page_size(IOMMUMemoryRegion
*iommu
)
189 SpaprTceTable
*tcet
= container_of(iommu
, SpaprTceTable
, iommu
);
191 return 1ULL << tcet
->page_shift
;
194 static int spapr_tce_get_attr(IOMMUMemoryRegion
*iommu
,
195 enum IOMMUMemoryRegionAttr attr
, void *data
)
197 SpaprTceTable
*tcet
= container_of(iommu
, SpaprTceTable
, iommu
);
199 if (attr
== IOMMU_ATTR_SPAPR_TCE_FD
&& kvmppc_has_cap_spapr_vfio()) {
200 *(int *) data
= tcet
->fd
;
207 static int spapr_tce_notify_flag_changed(IOMMUMemoryRegion
*iommu
,
208 IOMMUNotifierFlag old
,
209 IOMMUNotifierFlag
new,
212 struct SpaprTceTable
*tbl
= container_of(iommu
, SpaprTceTable
, iommu
);
214 if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP
) {
215 error_setg(errp
, "spart_tce does not support dev-iotlb yet");
219 if (old
== IOMMU_NOTIFIER_NONE
&& new != IOMMU_NOTIFIER_NONE
) {
220 spapr_tce_set_need_vfio(tbl
, true);
221 } else if (old
!= IOMMU_NOTIFIER_NONE
&& new == IOMMU_NOTIFIER_NONE
) {
222 spapr_tce_set_need_vfio(tbl
, false);
227 static int spapr_tce_table_post_load(void *opaque
, int version_id
)
229 SpaprTceTable
*tcet
= SPAPR_TCE_TABLE(opaque
);
230 uint32_t old_nb_table
= tcet
->nb_table
;
231 uint64_t old_bus_offset
= tcet
->bus_offset
;
232 uint32_t old_page_shift
= tcet
->page_shift
;
235 spapr_vio_set_bypass(tcet
->vdev
, tcet
->bypass
);
238 if (tcet
->mig_nb_table
!= tcet
->nb_table
) {
239 spapr_tce_table_disable(tcet
);
242 if (tcet
->mig_nb_table
) {
243 if (!tcet
->nb_table
) {
244 spapr_tce_table_enable(tcet
, old_page_shift
, old_bus_offset
,
248 memcpy(tcet
->table
, tcet
->mig_table
,
249 tcet
->nb_table
* sizeof(tcet
->table
[0]));
251 g_free(tcet
->mig_table
);
252 tcet
->mig_table
= NULL
;
255 trace_spapr_iommu_post_load(tcet
->liobn
, old_nb_table
, tcet
->nb_table
,
256 tcet
->bus_offset
, tcet
->page_shift
);
261 static bool spapr_tce_table_ex_needed(void *opaque
)
263 SpaprTceTable
*tcet
= opaque
;
265 return tcet
->bus_offset
|| tcet
->page_shift
!= 0xC;
268 static const VMStateDescription vmstate_spapr_tce_table_ex
= {
269 .name
= "spapr_iommu_ex",
271 .minimum_version_id
= 1,
272 .needed
= spapr_tce_table_ex_needed
,
273 .fields
= (VMStateField
[]) {
274 VMSTATE_UINT64(bus_offset
, SpaprTceTable
),
275 VMSTATE_UINT32(page_shift
, SpaprTceTable
),
276 VMSTATE_END_OF_LIST()
280 static const VMStateDescription vmstate_spapr_tce_table
= {
281 .name
= "spapr_iommu",
283 .minimum_version_id
= 2,
284 .pre_save
= spapr_tce_table_pre_save
,
285 .post_load
= spapr_tce_table_post_load
,
286 .fields
= (VMStateField
[]) {
288 VMSTATE_UINT32_EQUAL(liobn
, SpaprTceTable
, NULL
),
291 VMSTATE_UINT32(mig_nb_table
, SpaprTceTable
),
292 VMSTATE_BOOL(bypass
, SpaprTceTable
),
293 VMSTATE_VARRAY_UINT32_ALLOC(mig_table
, SpaprTceTable
, mig_nb_table
, 0,
294 vmstate_info_uint64
, uint64_t),
295 VMSTATE_BOOL_V(def_win
, SpaprTceTable
, 3),
297 VMSTATE_END_OF_LIST()
299 .subsections
= (const VMStateDescription
*[]) {
300 &vmstate_spapr_tce_table_ex
,
305 static void spapr_tce_table_realize(DeviceState
*dev
, Error
**errp
)
307 SpaprTceTable
*tcet
= SPAPR_TCE_TABLE(dev
);
308 Object
*tcetobj
= OBJECT(tcet
);
312 tcet
->need_vfio
= false;
313 tmp
= g_strdup_printf("tce-root-%x", tcet
->liobn
);
314 memory_region_init(&tcet
->root
, tcetobj
, tmp
, UINT64_MAX
);
317 tmp
= g_strdup_printf("tce-iommu-%x", tcet
->liobn
);
318 memory_region_init_iommu(&tcet
->iommu
, sizeof(tcet
->iommu
),
319 TYPE_SPAPR_IOMMU_MEMORY_REGION
,
323 QLIST_INSERT_HEAD(&spapr_tce_tables
, tcet
, list
);
325 vmstate_register(VMSTATE_IF(tcet
), tcet
->liobn
, &vmstate_spapr_tce_table
,
329 void spapr_tce_set_need_vfio(SpaprTceTable
*tcet
, bool need_vfio
)
331 size_t table_size
= tcet
->nb_table
* sizeof(uint64_t);
335 g_assert(need_vfio
!= tcet
->need_vfio
);
337 tcet
->need_vfio
= need_vfio
;
339 if (!need_vfio
|| (tcet
->fd
!= -1 && kvmppc_has_cap_spapr_vfio())) {
343 oldtable
= tcet
->table
;
345 tcet
->table
= spapr_tce_alloc_table(tcet
->liobn
,
351 memcpy(tcet
->table
, oldtable
, table_size
);
353 spapr_tce_free_table(oldtable
, tcet
->fd
, tcet
->nb_table
);
358 SpaprTceTable
*spapr_tce_new_table(DeviceState
*owner
, uint32_t liobn
)
363 if (spapr_tce_find_by_liobn(liobn
)) {
364 error_report("Attempted to create TCE table with duplicate"
365 " LIOBN 0x%x", liobn
);
369 tcet
= SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE
));
372 tmp
= g_strdup_printf("tce-table-%x", liobn
);
373 object_property_add_child(OBJECT(owner
), tmp
, OBJECT(tcet
));
375 object_unref(OBJECT(tcet
));
377 qdev_realize(DEVICE(tcet
), NULL
, NULL
);
382 void spapr_tce_table_enable(SpaprTceTable
*tcet
,
383 uint32_t page_shift
, uint64_t bus_offset
,
386 if (tcet
->nb_table
) {
387 warn_report("trying to enable already enabled TCE table");
391 tcet
->bus_offset
= bus_offset
;
392 tcet
->page_shift
= page_shift
;
393 tcet
->nb_table
= nb_table
;
394 tcet
->table
= spapr_tce_alloc_table(tcet
->liobn
,
401 memory_region_set_size(MEMORY_REGION(&tcet
->iommu
),
402 (uint64_t)tcet
->nb_table
<< tcet
->page_shift
);
403 memory_region_add_subregion(&tcet
->root
, tcet
->bus_offset
,
404 MEMORY_REGION(&tcet
->iommu
));
407 void spapr_tce_table_disable(SpaprTceTable
*tcet
)
409 if (!tcet
->nb_table
) {
413 memory_region_del_subregion(&tcet
->root
, MEMORY_REGION(&tcet
->iommu
));
414 memory_region_set_size(MEMORY_REGION(&tcet
->iommu
), 0);
416 spapr_tce_free_table(tcet
->table
, tcet
->fd
, tcet
->nb_table
);
419 tcet
->bus_offset
= 0;
420 tcet
->page_shift
= 0;
424 static void spapr_tce_table_unrealize(DeviceState
*dev
)
426 SpaprTceTable
*tcet
= SPAPR_TCE_TABLE(dev
);
428 vmstate_unregister(VMSTATE_IF(tcet
), &vmstate_spapr_tce_table
, tcet
);
430 QLIST_REMOVE(tcet
, list
);
432 spapr_tce_table_disable(tcet
);
435 MemoryRegion
*spapr_tce_get_iommu(SpaprTceTable
*tcet
)
440 static void spapr_tce_reset(DeviceState
*dev
)
442 SpaprTceTable
*tcet
= SPAPR_TCE_TABLE(dev
);
443 size_t table_size
= tcet
->nb_table
* sizeof(uint64_t);
445 if (tcet
->nb_table
) {
446 memset(tcet
->table
, 0, table_size
);
450 static target_ulong
put_tce_emu(SpaprTceTable
*tcet
, target_ulong ioba
,
454 hwaddr page_mask
= IOMMU_PAGE_MASK(tcet
->page_shift
);
455 unsigned long index
= (ioba
- tcet
->bus_offset
) >> tcet
->page_shift
;
457 if (index
>= tcet
->nb_table
) {
458 hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
459 TARGET_FMT_lx
"\n", ioba
);
463 tcet
->table
[index
] = tce
;
465 event
.entry
.target_as
= &address_space_memory
,
466 event
.entry
.iova
= (ioba
- tcet
->bus_offset
) & page_mask
;
467 event
.entry
.translated_addr
= tce
& page_mask
;
468 event
.entry
.addr_mask
= ~page_mask
;
469 event
.entry
.perm
= spapr_tce_iommu_access_flags(tce
);
470 event
.type
= event
.entry
.perm
? IOMMU_NOTIFIER_MAP
: IOMMU_NOTIFIER_UNMAP
;
471 memory_region_notify_iommu(&tcet
->iommu
, 0, event
);
476 static target_ulong
h_put_tce_indirect(PowerPCCPU
*cpu
,
477 SpaprMachineState
*spapr
,
478 target_ulong opcode
, target_ulong
*args
)
481 target_ulong liobn
= args
[0];
482 target_ulong ioba
= args
[1];
483 target_ulong ioba1
= ioba
;
484 target_ulong tce_list
= args
[2];
485 target_ulong npages
= args
[3];
486 target_ulong ret
= H_PARAMETER
, tce
= 0;
487 SpaprTceTable
*tcet
= spapr_tce_find_by_liobn(liobn
);
488 CPUState
*cs
= CPU(cpu
);
489 hwaddr page_mask
, page_size
;
495 if ((npages
> 512) || (tce_list
& SPAPR_TCE_PAGE_MASK
)) {
499 page_mask
= IOMMU_PAGE_MASK(tcet
->page_shift
);
500 page_size
= IOMMU_PAGE_SIZE(tcet
->page_shift
);
503 for (i
= 0; i
< npages
; ++i
, ioba
+= page_size
) {
504 tce
= ldq_be_phys(cs
->as
, tce_list
+ i
* sizeof(target_ulong
));
506 ret
= put_tce_emu(tcet
, ioba
, tce
);
512 /* Trace last successful or the first problematic entry */
514 if (SPAPR_IS_PCI_LIOBN(liobn
)) {
515 trace_spapr_iommu_pci_indirect(liobn
, ioba1
, tce_list
, i
, tce
, ret
);
517 trace_spapr_iommu_indirect(liobn
, ioba1
, tce_list
, i
, tce
, ret
);
522 static target_ulong
h_stuff_tce(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
523 target_ulong opcode
, target_ulong
*args
)
526 target_ulong liobn
= args
[0];
527 target_ulong ioba
= args
[1];
528 target_ulong tce_value
= args
[2];
529 target_ulong npages
= args
[3];
530 target_ulong ret
= H_PARAMETER
;
531 SpaprTceTable
*tcet
= spapr_tce_find_by_liobn(liobn
);
532 hwaddr page_mask
, page_size
;
538 if (npages
> tcet
->nb_table
) {
542 page_mask
= IOMMU_PAGE_MASK(tcet
->page_shift
);
543 page_size
= IOMMU_PAGE_SIZE(tcet
->page_shift
);
546 for (i
= 0; i
< npages
; ++i
, ioba
+= page_size
) {
547 ret
= put_tce_emu(tcet
, ioba
, tce_value
);
552 if (SPAPR_IS_PCI_LIOBN(liobn
)) {
553 trace_spapr_iommu_pci_stuff(liobn
, ioba
, tce_value
, npages
, ret
);
555 trace_spapr_iommu_stuff(liobn
, ioba
, tce_value
, npages
, ret
);
561 static target_ulong
h_put_tce(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
562 target_ulong opcode
, target_ulong
*args
)
564 target_ulong liobn
= args
[0];
565 target_ulong ioba
= args
[1];
566 target_ulong tce
= args
[2];
567 target_ulong ret
= H_PARAMETER
;
568 SpaprTceTable
*tcet
= spapr_tce_find_by_liobn(liobn
);
571 hwaddr page_mask
= IOMMU_PAGE_MASK(tcet
->page_shift
);
575 ret
= put_tce_emu(tcet
, ioba
, tce
);
577 if (SPAPR_IS_PCI_LIOBN(liobn
)) {
578 trace_spapr_iommu_pci_put(liobn
, ioba
, tce
, ret
);
580 trace_spapr_iommu_put(liobn
, ioba
, tce
, ret
);
586 static target_ulong
get_tce_emu(SpaprTceTable
*tcet
, target_ulong ioba
,
589 unsigned long index
= (ioba
- tcet
->bus_offset
) >> tcet
->page_shift
;
591 if (index
>= tcet
->nb_table
) {
592 hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x"
593 TARGET_FMT_lx
"\n", ioba
);
597 *tce
= tcet
->table
[index
];
602 static target_ulong
h_get_tce(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
603 target_ulong opcode
, target_ulong
*args
)
605 target_ulong liobn
= args
[0];
606 target_ulong ioba
= args
[1];
607 target_ulong tce
= 0;
608 target_ulong ret
= H_PARAMETER
;
609 SpaprTceTable
*tcet
= spapr_tce_find_by_liobn(liobn
);
612 hwaddr page_mask
= IOMMU_PAGE_MASK(tcet
->page_shift
);
616 ret
= get_tce_emu(tcet
, ioba
, &tce
);
621 if (SPAPR_IS_PCI_LIOBN(liobn
)) {
622 trace_spapr_iommu_pci_get(liobn
, ioba
, ret
, tce
);
624 trace_spapr_iommu_get(liobn
, ioba
, ret
, tce
);
630 int spapr_dma_dt(void *fdt
, int node_off
, const char *propname
,
631 uint32_t liobn
, uint64_t window
, uint32_t size
)
633 uint32_t dma_prop
[5];
636 dma_prop
[0] = cpu_to_be32(liobn
);
637 dma_prop
[1] = cpu_to_be32(window
>> 32);
638 dma_prop
[2] = cpu_to_be32(window
& 0xFFFFFFFF);
639 dma_prop
[3] = 0; /* window size is 32 bits */
640 dma_prop
[4] = cpu_to_be32(size
);
642 ret
= fdt_setprop_cell(fdt
, node_off
, "ibm,#dma-address-cells", 2);
647 ret
= fdt_setprop_cell(fdt
, node_off
, "ibm,#dma-size-cells", 2);
652 ret
= fdt_setprop(fdt
, node_off
, propname
, dma_prop
, sizeof(dma_prop
));
660 int spapr_tcet_dma_dt(void *fdt
, int node_off
, const char *propname
,
667 return spapr_dma_dt(fdt
, node_off
, propname
,
668 tcet
->liobn
, 0, tcet
->nb_table
<< tcet
->page_shift
);
671 static void spapr_tce_table_class_init(ObjectClass
*klass
, void *data
)
673 DeviceClass
*dc
= DEVICE_CLASS(klass
);
674 dc
->realize
= spapr_tce_table_realize
;
675 dc
->reset
= spapr_tce_reset
;
676 dc
->unrealize
= spapr_tce_table_unrealize
;
677 /* Reason: This is just an internal device for handling the hypercalls */
678 dc
->user_creatable
= false;
680 QLIST_INIT(&spapr_tce_tables
);
683 spapr_register_hypercall(H_PUT_TCE
, h_put_tce
);
684 spapr_register_hypercall(H_GET_TCE
, h_get_tce
);
685 spapr_register_hypercall(H_PUT_TCE_INDIRECT
, h_put_tce_indirect
);
686 spapr_register_hypercall(H_STUFF_TCE
, h_stuff_tce
);
689 static const TypeInfo spapr_tce_table_info
= {
690 .name
= TYPE_SPAPR_TCE_TABLE
,
691 .parent
= TYPE_DEVICE
,
692 .instance_size
= sizeof(SpaprTceTable
),
693 .class_init
= spapr_tce_table_class_init
,
696 static void spapr_iommu_memory_region_class_init(ObjectClass
*klass
, void *data
)
698 IOMMUMemoryRegionClass
*imrc
= IOMMU_MEMORY_REGION_CLASS(klass
);
700 imrc
->translate
= spapr_tce_translate_iommu
;
701 imrc
->replay
= spapr_tce_replay
;
702 imrc
->get_min_page_size
= spapr_tce_get_min_page_size
;
703 imrc
->notify_flag_changed
= spapr_tce_notify_flag_changed
;
704 imrc
->get_attr
= spapr_tce_get_attr
;
707 static const TypeInfo spapr_iommu_memory_region_info
= {
708 .parent
= TYPE_IOMMU_MEMORY_REGION
,
709 .name
= TYPE_SPAPR_IOMMU_MEMORY_REGION
,
710 .class_init
= spapr_iommu_memory_region_class_init
,
713 static void register_types(void)
715 type_register_static(&spapr_tce_table_info
);
716 type_register_static(&spapr_iommu_memory_region_info
);
719 type_init(register_types
);