2 * IOMMU API for ARM architected SMMU implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * Copyright (C) 2013 ARM Limited
19 * Author: Will Deacon <will.deacon@arm.com>
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
27 * - Extended Stream ID (16 bit)
30 #define pr_fmt(fmt) "arm-smmu: " fmt
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/init.h>
45 #include <linux/moduleparam.h>
47 #include <linux/of_address.h>
48 #include <linux/of_device.h>
49 #include <linux/of_iommu.h>
50 #include <linux/pci.h>
51 #include <linux/platform_device.h>
52 #include <linux/pm_runtime.h>
53 #include <linux/slab.h>
54 #include <linux/spinlock.h>
56 #include <linux/amba/bus.h>
57 #include <linux/fsl/mc.h>
59 #include "io-pgtable.h"
60 #include "arm-smmu-regs.h"
62 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
64 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
65 #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
66 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
68 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
69 #define TLB_SPIN_COUNT 10
71 /* Maximum number of context banks per SMMU */
72 #define ARM_SMMU_MAX_CBS 128
74 /* SMMU global address space */
75 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
76 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
79 * SMMU global address space with conditional offset to access secure
80 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
83 #define ARM_SMMU_GR0_NS(smmu) \
85 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
89 * Some 64-bit registers only make sense to write atomically, but in such
90 * cases all the data relevant to AArch32 formats lies within the lower word,
91 * therefore this actually makes more sense than it might first appear.
94 #define smmu_write_atomic_lq writeq_relaxed
96 #define smmu_write_atomic_lq writel_relaxed
99 /* Translation context bank */
100 #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
102 #define MSI_IOVA_BASE 0x8000000
103 #define MSI_IOVA_LENGTH 0x100000
105 static int force_stage
;
107 * not really modular, but the easiest way to keep compat with existing
108 * bootargs behaviour is to continue using module_param() here.
110 module_param(force_stage
, int, S_IRUGO
);
111 MODULE_PARM_DESC(force_stage
,
112 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
113 static bool disable_bypass
;
114 module_param(disable_bypass
, bool, S_IRUGO
);
115 MODULE_PARM_DESC(disable_bypass
,
116 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
118 enum arm_smmu_arch_version
{
124 enum arm_smmu_implementation
{
131 struct arm_smmu_s2cr
{
132 struct iommu_group
*group
;
134 enum arm_smmu_s2cr_type type
;
135 enum arm_smmu_s2cr_privcfg privcfg
;
139 #define s2cr_init_val (struct arm_smmu_s2cr){ \
140 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
143 struct arm_smmu_smr
{
153 struct arm_smmu_cfg
*cfg
;
156 struct arm_smmu_master_cfg
{
157 struct arm_smmu_device
*smmu
;
160 #define INVALID_SMENDX -1
161 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
162 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
163 #define fwspec_smendx(fw, i) \
164 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
165 #define for_each_cfg_sme(fw, i, idx) \
166 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
168 struct arm_smmu_device
{
172 void __iomem
*cb_base
;
173 unsigned long pgshift
;
175 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
176 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
177 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
178 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
179 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
180 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
181 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
182 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
183 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
184 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
185 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
186 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
187 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
190 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
192 enum arm_smmu_arch_version version
;
193 enum arm_smmu_implementation model
;
195 u32 num_context_banks
;
196 u32 num_s2_context_banks
;
197 DECLARE_BITMAP(context_map
, ARM_SMMU_MAX_CBS
);
198 struct arm_smmu_cb
*cbs
;
201 u32 num_mapping_groups
;
204 struct arm_smmu_smr
*smrs
;
205 struct arm_smmu_s2cr
*s2crs
;
206 struct mutex stream_map_mutex
;
208 unsigned long va_size
;
209 unsigned long ipa_size
;
210 unsigned long pa_size
;
211 unsigned long pgsize_bitmap
;
214 u32 num_context_irqs
;
216 struct clk_bulk_data
*clks
;
219 u32 cavium_id_base
; /* Specific to Cavium */
221 spinlock_t global_sync_lock
;
223 /* IOMMU core code handle */
224 struct iommu_device iommu
;
227 enum arm_smmu_context_fmt
{
228 ARM_SMMU_CTX_FMT_NONE
,
229 ARM_SMMU_CTX_FMT_AARCH64
,
230 ARM_SMMU_CTX_FMT_AARCH32_L
,
231 ARM_SMMU_CTX_FMT_AARCH32_S
,
234 struct arm_smmu_cfg
{
242 enum arm_smmu_context_fmt fmt
;
244 #define INVALID_IRPTNDX 0xff
246 enum arm_smmu_domain_stage
{
247 ARM_SMMU_DOMAIN_S1
= 0,
249 ARM_SMMU_DOMAIN_NESTED
,
250 ARM_SMMU_DOMAIN_BYPASS
,
253 struct arm_smmu_domain
{
254 struct arm_smmu_device
*smmu
;
255 struct io_pgtable_ops
*pgtbl_ops
;
256 const struct iommu_gather_ops
*tlb_ops
;
257 struct arm_smmu_cfg cfg
;
258 enum arm_smmu_domain_stage stage
;
260 struct mutex init_mutex
; /* Protects smmu pointer */
261 spinlock_t cb_lock
; /* Serialises ATS1* ops and TLB syncs */
262 struct iommu_domain domain
;
265 struct arm_smmu_option_prop
{
270 static atomic_t cavium_smmu_context_count
= ATOMIC_INIT(0);
272 static bool using_legacy_binding
, using_generic_binding
;
274 static struct arm_smmu_option_prop arm_smmu_options
[] = {
275 { ARM_SMMU_OPT_SECURE_CFG_ACCESS
, "calxeda,smmu-secure-config-access" },
279 static inline int arm_smmu_rpm_get(struct arm_smmu_device
*smmu
)
281 if (pm_runtime_enabled(smmu
->dev
))
282 return pm_runtime_get_sync(smmu
->dev
);
287 static inline void arm_smmu_rpm_put(struct arm_smmu_device
*smmu
)
289 if (pm_runtime_enabled(smmu
->dev
))
290 pm_runtime_put(smmu
->dev
);
293 static struct arm_smmu_domain
*to_smmu_domain(struct iommu_domain
*dom
)
295 return container_of(dom
, struct arm_smmu_domain
, domain
);
298 static void parse_driver_options(struct arm_smmu_device
*smmu
)
303 if (of_property_read_bool(smmu
->dev
->of_node
,
304 arm_smmu_options
[i
].prop
)) {
305 smmu
->options
|= arm_smmu_options
[i
].opt
;
306 dev_notice(smmu
->dev
, "option %s\n",
307 arm_smmu_options
[i
].prop
);
309 } while (arm_smmu_options
[++i
].opt
);
312 static struct device_node
*dev_get_dev_node(struct device
*dev
)
314 if (dev_is_pci(dev
)) {
315 struct pci_bus
*bus
= to_pci_dev(dev
)->bus
;
317 while (!pci_is_root_bus(bus
))
319 return of_node_get(bus
->bridge
->parent
->of_node
);
322 return of_node_get(dev
->of_node
);
325 static int __arm_smmu_get_pci_sid(struct pci_dev
*pdev
, u16 alias
, void *data
)
327 *((__be32
*)data
) = cpu_to_be32(alias
);
328 return 0; /* Continue walking */
331 static int __find_legacy_master_phandle(struct device
*dev
, void *data
)
333 struct of_phandle_iterator
*it
= *(void **)data
;
334 struct device_node
*np
= it
->node
;
337 of_for_each_phandle(it
, err
, dev
->of_node
, "mmu-masters",
338 "#stream-id-cells", 0)
339 if (it
->node
== np
) {
340 *(void **)data
= dev
;
344 return err
== -ENOENT
? 0 : err
;
347 static struct platform_driver arm_smmu_driver
;
348 static struct iommu_ops arm_smmu_ops
;
350 static int arm_smmu_register_legacy_master(struct device
*dev
,
351 struct arm_smmu_device
**smmu
)
353 struct device
*smmu_dev
;
354 struct device_node
*np
;
355 struct of_phandle_iterator it
;
361 np
= dev_get_dev_node(dev
);
362 if (!np
|| !of_find_property(np
, "#stream-id-cells", NULL
)) {
368 err
= driver_for_each_device(&arm_smmu_driver
.driver
, NULL
, &data
,
369 __find_legacy_master_phandle
);
377 if (dev_is_pci(dev
)) {
378 /* "mmu-masters" assumes Stream ID == Requester ID */
379 pci_for_each_dma_alias(to_pci_dev(dev
), __arm_smmu_get_pci_sid
,
385 err
= iommu_fwspec_init(dev
, &smmu_dev
->of_node
->fwnode
,
390 sids
= kcalloc(it
.cur_count
, sizeof(*sids
), GFP_KERNEL
);
394 *smmu
= dev_get_drvdata(smmu_dev
);
395 of_phandle_iterator_args(&it
, sids
, it
.cur_count
);
396 err
= iommu_fwspec_add_ids(dev
, sids
, it
.cur_count
);
401 static int __arm_smmu_alloc_bitmap(unsigned long *map
, int start
, int end
)
406 idx
= find_next_zero_bit(map
, end
, start
);
409 } while (test_and_set_bit(idx
, map
));
414 static void __arm_smmu_free_bitmap(unsigned long *map
, int idx
)
419 /* Wait for any pending TLB invalidations to complete */
420 static void __arm_smmu_tlb_sync(struct arm_smmu_device
*smmu
,
421 void __iomem
*sync
, void __iomem
*status
)
423 unsigned int spin_cnt
, delay
;
425 writel_relaxed(0, sync
);
426 for (delay
= 1; delay
< TLB_LOOP_TIMEOUT
; delay
*= 2) {
427 for (spin_cnt
= TLB_SPIN_COUNT
; spin_cnt
> 0; spin_cnt
--) {
428 if (!(readl_relaxed(status
) & sTLBGSTATUS_GSACTIVE
))
434 dev_err_ratelimited(smmu
->dev
,
435 "TLB sync timed out -- SMMU may be deadlocked\n");
438 static void arm_smmu_tlb_sync_global(struct arm_smmu_device
*smmu
)
440 void __iomem
*base
= ARM_SMMU_GR0(smmu
);
443 spin_lock_irqsave(&smmu
->global_sync_lock
, flags
);
444 __arm_smmu_tlb_sync(smmu
, base
+ ARM_SMMU_GR0_sTLBGSYNC
,
445 base
+ ARM_SMMU_GR0_sTLBGSTATUS
);
446 spin_unlock_irqrestore(&smmu
->global_sync_lock
, flags
);
449 static void arm_smmu_tlb_sync_context(void *cookie
)
451 struct arm_smmu_domain
*smmu_domain
= cookie
;
452 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
453 void __iomem
*base
= ARM_SMMU_CB(smmu
, smmu_domain
->cfg
.cbndx
);
456 spin_lock_irqsave(&smmu_domain
->cb_lock
, flags
);
457 __arm_smmu_tlb_sync(smmu
, base
+ ARM_SMMU_CB_TLBSYNC
,
458 base
+ ARM_SMMU_CB_TLBSTATUS
);
459 spin_unlock_irqrestore(&smmu_domain
->cb_lock
, flags
);
462 static void arm_smmu_tlb_sync_vmid(void *cookie
)
464 struct arm_smmu_domain
*smmu_domain
= cookie
;
466 arm_smmu_tlb_sync_global(smmu_domain
->smmu
);
469 static void arm_smmu_tlb_inv_context_s1(void *cookie
)
471 struct arm_smmu_domain
*smmu_domain
= cookie
;
472 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
473 void __iomem
*base
= ARM_SMMU_CB(smmu_domain
->smmu
, cfg
->cbndx
);
476 * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
477 * cleared by the current CPU are visible to the SMMU before the TLBI.
479 writel(cfg
->asid
, base
+ ARM_SMMU_CB_S1_TLBIASID
);
480 arm_smmu_tlb_sync_context(cookie
);
483 static void arm_smmu_tlb_inv_context_s2(void *cookie
)
485 struct arm_smmu_domain
*smmu_domain
= cookie
;
486 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
487 void __iomem
*base
= ARM_SMMU_GR0(smmu
);
489 /* NOTE: see above */
490 writel(smmu_domain
->cfg
.vmid
, base
+ ARM_SMMU_GR0_TLBIVMID
);
491 arm_smmu_tlb_sync_global(smmu
);
494 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova
, size_t size
,
495 size_t granule
, bool leaf
, void *cookie
)
497 struct arm_smmu_domain
*smmu_domain
= cookie
;
498 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
499 bool stage1
= cfg
->cbar
!= CBAR_TYPE_S2_TRANS
;
500 void __iomem
*reg
= ARM_SMMU_CB(smmu_domain
->smmu
, cfg
->cbndx
);
502 if (smmu_domain
->smmu
->features
& ARM_SMMU_FEAT_COHERENT_WALK
)
506 reg
+= leaf
? ARM_SMMU_CB_S1_TLBIVAL
: ARM_SMMU_CB_S1_TLBIVA
;
508 if (cfg
->fmt
!= ARM_SMMU_CTX_FMT_AARCH64
) {
512 writel_relaxed(iova
, reg
);
514 } while (size
-= granule
);
517 iova
|= (u64
)cfg
->asid
<< 48;
519 writeq_relaxed(iova
, reg
);
520 iova
+= granule
>> 12;
521 } while (size
-= granule
);
524 reg
+= leaf
? ARM_SMMU_CB_S2_TLBIIPAS2L
:
525 ARM_SMMU_CB_S2_TLBIIPAS2
;
528 smmu_write_atomic_lq(iova
, reg
);
529 iova
+= granule
>> 12;
530 } while (size
-= granule
);
535 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
536 * almost negligible, but the benefit of getting the first one in as far ahead
537 * of the sync as possible is significant, hence we don't just make this a
538 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
540 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova
, size_t size
,
541 size_t granule
, bool leaf
, void *cookie
)
543 struct arm_smmu_domain
*smmu_domain
= cookie
;
544 void __iomem
*base
= ARM_SMMU_GR0(smmu_domain
->smmu
);
546 if (smmu_domain
->smmu
->features
& ARM_SMMU_FEAT_COHERENT_WALK
)
549 writel_relaxed(smmu_domain
->cfg
.vmid
, base
+ ARM_SMMU_GR0_TLBIVMID
);
552 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops
= {
553 .tlb_flush_all
= arm_smmu_tlb_inv_context_s1
,
554 .tlb_add_flush
= arm_smmu_tlb_inv_range_nosync
,
555 .tlb_sync
= arm_smmu_tlb_sync_context
,
558 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2
= {
559 .tlb_flush_all
= arm_smmu_tlb_inv_context_s2
,
560 .tlb_add_flush
= arm_smmu_tlb_inv_range_nosync
,
561 .tlb_sync
= arm_smmu_tlb_sync_context
,
564 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1
= {
565 .tlb_flush_all
= arm_smmu_tlb_inv_context_s2
,
566 .tlb_add_flush
= arm_smmu_tlb_inv_vmid_nosync
,
567 .tlb_sync
= arm_smmu_tlb_sync_vmid
,
570 static irqreturn_t
arm_smmu_context_fault(int irq
, void *dev
)
574 struct iommu_domain
*domain
= dev
;
575 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
576 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
577 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
578 void __iomem
*cb_base
;
580 cb_base
= ARM_SMMU_CB(smmu
, cfg
->cbndx
);
581 fsr
= readl_relaxed(cb_base
+ ARM_SMMU_CB_FSR
);
583 if (!(fsr
& FSR_FAULT
))
586 fsynr
= readl_relaxed(cb_base
+ ARM_SMMU_CB_FSYNR0
);
587 iova
= readq_relaxed(cb_base
+ ARM_SMMU_CB_FAR
);
589 dev_err_ratelimited(smmu
->dev
,
590 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
591 fsr
, iova
, fsynr
, cfg
->cbndx
);
593 writel(fsr
, cb_base
+ ARM_SMMU_CB_FSR
);
597 static irqreturn_t
arm_smmu_global_fault(int irq
, void *dev
)
599 u32 gfsr
, gfsynr0
, gfsynr1
, gfsynr2
;
600 struct arm_smmu_device
*smmu
= dev
;
601 void __iomem
*gr0_base
= ARM_SMMU_GR0_NS(smmu
);
603 gfsr
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSR
);
604 gfsynr0
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSYNR0
);
605 gfsynr1
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSYNR1
);
606 gfsynr2
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSYNR2
);
611 dev_err_ratelimited(smmu
->dev
,
612 "Unexpected global fault, this could be serious\n");
613 dev_err_ratelimited(smmu
->dev
,
614 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
615 gfsr
, gfsynr0
, gfsynr1
, gfsynr2
);
617 writel(gfsr
, gr0_base
+ ARM_SMMU_GR0_sGFSR
);
621 static void arm_smmu_init_context_bank(struct arm_smmu_domain
*smmu_domain
,
622 struct io_pgtable_cfg
*pgtbl_cfg
)
624 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
625 struct arm_smmu_cb
*cb
= &smmu_domain
->smmu
->cbs
[cfg
->cbndx
];
626 bool stage1
= cfg
->cbar
!= CBAR_TYPE_S2_TRANS
;
632 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_S
) {
633 cb
->tcr
[0] = pgtbl_cfg
->arm_v7s_cfg
.tcr
;
635 cb
->tcr
[0] = pgtbl_cfg
->arm_lpae_s1_cfg
.tcr
;
636 cb
->tcr
[1] = pgtbl_cfg
->arm_lpae_s1_cfg
.tcr
>> 32;
637 cb
->tcr
[1] |= TTBCR2_SEP_UPSTREAM
;
638 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
)
639 cb
->tcr
[1] |= TTBCR2_AS
;
642 cb
->tcr
[0] = pgtbl_cfg
->arm_lpae_s2_cfg
.vtcr
;
647 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_S
) {
648 cb
->ttbr
[0] = pgtbl_cfg
->arm_v7s_cfg
.ttbr
[0];
649 cb
->ttbr
[1] = pgtbl_cfg
->arm_v7s_cfg
.ttbr
[1];
651 cb
->ttbr
[0] = pgtbl_cfg
->arm_lpae_s1_cfg
.ttbr
[0];
652 cb
->ttbr
[0] |= (u64
)cfg
->asid
<< TTBRn_ASID_SHIFT
;
653 cb
->ttbr
[1] = pgtbl_cfg
->arm_lpae_s1_cfg
.ttbr
[1];
654 cb
->ttbr
[1] |= (u64
)cfg
->asid
<< TTBRn_ASID_SHIFT
;
657 cb
->ttbr
[0] = pgtbl_cfg
->arm_lpae_s2_cfg
.vttbr
;
660 /* MAIRs (stage-1 only) */
662 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_S
) {
663 cb
->mair
[0] = pgtbl_cfg
->arm_v7s_cfg
.prrr
;
664 cb
->mair
[1] = pgtbl_cfg
->arm_v7s_cfg
.nmrr
;
666 cb
->mair
[0] = pgtbl_cfg
->arm_lpae_s1_cfg
.mair
[0];
667 cb
->mair
[1] = pgtbl_cfg
->arm_lpae_s1_cfg
.mair
[1];
672 static void arm_smmu_write_context_bank(struct arm_smmu_device
*smmu
, int idx
)
676 struct arm_smmu_cb
*cb
= &smmu
->cbs
[idx
];
677 struct arm_smmu_cfg
*cfg
= cb
->cfg
;
678 void __iomem
*cb_base
, *gr1_base
;
680 cb_base
= ARM_SMMU_CB(smmu
, idx
);
682 /* Unassigned context banks only need disabling */
684 writel_relaxed(0, cb_base
+ ARM_SMMU_CB_SCTLR
);
688 gr1_base
= ARM_SMMU_GR1(smmu
);
689 stage1
= cfg
->cbar
!= CBAR_TYPE_S2_TRANS
;
692 if (smmu
->version
> ARM_SMMU_V1
) {
693 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
)
694 reg
= CBA2R_RW64_64BIT
;
696 reg
= CBA2R_RW64_32BIT
;
697 /* 16-bit VMIDs live in CBA2R */
698 if (smmu
->features
& ARM_SMMU_FEAT_VMID16
)
699 reg
|= cfg
->vmid
<< CBA2R_VMID_SHIFT
;
701 writel_relaxed(reg
, gr1_base
+ ARM_SMMU_GR1_CBA2R(idx
));
706 if (smmu
->version
< ARM_SMMU_V2
)
707 reg
|= cfg
->irptndx
<< CBAR_IRPTNDX_SHIFT
;
710 * Use the weakest shareability/memory types, so they are
711 * overridden by the ttbcr/pte.
714 reg
|= (CBAR_S1_BPSHCFG_NSH
<< CBAR_S1_BPSHCFG_SHIFT
) |
715 (CBAR_S1_MEMATTR_WB
<< CBAR_S1_MEMATTR_SHIFT
);
716 } else if (!(smmu
->features
& ARM_SMMU_FEAT_VMID16
)) {
717 /* 8-bit VMIDs live in CBAR */
718 reg
|= cfg
->vmid
<< CBAR_VMID_SHIFT
;
720 writel_relaxed(reg
, gr1_base
+ ARM_SMMU_GR1_CBAR(idx
));
724 * We must write this before the TTBRs, since it determines the
725 * access behaviour of some fields (in particular, ASID[15:8]).
727 if (stage1
&& smmu
->version
> ARM_SMMU_V1
)
728 writel_relaxed(cb
->tcr
[1], cb_base
+ ARM_SMMU_CB_TTBCR2
);
729 writel_relaxed(cb
->tcr
[0], cb_base
+ ARM_SMMU_CB_TTBCR
);
732 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_S
) {
733 writel_relaxed(cfg
->asid
, cb_base
+ ARM_SMMU_CB_CONTEXTIDR
);
734 writel_relaxed(cb
->ttbr
[0], cb_base
+ ARM_SMMU_CB_TTBR0
);
735 writel_relaxed(cb
->ttbr
[1], cb_base
+ ARM_SMMU_CB_TTBR1
);
737 writeq_relaxed(cb
->ttbr
[0], cb_base
+ ARM_SMMU_CB_TTBR0
);
739 writeq_relaxed(cb
->ttbr
[1], cb_base
+ ARM_SMMU_CB_TTBR1
);
742 /* MAIRs (stage-1 only) */
744 writel_relaxed(cb
->mair
[0], cb_base
+ ARM_SMMU_CB_S1_MAIR0
);
745 writel_relaxed(cb
->mair
[1], cb_base
+ ARM_SMMU_CB_S1_MAIR1
);
749 reg
= SCTLR_CFIE
| SCTLR_CFRE
| SCTLR_AFE
| SCTLR_TRE
| SCTLR_M
;
751 reg
|= SCTLR_S1_ASIDPNE
;
752 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN
))
755 writel_relaxed(reg
, cb_base
+ ARM_SMMU_CB_SCTLR
);
758 static int arm_smmu_init_domain_context(struct iommu_domain
*domain
,
759 struct arm_smmu_device
*smmu
)
761 int irq
, start
, ret
= 0;
762 unsigned long ias
, oas
;
763 struct io_pgtable_ops
*pgtbl_ops
;
764 struct io_pgtable_cfg pgtbl_cfg
;
765 enum io_pgtable_fmt fmt
;
766 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
767 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
769 mutex_lock(&smmu_domain
->init_mutex
);
770 if (smmu_domain
->smmu
)
773 if (domain
->type
== IOMMU_DOMAIN_IDENTITY
) {
774 smmu_domain
->stage
= ARM_SMMU_DOMAIN_BYPASS
;
775 smmu_domain
->smmu
= smmu
;
780 * Mapping the requested stage onto what we support is surprisingly
781 * complicated, mainly because the spec allows S1+S2 SMMUs without
782 * support for nested translation. That means we end up with the
785 * Requested Supported Actual
795 * Note that you can't actually request stage-2 mappings.
797 if (!(smmu
->features
& ARM_SMMU_FEAT_TRANS_S1
))
798 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S2
;
799 if (!(smmu
->features
& ARM_SMMU_FEAT_TRANS_S2
))
800 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S1
;
803 * Choosing a suitable context format is even more fiddly. Until we
804 * grow some way for the caller to express a preference, and/or move
805 * the decision into the io-pgtable code where it arguably belongs,
806 * just aim for the closest thing to the rest of the system, and hope
807 * that the hardware isn't esoteric enough that we can't assume AArch64
808 * support to be a superset of AArch32 support...
810 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH32_L
)
811 cfg
->fmt
= ARM_SMMU_CTX_FMT_AARCH32_L
;
812 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S
) &&
813 !IS_ENABLED(CONFIG_64BIT
) && !IS_ENABLED(CONFIG_ARM_LPAE
) &&
814 (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH32_S
) &&
815 (smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
))
816 cfg
->fmt
= ARM_SMMU_CTX_FMT_AARCH32_S
;
817 if ((IS_ENABLED(CONFIG_64BIT
) || cfg
->fmt
== ARM_SMMU_CTX_FMT_NONE
) &&
818 (smmu
->features
& (ARM_SMMU_FEAT_FMT_AARCH64_64K
|
819 ARM_SMMU_FEAT_FMT_AARCH64_16K
|
820 ARM_SMMU_FEAT_FMT_AARCH64_4K
)))
821 cfg
->fmt
= ARM_SMMU_CTX_FMT_AARCH64
;
823 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_NONE
) {
828 switch (smmu_domain
->stage
) {
829 case ARM_SMMU_DOMAIN_S1
:
830 cfg
->cbar
= CBAR_TYPE_S1_TRANS_S2_BYPASS
;
831 start
= smmu
->num_s2_context_banks
;
833 oas
= smmu
->ipa_size
;
834 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
) {
835 fmt
= ARM_64_LPAE_S1
;
836 } else if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_L
) {
837 fmt
= ARM_32_LPAE_S1
;
838 ias
= min(ias
, 32UL);
839 oas
= min(oas
, 40UL);
842 ias
= min(ias
, 32UL);
843 oas
= min(oas
, 32UL);
845 smmu_domain
->tlb_ops
= &arm_smmu_s1_tlb_ops
;
847 case ARM_SMMU_DOMAIN_NESTED
:
849 * We will likely want to change this if/when KVM gets
852 case ARM_SMMU_DOMAIN_S2
:
853 cfg
->cbar
= CBAR_TYPE_S2_TRANS
;
855 ias
= smmu
->ipa_size
;
857 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
) {
858 fmt
= ARM_64_LPAE_S2
;
860 fmt
= ARM_32_LPAE_S2
;
861 ias
= min(ias
, 40UL);
862 oas
= min(oas
, 40UL);
864 if (smmu
->version
== ARM_SMMU_V2
)
865 smmu_domain
->tlb_ops
= &arm_smmu_s2_tlb_ops_v2
;
867 smmu_domain
->tlb_ops
= &arm_smmu_s2_tlb_ops_v1
;
873 ret
= __arm_smmu_alloc_bitmap(smmu
->context_map
, start
,
874 smmu
->num_context_banks
);
879 if (smmu
->version
< ARM_SMMU_V2
) {
880 cfg
->irptndx
= atomic_inc_return(&smmu
->irptndx
);
881 cfg
->irptndx
%= smmu
->num_context_irqs
;
883 cfg
->irptndx
= cfg
->cbndx
;
886 if (smmu_domain
->stage
== ARM_SMMU_DOMAIN_S2
)
887 cfg
->vmid
= cfg
->cbndx
+ 1 + smmu
->cavium_id_base
;
889 cfg
->asid
= cfg
->cbndx
+ smmu
->cavium_id_base
;
891 pgtbl_cfg
= (struct io_pgtable_cfg
) {
892 .pgsize_bitmap
= smmu
->pgsize_bitmap
,
895 .tlb
= smmu_domain
->tlb_ops
,
896 .iommu_dev
= smmu
->dev
,
899 if (smmu
->features
& ARM_SMMU_FEAT_COHERENT_WALK
)
900 pgtbl_cfg
.quirks
= IO_PGTABLE_QUIRK_NO_DMA
;
902 if (smmu_domain
->non_strict
)
903 pgtbl_cfg
.quirks
|= IO_PGTABLE_QUIRK_NON_STRICT
;
905 smmu_domain
->smmu
= smmu
;
906 pgtbl_ops
= alloc_io_pgtable_ops(fmt
, &pgtbl_cfg
, smmu_domain
);
912 /* Update the domain's page sizes to reflect the page table format */
913 domain
->pgsize_bitmap
= pgtbl_cfg
.pgsize_bitmap
;
914 domain
->geometry
.aperture_end
= (1UL << ias
) - 1;
915 domain
->geometry
.force_aperture
= true;
917 /* Initialise the context bank with our page table cfg */
918 arm_smmu_init_context_bank(smmu_domain
, &pgtbl_cfg
);
919 arm_smmu_write_context_bank(smmu
, cfg
->cbndx
);
922 * Request context fault interrupt. Do this last to avoid the
923 * handler seeing a half-initialised domain state.
925 irq
= smmu
->irqs
[smmu
->num_global_irqs
+ cfg
->irptndx
];
926 ret
= devm_request_irq(smmu
->dev
, irq
, arm_smmu_context_fault
,
927 IRQF_SHARED
, "arm-smmu-context-fault", domain
);
929 dev_err(smmu
->dev
, "failed to request context IRQ %d (%u)\n",
931 cfg
->irptndx
= INVALID_IRPTNDX
;
934 mutex_unlock(&smmu_domain
->init_mutex
);
936 /* Publish page table ops for map/unmap */
937 smmu_domain
->pgtbl_ops
= pgtbl_ops
;
941 smmu_domain
->smmu
= NULL
;
943 mutex_unlock(&smmu_domain
->init_mutex
);
947 static void arm_smmu_destroy_domain_context(struct iommu_domain
*domain
)
949 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
950 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
951 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
954 if (!smmu
|| domain
->type
== IOMMU_DOMAIN_IDENTITY
)
957 ret
= arm_smmu_rpm_get(smmu
);
962 * Disable the context bank and free the page tables before freeing
965 smmu
->cbs
[cfg
->cbndx
].cfg
= NULL
;
966 arm_smmu_write_context_bank(smmu
, cfg
->cbndx
);
968 if (cfg
->irptndx
!= INVALID_IRPTNDX
) {
969 irq
= smmu
->irqs
[smmu
->num_global_irqs
+ cfg
->irptndx
];
970 devm_free_irq(smmu
->dev
, irq
, domain
);
973 free_io_pgtable_ops(smmu_domain
->pgtbl_ops
);
974 __arm_smmu_free_bitmap(smmu
->context_map
, cfg
->cbndx
);
976 arm_smmu_rpm_put(smmu
);
979 static struct iommu_domain
*arm_smmu_domain_alloc(unsigned type
)
981 struct arm_smmu_domain
*smmu_domain
;
983 if (type
!= IOMMU_DOMAIN_UNMANAGED
&&
984 type
!= IOMMU_DOMAIN_DMA
&&
985 type
!= IOMMU_DOMAIN_IDENTITY
)
988 * Allocate the domain and initialise some of its data structures.
989 * We can't really do anything meaningful until we've added a
992 smmu_domain
= kzalloc(sizeof(*smmu_domain
), GFP_KERNEL
);
996 if (type
== IOMMU_DOMAIN_DMA
&& (using_legacy_binding
||
997 iommu_get_dma_cookie(&smmu_domain
->domain
))) {
1002 mutex_init(&smmu_domain
->init_mutex
);
1003 spin_lock_init(&smmu_domain
->cb_lock
);
1005 return &smmu_domain
->domain
;
1008 static void arm_smmu_domain_free(struct iommu_domain
*domain
)
1010 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1013 * Free the domain resources. We assume that all devices have
1014 * already been detached.
1016 iommu_put_dma_cookie(domain
);
1017 arm_smmu_destroy_domain_context(domain
);
1021 static void arm_smmu_write_smr(struct arm_smmu_device
*smmu
, int idx
)
1023 struct arm_smmu_smr
*smr
= smmu
->smrs
+ idx
;
1024 u32 reg
= smr
->id
<< SMR_ID_SHIFT
| smr
->mask
<< SMR_MASK_SHIFT
;
1026 if (!(smmu
->features
& ARM_SMMU_FEAT_EXIDS
) && smr
->valid
)
1028 writel_relaxed(reg
, ARM_SMMU_GR0(smmu
) + ARM_SMMU_GR0_SMR(idx
));
1031 static void arm_smmu_write_s2cr(struct arm_smmu_device
*smmu
, int idx
)
1033 struct arm_smmu_s2cr
*s2cr
= smmu
->s2crs
+ idx
;
1034 u32 reg
= (s2cr
->type
& S2CR_TYPE_MASK
) << S2CR_TYPE_SHIFT
|
1035 (s2cr
->cbndx
& S2CR_CBNDX_MASK
) << S2CR_CBNDX_SHIFT
|
1036 (s2cr
->privcfg
& S2CR_PRIVCFG_MASK
) << S2CR_PRIVCFG_SHIFT
;
1038 if (smmu
->features
& ARM_SMMU_FEAT_EXIDS
&& smmu
->smrs
&&
1039 smmu
->smrs
[idx
].valid
)
1040 reg
|= S2CR_EXIDVALID
;
1041 writel_relaxed(reg
, ARM_SMMU_GR0(smmu
) + ARM_SMMU_GR0_S2CR(idx
));
1044 static void arm_smmu_write_sme(struct arm_smmu_device
*smmu
, int idx
)
1046 arm_smmu_write_s2cr(smmu
, idx
);
1048 arm_smmu_write_smr(smmu
, idx
);
1052 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1053 * should be called after sCR0 is written.
1055 static void arm_smmu_test_smr_masks(struct arm_smmu_device
*smmu
)
1057 void __iomem
*gr0_base
= ARM_SMMU_GR0(smmu
);
1064 * SMR.ID bits may not be preserved if the corresponding MASK
1065 * bits are set, so check each one separately. We can reject
1066 * masters later if they try to claim IDs outside these masks.
1068 smr
= smmu
->streamid_mask
<< SMR_ID_SHIFT
;
1069 writel_relaxed(smr
, gr0_base
+ ARM_SMMU_GR0_SMR(0));
1070 smr
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_SMR(0));
1071 smmu
->streamid_mask
= smr
>> SMR_ID_SHIFT
;
1073 smr
= smmu
->streamid_mask
<< SMR_MASK_SHIFT
;
1074 writel_relaxed(smr
, gr0_base
+ ARM_SMMU_GR0_SMR(0));
1075 smr
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_SMR(0));
1076 smmu
->smr_mask_mask
= smr
>> SMR_MASK_SHIFT
;
1079 static int arm_smmu_find_sme(struct arm_smmu_device
*smmu
, u16 id
, u16 mask
)
1081 struct arm_smmu_smr
*smrs
= smmu
->smrs
;
1082 int i
, free_idx
= -ENOSPC
;
1084 /* Stream indexing is blissfully easy */
1088 /* Validating SMRs is... less so */
1089 for (i
= 0; i
< smmu
->num_mapping_groups
; ++i
) {
1090 if (!smrs
[i
].valid
) {
1092 * Note the first free entry we come across, which
1093 * we'll claim in the end if nothing else matches.
1100 * If the new entry is _entirely_ matched by an existing entry,
1101 * then reuse that, with the guarantee that there also cannot
1102 * be any subsequent conflicting entries. In normal use we'd
1103 * expect simply identical entries for this case, but there's
1104 * no harm in accommodating the generalisation.
1106 if ((mask
& smrs
[i
].mask
) == mask
&&
1107 !((id
^ smrs
[i
].id
) & ~smrs
[i
].mask
))
1110 * If the new entry has any other overlap with an existing one,
1111 * though, then there always exists at least one stream ID
1112 * which would cause a conflict, and we can't allow that risk.
1114 if (!((id
^ smrs
[i
].id
) & ~(smrs
[i
].mask
| mask
)))
1121 static bool arm_smmu_free_sme(struct arm_smmu_device
*smmu
, int idx
)
1123 if (--smmu
->s2crs
[idx
].count
)
1126 smmu
->s2crs
[idx
] = s2cr_init_val
;
1128 smmu
->smrs
[idx
].valid
= false;
1133 static int arm_smmu_master_alloc_smes(struct device
*dev
)
1135 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
1136 struct arm_smmu_master_cfg
*cfg
= fwspec
->iommu_priv
;
1137 struct arm_smmu_device
*smmu
= cfg
->smmu
;
1138 struct arm_smmu_smr
*smrs
= smmu
->smrs
;
1139 struct iommu_group
*group
;
1142 mutex_lock(&smmu
->stream_map_mutex
);
1143 /* Figure out a viable stream map entry allocation */
1144 for_each_cfg_sme(fwspec
, i
, idx
) {
1145 u16 sid
= fwspec
->ids
[i
];
1146 u16 mask
= fwspec
->ids
[i
] >> SMR_MASK_SHIFT
;
1148 if (idx
!= INVALID_SMENDX
) {
1153 ret
= arm_smmu_find_sme(smmu
, sid
, mask
);
1158 if (smrs
&& smmu
->s2crs
[idx
].count
== 0) {
1160 smrs
[idx
].mask
= mask
;
1161 smrs
[idx
].valid
= true;
1163 smmu
->s2crs
[idx
].count
++;
1164 cfg
->smendx
[i
] = (s16
)idx
;
1167 group
= iommu_group_get_for_dev(dev
);
1169 group
= ERR_PTR(-ENOMEM
);
1170 if (IS_ERR(group
)) {
1171 ret
= PTR_ERR(group
);
1174 iommu_group_put(group
);
1176 /* It worked! Now, poke the actual hardware */
1177 for_each_cfg_sme(fwspec
, i
, idx
) {
1178 arm_smmu_write_sme(smmu
, idx
);
1179 smmu
->s2crs
[idx
].group
= group
;
1182 mutex_unlock(&smmu
->stream_map_mutex
);
1187 arm_smmu_free_sme(smmu
, cfg
->smendx
[i
]);
1188 cfg
->smendx
[i
] = INVALID_SMENDX
;
1190 mutex_unlock(&smmu
->stream_map_mutex
);
1194 static void arm_smmu_master_free_smes(struct iommu_fwspec
*fwspec
)
1196 struct arm_smmu_device
*smmu
= fwspec_smmu(fwspec
);
1197 struct arm_smmu_master_cfg
*cfg
= fwspec
->iommu_priv
;
1200 mutex_lock(&smmu
->stream_map_mutex
);
1201 for_each_cfg_sme(fwspec
, i
, idx
) {
1202 if (arm_smmu_free_sme(smmu
, idx
))
1203 arm_smmu_write_sme(smmu
, idx
);
1204 cfg
->smendx
[i
] = INVALID_SMENDX
;
1206 mutex_unlock(&smmu
->stream_map_mutex
);
1209 static int arm_smmu_domain_add_master(struct arm_smmu_domain
*smmu_domain
,
1210 struct iommu_fwspec
*fwspec
)
1212 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
1213 struct arm_smmu_s2cr
*s2cr
= smmu
->s2crs
;
1214 u8 cbndx
= smmu_domain
->cfg
.cbndx
;
1215 enum arm_smmu_s2cr_type type
;
1218 if (smmu_domain
->stage
== ARM_SMMU_DOMAIN_BYPASS
)
1219 type
= S2CR_TYPE_BYPASS
;
1221 type
= S2CR_TYPE_TRANS
;
1223 for_each_cfg_sme(fwspec
, i
, idx
) {
1224 if (type
== s2cr
[idx
].type
&& cbndx
== s2cr
[idx
].cbndx
)
1227 s2cr
[idx
].type
= type
;
1228 s2cr
[idx
].privcfg
= S2CR_PRIVCFG_DEFAULT
;
1229 s2cr
[idx
].cbndx
= cbndx
;
1230 arm_smmu_write_s2cr(smmu
, idx
);
1235 static int arm_smmu_attach_dev(struct iommu_domain
*domain
, struct device
*dev
)
1238 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
1239 struct arm_smmu_device
*smmu
;
1240 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1242 if (!fwspec
|| fwspec
->ops
!= &arm_smmu_ops
) {
1243 dev_err(dev
, "cannot attach to SMMU, is it on the same bus?\n");
1248 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1249 * domains between of_xlate() and add_device() - we have no way to cope
1250 * with that, so until ARM gets converted to rely on groups and default
1251 * domains, just say no (but more politely than by dereferencing NULL).
1252 * This should be at least a WARN_ON once that's sorted.
1254 if (!fwspec
->iommu_priv
)
1257 smmu
= fwspec_smmu(fwspec
);
1259 ret
= arm_smmu_rpm_get(smmu
);
1263 /* Ensure that the domain is finalised */
1264 ret
= arm_smmu_init_domain_context(domain
, smmu
);
1269 * Sanity check the domain. We don't support domains across
1272 if (smmu_domain
->smmu
!= smmu
) {
1274 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1275 dev_name(smmu_domain
->smmu
->dev
), dev_name(smmu
->dev
));
1280 /* Looks ok, so add the device to the domain */
1281 ret
= arm_smmu_domain_add_master(smmu_domain
, fwspec
);
1284 arm_smmu_rpm_put(smmu
);
1288 static int arm_smmu_map(struct iommu_domain
*domain
, unsigned long iova
,
1289 phys_addr_t paddr
, size_t size
, int prot
)
1291 struct io_pgtable_ops
*ops
= to_smmu_domain(domain
)->pgtbl_ops
;
1292 struct arm_smmu_device
*smmu
= to_smmu_domain(domain
)->smmu
;
1298 arm_smmu_rpm_get(smmu
);
1299 ret
= ops
->map(ops
, iova
, paddr
, size
, prot
);
1300 arm_smmu_rpm_put(smmu
);
1305 static size_t arm_smmu_unmap(struct iommu_domain
*domain
, unsigned long iova
,
1308 struct io_pgtable_ops
*ops
= to_smmu_domain(domain
)->pgtbl_ops
;
1309 struct arm_smmu_device
*smmu
= to_smmu_domain(domain
)->smmu
;
1315 arm_smmu_rpm_get(smmu
);
1316 ret
= ops
->unmap(ops
, iova
, size
);
1317 arm_smmu_rpm_put(smmu
);
1322 static void arm_smmu_flush_iotlb_all(struct iommu_domain
*domain
)
1324 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1325 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
1327 if (smmu_domain
->tlb_ops
) {
1328 arm_smmu_rpm_get(smmu
);
1329 smmu_domain
->tlb_ops
->tlb_flush_all(smmu_domain
);
1330 arm_smmu_rpm_put(smmu
);
1334 static void arm_smmu_iotlb_sync(struct iommu_domain
*domain
)
1336 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1337 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
1339 if (smmu_domain
->tlb_ops
) {
1340 arm_smmu_rpm_get(smmu
);
1341 smmu_domain
->tlb_ops
->tlb_sync(smmu_domain
);
1342 arm_smmu_rpm_put(smmu
);
1346 static phys_addr_t
arm_smmu_iova_to_phys_hard(struct iommu_domain
*domain
,
1349 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1350 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
1351 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
1352 struct io_pgtable_ops
*ops
= smmu_domain
->pgtbl_ops
;
1353 struct device
*dev
= smmu
->dev
;
1354 void __iomem
*cb_base
;
1357 unsigned long va
, flags
;
1360 ret
= arm_smmu_rpm_get(smmu
);
1364 cb_base
= ARM_SMMU_CB(smmu
, cfg
->cbndx
);
1366 spin_lock_irqsave(&smmu_domain
->cb_lock
, flags
);
1367 /* ATS1 registers can only be written atomically */
1368 va
= iova
& ~0xfffUL
;
1369 if (smmu
->version
== ARM_SMMU_V2
)
1370 smmu_write_atomic_lq(va
, cb_base
+ ARM_SMMU_CB_ATS1PR
);
1371 else /* Register is only 32-bit in v1 */
1372 writel_relaxed(va
, cb_base
+ ARM_SMMU_CB_ATS1PR
);
1374 if (readl_poll_timeout_atomic(cb_base
+ ARM_SMMU_CB_ATSR
, tmp
,
1375 !(tmp
& ATSR_ACTIVE
), 5, 50)) {
1376 spin_unlock_irqrestore(&smmu_domain
->cb_lock
, flags
);
1378 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1380 return ops
->iova_to_phys(ops
, iova
);
1383 phys
= readq_relaxed(cb_base
+ ARM_SMMU_CB_PAR
);
1384 spin_unlock_irqrestore(&smmu_domain
->cb_lock
, flags
);
1385 if (phys
& CB_PAR_F
) {
1386 dev_err(dev
, "translation fault!\n");
1387 dev_err(dev
, "PAR = 0x%llx\n", phys
);
1391 arm_smmu_rpm_put(smmu
);
1393 return (phys
& GENMASK_ULL(39, 12)) | (iova
& 0xfff);
1396 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain
*domain
,
1399 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1400 struct io_pgtable_ops
*ops
= smmu_domain
->pgtbl_ops
;
1402 if (domain
->type
== IOMMU_DOMAIN_IDENTITY
)
1408 if (smmu_domain
->smmu
->features
& ARM_SMMU_FEAT_TRANS_OPS
&&
1409 smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
)
1410 return arm_smmu_iova_to_phys_hard(domain
, iova
);
1412 return ops
->iova_to_phys(ops
, iova
);
1415 static bool arm_smmu_capable(enum iommu_cap cap
)
1418 case IOMMU_CAP_CACHE_COHERENCY
:
1420 * Return true here as the SMMU can always send out coherent
1424 case IOMMU_CAP_NOEXEC
:
1431 static int arm_smmu_match_node(struct device
*dev
, void *data
)
1433 return dev
->fwnode
== data
;
1437 struct arm_smmu_device
*arm_smmu_get_by_fwnode(struct fwnode_handle
*fwnode
)
1439 struct device
*dev
= driver_find_device(&arm_smmu_driver
.driver
, NULL
,
1440 fwnode
, arm_smmu_match_node
);
1442 return dev
? dev_get_drvdata(dev
) : NULL
;
1445 static int arm_smmu_add_device(struct device
*dev
)
1447 struct arm_smmu_device
*smmu
;
1448 struct arm_smmu_master_cfg
*cfg
;
1449 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
1452 if (using_legacy_binding
) {
1453 ret
= arm_smmu_register_legacy_master(dev
, &smmu
);
1456 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1457 * will allocate/initialise a new one. Thus we need to update fwspec for
1460 fwspec
= dev_iommu_fwspec_get(dev
);
1463 } else if (fwspec
&& fwspec
->ops
== &arm_smmu_ops
) {
1464 smmu
= arm_smmu_get_by_fwnode(fwspec
->iommu_fwnode
);
1470 for (i
= 0; i
< fwspec
->num_ids
; i
++) {
1471 u16 sid
= fwspec
->ids
[i
];
1472 u16 mask
= fwspec
->ids
[i
] >> SMR_MASK_SHIFT
;
1474 if (sid
& ~smmu
->streamid_mask
) {
1475 dev_err(dev
, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1476 sid
, smmu
->streamid_mask
);
1479 if (mask
& ~smmu
->smr_mask_mask
) {
1480 dev_err(dev
, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1481 mask
, smmu
->smr_mask_mask
);
1487 cfg
= kzalloc(offsetof(struct arm_smmu_master_cfg
, smendx
[i
]),
1493 fwspec
->iommu_priv
= cfg
;
1495 cfg
->smendx
[i
] = INVALID_SMENDX
;
1497 ret
= arm_smmu_rpm_get(smmu
);
1501 ret
= arm_smmu_master_alloc_smes(dev
);
1502 arm_smmu_rpm_put(smmu
);
1507 iommu_device_link(&smmu
->iommu
, dev
);
1509 device_link_add(dev
, smmu
->dev
,
1510 DL_FLAG_PM_RUNTIME
| DL_FLAG_AUTOREMOVE_SUPPLIER
);
1517 iommu_fwspec_free(dev
);
1521 static void arm_smmu_remove_device(struct device
*dev
)
1523 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
1524 struct arm_smmu_master_cfg
*cfg
;
1525 struct arm_smmu_device
*smmu
;
1528 if (!fwspec
|| fwspec
->ops
!= &arm_smmu_ops
)
1531 cfg
= fwspec
->iommu_priv
;
1534 ret
= arm_smmu_rpm_get(smmu
);
1538 iommu_device_unlink(&smmu
->iommu
, dev
);
1539 arm_smmu_master_free_smes(fwspec
);
1541 arm_smmu_rpm_put(smmu
);
1543 iommu_group_remove_device(dev
);
1544 kfree(fwspec
->iommu_priv
);
1545 iommu_fwspec_free(dev
);
1548 static struct iommu_group
*arm_smmu_device_group(struct device
*dev
)
1550 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
1551 struct arm_smmu_device
*smmu
= fwspec_smmu(fwspec
);
1552 struct iommu_group
*group
= NULL
;
1555 for_each_cfg_sme(fwspec
, i
, idx
) {
1556 if (group
&& smmu
->s2crs
[idx
].group
&&
1557 group
!= smmu
->s2crs
[idx
].group
)
1558 return ERR_PTR(-EINVAL
);
1560 group
= smmu
->s2crs
[idx
].group
;
1564 return iommu_group_ref_get(group
);
1566 if (dev_is_pci(dev
))
1567 group
= pci_device_group(dev
);
1568 else if (dev_is_fsl_mc(dev
))
1569 group
= fsl_mc_device_group(dev
);
1571 group
= generic_device_group(dev
);
1576 static int arm_smmu_domain_get_attr(struct iommu_domain
*domain
,
1577 enum iommu_attr attr
, void *data
)
1579 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1581 switch(domain
->type
) {
1582 case IOMMU_DOMAIN_UNMANAGED
:
1584 case DOMAIN_ATTR_NESTING
:
1585 *(int *)data
= (smmu_domain
->stage
== ARM_SMMU_DOMAIN_NESTED
);
1591 case IOMMU_DOMAIN_DMA
:
1593 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE
:
1594 *(int *)data
= smmu_domain
->non_strict
;
1605 static int arm_smmu_domain_set_attr(struct iommu_domain
*domain
,
1606 enum iommu_attr attr
, void *data
)
1609 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1611 mutex_lock(&smmu_domain
->init_mutex
);
1613 switch(domain
->type
) {
1614 case IOMMU_DOMAIN_UNMANAGED
:
1616 case DOMAIN_ATTR_NESTING
:
1617 if (smmu_domain
->smmu
) {
1623 smmu_domain
->stage
= ARM_SMMU_DOMAIN_NESTED
;
1625 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S1
;
1631 case IOMMU_DOMAIN_DMA
:
1633 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE
:
1634 smmu_domain
->non_strict
= *(int *)data
;
1644 mutex_unlock(&smmu_domain
->init_mutex
);
1648 static int arm_smmu_of_xlate(struct device
*dev
, struct of_phandle_args
*args
)
1652 if (args
->args_count
> 0)
1653 fwid
|= (u16
)args
->args
[0];
1655 if (args
->args_count
> 1)
1656 fwid
|= (u16
)args
->args
[1] << SMR_MASK_SHIFT
;
1657 else if (!of_property_read_u32(args
->np
, "stream-match-mask", &mask
))
1658 fwid
|= (u16
)mask
<< SMR_MASK_SHIFT
;
1660 return iommu_fwspec_add_ids(dev
, &fwid
, 1);
1663 static void arm_smmu_get_resv_regions(struct device
*dev
,
1664 struct list_head
*head
)
1666 struct iommu_resv_region
*region
;
1667 int prot
= IOMMU_WRITE
| IOMMU_NOEXEC
| IOMMU_MMIO
;
1669 region
= iommu_alloc_resv_region(MSI_IOVA_BASE
, MSI_IOVA_LENGTH
,
1670 prot
, IOMMU_RESV_SW_MSI
);
1674 list_add_tail(®ion
->list
, head
);
1676 iommu_dma_get_resv_regions(dev
, head
);
1679 static void arm_smmu_put_resv_regions(struct device
*dev
,
1680 struct list_head
*head
)
1682 struct iommu_resv_region
*entry
, *next
;
1684 list_for_each_entry_safe(entry
, next
, head
, list
)
1688 static struct iommu_ops arm_smmu_ops
= {
1689 .capable
= arm_smmu_capable
,
1690 .domain_alloc
= arm_smmu_domain_alloc
,
1691 .domain_free
= arm_smmu_domain_free
,
1692 .attach_dev
= arm_smmu_attach_dev
,
1693 .map
= arm_smmu_map
,
1694 .unmap
= arm_smmu_unmap
,
1695 .flush_iotlb_all
= arm_smmu_flush_iotlb_all
,
1696 .iotlb_sync
= arm_smmu_iotlb_sync
,
1697 .iova_to_phys
= arm_smmu_iova_to_phys
,
1698 .add_device
= arm_smmu_add_device
,
1699 .remove_device
= arm_smmu_remove_device
,
1700 .device_group
= arm_smmu_device_group
,
1701 .domain_get_attr
= arm_smmu_domain_get_attr
,
1702 .domain_set_attr
= arm_smmu_domain_set_attr
,
1703 .of_xlate
= arm_smmu_of_xlate
,
1704 .get_resv_regions
= arm_smmu_get_resv_regions
,
1705 .put_resv_regions
= arm_smmu_put_resv_regions
,
1706 .pgsize_bitmap
= -1UL, /* Restricted during device attach */
1709 static void arm_smmu_device_reset(struct arm_smmu_device
*smmu
)
1711 void __iomem
*gr0_base
= ARM_SMMU_GR0(smmu
);
1715 /* clear global FSR */
1716 reg
= readl_relaxed(ARM_SMMU_GR0_NS(smmu
) + ARM_SMMU_GR0_sGFSR
);
1717 writel(reg
, ARM_SMMU_GR0_NS(smmu
) + ARM_SMMU_GR0_sGFSR
);
1720 * Reset stream mapping groups: Initial values mark all SMRn as
1721 * invalid and all S2CRn as bypass unless overridden.
1723 for (i
= 0; i
< smmu
->num_mapping_groups
; ++i
)
1724 arm_smmu_write_sme(smmu
, i
);
1726 if (smmu
->model
== ARM_MMU500
) {
1728 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1729 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1730 * bit is only present in MMU-500r2 onwards.
1732 reg
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_ID7
);
1733 major
= (reg
>> ID7_MAJOR_SHIFT
) & ID7_MAJOR_MASK
;
1734 reg
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sACR
);
1736 reg
&= ~ARM_MMU500_ACR_CACHE_LOCK
;
1738 * Allow unmatched Stream IDs to allocate bypass
1739 * TLB entries for reduced latency.
1741 reg
|= ARM_MMU500_ACR_SMTNMB_TLBEN
| ARM_MMU500_ACR_S2CRB_TLBEN
;
1742 writel_relaxed(reg
, gr0_base
+ ARM_SMMU_GR0_sACR
);
1745 /* Make sure all context banks are disabled and clear CB_FSR */
1746 for (i
= 0; i
< smmu
->num_context_banks
; ++i
) {
1747 void __iomem
*cb_base
= ARM_SMMU_CB(smmu
, i
);
1749 arm_smmu_write_context_bank(smmu
, i
);
1750 writel_relaxed(FSR_FAULT
, cb_base
+ ARM_SMMU_CB_FSR
);
1752 * Disable MMU-500's not-particularly-beneficial next-page
1753 * prefetcher for the sake of errata #841119 and #826419.
1755 if (smmu
->model
== ARM_MMU500
) {
1756 reg
= readl_relaxed(cb_base
+ ARM_SMMU_CB_ACTLR
);
1757 reg
&= ~ARM_MMU500_ACTLR_CPRE
;
1758 writel_relaxed(reg
, cb_base
+ ARM_SMMU_CB_ACTLR
);
1762 /* Invalidate the TLB, just in case */
1763 writel_relaxed(0, gr0_base
+ ARM_SMMU_GR0_TLBIALLH
);
1764 writel_relaxed(0, gr0_base
+ ARM_SMMU_GR0_TLBIALLNSNH
);
1766 reg
= readl_relaxed(ARM_SMMU_GR0_NS(smmu
) + ARM_SMMU_GR0_sCR0
);
1768 /* Enable fault reporting */
1769 reg
|= (sCR0_GFRE
| sCR0_GFIE
| sCR0_GCFGFRE
| sCR0_GCFGFIE
);
1771 /* Disable TLB broadcasting. */
1772 reg
|= (sCR0_VMIDPNE
| sCR0_PTM
);
1774 /* Enable client access, handling unmatched streams as appropriate */
1775 reg
&= ~sCR0_CLIENTPD
;
1779 reg
&= ~sCR0_USFCFG
;
1781 /* Disable forced broadcasting */
1784 /* Don't upgrade barriers */
1785 reg
&= ~(sCR0_BSU_MASK
<< sCR0_BSU_SHIFT
);
1787 if (smmu
->features
& ARM_SMMU_FEAT_VMID16
)
1788 reg
|= sCR0_VMID16EN
;
1790 if (smmu
->features
& ARM_SMMU_FEAT_EXIDS
)
1791 reg
|= sCR0_EXIDENABLE
;
1793 /* Push the button */
1794 arm_smmu_tlb_sync_global(smmu
);
1795 writel(reg
, ARM_SMMU_GR0_NS(smmu
) + ARM_SMMU_GR0_sCR0
);
1798 static int arm_smmu_id_size_to_bits(int size
)
1817 static int arm_smmu_device_cfg_probe(struct arm_smmu_device
*smmu
)
1820 void __iomem
*gr0_base
= ARM_SMMU_GR0(smmu
);
1822 bool cttw_reg
, cttw_fw
= smmu
->features
& ARM_SMMU_FEAT_COHERENT_WALK
;
1825 dev_notice(smmu
->dev
, "probing hardware configuration...\n");
1826 dev_notice(smmu
->dev
, "SMMUv%d with:\n",
1827 smmu
->version
== ARM_SMMU_V2
? 2 : 1);
1830 id
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_ID0
);
1832 /* Restrict available stages based on module parameter */
1833 if (force_stage
== 1)
1834 id
&= ~(ID0_S2TS
| ID0_NTS
);
1835 else if (force_stage
== 2)
1836 id
&= ~(ID0_S1TS
| ID0_NTS
);
1838 if (id
& ID0_S1TS
) {
1839 smmu
->features
|= ARM_SMMU_FEAT_TRANS_S1
;
1840 dev_notice(smmu
->dev
, "\tstage 1 translation\n");
1843 if (id
& ID0_S2TS
) {
1844 smmu
->features
|= ARM_SMMU_FEAT_TRANS_S2
;
1845 dev_notice(smmu
->dev
, "\tstage 2 translation\n");
1849 smmu
->features
|= ARM_SMMU_FEAT_TRANS_NESTED
;
1850 dev_notice(smmu
->dev
, "\tnested translation\n");
1853 if (!(smmu
->features
&
1854 (ARM_SMMU_FEAT_TRANS_S1
| ARM_SMMU_FEAT_TRANS_S2
))) {
1855 dev_err(smmu
->dev
, "\tno translation support!\n");
1859 if ((id
& ID0_S1TS
) &&
1860 ((smmu
->version
< ARM_SMMU_V2
) || !(id
& ID0_ATOSNS
))) {
1861 smmu
->features
|= ARM_SMMU_FEAT_TRANS_OPS
;
1862 dev_notice(smmu
->dev
, "\taddress translation ops\n");
1866 * In order for DMA API calls to work properly, we must defer to what
1867 * the FW says about coherency, regardless of what the hardware claims.
1868 * Fortunately, this also opens up a workaround for systems where the
1869 * ID register value has ended up configured incorrectly.
1871 cttw_reg
= !!(id
& ID0_CTTW
);
1872 if (cttw_fw
|| cttw_reg
)
1873 dev_notice(smmu
->dev
, "\t%scoherent table walk\n",
1874 cttw_fw
? "" : "non-");
1875 if (cttw_fw
!= cttw_reg
)
1876 dev_notice(smmu
->dev
,
1877 "\t(IDR0.CTTW overridden by FW configuration)\n");
1879 /* Max. number of entries we have for stream matching/indexing */
1880 if (smmu
->version
== ARM_SMMU_V2
&& id
& ID0_EXIDS
) {
1881 smmu
->features
|= ARM_SMMU_FEAT_EXIDS
;
1884 size
= 1 << ((id
>> ID0_NUMSIDB_SHIFT
) & ID0_NUMSIDB_MASK
);
1886 smmu
->streamid_mask
= size
- 1;
1888 smmu
->features
|= ARM_SMMU_FEAT_STREAM_MATCH
;
1889 size
= (id
>> ID0_NUMSMRG_SHIFT
) & ID0_NUMSMRG_MASK
;
1892 "stream-matching supported, but no SMRs present!\n");
1896 /* Zero-initialised to mark as invalid */
1897 smmu
->smrs
= devm_kcalloc(smmu
->dev
, size
, sizeof(*smmu
->smrs
),
1902 dev_notice(smmu
->dev
,
1903 "\tstream matching with %lu register groups", size
);
1905 /* s2cr->type == 0 means translation, so initialise explicitly */
1906 smmu
->s2crs
= devm_kmalloc_array(smmu
->dev
, size
, sizeof(*smmu
->s2crs
),
1910 for (i
= 0; i
< size
; i
++)
1911 smmu
->s2crs
[i
] = s2cr_init_val
;
1913 smmu
->num_mapping_groups
= size
;
1914 mutex_init(&smmu
->stream_map_mutex
);
1915 spin_lock_init(&smmu
->global_sync_lock
);
1917 if (smmu
->version
< ARM_SMMU_V2
|| !(id
& ID0_PTFS_NO_AARCH32
)) {
1918 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH32_L
;
1919 if (!(id
& ID0_PTFS_NO_AARCH32S
))
1920 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH32_S
;
1924 id
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_ID1
);
1925 smmu
->pgshift
= (id
& ID1_PAGESIZE
) ? 16 : 12;
1927 /* Check for size mismatch of SMMU address space from mapped region */
1928 size
= 1 << (((id
>> ID1_NUMPAGENDXB_SHIFT
) & ID1_NUMPAGENDXB_MASK
) + 1);
1929 size
<<= smmu
->pgshift
;
1930 if (smmu
->cb_base
!= gr0_base
+ size
)
1932 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1933 size
* 2, (smmu
->cb_base
- gr0_base
) * 2);
1935 smmu
->num_s2_context_banks
= (id
>> ID1_NUMS2CB_SHIFT
) & ID1_NUMS2CB_MASK
;
1936 smmu
->num_context_banks
= (id
>> ID1_NUMCB_SHIFT
) & ID1_NUMCB_MASK
;
1937 if (smmu
->num_s2_context_banks
> smmu
->num_context_banks
) {
1938 dev_err(smmu
->dev
, "impossible number of S2 context banks!\n");
1941 dev_notice(smmu
->dev
, "\t%u context banks (%u stage-2 only)\n",
1942 smmu
->num_context_banks
, smmu
->num_s2_context_banks
);
1944 * Cavium CN88xx erratum #27704.
1945 * Ensure ASID and VMID allocation is unique across all SMMUs in
1948 if (smmu
->model
== CAVIUM_SMMUV2
) {
1949 smmu
->cavium_id_base
=
1950 atomic_add_return(smmu
->num_context_banks
,
1951 &cavium_smmu_context_count
);
1952 smmu
->cavium_id_base
-= smmu
->num_context_banks
;
1953 dev_notice(smmu
->dev
, "\tenabling workaround for Cavium erratum 27704\n");
1955 smmu
->cbs
= devm_kcalloc(smmu
->dev
, smmu
->num_context_banks
,
1956 sizeof(*smmu
->cbs
), GFP_KERNEL
);
1961 id
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_ID2
);
1962 size
= arm_smmu_id_size_to_bits((id
>> ID2_IAS_SHIFT
) & ID2_IAS_MASK
);
1963 smmu
->ipa_size
= size
;
1965 /* The output mask is also applied for bypass */
1966 size
= arm_smmu_id_size_to_bits((id
>> ID2_OAS_SHIFT
) & ID2_OAS_MASK
);
1967 smmu
->pa_size
= size
;
1969 if (id
& ID2_VMID16
)
1970 smmu
->features
|= ARM_SMMU_FEAT_VMID16
;
1973 * What the page table walker can address actually depends on which
1974 * descriptor format is in use, but since a) we don't know that yet,
1975 * and b) it can vary per context bank, this will have to do...
1977 if (dma_set_mask_and_coherent(smmu
->dev
, DMA_BIT_MASK(size
)))
1979 "failed to set DMA mask for table walker\n");
1981 if (smmu
->version
< ARM_SMMU_V2
) {
1982 smmu
->va_size
= smmu
->ipa_size
;
1983 if (smmu
->version
== ARM_SMMU_V1_64K
)
1984 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_64K
;
1986 size
= (id
>> ID2_UBS_SHIFT
) & ID2_UBS_MASK
;
1987 smmu
->va_size
= arm_smmu_id_size_to_bits(size
);
1988 if (id
& ID2_PTFS_4K
)
1989 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_4K
;
1990 if (id
& ID2_PTFS_16K
)
1991 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_16K
;
1992 if (id
& ID2_PTFS_64K
)
1993 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_64K
;
1996 /* Now we've corralled the various formats, what'll it do? */
1997 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH32_S
)
1998 smmu
->pgsize_bitmap
|= SZ_4K
| SZ_64K
| SZ_1M
| SZ_16M
;
1999 if (smmu
->features
&
2000 (ARM_SMMU_FEAT_FMT_AARCH32_L
| ARM_SMMU_FEAT_FMT_AARCH64_4K
))
2001 smmu
->pgsize_bitmap
|= SZ_4K
| SZ_2M
| SZ_1G
;
2002 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH64_16K
)
2003 smmu
->pgsize_bitmap
|= SZ_16K
| SZ_32M
;
2004 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH64_64K
)
2005 smmu
->pgsize_bitmap
|= SZ_64K
| SZ_512M
;
2007 if (arm_smmu_ops
.pgsize_bitmap
== -1UL)
2008 arm_smmu_ops
.pgsize_bitmap
= smmu
->pgsize_bitmap
;
2010 arm_smmu_ops
.pgsize_bitmap
|= smmu
->pgsize_bitmap
;
2011 dev_notice(smmu
->dev
, "\tSupported page sizes: 0x%08lx\n",
2012 smmu
->pgsize_bitmap
);
2015 if (smmu
->features
& ARM_SMMU_FEAT_TRANS_S1
)
2016 dev_notice(smmu
->dev
, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
2017 smmu
->va_size
, smmu
->ipa_size
);
2019 if (smmu
->features
& ARM_SMMU_FEAT_TRANS_S2
)
2020 dev_notice(smmu
->dev
, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
2021 smmu
->ipa_size
, smmu
->pa_size
);
2026 struct arm_smmu_match_data
{
2027 enum arm_smmu_arch_version version
;
2028 enum arm_smmu_implementation model
;
2031 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
2032 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
2034 ARM_SMMU_MATCH_DATA(smmu_generic_v1
, ARM_SMMU_V1
, GENERIC_SMMU
);
2035 ARM_SMMU_MATCH_DATA(smmu_generic_v2
, ARM_SMMU_V2
, GENERIC_SMMU
);
2036 ARM_SMMU_MATCH_DATA(arm_mmu401
, ARM_SMMU_V1_64K
, GENERIC_SMMU
);
2037 ARM_SMMU_MATCH_DATA(arm_mmu500
, ARM_SMMU_V2
, ARM_MMU500
);
2038 ARM_SMMU_MATCH_DATA(cavium_smmuv2
, ARM_SMMU_V2
, CAVIUM_SMMUV2
);
2039 ARM_SMMU_MATCH_DATA(qcom_smmuv2
, ARM_SMMU_V2
, QCOM_SMMUV2
);
2041 static const struct of_device_id arm_smmu_of_match
[] = {
2042 { .compatible
= "arm,smmu-v1", .data
= &smmu_generic_v1
},
2043 { .compatible
= "arm,smmu-v2", .data
= &smmu_generic_v2
},
2044 { .compatible
= "arm,mmu-400", .data
= &smmu_generic_v1
},
2045 { .compatible
= "arm,mmu-401", .data
= &arm_mmu401
},
2046 { .compatible
= "arm,mmu-500", .data
= &arm_mmu500
},
2047 { .compatible
= "cavium,smmu-v2", .data
= &cavium_smmuv2
},
2048 { .compatible
= "qcom,smmu-v2", .data
= &qcom_smmuv2
},
2053 static int acpi_smmu_get_data(u32 model
, struct arm_smmu_device
*smmu
)
2058 case ACPI_IORT_SMMU_V1
:
2059 case ACPI_IORT_SMMU_CORELINK_MMU400
:
2060 smmu
->version
= ARM_SMMU_V1
;
2061 smmu
->model
= GENERIC_SMMU
;
2063 case ACPI_IORT_SMMU_CORELINK_MMU401
:
2064 smmu
->version
= ARM_SMMU_V1_64K
;
2065 smmu
->model
= GENERIC_SMMU
;
2067 case ACPI_IORT_SMMU_V2
:
2068 smmu
->version
= ARM_SMMU_V2
;
2069 smmu
->model
= GENERIC_SMMU
;
2071 case ACPI_IORT_SMMU_CORELINK_MMU500
:
2072 smmu
->version
= ARM_SMMU_V2
;
2073 smmu
->model
= ARM_MMU500
;
2075 case ACPI_IORT_SMMU_CAVIUM_THUNDERX
:
2076 smmu
->version
= ARM_SMMU_V2
;
2077 smmu
->model
= CAVIUM_SMMUV2
;
2086 static int arm_smmu_device_acpi_probe(struct platform_device
*pdev
,
2087 struct arm_smmu_device
*smmu
)
2089 struct device
*dev
= smmu
->dev
;
2090 struct acpi_iort_node
*node
=
2091 *(struct acpi_iort_node
**)dev_get_platdata(dev
);
2092 struct acpi_iort_smmu
*iort_smmu
;
2095 /* Retrieve SMMU1/2 specific data */
2096 iort_smmu
= (struct acpi_iort_smmu
*)node
->node_data
;
2098 ret
= acpi_smmu_get_data(iort_smmu
->model
, smmu
);
2102 /* Ignore the configuration access interrupt */
2103 smmu
->num_global_irqs
= 1;
2105 if (iort_smmu
->flags
& ACPI_IORT_SMMU_COHERENT_WALK
)
2106 smmu
->features
|= ARM_SMMU_FEAT_COHERENT_WALK
;
2111 static inline int arm_smmu_device_acpi_probe(struct platform_device
*pdev
,
2112 struct arm_smmu_device
*smmu
)
2118 static int arm_smmu_device_dt_probe(struct platform_device
*pdev
,
2119 struct arm_smmu_device
*smmu
)
2121 const struct arm_smmu_match_data
*data
;
2122 struct device
*dev
= &pdev
->dev
;
2123 bool legacy_binding
;
2125 if (of_property_read_u32(dev
->of_node
, "#global-interrupts",
2126 &smmu
->num_global_irqs
)) {
2127 dev_err(dev
, "missing #global-interrupts property\n");
2131 data
= of_device_get_match_data(dev
);
2132 smmu
->version
= data
->version
;
2133 smmu
->model
= data
->model
;
2135 parse_driver_options(smmu
);
2137 legacy_binding
= of_find_property(dev
->of_node
, "mmu-masters", NULL
);
2138 if (legacy_binding
&& !using_generic_binding
) {
2139 if (!using_legacy_binding
)
2140 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2141 using_legacy_binding
= true;
2142 } else if (!legacy_binding
&& !using_legacy_binding
) {
2143 using_generic_binding
= true;
2145 dev_err(dev
, "not probing due to mismatched DT properties\n");
2149 if (of_dma_is_coherent(dev
->of_node
))
2150 smmu
->features
|= ARM_SMMU_FEAT_COHERENT_WALK
;
2155 static void arm_smmu_bus_init(void)
2157 /* Oh, for a proper bus abstraction */
2158 if (!iommu_present(&platform_bus_type
))
2159 bus_set_iommu(&platform_bus_type
, &arm_smmu_ops
);
2160 #ifdef CONFIG_ARM_AMBA
2161 if (!iommu_present(&amba_bustype
))
2162 bus_set_iommu(&amba_bustype
, &arm_smmu_ops
);
2165 if (!iommu_present(&pci_bus_type
)) {
2167 bus_set_iommu(&pci_bus_type
, &arm_smmu_ops
);
2170 #ifdef CONFIG_FSL_MC_BUS
2171 if (!iommu_present(&fsl_mc_bus_type
))
2172 bus_set_iommu(&fsl_mc_bus_type
, &arm_smmu_ops
);
2176 static int arm_smmu_device_probe(struct platform_device
*pdev
)
2178 struct resource
*res
;
2179 resource_size_t ioaddr
;
2180 struct arm_smmu_device
*smmu
;
2181 struct device
*dev
= &pdev
->dev
;
2182 int num_irqs
, i
, err
;
2184 smmu
= devm_kzalloc(dev
, sizeof(*smmu
), GFP_KERNEL
);
2186 dev_err(dev
, "failed to allocate arm_smmu_device\n");
2192 err
= arm_smmu_device_dt_probe(pdev
, smmu
);
2194 err
= arm_smmu_device_acpi_probe(pdev
, smmu
);
2199 res
= platform_get_resource(pdev
, IORESOURCE_MEM
, 0);
2200 ioaddr
= res
->start
;
2201 smmu
->base
= devm_ioremap_resource(dev
, res
);
2202 if (IS_ERR(smmu
->base
))
2203 return PTR_ERR(smmu
->base
);
2204 smmu
->cb_base
= smmu
->base
+ resource_size(res
) / 2;
2207 while ((res
= platform_get_resource(pdev
, IORESOURCE_IRQ
, num_irqs
))) {
2209 if (num_irqs
> smmu
->num_global_irqs
)
2210 smmu
->num_context_irqs
++;
2213 if (!smmu
->num_context_irqs
) {
2214 dev_err(dev
, "found %d interrupts but expected at least %d\n",
2215 num_irqs
, smmu
->num_global_irqs
+ 1);
2219 smmu
->irqs
= devm_kcalloc(dev
, num_irqs
, sizeof(*smmu
->irqs
),
2222 dev_err(dev
, "failed to allocate %d irqs\n", num_irqs
);
2226 for (i
= 0; i
< num_irqs
; ++i
) {
2227 int irq
= platform_get_irq(pdev
, i
);
2230 dev_err(dev
, "failed to get irq index %d\n", i
);
2233 smmu
->irqs
[i
] = irq
;
2236 err
= devm_clk_bulk_get_all(dev
, &smmu
->clks
);
2238 dev_err(dev
, "failed to get clocks %d\n", err
);
2241 smmu
->num_clks
= err
;
2243 err
= clk_bulk_prepare_enable(smmu
->num_clks
, smmu
->clks
);
2247 err
= arm_smmu_device_cfg_probe(smmu
);
2251 if (smmu
->version
== ARM_SMMU_V2
) {
2252 if (smmu
->num_context_banks
> smmu
->num_context_irqs
) {
2254 "found only %d context irq(s) but %d required\n",
2255 smmu
->num_context_irqs
, smmu
->num_context_banks
);
2259 /* Ignore superfluous interrupts */
2260 smmu
->num_context_irqs
= smmu
->num_context_banks
;
2263 for (i
= 0; i
< smmu
->num_global_irqs
; ++i
) {
2264 err
= devm_request_irq(smmu
->dev
, smmu
->irqs
[i
],
2265 arm_smmu_global_fault
,
2267 "arm-smmu global fault",
2270 dev_err(dev
, "failed to request global IRQ %d (%u)\n",
2276 err
= iommu_device_sysfs_add(&smmu
->iommu
, smmu
->dev
, NULL
,
2277 "smmu.%pa", &ioaddr
);
2279 dev_err(dev
, "Failed to register iommu in sysfs\n");
2283 iommu_device_set_ops(&smmu
->iommu
, &arm_smmu_ops
);
2284 iommu_device_set_fwnode(&smmu
->iommu
, dev
->fwnode
);
2286 err
= iommu_device_register(&smmu
->iommu
);
2288 dev_err(dev
, "Failed to register iommu\n");
2292 platform_set_drvdata(pdev
, smmu
);
2293 arm_smmu_device_reset(smmu
);
2294 arm_smmu_test_smr_masks(smmu
);
2297 * We want to avoid touching dev->power.lock in fastpaths unless
2298 * it's really going to do something useful - pm_runtime_enabled()
2299 * can serve as an ideal proxy for that decision. So, conditionally
2300 * enable pm_runtime.
2302 if (dev
->pm_domain
) {
2303 pm_runtime_set_active(dev
);
2304 pm_runtime_enable(dev
);
2308 * For ACPI and generic DT bindings, an SMMU will be probed before
2309 * any device which might need it, so we want the bus ops in place
2310 * ready to handle default domain setup as soon as any SMMU exists.
2312 if (!using_legacy_binding
)
2313 arm_smmu_bus_init();
2319 * With the legacy DT binding in play, though, we have no guarantees about
2320 * probe order, but then we're also not doing default domains, so we can
2321 * delay setting bus ops until we're sure every possible SMMU is ready,
2322 * and that way ensure that no add_device() calls get missed.
2324 static int arm_smmu_legacy_bus_init(void)
2326 if (using_legacy_binding
)
2327 arm_smmu_bus_init();
2330 device_initcall_sync(arm_smmu_legacy_bus_init
);
2332 static void arm_smmu_device_shutdown(struct platform_device
*pdev
)
2334 struct arm_smmu_device
*smmu
= platform_get_drvdata(pdev
);
2339 if (!bitmap_empty(smmu
->context_map
, ARM_SMMU_MAX_CBS
))
2340 dev_err(&pdev
->dev
, "removing device with active domains!\n");
2342 arm_smmu_rpm_get(smmu
);
2343 /* Turn the thing off */
2344 writel(sCR0_CLIENTPD
, ARM_SMMU_GR0_NS(smmu
) + ARM_SMMU_GR0_sCR0
);
2345 arm_smmu_rpm_put(smmu
);
2347 if (pm_runtime_enabled(smmu
->dev
))
2348 pm_runtime_force_suspend(smmu
->dev
);
2350 clk_bulk_disable(smmu
->num_clks
, smmu
->clks
);
2352 clk_bulk_unprepare(smmu
->num_clks
, smmu
->clks
);
2355 static int __maybe_unused
arm_smmu_runtime_resume(struct device
*dev
)
2357 struct arm_smmu_device
*smmu
= dev_get_drvdata(dev
);
2360 ret
= clk_bulk_enable(smmu
->num_clks
, smmu
->clks
);
2364 arm_smmu_device_reset(smmu
);
2369 static int __maybe_unused
arm_smmu_runtime_suspend(struct device
*dev
)
2371 struct arm_smmu_device
*smmu
= dev_get_drvdata(dev
);
2373 clk_bulk_disable(smmu
->num_clks
, smmu
->clks
);
2378 static int __maybe_unused
arm_smmu_pm_resume(struct device
*dev
)
2380 if (pm_runtime_suspended(dev
))
2383 return arm_smmu_runtime_resume(dev
);
2386 static int __maybe_unused
arm_smmu_pm_suspend(struct device
*dev
)
2388 if (pm_runtime_suspended(dev
))
2391 return arm_smmu_runtime_suspend(dev
);
2394 static const struct dev_pm_ops arm_smmu_pm_ops
= {
2395 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend
, arm_smmu_pm_resume
)
2396 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend
,
2397 arm_smmu_runtime_resume
, NULL
)
2400 static struct platform_driver arm_smmu_driver
= {
2403 .of_match_table
= of_match_ptr(arm_smmu_of_match
),
2404 .pm
= &arm_smmu_pm_ops
,
2405 .suppress_bind_attrs
= true,
2407 .probe
= arm_smmu_device_probe
,
2408 .shutdown
= arm_smmu_device_shutdown
,
2410 builtin_platform_driver(arm_smmu_driver
);