2 * IOMMU API for ARM architected SMMU implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * Copyright (C) 2013 ARM Limited
19 * Author: Will Deacon <will.deacon@arm.com>
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
27 * - Extended Stream ID (16 bit)
30 #define pr_fmt(fmt) "arm-smmu: " fmt
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
54 #include <linux/amba/bus.h>
56 #include "io-pgtable.h"
58 /* Maximum number of context banks per SMMU */
59 #define ARM_SMMU_MAX_CBS 128
61 /* SMMU global address space */
62 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
63 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
66 * SMMU global address space with conditional offset to access secure
67 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
70 #define ARM_SMMU_GR0_NS(smmu) \
72 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
76 * Some 64-bit registers only make sense to write atomically, but in such
77 * cases all the data relevant to AArch32 formats lies within the lower word,
78 * therefore this actually makes more sense than it might first appear.
81 #define smmu_write_atomic_lq writeq_relaxed
83 #define smmu_write_atomic_lq writel_relaxed
86 /* Configuration registers */
87 #define ARM_SMMU_GR0_sCR0 0x0
88 #define sCR0_CLIENTPD (1 << 0)
89 #define sCR0_GFRE (1 << 1)
90 #define sCR0_GFIE (1 << 2)
91 #define sCR0_EXIDENABLE (1 << 3)
92 #define sCR0_GCFGFRE (1 << 4)
93 #define sCR0_GCFGFIE (1 << 5)
94 #define sCR0_USFCFG (1 << 10)
95 #define sCR0_VMIDPNE (1 << 11)
96 #define sCR0_PTM (1 << 12)
97 #define sCR0_FB (1 << 13)
98 #define sCR0_VMID16EN (1 << 31)
99 #define sCR0_BSU_SHIFT 14
100 #define sCR0_BSU_MASK 0x3
102 /* Auxiliary Configuration register */
103 #define ARM_SMMU_GR0_sACR 0x10
105 /* Identification registers */
106 #define ARM_SMMU_GR0_ID0 0x20
107 #define ARM_SMMU_GR0_ID1 0x24
108 #define ARM_SMMU_GR0_ID2 0x28
109 #define ARM_SMMU_GR0_ID3 0x2c
110 #define ARM_SMMU_GR0_ID4 0x30
111 #define ARM_SMMU_GR0_ID5 0x34
112 #define ARM_SMMU_GR0_ID6 0x38
113 #define ARM_SMMU_GR0_ID7 0x3c
114 #define ARM_SMMU_GR0_sGFSR 0x48
115 #define ARM_SMMU_GR0_sGFSYNR0 0x50
116 #define ARM_SMMU_GR0_sGFSYNR1 0x54
117 #define ARM_SMMU_GR0_sGFSYNR2 0x58
119 #define ID0_S1TS (1 << 30)
120 #define ID0_S2TS (1 << 29)
121 #define ID0_NTS (1 << 28)
122 #define ID0_SMS (1 << 27)
123 #define ID0_ATOSNS (1 << 26)
124 #define ID0_PTFS_NO_AARCH32 (1 << 25)
125 #define ID0_PTFS_NO_AARCH32S (1 << 24)
126 #define ID0_CTTW (1 << 14)
127 #define ID0_NUMIRPT_SHIFT 16
128 #define ID0_NUMIRPT_MASK 0xff
129 #define ID0_NUMSIDB_SHIFT 9
130 #define ID0_NUMSIDB_MASK 0xf
131 #define ID0_EXIDS (1 << 8)
132 #define ID0_NUMSMRG_SHIFT 0
133 #define ID0_NUMSMRG_MASK 0xff
135 #define ID1_PAGESIZE (1 << 31)
136 #define ID1_NUMPAGENDXB_SHIFT 28
137 #define ID1_NUMPAGENDXB_MASK 7
138 #define ID1_NUMS2CB_SHIFT 16
139 #define ID1_NUMS2CB_MASK 0xff
140 #define ID1_NUMCB_SHIFT 0
141 #define ID1_NUMCB_MASK 0xff
143 #define ID2_OAS_SHIFT 4
144 #define ID2_OAS_MASK 0xf
145 #define ID2_IAS_SHIFT 0
146 #define ID2_IAS_MASK 0xf
147 #define ID2_UBS_SHIFT 8
148 #define ID2_UBS_MASK 0xf
149 #define ID2_PTFS_4K (1 << 12)
150 #define ID2_PTFS_16K (1 << 13)
151 #define ID2_PTFS_64K (1 << 14)
152 #define ID2_VMID16 (1 << 15)
154 #define ID7_MAJOR_SHIFT 4
155 #define ID7_MAJOR_MASK 0xf
157 /* Global TLB invalidation */
158 #define ARM_SMMU_GR0_TLBIVMID 0x64
159 #define ARM_SMMU_GR0_TLBIALLNSNH 0x68
160 #define ARM_SMMU_GR0_TLBIALLH 0x6c
161 #define ARM_SMMU_GR0_sTLBGSYNC 0x70
162 #define ARM_SMMU_GR0_sTLBGSTATUS 0x74
163 #define sTLBGSTATUS_GSACTIVE (1 << 0)
164 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
165 #define TLB_SPIN_COUNT 10
167 /* Stream mapping registers */
168 #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2))
169 #define SMR_VALID (1 << 31)
170 #define SMR_MASK_SHIFT 16
171 #define SMR_ID_SHIFT 0
173 #define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2))
174 #define S2CR_CBNDX_SHIFT 0
175 #define S2CR_CBNDX_MASK 0xff
176 #define S2CR_EXIDVALID (1 << 10)
177 #define S2CR_TYPE_SHIFT 16
178 #define S2CR_TYPE_MASK 0x3
179 enum arm_smmu_s2cr_type
{
185 #define S2CR_PRIVCFG_SHIFT 24
186 #define S2CR_PRIVCFG_MASK 0x3
187 enum arm_smmu_s2cr_privcfg
{
188 S2CR_PRIVCFG_DEFAULT
,
194 /* Context bank attribute registers */
195 #define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2))
196 #define CBAR_VMID_SHIFT 0
197 #define CBAR_VMID_MASK 0xff
198 #define CBAR_S1_BPSHCFG_SHIFT 8
199 #define CBAR_S1_BPSHCFG_MASK 3
200 #define CBAR_S1_BPSHCFG_NSH 3
201 #define CBAR_S1_MEMATTR_SHIFT 12
202 #define CBAR_S1_MEMATTR_MASK 0xf
203 #define CBAR_S1_MEMATTR_WB 0xf
204 #define CBAR_TYPE_SHIFT 16
205 #define CBAR_TYPE_MASK 0x3
206 #define CBAR_TYPE_S2_TRANS (0 << CBAR_TYPE_SHIFT)
207 #define CBAR_TYPE_S1_TRANS_S2_BYPASS (1 << CBAR_TYPE_SHIFT)
208 #define CBAR_TYPE_S1_TRANS_S2_FAULT (2 << CBAR_TYPE_SHIFT)
209 #define CBAR_TYPE_S1_TRANS_S2_TRANS (3 << CBAR_TYPE_SHIFT)
210 #define CBAR_IRPTNDX_SHIFT 24
211 #define CBAR_IRPTNDX_MASK 0xff
213 #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
214 #define CBA2R_RW64_32BIT (0 << 0)
215 #define CBA2R_RW64_64BIT (1 << 0)
216 #define CBA2R_VMID_SHIFT 16
217 #define CBA2R_VMID_MASK 0xffff
219 /* Translation context bank */
220 #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
222 #define ARM_SMMU_CB_SCTLR 0x0
223 #define ARM_SMMU_CB_ACTLR 0x4
224 #define ARM_SMMU_CB_RESUME 0x8
225 #define ARM_SMMU_CB_TTBCR2 0x10
226 #define ARM_SMMU_CB_TTBR0 0x20
227 #define ARM_SMMU_CB_TTBR1 0x28
228 #define ARM_SMMU_CB_TTBCR 0x30
229 #define ARM_SMMU_CB_CONTEXTIDR 0x34
230 #define ARM_SMMU_CB_S1_MAIR0 0x38
231 #define ARM_SMMU_CB_S1_MAIR1 0x3c
232 #define ARM_SMMU_CB_PAR 0x50
233 #define ARM_SMMU_CB_FSR 0x58
234 #define ARM_SMMU_CB_FAR 0x60
235 #define ARM_SMMU_CB_FSYNR0 0x68
236 #define ARM_SMMU_CB_S1_TLBIVA 0x600
237 #define ARM_SMMU_CB_S1_TLBIASID 0x610
238 #define ARM_SMMU_CB_S1_TLBIVAL 0x620
239 #define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
240 #define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
241 #define ARM_SMMU_CB_TLBSYNC 0x7f0
242 #define ARM_SMMU_CB_TLBSTATUS 0x7f4
243 #define ARM_SMMU_CB_ATS1PR 0x800
244 #define ARM_SMMU_CB_ATSR 0x8f0
246 #define SCTLR_S1_ASIDPNE (1 << 12)
247 #define SCTLR_CFCFG (1 << 7)
248 #define SCTLR_CFIE (1 << 6)
249 #define SCTLR_CFRE (1 << 5)
250 #define SCTLR_E (1 << 4)
251 #define SCTLR_AFE (1 << 2)
252 #define SCTLR_TRE (1 << 1)
253 #define SCTLR_M (1 << 0)
255 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
257 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
258 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
260 #define CB_PAR_F (1 << 0)
262 #define ATSR_ACTIVE (1 << 0)
264 #define RESUME_RETRY (0 << 0)
265 #define RESUME_TERMINATE (1 << 0)
267 #define TTBCR2_SEP_SHIFT 15
268 #define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
269 #define TTBCR2_AS (1 << 4)
271 #define TTBRn_ASID_SHIFT 48
273 #define FSR_MULTI (1 << 31)
274 #define FSR_SS (1 << 30)
275 #define FSR_UUT (1 << 8)
276 #define FSR_ASF (1 << 7)
277 #define FSR_TLBLKF (1 << 6)
278 #define FSR_TLBMCF (1 << 5)
279 #define FSR_EF (1 << 4)
280 #define FSR_PF (1 << 3)
281 #define FSR_AFF (1 << 2)
282 #define FSR_TF (1 << 1)
284 #define FSR_IGN (FSR_AFF | FSR_ASF | \
285 FSR_TLBMCF | FSR_TLBLKF)
286 #define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \
287 FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
289 #define FSYNR0_WNR (1 << 4)
291 #define MSI_IOVA_BASE 0x8000000
292 #define MSI_IOVA_LENGTH 0x100000
294 static int force_stage
;
295 module_param(force_stage
, int, S_IRUGO
);
296 MODULE_PARM_DESC(force_stage
,
297 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
298 static bool disable_bypass
;
299 module_param(disable_bypass
, bool, S_IRUGO
);
300 MODULE_PARM_DESC(disable_bypass
,
301 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
303 enum arm_smmu_arch_version
{
309 enum arm_smmu_implementation
{
315 struct arm_smmu_s2cr
{
316 struct iommu_group
*group
;
318 enum arm_smmu_s2cr_type type
;
319 enum arm_smmu_s2cr_privcfg privcfg
;
323 #define s2cr_init_val (struct arm_smmu_s2cr){ \
324 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
327 struct arm_smmu_smr
{
333 struct arm_smmu_master_cfg
{
334 struct arm_smmu_device
*smmu
;
337 #define INVALID_SMENDX -1
338 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
339 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
340 #define fwspec_smendx(fw, i) \
341 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
342 #define for_each_cfg_sme(fw, i, idx) \
343 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
345 struct arm_smmu_device
{
349 void __iomem
*cb_base
;
350 unsigned long pgshift
;
352 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
353 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
354 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
355 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
356 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
357 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
358 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
359 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
360 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
361 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
362 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
363 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
364 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
367 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
369 enum arm_smmu_arch_version version
;
370 enum arm_smmu_implementation model
;
372 u32 num_context_banks
;
373 u32 num_s2_context_banks
;
374 DECLARE_BITMAP(context_map
, ARM_SMMU_MAX_CBS
);
377 u32 num_mapping_groups
;
380 struct arm_smmu_smr
*smrs
;
381 struct arm_smmu_s2cr
*s2crs
;
382 struct mutex stream_map_mutex
;
384 unsigned long va_size
;
385 unsigned long ipa_size
;
386 unsigned long pa_size
;
387 unsigned long pgsize_bitmap
;
390 u32 num_context_irqs
;
393 u32 cavium_id_base
; /* Specific to Cavium */
395 /* IOMMU core code handle */
396 struct iommu_device iommu
;
399 enum arm_smmu_context_fmt
{
400 ARM_SMMU_CTX_FMT_NONE
,
401 ARM_SMMU_CTX_FMT_AARCH64
,
402 ARM_SMMU_CTX_FMT_AARCH32_L
,
403 ARM_SMMU_CTX_FMT_AARCH32_S
,
406 struct arm_smmu_cfg
{
414 enum arm_smmu_context_fmt fmt
;
416 #define INVALID_IRPTNDX 0xff
418 enum arm_smmu_domain_stage
{
419 ARM_SMMU_DOMAIN_S1
= 0,
421 ARM_SMMU_DOMAIN_NESTED
,
422 ARM_SMMU_DOMAIN_BYPASS
,
425 struct arm_smmu_domain
{
426 struct arm_smmu_device
*smmu
;
427 struct io_pgtable_ops
*pgtbl_ops
;
428 spinlock_t pgtbl_lock
;
429 struct arm_smmu_cfg cfg
;
430 enum arm_smmu_domain_stage stage
;
431 struct mutex init_mutex
; /* Protects smmu pointer */
432 struct iommu_domain domain
;
435 struct arm_smmu_option_prop
{
440 static atomic_t cavium_smmu_context_count
= ATOMIC_INIT(0);
442 static bool using_legacy_binding
, using_generic_binding
;
444 static struct arm_smmu_option_prop arm_smmu_options
[] = {
445 { ARM_SMMU_OPT_SECURE_CFG_ACCESS
, "calxeda,smmu-secure-config-access" },
449 static struct arm_smmu_domain
*to_smmu_domain(struct iommu_domain
*dom
)
451 return container_of(dom
, struct arm_smmu_domain
, domain
);
454 static void parse_driver_options(struct arm_smmu_device
*smmu
)
459 if (of_property_read_bool(smmu
->dev
->of_node
,
460 arm_smmu_options
[i
].prop
)) {
461 smmu
->options
|= arm_smmu_options
[i
].opt
;
462 dev_notice(smmu
->dev
, "option %s\n",
463 arm_smmu_options
[i
].prop
);
465 } while (arm_smmu_options
[++i
].opt
);
468 static struct device_node
*dev_get_dev_node(struct device
*dev
)
470 if (dev_is_pci(dev
)) {
471 struct pci_bus
*bus
= to_pci_dev(dev
)->bus
;
473 while (!pci_is_root_bus(bus
))
475 return of_node_get(bus
->bridge
->parent
->of_node
);
478 return of_node_get(dev
->of_node
);
481 static int __arm_smmu_get_pci_sid(struct pci_dev
*pdev
, u16 alias
, void *data
)
483 *((__be32
*)data
) = cpu_to_be32(alias
);
484 return 0; /* Continue walking */
487 static int __find_legacy_master_phandle(struct device
*dev
, void *data
)
489 struct of_phandle_iterator
*it
= *(void **)data
;
490 struct device_node
*np
= it
->node
;
493 of_for_each_phandle(it
, err
, dev
->of_node
, "mmu-masters",
494 "#stream-id-cells", 0)
495 if (it
->node
== np
) {
496 *(void **)data
= dev
;
500 return err
== -ENOENT
? 0 : err
;
503 static struct platform_driver arm_smmu_driver
;
504 static struct iommu_ops arm_smmu_ops
;
506 static int arm_smmu_register_legacy_master(struct device
*dev
,
507 struct arm_smmu_device
**smmu
)
509 struct device
*smmu_dev
;
510 struct device_node
*np
;
511 struct of_phandle_iterator it
;
517 np
= dev_get_dev_node(dev
);
518 if (!np
|| !of_find_property(np
, "#stream-id-cells", NULL
)) {
524 err
= driver_for_each_device(&arm_smmu_driver
.driver
, NULL
, &data
,
525 __find_legacy_master_phandle
);
533 if (dev_is_pci(dev
)) {
534 /* "mmu-masters" assumes Stream ID == Requester ID */
535 pci_for_each_dma_alias(to_pci_dev(dev
), __arm_smmu_get_pci_sid
,
541 err
= iommu_fwspec_init(dev
, &smmu_dev
->of_node
->fwnode
,
546 sids
= kcalloc(it
.cur_count
, sizeof(*sids
), GFP_KERNEL
);
550 *smmu
= dev_get_drvdata(smmu_dev
);
551 of_phandle_iterator_args(&it
, sids
, it
.cur_count
);
552 err
= iommu_fwspec_add_ids(dev
, sids
, it
.cur_count
);
557 static int __arm_smmu_alloc_bitmap(unsigned long *map
, int start
, int end
)
562 idx
= find_next_zero_bit(map
, end
, start
);
565 } while (test_and_set_bit(idx
, map
));
570 static void __arm_smmu_free_bitmap(unsigned long *map
, int idx
)
575 /* Wait for any pending TLB invalidations to complete */
576 static void __arm_smmu_tlb_sync(struct arm_smmu_device
*smmu
,
577 void __iomem
*sync
, void __iomem
*status
)
579 unsigned int spin_cnt
, delay
;
581 writel_relaxed(0, sync
);
582 for (delay
= 1; delay
< TLB_LOOP_TIMEOUT
; delay
*= 2) {
583 for (spin_cnt
= TLB_SPIN_COUNT
; spin_cnt
> 0; spin_cnt
--) {
584 if (!(readl_relaxed(status
) & sTLBGSTATUS_GSACTIVE
))
590 dev_err_ratelimited(smmu
->dev
,
591 "TLB sync timed out -- SMMU may be deadlocked\n");
594 static void arm_smmu_tlb_sync_global(struct arm_smmu_device
*smmu
)
596 void __iomem
*base
= ARM_SMMU_GR0(smmu
);
598 __arm_smmu_tlb_sync(smmu
, base
+ ARM_SMMU_GR0_sTLBGSYNC
,
599 base
+ ARM_SMMU_GR0_sTLBGSTATUS
);
602 static void arm_smmu_tlb_sync_context(void *cookie
)
604 struct arm_smmu_domain
*smmu_domain
= cookie
;
605 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
606 void __iomem
*base
= ARM_SMMU_CB(smmu
, smmu_domain
->cfg
.cbndx
);
608 __arm_smmu_tlb_sync(smmu
, base
+ ARM_SMMU_CB_TLBSYNC
,
609 base
+ ARM_SMMU_CB_TLBSTATUS
);
612 static void arm_smmu_tlb_sync_vmid(void *cookie
)
614 struct arm_smmu_domain
*smmu_domain
= cookie
;
616 arm_smmu_tlb_sync_global(smmu_domain
->smmu
);
619 static void arm_smmu_tlb_inv_context_s1(void *cookie
)
621 struct arm_smmu_domain
*smmu_domain
= cookie
;
622 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
623 void __iomem
*base
= ARM_SMMU_CB(smmu_domain
->smmu
, cfg
->cbndx
);
625 writel_relaxed(cfg
->asid
, base
+ ARM_SMMU_CB_S1_TLBIASID
);
626 arm_smmu_tlb_sync_context(cookie
);
629 static void arm_smmu_tlb_inv_context_s2(void *cookie
)
631 struct arm_smmu_domain
*smmu_domain
= cookie
;
632 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
633 void __iomem
*base
= ARM_SMMU_GR0(smmu
);
635 writel_relaxed(smmu_domain
->cfg
.vmid
, base
+ ARM_SMMU_GR0_TLBIVMID
);
636 arm_smmu_tlb_sync_global(smmu
);
639 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova
, size_t size
,
640 size_t granule
, bool leaf
, void *cookie
)
642 struct arm_smmu_domain
*smmu_domain
= cookie
;
643 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
644 bool stage1
= cfg
->cbar
!= CBAR_TYPE_S2_TRANS
;
645 void __iomem
*reg
= ARM_SMMU_CB(smmu_domain
->smmu
, cfg
->cbndx
);
648 reg
+= leaf
? ARM_SMMU_CB_S1_TLBIVAL
: ARM_SMMU_CB_S1_TLBIVA
;
650 if (cfg
->fmt
!= ARM_SMMU_CTX_FMT_AARCH64
) {
654 writel_relaxed(iova
, reg
);
656 } while (size
-= granule
);
659 iova
|= (u64
)cfg
->asid
<< 48;
661 writeq_relaxed(iova
, reg
);
662 iova
+= granule
>> 12;
663 } while (size
-= granule
);
666 reg
+= leaf
? ARM_SMMU_CB_S2_TLBIIPAS2L
:
667 ARM_SMMU_CB_S2_TLBIIPAS2
;
670 smmu_write_atomic_lq(iova
, reg
);
671 iova
+= granule
>> 12;
672 } while (size
-= granule
);
677 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
678 * almost negligible, but the benefit of getting the first one in as far ahead
679 * of the sync as possible is significant, hence we don't just make this a
680 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
682 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova
, size_t size
,
683 size_t granule
, bool leaf
, void *cookie
)
685 struct arm_smmu_domain
*smmu_domain
= cookie
;
686 void __iomem
*base
= ARM_SMMU_GR0(smmu_domain
->smmu
);
688 writel_relaxed(smmu_domain
->cfg
.vmid
, base
+ ARM_SMMU_GR0_TLBIVMID
);
691 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops
= {
692 .tlb_flush_all
= arm_smmu_tlb_inv_context_s1
,
693 .tlb_add_flush
= arm_smmu_tlb_inv_range_nosync
,
694 .tlb_sync
= arm_smmu_tlb_sync_context
,
697 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2
= {
698 .tlb_flush_all
= arm_smmu_tlb_inv_context_s2
,
699 .tlb_add_flush
= arm_smmu_tlb_inv_range_nosync
,
700 .tlb_sync
= arm_smmu_tlb_sync_context
,
703 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1
= {
704 .tlb_flush_all
= arm_smmu_tlb_inv_context_s2
,
705 .tlb_add_flush
= arm_smmu_tlb_inv_vmid_nosync
,
706 .tlb_sync
= arm_smmu_tlb_sync_vmid
,
709 static irqreturn_t
arm_smmu_context_fault(int irq
, void *dev
)
713 struct iommu_domain
*domain
= dev
;
714 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
715 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
716 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
717 void __iomem
*cb_base
;
719 cb_base
= ARM_SMMU_CB(smmu
, cfg
->cbndx
);
720 fsr
= readl_relaxed(cb_base
+ ARM_SMMU_CB_FSR
);
722 if (!(fsr
& FSR_FAULT
))
725 fsynr
= readl_relaxed(cb_base
+ ARM_SMMU_CB_FSYNR0
);
726 iova
= readq_relaxed(cb_base
+ ARM_SMMU_CB_FAR
);
728 dev_err_ratelimited(smmu
->dev
,
729 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
730 fsr
, iova
, fsynr
, cfg
->cbndx
);
732 writel(fsr
, cb_base
+ ARM_SMMU_CB_FSR
);
736 static irqreturn_t
arm_smmu_global_fault(int irq
, void *dev
)
738 u32 gfsr
, gfsynr0
, gfsynr1
, gfsynr2
;
739 struct arm_smmu_device
*smmu
= dev
;
740 void __iomem
*gr0_base
= ARM_SMMU_GR0_NS(smmu
);
742 gfsr
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSR
);
743 gfsynr0
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSYNR0
);
744 gfsynr1
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSYNR1
);
745 gfsynr2
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSYNR2
);
750 dev_err_ratelimited(smmu
->dev
,
751 "Unexpected global fault, this could be serious\n");
752 dev_err_ratelimited(smmu
->dev
,
753 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
754 gfsr
, gfsynr0
, gfsynr1
, gfsynr2
);
756 writel(gfsr
, gr0_base
+ ARM_SMMU_GR0_sGFSR
);
760 static void arm_smmu_init_context_bank(struct arm_smmu_domain
*smmu_domain
,
761 struct io_pgtable_cfg
*pgtbl_cfg
)
766 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
767 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
768 void __iomem
*cb_base
, *gr1_base
;
770 gr1_base
= ARM_SMMU_GR1(smmu
);
771 stage1
= cfg
->cbar
!= CBAR_TYPE_S2_TRANS
;
772 cb_base
= ARM_SMMU_CB(smmu
, cfg
->cbndx
);
774 if (smmu
->version
> ARM_SMMU_V1
) {
775 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
)
776 reg
= CBA2R_RW64_64BIT
;
778 reg
= CBA2R_RW64_32BIT
;
779 /* 16-bit VMIDs live in CBA2R */
780 if (smmu
->features
& ARM_SMMU_FEAT_VMID16
)
781 reg
|= cfg
->vmid
<< CBA2R_VMID_SHIFT
;
783 writel_relaxed(reg
, gr1_base
+ ARM_SMMU_GR1_CBA2R(cfg
->cbndx
));
788 if (smmu
->version
< ARM_SMMU_V2
)
789 reg
|= cfg
->irptndx
<< CBAR_IRPTNDX_SHIFT
;
792 * Use the weakest shareability/memory types, so they are
793 * overridden by the ttbcr/pte.
796 reg
|= (CBAR_S1_BPSHCFG_NSH
<< CBAR_S1_BPSHCFG_SHIFT
) |
797 (CBAR_S1_MEMATTR_WB
<< CBAR_S1_MEMATTR_SHIFT
);
798 } else if (!(smmu
->features
& ARM_SMMU_FEAT_VMID16
)) {
799 /* 8-bit VMIDs live in CBAR */
800 reg
|= cfg
->vmid
<< CBAR_VMID_SHIFT
;
802 writel_relaxed(reg
, gr1_base
+ ARM_SMMU_GR1_CBAR(cfg
->cbndx
));
806 * We must write this before the TTBRs, since it determines the
807 * access behaviour of some fields (in particular, ASID[15:8]).
810 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_S
) {
811 reg
= pgtbl_cfg
->arm_v7s_cfg
.tcr
;
814 reg
= pgtbl_cfg
->arm_lpae_s1_cfg
.tcr
;
815 reg2
= pgtbl_cfg
->arm_lpae_s1_cfg
.tcr
>> 32;
816 reg2
|= TTBCR2_SEP_UPSTREAM
;
817 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
)
820 if (smmu
->version
> ARM_SMMU_V1
)
821 writel_relaxed(reg2
, cb_base
+ ARM_SMMU_CB_TTBCR2
);
823 reg
= pgtbl_cfg
->arm_lpae_s2_cfg
.vtcr
;
825 writel_relaxed(reg
, cb_base
+ ARM_SMMU_CB_TTBCR
);
829 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_S
) {
830 reg
= pgtbl_cfg
->arm_v7s_cfg
.ttbr
[0];
831 writel_relaxed(reg
, cb_base
+ ARM_SMMU_CB_TTBR0
);
832 reg
= pgtbl_cfg
->arm_v7s_cfg
.ttbr
[1];
833 writel_relaxed(reg
, cb_base
+ ARM_SMMU_CB_TTBR1
);
834 writel_relaxed(cfg
->asid
, cb_base
+ ARM_SMMU_CB_CONTEXTIDR
);
836 reg64
= pgtbl_cfg
->arm_lpae_s1_cfg
.ttbr
[0];
837 reg64
|= (u64
)cfg
->asid
<< TTBRn_ASID_SHIFT
;
838 writeq_relaxed(reg64
, cb_base
+ ARM_SMMU_CB_TTBR0
);
839 reg64
= pgtbl_cfg
->arm_lpae_s1_cfg
.ttbr
[1];
840 reg64
|= (u64
)cfg
->asid
<< TTBRn_ASID_SHIFT
;
841 writeq_relaxed(reg64
, cb_base
+ ARM_SMMU_CB_TTBR1
);
844 reg64
= pgtbl_cfg
->arm_lpae_s2_cfg
.vttbr
;
845 writeq_relaxed(reg64
, cb_base
+ ARM_SMMU_CB_TTBR0
);
848 /* MAIRs (stage-1 only) */
850 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_S
) {
851 reg
= pgtbl_cfg
->arm_v7s_cfg
.prrr
;
852 reg2
= pgtbl_cfg
->arm_v7s_cfg
.nmrr
;
854 reg
= pgtbl_cfg
->arm_lpae_s1_cfg
.mair
[0];
855 reg2
= pgtbl_cfg
->arm_lpae_s1_cfg
.mair
[1];
857 writel_relaxed(reg
, cb_base
+ ARM_SMMU_CB_S1_MAIR0
);
858 writel_relaxed(reg2
, cb_base
+ ARM_SMMU_CB_S1_MAIR1
);
862 reg
= SCTLR_CFIE
| SCTLR_CFRE
| SCTLR_AFE
| SCTLR_TRE
| SCTLR_M
;
864 reg
|= SCTLR_S1_ASIDPNE
;
868 writel_relaxed(reg
, cb_base
+ ARM_SMMU_CB_SCTLR
);
871 static int arm_smmu_init_domain_context(struct iommu_domain
*domain
,
872 struct arm_smmu_device
*smmu
)
874 int irq
, start
, ret
= 0;
875 unsigned long ias
, oas
;
876 struct io_pgtable_ops
*pgtbl_ops
;
877 struct io_pgtable_cfg pgtbl_cfg
;
878 enum io_pgtable_fmt fmt
;
879 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
880 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
881 const struct iommu_gather_ops
*tlb_ops
;
883 mutex_lock(&smmu_domain
->init_mutex
);
884 if (smmu_domain
->smmu
)
887 if (domain
->type
== IOMMU_DOMAIN_IDENTITY
) {
888 smmu_domain
->stage
= ARM_SMMU_DOMAIN_BYPASS
;
889 smmu_domain
->smmu
= smmu
;
894 * Mapping the requested stage onto what we support is surprisingly
895 * complicated, mainly because the spec allows S1+S2 SMMUs without
896 * support for nested translation. That means we end up with the
899 * Requested Supported Actual
909 * Note that you can't actually request stage-2 mappings.
911 if (!(smmu
->features
& ARM_SMMU_FEAT_TRANS_S1
))
912 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S2
;
913 if (!(smmu
->features
& ARM_SMMU_FEAT_TRANS_S2
))
914 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S1
;
917 * Choosing a suitable context format is even more fiddly. Until we
918 * grow some way for the caller to express a preference, and/or move
919 * the decision into the io-pgtable code where it arguably belongs,
920 * just aim for the closest thing to the rest of the system, and hope
921 * that the hardware isn't esoteric enough that we can't assume AArch64
922 * support to be a superset of AArch32 support...
924 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH32_L
)
925 cfg
->fmt
= ARM_SMMU_CTX_FMT_AARCH32_L
;
926 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S
) &&
927 !IS_ENABLED(CONFIG_64BIT
) && !IS_ENABLED(CONFIG_ARM_LPAE
) &&
928 (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH32_S
) &&
929 (smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
))
930 cfg
->fmt
= ARM_SMMU_CTX_FMT_AARCH32_S
;
931 if ((IS_ENABLED(CONFIG_64BIT
) || cfg
->fmt
== ARM_SMMU_CTX_FMT_NONE
) &&
932 (smmu
->features
& (ARM_SMMU_FEAT_FMT_AARCH64_64K
|
933 ARM_SMMU_FEAT_FMT_AARCH64_16K
|
934 ARM_SMMU_FEAT_FMT_AARCH64_4K
)))
935 cfg
->fmt
= ARM_SMMU_CTX_FMT_AARCH64
;
937 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_NONE
) {
942 switch (smmu_domain
->stage
) {
943 case ARM_SMMU_DOMAIN_S1
:
944 cfg
->cbar
= CBAR_TYPE_S1_TRANS_S2_BYPASS
;
945 start
= smmu
->num_s2_context_banks
;
947 oas
= smmu
->ipa_size
;
948 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
) {
949 fmt
= ARM_64_LPAE_S1
;
950 } else if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_L
) {
951 fmt
= ARM_32_LPAE_S1
;
952 ias
= min(ias
, 32UL);
953 oas
= min(oas
, 40UL);
956 ias
= min(ias
, 32UL);
957 oas
= min(oas
, 32UL);
959 tlb_ops
= &arm_smmu_s1_tlb_ops
;
961 case ARM_SMMU_DOMAIN_NESTED
:
963 * We will likely want to change this if/when KVM gets
966 case ARM_SMMU_DOMAIN_S2
:
967 cfg
->cbar
= CBAR_TYPE_S2_TRANS
;
969 ias
= smmu
->ipa_size
;
971 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
) {
972 fmt
= ARM_64_LPAE_S2
;
974 fmt
= ARM_32_LPAE_S2
;
975 ias
= min(ias
, 40UL);
976 oas
= min(oas
, 40UL);
978 if (smmu
->version
== ARM_SMMU_V2
)
979 tlb_ops
= &arm_smmu_s2_tlb_ops_v2
;
981 tlb_ops
= &arm_smmu_s2_tlb_ops_v1
;
987 ret
= __arm_smmu_alloc_bitmap(smmu
->context_map
, start
,
988 smmu
->num_context_banks
);
993 if (smmu
->version
< ARM_SMMU_V2
) {
994 cfg
->irptndx
= atomic_inc_return(&smmu
->irptndx
);
995 cfg
->irptndx
%= smmu
->num_context_irqs
;
997 cfg
->irptndx
= cfg
->cbndx
;
1000 if (smmu_domain
->stage
== ARM_SMMU_DOMAIN_S2
)
1001 cfg
->vmid
= cfg
->cbndx
+ 1 + smmu
->cavium_id_base
;
1003 cfg
->asid
= cfg
->cbndx
+ smmu
->cavium_id_base
;
1005 pgtbl_cfg
= (struct io_pgtable_cfg
) {
1006 .pgsize_bitmap
= smmu
->pgsize_bitmap
,
1010 .iommu_dev
= smmu
->dev
,
1013 smmu_domain
->smmu
= smmu
;
1014 pgtbl_ops
= alloc_io_pgtable_ops(fmt
, &pgtbl_cfg
, smmu_domain
);
1017 goto out_clear_smmu
;
1020 /* Update the domain's page sizes to reflect the page table format */
1021 domain
->pgsize_bitmap
= pgtbl_cfg
.pgsize_bitmap
;
1022 domain
->geometry
.aperture_end
= (1UL << ias
) - 1;
1023 domain
->geometry
.force_aperture
= true;
1025 /* Initialise the context bank with our page table cfg */
1026 arm_smmu_init_context_bank(smmu_domain
, &pgtbl_cfg
);
1029 * Request context fault interrupt. Do this last to avoid the
1030 * handler seeing a half-initialised domain state.
1032 irq
= smmu
->irqs
[smmu
->num_global_irqs
+ cfg
->irptndx
];
1033 ret
= devm_request_irq(smmu
->dev
, irq
, arm_smmu_context_fault
,
1034 IRQF_SHARED
, "arm-smmu-context-fault", domain
);
1036 dev_err(smmu
->dev
, "failed to request context IRQ %d (%u)\n",
1038 cfg
->irptndx
= INVALID_IRPTNDX
;
1041 mutex_unlock(&smmu_domain
->init_mutex
);
1043 /* Publish page table ops for map/unmap */
1044 smmu_domain
->pgtbl_ops
= pgtbl_ops
;
1048 smmu_domain
->smmu
= NULL
;
1050 mutex_unlock(&smmu_domain
->init_mutex
);
1054 static void arm_smmu_destroy_domain_context(struct iommu_domain
*domain
)
1056 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1057 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
1058 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
1059 void __iomem
*cb_base
;
1062 if (!smmu
|| domain
->type
== IOMMU_DOMAIN_IDENTITY
)
1066 * Disable the context bank and free the page tables before freeing
1069 cb_base
= ARM_SMMU_CB(smmu
, cfg
->cbndx
);
1070 writel_relaxed(0, cb_base
+ ARM_SMMU_CB_SCTLR
);
1072 if (cfg
->irptndx
!= INVALID_IRPTNDX
) {
1073 irq
= smmu
->irqs
[smmu
->num_global_irqs
+ cfg
->irptndx
];
1074 devm_free_irq(smmu
->dev
, irq
, domain
);
1077 free_io_pgtable_ops(smmu_domain
->pgtbl_ops
);
1078 __arm_smmu_free_bitmap(smmu
->context_map
, cfg
->cbndx
);
1081 static struct iommu_domain
*arm_smmu_domain_alloc(unsigned type
)
1083 struct arm_smmu_domain
*smmu_domain
;
1085 if (type
!= IOMMU_DOMAIN_UNMANAGED
&&
1086 type
!= IOMMU_DOMAIN_DMA
&&
1087 type
!= IOMMU_DOMAIN_IDENTITY
)
1090 * Allocate the domain and initialise some of its data structures.
1091 * We can't really do anything meaningful until we've added a
1094 smmu_domain
= kzalloc(sizeof(*smmu_domain
), GFP_KERNEL
);
1098 if (type
== IOMMU_DOMAIN_DMA
&& (using_legacy_binding
||
1099 iommu_get_dma_cookie(&smmu_domain
->domain
))) {
1104 mutex_init(&smmu_domain
->init_mutex
);
1105 spin_lock_init(&smmu_domain
->pgtbl_lock
);
1107 return &smmu_domain
->domain
;
1110 static void arm_smmu_domain_free(struct iommu_domain
*domain
)
1112 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1115 * Free the domain resources. We assume that all devices have
1116 * already been detached.
1118 iommu_put_dma_cookie(domain
);
1119 arm_smmu_destroy_domain_context(domain
);
1123 static void arm_smmu_write_smr(struct arm_smmu_device
*smmu
, int idx
)
1125 struct arm_smmu_smr
*smr
= smmu
->smrs
+ idx
;
1126 u32 reg
= smr
->id
<< SMR_ID_SHIFT
| smr
->mask
<< SMR_MASK_SHIFT
;
1128 if (!(smmu
->features
& ARM_SMMU_FEAT_EXIDS
) && smr
->valid
)
1130 writel_relaxed(reg
, ARM_SMMU_GR0(smmu
) + ARM_SMMU_GR0_SMR(idx
));
1133 static void arm_smmu_write_s2cr(struct arm_smmu_device
*smmu
, int idx
)
1135 struct arm_smmu_s2cr
*s2cr
= smmu
->s2crs
+ idx
;
1136 u32 reg
= (s2cr
->type
& S2CR_TYPE_MASK
) << S2CR_TYPE_SHIFT
|
1137 (s2cr
->cbndx
& S2CR_CBNDX_MASK
) << S2CR_CBNDX_SHIFT
|
1138 (s2cr
->privcfg
& S2CR_PRIVCFG_MASK
) << S2CR_PRIVCFG_SHIFT
;
1140 if (smmu
->features
& ARM_SMMU_FEAT_EXIDS
&& smmu
->smrs
&&
1141 smmu
->smrs
[idx
].valid
)
1142 reg
|= S2CR_EXIDVALID
;
1143 writel_relaxed(reg
, ARM_SMMU_GR0(smmu
) + ARM_SMMU_GR0_S2CR(idx
));
1146 static void arm_smmu_write_sme(struct arm_smmu_device
*smmu
, int idx
)
1148 arm_smmu_write_s2cr(smmu
, idx
);
1150 arm_smmu_write_smr(smmu
, idx
);
1154 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1155 * should be called after sCR0 is written.
1157 static void arm_smmu_test_smr_masks(struct arm_smmu_device
*smmu
)
1159 void __iomem
*gr0_base
= ARM_SMMU_GR0(smmu
);
1166 * SMR.ID bits may not be preserved if the corresponding MASK
1167 * bits are set, so check each one separately. We can reject
1168 * masters later if they try to claim IDs outside these masks.
1170 smr
= smmu
->streamid_mask
<< SMR_ID_SHIFT
;
1171 writel_relaxed(smr
, gr0_base
+ ARM_SMMU_GR0_SMR(0));
1172 smr
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_SMR(0));
1173 smmu
->streamid_mask
= smr
>> SMR_ID_SHIFT
;
1175 smr
= smmu
->streamid_mask
<< SMR_MASK_SHIFT
;
1176 writel_relaxed(smr
, gr0_base
+ ARM_SMMU_GR0_SMR(0));
1177 smr
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_SMR(0));
1178 smmu
->smr_mask_mask
= smr
>> SMR_MASK_SHIFT
;
1181 static int arm_smmu_find_sme(struct arm_smmu_device
*smmu
, u16 id
, u16 mask
)
1183 struct arm_smmu_smr
*smrs
= smmu
->smrs
;
1184 int i
, free_idx
= -ENOSPC
;
1186 /* Stream indexing is blissfully easy */
1190 /* Validating SMRs is... less so */
1191 for (i
= 0; i
< smmu
->num_mapping_groups
; ++i
) {
1192 if (!smrs
[i
].valid
) {
1194 * Note the first free entry we come across, which
1195 * we'll claim in the end if nothing else matches.
1202 * If the new entry is _entirely_ matched by an existing entry,
1203 * then reuse that, with the guarantee that there also cannot
1204 * be any subsequent conflicting entries. In normal use we'd
1205 * expect simply identical entries for this case, but there's
1206 * no harm in accommodating the generalisation.
1208 if ((mask
& smrs
[i
].mask
) == mask
&&
1209 !((id
^ smrs
[i
].id
) & ~smrs
[i
].mask
))
1212 * If the new entry has any other overlap with an existing one,
1213 * though, then there always exists at least one stream ID
1214 * which would cause a conflict, and we can't allow that risk.
1216 if (!((id
^ smrs
[i
].id
) & ~(smrs
[i
].mask
| mask
)))
1223 static bool arm_smmu_free_sme(struct arm_smmu_device
*smmu
, int idx
)
1225 if (--smmu
->s2crs
[idx
].count
)
1228 smmu
->s2crs
[idx
] = s2cr_init_val
;
1230 smmu
->smrs
[idx
].valid
= false;
1235 static int arm_smmu_master_alloc_smes(struct device
*dev
)
1237 struct iommu_fwspec
*fwspec
= dev
->iommu_fwspec
;
1238 struct arm_smmu_master_cfg
*cfg
= fwspec
->iommu_priv
;
1239 struct arm_smmu_device
*smmu
= cfg
->smmu
;
1240 struct arm_smmu_smr
*smrs
= smmu
->smrs
;
1241 struct iommu_group
*group
;
1244 mutex_lock(&smmu
->stream_map_mutex
);
1245 /* Figure out a viable stream map entry allocation */
1246 for_each_cfg_sme(fwspec
, i
, idx
) {
1247 u16 sid
= fwspec
->ids
[i
];
1248 u16 mask
= fwspec
->ids
[i
] >> SMR_MASK_SHIFT
;
1250 if (idx
!= INVALID_SMENDX
) {
1255 ret
= arm_smmu_find_sme(smmu
, sid
, mask
);
1260 if (smrs
&& smmu
->s2crs
[idx
].count
== 0) {
1262 smrs
[idx
].mask
= mask
;
1263 smrs
[idx
].valid
= true;
1265 smmu
->s2crs
[idx
].count
++;
1266 cfg
->smendx
[i
] = (s16
)idx
;
1269 group
= iommu_group_get_for_dev(dev
);
1271 group
= ERR_PTR(-ENOMEM
);
1272 if (IS_ERR(group
)) {
1273 ret
= PTR_ERR(group
);
1276 iommu_group_put(group
);
1278 /* It worked! Now, poke the actual hardware */
1279 for_each_cfg_sme(fwspec
, i
, idx
) {
1280 arm_smmu_write_sme(smmu
, idx
);
1281 smmu
->s2crs
[idx
].group
= group
;
1284 mutex_unlock(&smmu
->stream_map_mutex
);
1289 arm_smmu_free_sme(smmu
, cfg
->smendx
[i
]);
1290 cfg
->smendx
[i
] = INVALID_SMENDX
;
1292 mutex_unlock(&smmu
->stream_map_mutex
);
1296 static void arm_smmu_master_free_smes(struct iommu_fwspec
*fwspec
)
1298 struct arm_smmu_device
*smmu
= fwspec_smmu(fwspec
);
1299 struct arm_smmu_master_cfg
*cfg
= fwspec
->iommu_priv
;
1302 mutex_lock(&smmu
->stream_map_mutex
);
1303 for_each_cfg_sme(fwspec
, i
, idx
) {
1304 if (arm_smmu_free_sme(smmu
, idx
))
1305 arm_smmu_write_sme(smmu
, idx
);
1306 cfg
->smendx
[i
] = INVALID_SMENDX
;
1308 mutex_unlock(&smmu
->stream_map_mutex
);
1311 static int arm_smmu_domain_add_master(struct arm_smmu_domain
*smmu_domain
,
1312 struct iommu_fwspec
*fwspec
)
1314 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
1315 struct arm_smmu_s2cr
*s2cr
= smmu
->s2crs
;
1316 u8 cbndx
= smmu_domain
->cfg
.cbndx
;
1317 enum arm_smmu_s2cr_type type
;
1320 if (smmu_domain
->stage
== ARM_SMMU_DOMAIN_BYPASS
)
1321 type
= S2CR_TYPE_BYPASS
;
1323 type
= S2CR_TYPE_TRANS
;
1325 for_each_cfg_sme(fwspec
, i
, idx
) {
1326 if (type
== s2cr
[idx
].type
&& cbndx
== s2cr
[idx
].cbndx
)
1329 s2cr
[idx
].type
= type
;
1330 s2cr
[idx
].privcfg
= S2CR_PRIVCFG_DEFAULT
;
1331 s2cr
[idx
].cbndx
= cbndx
;
1332 arm_smmu_write_s2cr(smmu
, idx
);
1337 static int arm_smmu_attach_dev(struct iommu_domain
*domain
, struct device
*dev
)
1340 struct iommu_fwspec
*fwspec
= dev
->iommu_fwspec
;
1341 struct arm_smmu_device
*smmu
;
1342 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1344 if (!fwspec
|| fwspec
->ops
!= &arm_smmu_ops
) {
1345 dev_err(dev
, "cannot attach to SMMU, is it on the same bus?\n");
1350 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1351 * domains between of_xlate() and add_device() - we have no way to cope
1352 * with that, so until ARM gets converted to rely on groups and default
1353 * domains, just say no (but more politely than by dereferencing NULL).
1354 * This should be at least a WARN_ON once that's sorted.
1356 if (!fwspec
->iommu_priv
)
1359 smmu
= fwspec_smmu(fwspec
);
1360 /* Ensure that the domain is finalised */
1361 ret
= arm_smmu_init_domain_context(domain
, smmu
);
1366 * Sanity check the domain. We don't support domains across
1369 if (smmu_domain
->smmu
!= smmu
) {
1371 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1372 dev_name(smmu_domain
->smmu
->dev
), dev_name(smmu
->dev
));
1376 /* Looks ok, so add the device to the domain */
1377 return arm_smmu_domain_add_master(smmu_domain
, fwspec
);
1380 static int arm_smmu_map(struct iommu_domain
*domain
, unsigned long iova
,
1381 phys_addr_t paddr
, size_t size
, int prot
)
1384 unsigned long flags
;
1385 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1386 struct io_pgtable_ops
*ops
= smmu_domain
->pgtbl_ops
;
1391 spin_lock_irqsave(&smmu_domain
->pgtbl_lock
, flags
);
1392 ret
= ops
->map(ops
, iova
, paddr
, size
, prot
);
1393 spin_unlock_irqrestore(&smmu_domain
->pgtbl_lock
, flags
);
1397 static size_t arm_smmu_unmap(struct iommu_domain
*domain
, unsigned long iova
,
1401 unsigned long flags
;
1402 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1403 struct io_pgtable_ops
*ops
= smmu_domain
->pgtbl_ops
;
1408 spin_lock_irqsave(&smmu_domain
->pgtbl_lock
, flags
);
1409 ret
= ops
->unmap(ops
, iova
, size
);
1410 spin_unlock_irqrestore(&smmu_domain
->pgtbl_lock
, flags
);
1414 static phys_addr_t
arm_smmu_iova_to_phys_hard(struct iommu_domain
*domain
,
1417 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1418 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
1419 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
1420 struct io_pgtable_ops
*ops
= smmu_domain
->pgtbl_ops
;
1421 struct device
*dev
= smmu
->dev
;
1422 void __iomem
*cb_base
;
1427 cb_base
= ARM_SMMU_CB(smmu
, cfg
->cbndx
);
1429 /* ATS1 registers can only be written atomically */
1430 va
= iova
& ~0xfffUL
;
1431 if (smmu
->version
== ARM_SMMU_V2
)
1432 smmu_write_atomic_lq(va
, cb_base
+ ARM_SMMU_CB_ATS1PR
);
1433 else /* Register is only 32-bit in v1 */
1434 writel_relaxed(va
, cb_base
+ ARM_SMMU_CB_ATS1PR
);
1436 if (readl_poll_timeout_atomic(cb_base
+ ARM_SMMU_CB_ATSR
, tmp
,
1437 !(tmp
& ATSR_ACTIVE
), 5, 50)) {
1439 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1441 return ops
->iova_to_phys(ops
, iova
);
1444 phys
= readq_relaxed(cb_base
+ ARM_SMMU_CB_PAR
);
1445 if (phys
& CB_PAR_F
) {
1446 dev_err(dev
, "translation fault!\n");
1447 dev_err(dev
, "PAR = 0x%llx\n", phys
);
1451 return (phys
& GENMASK_ULL(39, 12)) | (iova
& 0xfff);
1454 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain
*domain
,
1458 unsigned long flags
;
1459 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1460 struct io_pgtable_ops
*ops
= smmu_domain
->pgtbl_ops
;
1462 if (domain
->type
== IOMMU_DOMAIN_IDENTITY
)
1468 spin_lock_irqsave(&smmu_domain
->pgtbl_lock
, flags
);
1469 if (smmu_domain
->smmu
->features
& ARM_SMMU_FEAT_TRANS_OPS
&&
1470 smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
) {
1471 ret
= arm_smmu_iova_to_phys_hard(domain
, iova
);
1473 ret
= ops
->iova_to_phys(ops
, iova
);
1476 spin_unlock_irqrestore(&smmu_domain
->pgtbl_lock
, flags
);
1481 static bool arm_smmu_capable(enum iommu_cap cap
)
1484 case IOMMU_CAP_CACHE_COHERENCY
:
1486 * Return true here as the SMMU can always send out coherent
1490 case IOMMU_CAP_NOEXEC
:
1497 static int arm_smmu_match_node(struct device
*dev
, void *data
)
1499 return dev
->fwnode
== data
;
1503 struct arm_smmu_device
*arm_smmu_get_by_fwnode(struct fwnode_handle
*fwnode
)
1505 struct device
*dev
= driver_find_device(&arm_smmu_driver
.driver
, NULL
,
1506 fwnode
, arm_smmu_match_node
);
1508 return dev
? dev_get_drvdata(dev
) : NULL
;
1511 static int arm_smmu_add_device(struct device
*dev
)
1513 struct arm_smmu_device
*smmu
;
1514 struct arm_smmu_master_cfg
*cfg
;
1515 struct iommu_fwspec
*fwspec
= dev
->iommu_fwspec
;
1518 if (using_legacy_binding
) {
1519 ret
= arm_smmu_register_legacy_master(dev
, &smmu
);
1520 fwspec
= dev
->iommu_fwspec
;
1523 } else if (fwspec
&& fwspec
->ops
== &arm_smmu_ops
) {
1524 smmu
= arm_smmu_get_by_fwnode(fwspec
->iommu_fwnode
);
1530 for (i
= 0; i
< fwspec
->num_ids
; i
++) {
1531 u16 sid
= fwspec
->ids
[i
];
1532 u16 mask
= fwspec
->ids
[i
] >> SMR_MASK_SHIFT
;
1534 if (sid
& ~smmu
->streamid_mask
) {
1535 dev_err(dev
, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1536 sid
, smmu
->streamid_mask
);
1539 if (mask
& ~smmu
->smr_mask_mask
) {
1540 dev_err(dev
, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1541 mask
, smmu
->smr_mask_mask
);
1547 cfg
= kzalloc(offsetof(struct arm_smmu_master_cfg
, smendx
[i
]),
1553 fwspec
->iommu_priv
= cfg
;
1555 cfg
->smendx
[i
] = INVALID_SMENDX
;
1557 ret
= arm_smmu_master_alloc_smes(dev
);
1561 iommu_device_link(&smmu
->iommu
, dev
);
1567 kfree(fwspec
->iommu_priv
);
1568 iommu_fwspec_free(dev
);
1572 static void arm_smmu_remove_device(struct device
*dev
)
1574 struct iommu_fwspec
*fwspec
= dev
->iommu_fwspec
;
1575 struct arm_smmu_master_cfg
*cfg
;
1576 struct arm_smmu_device
*smmu
;
1579 if (!fwspec
|| fwspec
->ops
!= &arm_smmu_ops
)
1582 cfg
= fwspec
->iommu_priv
;
1585 iommu_device_unlink(&smmu
->iommu
, dev
);
1586 arm_smmu_master_free_smes(fwspec
);
1587 iommu_group_remove_device(dev
);
1588 kfree(fwspec
->iommu_priv
);
1589 iommu_fwspec_free(dev
);
1592 static struct iommu_group
*arm_smmu_device_group(struct device
*dev
)
1594 struct iommu_fwspec
*fwspec
= dev
->iommu_fwspec
;
1595 struct arm_smmu_device
*smmu
= fwspec_smmu(fwspec
);
1596 struct iommu_group
*group
= NULL
;
1599 for_each_cfg_sme(fwspec
, i
, idx
) {
1600 if (group
&& smmu
->s2crs
[idx
].group
&&
1601 group
!= smmu
->s2crs
[idx
].group
)
1602 return ERR_PTR(-EINVAL
);
1604 group
= smmu
->s2crs
[idx
].group
;
1608 return iommu_group_ref_get(group
);
1610 if (dev_is_pci(dev
))
1611 group
= pci_device_group(dev
);
1613 group
= generic_device_group(dev
);
1618 static int arm_smmu_domain_get_attr(struct iommu_domain
*domain
,
1619 enum iommu_attr attr
, void *data
)
1621 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1623 if (domain
->type
!= IOMMU_DOMAIN_UNMANAGED
)
1627 case DOMAIN_ATTR_NESTING
:
1628 *(int *)data
= (smmu_domain
->stage
== ARM_SMMU_DOMAIN_NESTED
);
1635 static int arm_smmu_domain_set_attr(struct iommu_domain
*domain
,
1636 enum iommu_attr attr
, void *data
)
1639 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1641 if (domain
->type
!= IOMMU_DOMAIN_UNMANAGED
)
1644 mutex_lock(&smmu_domain
->init_mutex
);
1647 case DOMAIN_ATTR_NESTING
:
1648 if (smmu_domain
->smmu
) {
1654 smmu_domain
->stage
= ARM_SMMU_DOMAIN_NESTED
;
1656 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S1
;
1664 mutex_unlock(&smmu_domain
->init_mutex
);
1668 static int arm_smmu_of_xlate(struct device
*dev
, struct of_phandle_args
*args
)
1672 if (args
->args_count
> 0)
1673 fwid
|= (u16
)args
->args
[0];
1675 if (args
->args_count
> 1)
1676 fwid
|= (u16
)args
->args
[1] << SMR_MASK_SHIFT
;
1677 else if (!of_property_read_u32(args
->np
, "stream-match-mask", &mask
))
1678 fwid
|= (u16
)mask
<< SMR_MASK_SHIFT
;
1680 return iommu_fwspec_add_ids(dev
, &fwid
, 1);
1683 static void arm_smmu_get_resv_regions(struct device
*dev
,
1684 struct list_head
*head
)
1686 struct iommu_resv_region
*region
;
1687 int prot
= IOMMU_WRITE
| IOMMU_NOEXEC
| IOMMU_MMIO
;
1689 region
= iommu_alloc_resv_region(MSI_IOVA_BASE
, MSI_IOVA_LENGTH
,
1690 prot
, IOMMU_RESV_SW_MSI
);
1694 list_add_tail(®ion
->list
, head
);
1696 iommu_dma_get_resv_regions(dev
, head
);
1699 static void arm_smmu_put_resv_regions(struct device
*dev
,
1700 struct list_head
*head
)
1702 struct iommu_resv_region
*entry
, *next
;
1704 list_for_each_entry_safe(entry
, next
, head
, list
)
1708 static struct iommu_ops arm_smmu_ops
= {
1709 .capable
= arm_smmu_capable
,
1710 .domain_alloc
= arm_smmu_domain_alloc
,
1711 .domain_free
= arm_smmu_domain_free
,
1712 .attach_dev
= arm_smmu_attach_dev
,
1713 .map
= arm_smmu_map
,
1714 .unmap
= arm_smmu_unmap
,
1715 .map_sg
= default_iommu_map_sg
,
1716 .iova_to_phys
= arm_smmu_iova_to_phys
,
1717 .add_device
= arm_smmu_add_device
,
1718 .remove_device
= arm_smmu_remove_device
,
1719 .device_group
= arm_smmu_device_group
,
1720 .domain_get_attr
= arm_smmu_domain_get_attr
,
1721 .domain_set_attr
= arm_smmu_domain_set_attr
,
1722 .of_xlate
= arm_smmu_of_xlate
,
1723 .get_resv_regions
= arm_smmu_get_resv_regions
,
1724 .put_resv_regions
= arm_smmu_put_resv_regions
,
1725 .pgsize_bitmap
= -1UL, /* Restricted during device attach */
1728 static void arm_smmu_device_reset(struct arm_smmu_device
*smmu
)
1730 void __iomem
*gr0_base
= ARM_SMMU_GR0(smmu
);
1731 void __iomem
*cb_base
;
1735 /* clear global FSR */
1736 reg
= readl_relaxed(ARM_SMMU_GR0_NS(smmu
) + ARM_SMMU_GR0_sGFSR
);
1737 writel(reg
, ARM_SMMU_GR0_NS(smmu
) + ARM_SMMU_GR0_sGFSR
);
1740 * Reset stream mapping groups: Initial values mark all SMRn as
1741 * invalid and all S2CRn as bypass unless overridden.
1743 for (i
= 0; i
< smmu
->num_mapping_groups
; ++i
)
1744 arm_smmu_write_sme(smmu
, i
);
1746 if (smmu
->model
== ARM_MMU500
) {
1748 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1749 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1750 * bit is only present in MMU-500r2 onwards.
1752 reg
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_ID7
);
1753 major
= (reg
>> ID7_MAJOR_SHIFT
) & ID7_MAJOR_MASK
;
1754 reg
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sACR
);
1756 reg
&= ~ARM_MMU500_ACR_CACHE_LOCK
;
1758 * Allow unmatched Stream IDs to allocate bypass
1759 * TLB entries for reduced latency.
1761 reg
|= ARM_MMU500_ACR_SMTNMB_TLBEN
;
1762 writel_relaxed(reg
, gr0_base
+ ARM_SMMU_GR0_sACR
);
1765 /* Make sure all context banks are disabled and clear CB_FSR */
1766 for (i
= 0; i
< smmu
->num_context_banks
; ++i
) {
1767 cb_base
= ARM_SMMU_CB(smmu
, i
);
1768 writel_relaxed(0, cb_base
+ ARM_SMMU_CB_SCTLR
);
1769 writel_relaxed(FSR_FAULT
, cb_base
+ ARM_SMMU_CB_FSR
);
1771 * Disable MMU-500's not-particularly-beneficial next-page
1772 * prefetcher for the sake of errata #841119 and #826419.
1774 if (smmu
->model
== ARM_MMU500
) {
1775 reg
= readl_relaxed(cb_base
+ ARM_SMMU_CB_ACTLR
);
1776 reg
&= ~ARM_MMU500_ACTLR_CPRE
;
1777 writel_relaxed(reg
, cb_base
+ ARM_SMMU_CB_ACTLR
);
1781 /* Invalidate the TLB, just in case */
1782 writel_relaxed(0, gr0_base
+ ARM_SMMU_GR0_TLBIALLH
);
1783 writel_relaxed(0, gr0_base
+ ARM_SMMU_GR0_TLBIALLNSNH
);
1785 reg
= readl_relaxed(ARM_SMMU_GR0_NS(smmu
) + ARM_SMMU_GR0_sCR0
);
1787 /* Enable fault reporting */
1788 reg
|= (sCR0_GFRE
| sCR0_GFIE
| sCR0_GCFGFRE
| sCR0_GCFGFIE
);
1790 /* Disable TLB broadcasting. */
1791 reg
|= (sCR0_VMIDPNE
| sCR0_PTM
);
1793 /* Enable client access, handling unmatched streams as appropriate */
1794 reg
&= ~sCR0_CLIENTPD
;
1798 reg
&= ~sCR0_USFCFG
;
1800 /* Disable forced broadcasting */
1803 /* Don't upgrade barriers */
1804 reg
&= ~(sCR0_BSU_MASK
<< sCR0_BSU_SHIFT
);
1806 if (smmu
->features
& ARM_SMMU_FEAT_VMID16
)
1807 reg
|= sCR0_VMID16EN
;
1809 if (smmu
->features
& ARM_SMMU_FEAT_EXIDS
)
1810 reg
|= sCR0_EXIDENABLE
;
1812 /* Push the button */
1813 arm_smmu_tlb_sync_global(smmu
);
1814 writel(reg
, ARM_SMMU_GR0_NS(smmu
) + ARM_SMMU_GR0_sCR0
);
1817 static int arm_smmu_id_size_to_bits(int size
)
1836 static int arm_smmu_device_cfg_probe(struct arm_smmu_device
*smmu
)
1839 void __iomem
*gr0_base
= ARM_SMMU_GR0(smmu
);
1841 bool cttw_reg
, cttw_fw
= smmu
->features
& ARM_SMMU_FEAT_COHERENT_WALK
;
1844 dev_notice(smmu
->dev
, "probing hardware configuration...\n");
1845 dev_notice(smmu
->dev
, "SMMUv%d with:\n",
1846 smmu
->version
== ARM_SMMU_V2
? 2 : 1);
1849 id
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_ID0
);
1851 /* Restrict available stages based on module parameter */
1852 if (force_stage
== 1)
1853 id
&= ~(ID0_S2TS
| ID0_NTS
);
1854 else if (force_stage
== 2)
1855 id
&= ~(ID0_S1TS
| ID0_NTS
);
1857 if (id
& ID0_S1TS
) {
1858 smmu
->features
|= ARM_SMMU_FEAT_TRANS_S1
;
1859 dev_notice(smmu
->dev
, "\tstage 1 translation\n");
1862 if (id
& ID0_S2TS
) {
1863 smmu
->features
|= ARM_SMMU_FEAT_TRANS_S2
;
1864 dev_notice(smmu
->dev
, "\tstage 2 translation\n");
1868 smmu
->features
|= ARM_SMMU_FEAT_TRANS_NESTED
;
1869 dev_notice(smmu
->dev
, "\tnested translation\n");
1872 if (!(smmu
->features
&
1873 (ARM_SMMU_FEAT_TRANS_S1
| ARM_SMMU_FEAT_TRANS_S2
))) {
1874 dev_err(smmu
->dev
, "\tno translation support!\n");
1878 if ((id
& ID0_S1TS
) &&
1879 ((smmu
->version
< ARM_SMMU_V2
) || !(id
& ID0_ATOSNS
))) {
1880 smmu
->features
|= ARM_SMMU_FEAT_TRANS_OPS
;
1881 dev_notice(smmu
->dev
, "\taddress translation ops\n");
1885 * In order for DMA API calls to work properly, we must defer to what
1886 * the FW says about coherency, regardless of what the hardware claims.
1887 * Fortunately, this also opens up a workaround for systems where the
1888 * ID register value has ended up configured incorrectly.
1890 cttw_reg
= !!(id
& ID0_CTTW
);
1891 if (cttw_fw
|| cttw_reg
)
1892 dev_notice(smmu
->dev
, "\t%scoherent table walk\n",
1893 cttw_fw
? "" : "non-");
1894 if (cttw_fw
!= cttw_reg
)
1895 dev_notice(smmu
->dev
,
1896 "\t(IDR0.CTTW overridden by FW configuration)\n");
1898 /* Max. number of entries we have for stream matching/indexing */
1899 if (smmu
->version
== ARM_SMMU_V2
&& id
& ID0_EXIDS
) {
1900 smmu
->features
|= ARM_SMMU_FEAT_EXIDS
;
1903 size
= 1 << ((id
>> ID0_NUMSIDB_SHIFT
) & ID0_NUMSIDB_MASK
);
1905 smmu
->streamid_mask
= size
- 1;
1907 smmu
->features
|= ARM_SMMU_FEAT_STREAM_MATCH
;
1908 size
= (id
>> ID0_NUMSMRG_SHIFT
) & ID0_NUMSMRG_MASK
;
1911 "stream-matching supported, but no SMRs present!\n");
1915 /* Zero-initialised to mark as invalid */
1916 smmu
->smrs
= devm_kcalloc(smmu
->dev
, size
, sizeof(*smmu
->smrs
),
1921 dev_notice(smmu
->dev
,
1922 "\tstream matching with %lu register groups", size
);
1924 /* s2cr->type == 0 means translation, so initialise explicitly */
1925 smmu
->s2crs
= devm_kmalloc_array(smmu
->dev
, size
, sizeof(*smmu
->s2crs
),
1929 for (i
= 0; i
< size
; i
++)
1930 smmu
->s2crs
[i
] = s2cr_init_val
;
1932 smmu
->num_mapping_groups
= size
;
1933 mutex_init(&smmu
->stream_map_mutex
);
1935 if (smmu
->version
< ARM_SMMU_V2
|| !(id
& ID0_PTFS_NO_AARCH32
)) {
1936 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH32_L
;
1937 if (!(id
& ID0_PTFS_NO_AARCH32S
))
1938 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH32_S
;
1942 id
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_ID1
);
1943 smmu
->pgshift
= (id
& ID1_PAGESIZE
) ? 16 : 12;
1945 /* Check for size mismatch of SMMU address space from mapped region */
1946 size
= 1 << (((id
>> ID1_NUMPAGENDXB_SHIFT
) & ID1_NUMPAGENDXB_MASK
) + 1);
1947 size
<<= smmu
->pgshift
;
1948 if (smmu
->cb_base
!= gr0_base
+ size
)
1950 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1951 size
* 2, (smmu
->cb_base
- gr0_base
) * 2);
1953 smmu
->num_s2_context_banks
= (id
>> ID1_NUMS2CB_SHIFT
) & ID1_NUMS2CB_MASK
;
1954 smmu
->num_context_banks
= (id
>> ID1_NUMCB_SHIFT
) & ID1_NUMCB_MASK
;
1955 if (smmu
->num_s2_context_banks
> smmu
->num_context_banks
) {
1956 dev_err(smmu
->dev
, "impossible number of S2 context banks!\n");
1959 dev_notice(smmu
->dev
, "\t%u context banks (%u stage-2 only)\n",
1960 smmu
->num_context_banks
, smmu
->num_s2_context_banks
);
1962 * Cavium CN88xx erratum #27704.
1963 * Ensure ASID and VMID allocation is unique across all SMMUs in
1966 if (smmu
->model
== CAVIUM_SMMUV2
) {
1967 smmu
->cavium_id_base
=
1968 atomic_add_return(smmu
->num_context_banks
,
1969 &cavium_smmu_context_count
);
1970 smmu
->cavium_id_base
-= smmu
->num_context_banks
;
1971 dev_notice(smmu
->dev
, "\tenabling workaround for Cavium erratum 27704\n");
1975 id
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_ID2
);
1976 size
= arm_smmu_id_size_to_bits((id
>> ID2_IAS_SHIFT
) & ID2_IAS_MASK
);
1977 smmu
->ipa_size
= size
;
1979 /* The output mask is also applied for bypass */
1980 size
= arm_smmu_id_size_to_bits((id
>> ID2_OAS_SHIFT
) & ID2_OAS_MASK
);
1981 smmu
->pa_size
= size
;
1983 if (id
& ID2_VMID16
)
1984 smmu
->features
|= ARM_SMMU_FEAT_VMID16
;
1987 * What the page table walker can address actually depends on which
1988 * descriptor format is in use, but since a) we don't know that yet,
1989 * and b) it can vary per context bank, this will have to do...
1991 if (dma_set_mask_and_coherent(smmu
->dev
, DMA_BIT_MASK(size
)))
1993 "failed to set DMA mask for table walker\n");
1995 if (smmu
->version
< ARM_SMMU_V2
) {
1996 smmu
->va_size
= smmu
->ipa_size
;
1997 if (smmu
->version
== ARM_SMMU_V1_64K
)
1998 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_64K
;
2000 size
= (id
>> ID2_UBS_SHIFT
) & ID2_UBS_MASK
;
2001 smmu
->va_size
= arm_smmu_id_size_to_bits(size
);
2002 if (id
& ID2_PTFS_4K
)
2003 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_4K
;
2004 if (id
& ID2_PTFS_16K
)
2005 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_16K
;
2006 if (id
& ID2_PTFS_64K
)
2007 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_64K
;
2010 /* Now we've corralled the various formats, what'll it do? */
2011 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH32_S
)
2012 smmu
->pgsize_bitmap
|= SZ_4K
| SZ_64K
| SZ_1M
| SZ_16M
;
2013 if (smmu
->features
&
2014 (ARM_SMMU_FEAT_FMT_AARCH32_L
| ARM_SMMU_FEAT_FMT_AARCH64_4K
))
2015 smmu
->pgsize_bitmap
|= SZ_4K
| SZ_2M
| SZ_1G
;
2016 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH64_16K
)
2017 smmu
->pgsize_bitmap
|= SZ_16K
| SZ_32M
;
2018 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH64_64K
)
2019 smmu
->pgsize_bitmap
|= SZ_64K
| SZ_512M
;
2021 if (arm_smmu_ops
.pgsize_bitmap
== -1UL)
2022 arm_smmu_ops
.pgsize_bitmap
= smmu
->pgsize_bitmap
;
2024 arm_smmu_ops
.pgsize_bitmap
|= smmu
->pgsize_bitmap
;
2025 dev_notice(smmu
->dev
, "\tSupported page sizes: 0x%08lx\n",
2026 smmu
->pgsize_bitmap
);
2029 if (smmu
->features
& ARM_SMMU_FEAT_TRANS_S1
)
2030 dev_notice(smmu
->dev
, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
2031 smmu
->va_size
, smmu
->ipa_size
);
2033 if (smmu
->features
& ARM_SMMU_FEAT_TRANS_S2
)
2034 dev_notice(smmu
->dev
, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
2035 smmu
->ipa_size
, smmu
->pa_size
);
2040 struct arm_smmu_match_data
{
2041 enum arm_smmu_arch_version version
;
2042 enum arm_smmu_implementation model
;
2045 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
2046 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
2048 ARM_SMMU_MATCH_DATA(smmu_generic_v1
, ARM_SMMU_V1
, GENERIC_SMMU
);
2049 ARM_SMMU_MATCH_DATA(smmu_generic_v2
, ARM_SMMU_V2
, GENERIC_SMMU
);
2050 ARM_SMMU_MATCH_DATA(arm_mmu401
, ARM_SMMU_V1_64K
, GENERIC_SMMU
);
2051 ARM_SMMU_MATCH_DATA(arm_mmu500
, ARM_SMMU_V2
, ARM_MMU500
);
2052 ARM_SMMU_MATCH_DATA(cavium_smmuv2
, ARM_SMMU_V2
, CAVIUM_SMMUV2
);
2054 static const struct of_device_id arm_smmu_of_match
[] = {
2055 { .compatible
= "arm,smmu-v1", .data
= &smmu_generic_v1
},
2056 { .compatible
= "arm,smmu-v2", .data
= &smmu_generic_v2
},
2057 { .compatible
= "arm,mmu-400", .data
= &smmu_generic_v1
},
2058 { .compatible
= "arm,mmu-401", .data
= &arm_mmu401
},
2059 { .compatible
= "arm,mmu-500", .data
= &arm_mmu500
},
2060 { .compatible
= "cavium,smmu-v2", .data
= &cavium_smmuv2
},
2063 MODULE_DEVICE_TABLE(of
, arm_smmu_of_match
);
2066 static int acpi_smmu_get_data(u32 model
, struct arm_smmu_device
*smmu
)
2071 case ACPI_IORT_SMMU_V1
:
2072 case ACPI_IORT_SMMU_CORELINK_MMU400
:
2073 smmu
->version
= ARM_SMMU_V1
;
2074 smmu
->model
= GENERIC_SMMU
;
2076 case ACPI_IORT_SMMU_V2
:
2077 smmu
->version
= ARM_SMMU_V2
;
2078 smmu
->model
= GENERIC_SMMU
;
2080 case ACPI_IORT_SMMU_CORELINK_MMU500
:
2081 smmu
->version
= ARM_SMMU_V2
;
2082 smmu
->model
= ARM_MMU500
;
2091 static int arm_smmu_device_acpi_probe(struct platform_device
*pdev
,
2092 struct arm_smmu_device
*smmu
)
2094 struct device
*dev
= smmu
->dev
;
2095 struct acpi_iort_node
*node
=
2096 *(struct acpi_iort_node
**)dev_get_platdata(dev
);
2097 struct acpi_iort_smmu
*iort_smmu
;
2100 /* Retrieve SMMU1/2 specific data */
2101 iort_smmu
= (struct acpi_iort_smmu
*)node
->node_data
;
2103 ret
= acpi_smmu_get_data(iort_smmu
->model
, smmu
);
2107 /* Ignore the configuration access interrupt */
2108 smmu
->num_global_irqs
= 1;
2110 if (iort_smmu
->flags
& ACPI_IORT_SMMU_COHERENT_WALK
)
2111 smmu
->features
|= ARM_SMMU_FEAT_COHERENT_WALK
;
2116 static inline int arm_smmu_device_acpi_probe(struct platform_device
*pdev
,
2117 struct arm_smmu_device
*smmu
)
2123 static int arm_smmu_device_dt_probe(struct platform_device
*pdev
,
2124 struct arm_smmu_device
*smmu
)
2126 const struct arm_smmu_match_data
*data
;
2127 struct device
*dev
= &pdev
->dev
;
2128 bool legacy_binding
;
2130 if (of_property_read_u32(dev
->of_node
, "#global-interrupts",
2131 &smmu
->num_global_irqs
)) {
2132 dev_err(dev
, "missing #global-interrupts property\n");
2136 data
= of_device_get_match_data(dev
);
2137 smmu
->version
= data
->version
;
2138 smmu
->model
= data
->model
;
2140 parse_driver_options(smmu
);
2142 legacy_binding
= of_find_property(dev
->of_node
, "mmu-masters", NULL
);
2143 if (legacy_binding
&& !using_generic_binding
) {
2144 if (!using_legacy_binding
)
2145 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2146 using_legacy_binding
= true;
2147 } else if (!legacy_binding
&& !using_legacy_binding
) {
2148 using_generic_binding
= true;
2150 dev_err(dev
, "not probing due to mismatched DT properties\n");
2154 if (of_dma_is_coherent(dev
->of_node
))
2155 smmu
->features
|= ARM_SMMU_FEAT_COHERENT_WALK
;
2160 static void arm_smmu_bus_init(void)
2162 /* Oh, for a proper bus abstraction */
2163 if (!iommu_present(&platform_bus_type
))
2164 bus_set_iommu(&platform_bus_type
, &arm_smmu_ops
);
2165 #ifdef CONFIG_ARM_AMBA
2166 if (!iommu_present(&amba_bustype
))
2167 bus_set_iommu(&amba_bustype
, &arm_smmu_ops
);
2170 if (!iommu_present(&pci_bus_type
)) {
2172 bus_set_iommu(&pci_bus_type
, &arm_smmu_ops
);
2177 static int arm_smmu_device_probe(struct platform_device
*pdev
)
2179 struct resource
*res
;
2180 resource_size_t ioaddr
;
2181 struct arm_smmu_device
*smmu
;
2182 struct device
*dev
= &pdev
->dev
;
2183 int num_irqs
, i
, err
;
2185 smmu
= devm_kzalloc(dev
, sizeof(*smmu
), GFP_KERNEL
);
2187 dev_err(dev
, "failed to allocate arm_smmu_device\n");
2193 err
= arm_smmu_device_dt_probe(pdev
, smmu
);
2195 err
= arm_smmu_device_acpi_probe(pdev
, smmu
);
2200 res
= platform_get_resource(pdev
, IORESOURCE_MEM
, 0);
2201 ioaddr
= res
->start
;
2202 smmu
->base
= devm_ioremap_resource(dev
, res
);
2203 if (IS_ERR(smmu
->base
))
2204 return PTR_ERR(smmu
->base
);
2205 smmu
->cb_base
= smmu
->base
+ resource_size(res
) / 2;
2208 while ((res
= platform_get_resource(pdev
, IORESOURCE_IRQ
, num_irqs
))) {
2210 if (num_irqs
> smmu
->num_global_irqs
)
2211 smmu
->num_context_irqs
++;
2214 if (!smmu
->num_context_irqs
) {
2215 dev_err(dev
, "found %d interrupts but expected at least %d\n",
2216 num_irqs
, smmu
->num_global_irqs
+ 1);
2220 smmu
->irqs
= devm_kzalloc(dev
, sizeof(*smmu
->irqs
) * num_irqs
,
2223 dev_err(dev
, "failed to allocate %d irqs\n", num_irqs
);
2227 for (i
= 0; i
< num_irqs
; ++i
) {
2228 int irq
= platform_get_irq(pdev
, i
);
2231 dev_err(dev
, "failed to get irq index %d\n", i
);
2234 smmu
->irqs
[i
] = irq
;
2237 err
= arm_smmu_device_cfg_probe(smmu
);
2241 if (smmu
->version
== ARM_SMMU_V2
&&
2242 smmu
->num_context_banks
!= smmu
->num_context_irqs
) {
2244 "found only %d context interrupt(s) but %d required\n",
2245 smmu
->num_context_irqs
, smmu
->num_context_banks
);
2249 for (i
= 0; i
< smmu
->num_global_irqs
; ++i
) {
2250 err
= devm_request_irq(smmu
->dev
, smmu
->irqs
[i
],
2251 arm_smmu_global_fault
,
2253 "arm-smmu global fault",
2256 dev_err(dev
, "failed to request global IRQ %d (%u)\n",
2262 err
= iommu_device_sysfs_add(&smmu
->iommu
, smmu
->dev
, NULL
,
2263 "smmu.%pa", &ioaddr
);
2265 dev_err(dev
, "Failed to register iommu in sysfs\n");
2269 iommu_device_set_ops(&smmu
->iommu
, &arm_smmu_ops
);
2270 iommu_device_set_fwnode(&smmu
->iommu
, dev
->fwnode
);
2272 err
= iommu_device_register(&smmu
->iommu
);
2274 dev_err(dev
, "Failed to register iommu\n");
2278 platform_set_drvdata(pdev
, smmu
);
2279 arm_smmu_device_reset(smmu
);
2280 arm_smmu_test_smr_masks(smmu
);
2283 * For ACPI and generic DT bindings, an SMMU will be probed before
2284 * any device which might need it, so we want the bus ops in place
2285 * ready to handle default domain setup as soon as any SMMU exists.
2287 if (!using_legacy_binding
)
2288 arm_smmu_bus_init();
2294 * With the legacy DT binding in play, though, we have no guarantees about
2295 * probe order, but then we're also not doing default domains, so we can
2296 * delay setting bus ops until we're sure every possible SMMU is ready,
2297 * and that way ensure that no add_device() calls get missed.
2299 static int arm_smmu_legacy_bus_init(void)
2301 if (using_legacy_binding
)
2302 arm_smmu_bus_init();
2305 device_initcall_sync(arm_smmu_legacy_bus_init
);
2307 static int arm_smmu_device_remove(struct platform_device
*pdev
)
2309 struct arm_smmu_device
*smmu
= platform_get_drvdata(pdev
);
2314 if (!bitmap_empty(smmu
->context_map
, ARM_SMMU_MAX_CBS
))
2315 dev_err(&pdev
->dev
, "removing device with active domains!\n");
2317 /* Turn the thing off */
2318 writel(sCR0_CLIENTPD
, ARM_SMMU_GR0_NS(smmu
) + ARM_SMMU_GR0_sCR0
);
2322 static struct platform_driver arm_smmu_driver
= {
2325 .of_match_table
= of_match_ptr(arm_smmu_of_match
),
2327 .probe
= arm_smmu_device_probe
,
2328 .remove
= arm_smmu_device_remove
,
2330 module_platform_driver(arm_smmu_driver
);
2332 IOMMU_OF_DECLARE(arm_smmuv1
, "arm,smmu-v1", NULL
);
2333 IOMMU_OF_DECLARE(arm_smmuv2
, "arm,smmu-v2", NULL
);
2334 IOMMU_OF_DECLARE(arm_mmu400
, "arm,mmu-400", NULL
);
2335 IOMMU_OF_DECLARE(arm_mmu401
, "arm,mmu-401", NULL
);
2336 IOMMU_OF_DECLARE(arm_mmu500
, "arm,mmu-500", NULL
);
2337 IOMMU_OF_DECLARE(cavium_smmuv2
, "cavium,smmu-v2", NULL
);
2339 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2340 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2341 MODULE_LICENSE("GPL v2");