rtc: ds1307: factor out century bit handling
[linux-2.6/btrfs-unstable.git] / drivers / iommu / arm-smmu.c
blob7ec30b08b3bdc285872e0139997a300497450f98
1 /*
2 * IOMMU API for ARM architected SMMU implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * Copyright (C) 2013 ARM Limited
19 * Author: Will Deacon <will.deacon@arm.com>
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
27 * - Extended Stream ID (16 bit)
30 #define pr_fmt(fmt) "arm-smmu: " fmt
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
54 #include <linux/amba/bus.h>
56 #include "io-pgtable.h"
58 /* Maximum number of context banks per SMMU */
59 #define ARM_SMMU_MAX_CBS 128
61 /* SMMU global address space */
62 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
63 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
66 * SMMU global address space with conditional offset to access secure
67 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
68 * nsGFSYNR0: 0x450)
70 #define ARM_SMMU_GR0_NS(smmu) \
71 ((smmu)->base + \
72 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
73 ? 0x400 : 0))
76 * Some 64-bit registers only make sense to write atomically, but in such
77 * cases all the data relevant to AArch32 formats lies within the lower word,
78 * therefore this actually makes more sense than it might first appear.
80 #ifdef CONFIG_64BIT
81 #define smmu_write_atomic_lq writeq_relaxed
82 #else
83 #define smmu_write_atomic_lq writel_relaxed
84 #endif
86 /* Configuration registers */
87 #define ARM_SMMU_GR0_sCR0 0x0
88 #define sCR0_CLIENTPD (1 << 0)
89 #define sCR0_GFRE (1 << 1)
90 #define sCR0_GFIE (1 << 2)
91 #define sCR0_EXIDENABLE (1 << 3)
92 #define sCR0_GCFGFRE (1 << 4)
93 #define sCR0_GCFGFIE (1 << 5)
94 #define sCR0_USFCFG (1 << 10)
95 #define sCR0_VMIDPNE (1 << 11)
96 #define sCR0_PTM (1 << 12)
97 #define sCR0_FB (1 << 13)
98 #define sCR0_VMID16EN (1 << 31)
99 #define sCR0_BSU_SHIFT 14
100 #define sCR0_BSU_MASK 0x3
102 /* Auxiliary Configuration register */
103 #define ARM_SMMU_GR0_sACR 0x10
105 /* Identification registers */
106 #define ARM_SMMU_GR0_ID0 0x20
107 #define ARM_SMMU_GR0_ID1 0x24
108 #define ARM_SMMU_GR0_ID2 0x28
109 #define ARM_SMMU_GR0_ID3 0x2c
110 #define ARM_SMMU_GR0_ID4 0x30
111 #define ARM_SMMU_GR0_ID5 0x34
112 #define ARM_SMMU_GR0_ID6 0x38
113 #define ARM_SMMU_GR0_ID7 0x3c
114 #define ARM_SMMU_GR0_sGFSR 0x48
115 #define ARM_SMMU_GR0_sGFSYNR0 0x50
116 #define ARM_SMMU_GR0_sGFSYNR1 0x54
117 #define ARM_SMMU_GR0_sGFSYNR2 0x58
119 #define ID0_S1TS (1 << 30)
120 #define ID0_S2TS (1 << 29)
121 #define ID0_NTS (1 << 28)
122 #define ID0_SMS (1 << 27)
123 #define ID0_ATOSNS (1 << 26)
124 #define ID0_PTFS_NO_AARCH32 (1 << 25)
125 #define ID0_PTFS_NO_AARCH32S (1 << 24)
126 #define ID0_CTTW (1 << 14)
127 #define ID0_NUMIRPT_SHIFT 16
128 #define ID0_NUMIRPT_MASK 0xff
129 #define ID0_NUMSIDB_SHIFT 9
130 #define ID0_NUMSIDB_MASK 0xf
131 #define ID0_EXIDS (1 << 8)
132 #define ID0_NUMSMRG_SHIFT 0
133 #define ID0_NUMSMRG_MASK 0xff
135 #define ID1_PAGESIZE (1 << 31)
136 #define ID1_NUMPAGENDXB_SHIFT 28
137 #define ID1_NUMPAGENDXB_MASK 7
138 #define ID1_NUMS2CB_SHIFT 16
139 #define ID1_NUMS2CB_MASK 0xff
140 #define ID1_NUMCB_SHIFT 0
141 #define ID1_NUMCB_MASK 0xff
143 #define ID2_OAS_SHIFT 4
144 #define ID2_OAS_MASK 0xf
145 #define ID2_IAS_SHIFT 0
146 #define ID2_IAS_MASK 0xf
147 #define ID2_UBS_SHIFT 8
148 #define ID2_UBS_MASK 0xf
149 #define ID2_PTFS_4K (1 << 12)
150 #define ID2_PTFS_16K (1 << 13)
151 #define ID2_PTFS_64K (1 << 14)
152 #define ID2_VMID16 (1 << 15)
154 #define ID7_MAJOR_SHIFT 4
155 #define ID7_MAJOR_MASK 0xf
157 /* Global TLB invalidation */
158 #define ARM_SMMU_GR0_TLBIVMID 0x64
159 #define ARM_SMMU_GR0_TLBIALLNSNH 0x68
160 #define ARM_SMMU_GR0_TLBIALLH 0x6c
161 #define ARM_SMMU_GR0_sTLBGSYNC 0x70
162 #define ARM_SMMU_GR0_sTLBGSTATUS 0x74
163 #define sTLBGSTATUS_GSACTIVE (1 << 0)
164 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
165 #define TLB_SPIN_COUNT 10
167 /* Stream mapping registers */
168 #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2))
169 #define SMR_VALID (1 << 31)
170 #define SMR_MASK_SHIFT 16
171 #define SMR_ID_SHIFT 0
173 #define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2))
174 #define S2CR_CBNDX_SHIFT 0
175 #define S2CR_CBNDX_MASK 0xff
176 #define S2CR_EXIDVALID (1 << 10)
177 #define S2CR_TYPE_SHIFT 16
178 #define S2CR_TYPE_MASK 0x3
179 enum arm_smmu_s2cr_type {
180 S2CR_TYPE_TRANS,
181 S2CR_TYPE_BYPASS,
182 S2CR_TYPE_FAULT,
185 #define S2CR_PRIVCFG_SHIFT 24
186 #define S2CR_PRIVCFG_MASK 0x3
187 enum arm_smmu_s2cr_privcfg {
188 S2CR_PRIVCFG_DEFAULT,
189 S2CR_PRIVCFG_DIPAN,
190 S2CR_PRIVCFG_UNPRIV,
191 S2CR_PRIVCFG_PRIV,
194 /* Context bank attribute registers */
195 #define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2))
196 #define CBAR_VMID_SHIFT 0
197 #define CBAR_VMID_MASK 0xff
198 #define CBAR_S1_BPSHCFG_SHIFT 8
199 #define CBAR_S1_BPSHCFG_MASK 3
200 #define CBAR_S1_BPSHCFG_NSH 3
201 #define CBAR_S1_MEMATTR_SHIFT 12
202 #define CBAR_S1_MEMATTR_MASK 0xf
203 #define CBAR_S1_MEMATTR_WB 0xf
204 #define CBAR_TYPE_SHIFT 16
205 #define CBAR_TYPE_MASK 0x3
206 #define CBAR_TYPE_S2_TRANS (0 << CBAR_TYPE_SHIFT)
207 #define CBAR_TYPE_S1_TRANS_S2_BYPASS (1 << CBAR_TYPE_SHIFT)
208 #define CBAR_TYPE_S1_TRANS_S2_FAULT (2 << CBAR_TYPE_SHIFT)
209 #define CBAR_TYPE_S1_TRANS_S2_TRANS (3 << CBAR_TYPE_SHIFT)
210 #define CBAR_IRPTNDX_SHIFT 24
211 #define CBAR_IRPTNDX_MASK 0xff
213 #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
214 #define CBA2R_RW64_32BIT (0 << 0)
215 #define CBA2R_RW64_64BIT (1 << 0)
216 #define CBA2R_VMID_SHIFT 16
217 #define CBA2R_VMID_MASK 0xffff
219 /* Translation context bank */
220 #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
222 #define ARM_SMMU_CB_SCTLR 0x0
223 #define ARM_SMMU_CB_ACTLR 0x4
224 #define ARM_SMMU_CB_RESUME 0x8
225 #define ARM_SMMU_CB_TTBCR2 0x10
226 #define ARM_SMMU_CB_TTBR0 0x20
227 #define ARM_SMMU_CB_TTBR1 0x28
228 #define ARM_SMMU_CB_TTBCR 0x30
229 #define ARM_SMMU_CB_CONTEXTIDR 0x34
230 #define ARM_SMMU_CB_S1_MAIR0 0x38
231 #define ARM_SMMU_CB_S1_MAIR1 0x3c
232 #define ARM_SMMU_CB_PAR 0x50
233 #define ARM_SMMU_CB_FSR 0x58
234 #define ARM_SMMU_CB_FAR 0x60
235 #define ARM_SMMU_CB_FSYNR0 0x68
236 #define ARM_SMMU_CB_S1_TLBIVA 0x600
237 #define ARM_SMMU_CB_S1_TLBIASID 0x610
238 #define ARM_SMMU_CB_S1_TLBIVAL 0x620
239 #define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
240 #define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
241 #define ARM_SMMU_CB_TLBSYNC 0x7f0
242 #define ARM_SMMU_CB_TLBSTATUS 0x7f4
243 #define ARM_SMMU_CB_ATS1PR 0x800
244 #define ARM_SMMU_CB_ATSR 0x8f0
246 #define SCTLR_S1_ASIDPNE (1 << 12)
247 #define SCTLR_CFCFG (1 << 7)
248 #define SCTLR_CFIE (1 << 6)
249 #define SCTLR_CFRE (1 << 5)
250 #define SCTLR_E (1 << 4)
251 #define SCTLR_AFE (1 << 2)
252 #define SCTLR_TRE (1 << 1)
253 #define SCTLR_M (1 << 0)
255 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
257 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
258 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
260 #define CB_PAR_F (1 << 0)
262 #define ATSR_ACTIVE (1 << 0)
264 #define RESUME_RETRY (0 << 0)
265 #define RESUME_TERMINATE (1 << 0)
267 #define TTBCR2_SEP_SHIFT 15
268 #define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
269 #define TTBCR2_AS (1 << 4)
271 #define TTBRn_ASID_SHIFT 48
273 #define FSR_MULTI (1 << 31)
274 #define FSR_SS (1 << 30)
275 #define FSR_UUT (1 << 8)
276 #define FSR_ASF (1 << 7)
277 #define FSR_TLBLKF (1 << 6)
278 #define FSR_TLBMCF (1 << 5)
279 #define FSR_EF (1 << 4)
280 #define FSR_PF (1 << 3)
281 #define FSR_AFF (1 << 2)
282 #define FSR_TF (1 << 1)
284 #define FSR_IGN (FSR_AFF | FSR_ASF | \
285 FSR_TLBMCF | FSR_TLBLKF)
286 #define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \
287 FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
289 #define FSYNR0_WNR (1 << 4)
291 #define MSI_IOVA_BASE 0x8000000
292 #define MSI_IOVA_LENGTH 0x100000
294 static int force_stage;
295 module_param(force_stage, int, S_IRUGO);
296 MODULE_PARM_DESC(force_stage,
297 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
298 static bool disable_bypass;
299 module_param(disable_bypass, bool, S_IRUGO);
300 MODULE_PARM_DESC(disable_bypass,
301 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
303 enum arm_smmu_arch_version {
304 ARM_SMMU_V1,
305 ARM_SMMU_V1_64K,
306 ARM_SMMU_V2,
309 enum arm_smmu_implementation {
310 GENERIC_SMMU,
311 ARM_MMU500,
312 CAVIUM_SMMUV2,
315 struct arm_smmu_s2cr {
316 struct iommu_group *group;
317 int count;
318 enum arm_smmu_s2cr_type type;
319 enum arm_smmu_s2cr_privcfg privcfg;
320 u8 cbndx;
323 #define s2cr_init_val (struct arm_smmu_s2cr){ \
324 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
327 struct arm_smmu_smr {
328 u16 mask;
329 u16 id;
330 bool valid;
333 struct arm_smmu_master_cfg {
334 struct arm_smmu_device *smmu;
335 s16 smendx[];
337 #define INVALID_SMENDX -1
338 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
339 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
340 #define fwspec_smendx(fw, i) \
341 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
342 #define for_each_cfg_sme(fw, i, idx) \
343 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
345 struct arm_smmu_device {
346 struct device *dev;
348 void __iomem *base;
349 void __iomem *cb_base;
350 unsigned long pgshift;
352 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
353 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
354 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
355 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
356 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
357 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
358 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
359 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
360 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
361 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
362 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
363 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
364 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
365 u32 features;
367 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
368 u32 options;
369 enum arm_smmu_arch_version version;
370 enum arm_smmu_implementation model;
372 u32 num_context_banks;
373 u32 num_s2_context_banks;
374 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
375 atomic_t irptndx;
377 u32 num_mapping_groups;
378 u16 streamid_mask;
379 u16 smr_mask_mask;
380 struct arm_smmu_smr *smrs;
381 struct arm_smmu_s2cr *s2crs;
382 struct mutex stream_map_mutex;
384 unsigned long va_size;
385 unsigned long ipa_size;
386 unsigned long pa_size;
387 unsigned long pgsize_bitmap;
389 u32 num_global_irqs;
390 u32 num_context_irqs;
391 unsigned int *irqs;
393 u32 cavium_id_base; /* Specific to Cavium */
395 /* IOMMU core code handle */
396 struct iommu_device iommu;
399 enum arm_smmu_context_fmt {
400 ARM_SMMU_CTX_FMT_NONE,
401 ARM_SMMU_CTX_FMT_AARCH64,
402 ARM_SMMU_CTX_FMT_AARCH32_L,
403 ARM_SMMU_CTX_FMT_AARCH32_S,
406 struct arm_smmu_cfg {
407 u8 cbndx;
408 u8 irptndx;
409 union {
410 u16 asid;
411 u16 vmid;
413 u32 cbar;
414 enum arm_smmu_context_fmt fmt;
416 #define INVALID_IRPTNDX 0xff
418 enum arm_smmu_domain_stage {
419 ARM_SMMU_DOMAIN_S1 = 0,
420 ARM_SMMU_DOMAIN_S2,
421 ARM_SMMU_DOMAIN_NESTED,
422 ARM_SMMU_DOMAIN_BYPASS,
425 struct arm_smmu_domain {
426 struct arm_smmu_device *smmu;
427 struct io_pgtable_ops *pgtbl_ops;
428 spinlock_t pgtbl_lock;
429 struct arm_smmu_cfg cfg;
430 enum arm_smmu_domain_stage stage;
431 struct mutex init_mutex; /* Protects smmu pointer */
432 struct iommu_domain domain;
435 struct arm_smmu_option_prop {
436 u32 opt;
437 const char *prop;
440 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
442 static bool using_legacy_binding, using_generic_binding;
444 static struct arm_smmu_option_prop arm_smmu_options[] = {
445 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
446 { 0, NULL},
449 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
451 return container_of(dom, struct arm_smmu_domain, domain);
454 static void parse_driver_options(struct arm_smmu_device *smmu)
456 int i = 0;
458 do {
459 if (of_property_read_bool(smmu->dev->of_node,
460 arm_smmu_options[i].prop)) {
461 smmu->options |= arm_smmu_options[i].opt;
462 dev_notice(smmu->dev, "option %s\n",
463 arm_smmu_options[i].prop);
465 } while (arm_smmu_options[++i].opt);
468 static struct device_node *dev_get_dev_node(struct device *dev)
470 if (dev_is_pci(dev)) {
471 struct pci_bus *bus = to_pci_dev(dev)->bus;
473 while (!pci_is_root_bus(bus))
474 bus = bus->parent;
475 return of_node_get(bus->bridge->parent->of_node);
478 return of_node_get(dev->of_node);
481 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
483 *((__be32 *)data) = cpu_to_be32(alias);
484 return 0; /* Continue walking */
487 static int __find_legacy_master_phandle(struct device *dev, void *data)
489 struct of_phandle_iterator *it = *(void **)data;
490 struct device_node *np = it->node;
491 int err;
493 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
494 "#stream-id-cells", 0)
495 if (it->node == np) {
496 *(void **)data = dev;
497 return 1;
499 it->node = np;
500 return err == -ENOENT ? 0 : err;
503 static struct platform_driver arm_smmu_driver;
504 static struct iommu_ops arm_smmu_ops;
506 static int arm_smmu_register_legacy_master(struct device *dev,
507 struct arm_smmu_device **smmu)
509 struct device *smmu_dev;
510 struct device_node *np;
511 struct of_phandle_iterator it;
512 void *data = &it;
513 u32 *sids;
514 __be32 pci_sid;
515 int err;
517 np = dev_get_dev_node(dev);
518 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
519 of_node_put(np);
520 return -ENODEV;
523 it.node = np;
524 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
525 __find_legacy_master_phandle);
526 smmu_dev = data;
527 of_node_put(np);
528 if (err == 0)
529 return -ENODEV;
530 if (err < 0)
531 return err;
533 if (dev_is_pci(dev)) {
534 /* "mmu-masters" assumes Stream ID == Requester ID */
535 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
536 &pci_sid);
537 it.cur = &pci_sid;
538 it.cur_count = 1;
541 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
542 &arm_smmu_ops);
543 if (err)
544 return err;
546 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
547 if (!sids)
548 return -ENOMEM;
550 *smmu = dev_get_drvdata(smmu_dev);
551 of_phandle_iterator_args(&it, sids, it.cur_count);
552 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
553 kfree(sids);
554 return err;
557 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
559 int idx;
561 do {
562 idx = find_next_zero_bit(map, end, start);
563 if (idx == end)
564 return -ENOSPC;
565 } while (test_and_set_bit(idx, map));
567 return idx;
570 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
572 clear_bit(idx, map);
575 /* Wait for any pending TLB invalidations to complete */
576 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
577 void __iomem *sync, void __iomem *status)
579 unsigned int spin_cnt, delay;
581 writel_relaxed(0, sync);
582 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
583 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
584 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
585 return;
586 cpu_relax();
588 udelay(delay);
590 dev_err_ratelimited(smmu->dev,
591 "TLB sync timed out -- SMMU may be deadlocked\n");
594 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
596 void __iomem *base = ARM_SMMU_GR0(smmu);
598 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
599 base + ARM_SMMU_GR0_sTLBGSTATUS);
602 static void arm_smmu_tlb_sync_context(void *cookie)
604 struct arm_smmu_domain *smmu_domain = cookie;
605 struct arm_smmu_device *smmu = smmu_domain->smmu;
606 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
608 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
609 base + ARM_SMMU_CB_TLBSTATUS);
612 static void arm_smmu_tlb_sync_vmid(void *cookie)
614 struct arm_smmu_domain *smmu_domain = cookie;
616 arm_smmu_tlb_sync_global(smmu_domain->smmu);
619 static void arm_smmu_tlb_inv_context_s1(void *cookie)
621 struct arm_smmu_domain *smmu_domain = cookie;
622 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
623 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
625 writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
626 arm_smmu_tlb_sync_context(cookie);
629 static void arm_smmu_tlb_inv_context_s2(void *cookie)
631 struct arm_smmu_domain *smmu_domain = cookie;
632 struct arm_smmu_device *smmu = smmu_domain->smmu;
633 void __iomem *base = ARM_SMMU_GR0(smmu);
635 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
636 arm_smmu_tlb_sync_global(smmu);
639 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
640 size_t granule, bool leaf, void *cookie)
642 struct arm_smmu_domain *smmu_domain = cookie;
643 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
644 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
645 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
647 if (stage1) {
648 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
650 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
651 iova &= ~12UL;
652 iova |= cfg->asid;
653 do {
654 writel_relaxed(iova, reg);
655 iova += granule;
656 } while (size -= granule);
657 } else {
658 iova >>= 12;
659 iova |= (u64)cfg->asid << 48;
660 do {
661 writeq_relaxed(iova, reg);
662 iova += granule >> 12;
663 } while (size -= granule);
665 } else {
666 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
667 ARM_SMMU_CB_S2_TLBIIPAS2;
668 iova >>= 12;
669 do {
670 smmu_write_atomic_lq(iova, reg);
671 iova += granule >> 12;
672 } while (size -= granule);
677 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
678 * almost negligible, but the benefit of getting the first one in as far ahead
679 * of the sync as possible is significant, hence we don't just make this a
680 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
682 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
683 size_t granule, bool leaf, void *cookie)
685 struct arm_smmu_domain *smmu_domain = cookie;
686 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
688 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
691 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
692 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
693 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
694 .tlb_sync = arm_smmu_tlb_sync_context,
697 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
698 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
699 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
700 .tlb_sync = arm_smmu_tlb_sync_context,
703 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
704 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
705 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
706 .tlb_sync = arm_smmu_tlb_sync_vmid,
709 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
711 u32 fsr, fsynr;
712 unsigned long iova;
713 struct iommu_domain *domain = dev;
714 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
715 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
716 struct arm_smmu_device *smmu = smmu_domain->smmu;
717 void __iomem *cb_base;
719 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
720 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
722 if (!(fsr & FSR_FAULT))
723 return IRQ_NONE;
725 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
726 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
728 dev_err_ratelimited(smmu->dev,
729 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
730 fsr, iova, fsynr, cfg->cbndx);
732 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
733 return IRQ_HANDLED;
736 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
738 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
739 struct arm_smmu_device *smmu = dev;
740 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
742 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
743 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
744 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
745 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
747 if (!gfsr)
748 return IRQ_NONE;
750 dev_err_ratelimited(smmu->dev,
751 "Unexpected global fault, this could be serious\n");
752 dev_err_ratelimited(smmu->dev,
753 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
754 gfsr, gfsynr0, gfsynr1, gfsynr2);
756 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
757 return IRQ_HANDLED;
760 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
761 struct io_pgtable_cfg *pgtbl_cfg)
763 u32 reg, reg2;
764 u64 reg64;
765 bool stage1;
766 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
767 struct arm_smmu_device *smmu = smmu_domain->smmu;
768 void __iomem *cb_base, *gr1_base;
770 gr1_base = ARM_SMMU_GR1(smmu);
771 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
772 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
774 if (smmu->version > ARM_SMMU_V1) {
775 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
776 reg = CBA2R_RW64_64BIT;
777 else
778 reg = CBA2R_RW64_32BIT;
779 /* 16-bit VMIDs live in CBA2R */
780 if (smmu->features & ARM_SMMU_FEAT_VMID16)
781 reg |= cfg->vmid << CBA2R_VMID_SHIFT;
783 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
786 /* CBAR */
787 reg = cfg->cbar;
788 if (smmu->version < ARM_SMMU_V2)
789 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
792 * Use the weakest shareability/memory types, so they are
793 * overridden by the ttbcr/pte.
795 if (stage1) {
796 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
797 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
798 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
799 /* 8-bit VMIDs live in CBAR */
800 reg |= cfg->vmid << CBAR_VMID_SHIFT;
802 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
805 * TTBCR
806 * We must write this before the TTBRs, since it determines the
807 * access behaviour of some fields (in particular, ASID[15:8]).
809 if (stage1) {
810 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
811 reg = pgtbl_cfg->arm_v7s_cfg.tcr;
812 reg2 = 0;
813 } else {
814 reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
815 reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
816 reg2 |= TTBCR2_SEP_UPSTREAM;
817 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
818 reg2 |= TTBCR2_AS;
820 if (smmu->version > ARM_SMMU_V1)
821 writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
822 } else {
823 reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
825 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
827 /* TTBRs */
828 if (stage1) {
829 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
830 reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
831 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0);
832 reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
833 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1);
834 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
835 } else {
836 reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
837 reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
838 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
839 reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
840 reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
841 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
843 } else {
844 reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
845 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
848 /* MAIRs (stage-1 only) */
849 if (stage1) {
850 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
851 reg = pgtbl_cfg->arm_v7s_cfg.prrr;
852 reg2 = pgtbl_cfg->arm_v7s_cfg.nmrr;
853 } else {
854 reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
855 reg2 = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
857 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
858 writel_relaxed(reg2, cb_base + ARM_SMMU_CB_S1_MAIR1);
861 /* SCTLR */
862 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
863 if (stage1)
864 reg |= SCTLR_S1_ASIDPNE;
865 #ifdef __BIG_ENDIAN
866 reg |= SCTLR_E;
867 #endif
868 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
871 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
872 struct arm_smmu_device *smmu)
874 int irq, start, ret = 0;
875 unsigned long ias, oas;
876 struct io_pgtable_ops *pgtbl_ops;
877 struct io_pgtable_cfg pgtbl_cfg;
878 enum io_pgtable_fmt fmt;
879 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
880 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
881 const struct iommu_gather_ops *tlb_ops;
883 mutex_lock(&smmu_domain->init_mutex);
884 if (smmu_domain->smmu)
885 goto out_unlock;
887 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
888 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
889 smmu_domain->smmu = smmu;
890 goto out_unlock;
894 * Mapping the requested stage onto what we support is surprisingly
895 * complicated, mainly because the spec allows S1+S2 SMMUs without
896 * support for nested translation. That means we end up with the
897 * following table:
899 * Requested Supported Actual
900 * S1 N S1
901 * S1 S1+S2 S1
902 * S1 S2 S2
903 * S1 S1 S1
904 * N N N
905 * N S1+S2 S2
906 * N S2 S2
907 * N S1 S1
909 * Note that you can't actually request stage-2 mappings.
911 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
912 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
913 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
914 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
917 * Choosing a suitable context format is even more fiddly. Until we
918 * grow some way for the caller to express a preference, and/or move
919 * the decision into the io-pgtable code where it arguably belongs,
920 * just aim for the closest thing to the rest of the system, and hope
921 * that the hardware isn't esoteric enough that we can't assume AArch64
922 * support to be a superset of AArch32 support...
924 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
925 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
926 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
927 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
928 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
929 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
930 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
931 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
932 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
933 ARM_SMMU_FEAT_FMT_AARCH64_16K |
934 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
935 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
937 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
938 ret = -EINVAL;
939 goto out_unlock;
942 switch (smmu_domain->stage) {
943 case ARM_SMMU_DOMAIN_S1:
944 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
945 start = smmu->num_s2_context_banks;
946 ias = smmu->va_size;
947 oas = smmu->ipa_size;
948 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
949 fmt = ARM_64_LPAE_S1;
950 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
951 fmt = ARM_32_LPAE_S1;
952 ias = min(ias, 32UL);
953 oas = min(oas, 40UL);
954 } else {
955 fmt = ARM_V7S;
956 ias = min(ias, 32UL);
957 oas = min(oas, 32UL);
959 tlb_ops = &arm_smmu_s1_tlb_ops;
960 break;
961 case ARM_SMMU_DOMAIN_NESTED:
963 * We will likely want to change this if/when KVM gets
964 * involved.
966 case ARM_SMMU_DOMAIN_S2:
967 cfg->cbar = CBAR_TYPE_S2_TRANS;
968 start = 0;
969 ias = smmu->ipa_size;
970 oas = smmu->pa_size;
971 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
972 fmt = ARM_64_LPAE_S2;
973 } else {
974 fmt = ARM_32_LPAE_S2;
975 ias = min(ias, 40UL);
976 oas = min(oas, 40UL);
978 if (smmu->version == ARM_SMMU_V2)
979 tlb_ops = &arm_smmu_s2_tlb_ops_v2;
980 else
981 tlb_ops = &arm_smmu_s2_tlb_ops_v1;
982 break;
983 default:
984 ret = -EINVAL;
985 goto out_unlock;
987 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
988 smmu->num_context_banks);
989 if (ret < 0)
990 goto out_unlock;
992 cfg->cbndx = ret;
993 if (smmu->version < ARM_SMMU_V2) {
994 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
995 cfg->irptndx %= smmu->num_context_irqs;
996 } else {
997 cfg->irptndx = cfg->cbndx;
1000 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
1001 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
1002 else
1003 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
1005 pgtbl_cfg = (struct io_pgtable_cfg) {
1006 .pgsize_bitmap = smmu->pgsize_bitmap,
1007 .ias = ias,
1008 .oas = oas,
1009 .tlb = tlb_ops,
1010 .iommu_dev = smmu->dev,
1013 smmu_domain->smmu = smmu;
1014 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1015 if (!pgtbl_ops) {
1016 ret = -ENOMEM;
1017 goto out_clear_smmu;
1020 /* Update the domain's page sizes to reflect the page table format */
1021 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1022 domain->geometry.aperture_end = (1UL << ias) - 1;
1023 domain->geometry.force_aperture = true;
1025 /* Initialise the context bank with our page table cfg */
1026 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
1029 * Request context fault interrupt. Do this last to avoid the
1030 * handler seeing a half-initialised domain state.
1032 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
1033 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
1034 IRQF_SHARED, "arm-smmu-context-fault", domain);
1035 if (ret < 0) {
1036 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
1037 cfg->irptndx, irq);
1038 cfg->irptndx = INVALID_IRPTNDX;
1041 mutex_unlock(&smmu_domain->init_mutex);
1043 /* Publish page table ops for map/unmap */
1044 smmu_domain->pgtbl_ops = pgtbl_ops;
1045 return 0;
1047 out_clear_smmu:
1048 smmu_domain->smmu = NULL;
1049 out_unlock:
1050 mutex_unlock(&smmu_domain->init_mutex);
1051 return ret;
1054 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
1056 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1057 struct arm_smmu_device *smmu = smmu_domain->smmu;
1058 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1059 void __iomem *cb_base;
1060 int irq;
1062 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
1063 return;
1066 * Disable the context bank and free the page tables before freeing
1067 * it.
1069 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1070 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1072 if (cfg->irptndx != INVALID_IRPTNDX) {
1073 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
1074 devm_free_irq(smmu->dev, irq, domain);
1077 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1078 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
1081 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1083 struct arm_smmu_domain *smmu_domain;
1085 if (type != IOMMU_DOMAIN_UNMANAGED &&
1086 type != IOMMU_DOMAIN_DMA &&
1087 type != IOMMU_DOMAIN_IDENTITY)
1088 return NULL;
1090 * Allocate the domain and initialise some of its data structures.
1091 * We can't really do anything meaningful until we've added a
1092 * master.
1094 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1095 if (!smmu_domain)
1096 return NULL;
1098 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
1099 iommu_get_dma_cookie(&smmu_domain->domain))) {
1100 kfree(smmu_domain);
1101 return NULL;
1104 mutex_init(&smmu_domain->init_mutex);
1105 spin_lock_init(&smmu_domain->pgtbl_lock);
1107 return &smmu_domain->domain;
1110 static void arm_smmu_domain_free(struct iommu_domain *domain)
1112 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1115 * Free the domain resources. We assume that all devices have
1116 * already been detached.
1118 iommu_put_dma_cookie(domain);
1119 arm_smmu_destroy_domain_context(domain);
1120 kfree(smmu_domain);
1123 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1125 struct arm_smmu_smr *smr = smmu->smrs + idx;
1126 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1128 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1129 reg |= SMR_VALID;
1130 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1133 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1135 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1136 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1137 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1138 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1140 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1141 smmu->smrs[idx].valid)
1142 reg |= S2CR_EXIDVALID;
1143 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1146 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1148 arm_smmu_write_s2cr(smmu, idx);
1149 if (smmu->smrs)
1150 arm_smmu_write_smr(smmu, idx);
1154 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1155 * should be called after sCR0 is written.
1157 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1159 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1160 u32 smr;
1162 if (!smmu->smrs)
1163 return;
1166 * SMR.ID bits may not be preserved if the corresponding MASK
1167 * bits are set, so check each one separately. We can reject
1168 * masters later if they try to claim IDs outside these masks.
1170 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1171 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1172 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1173 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1175 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1176 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1177 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1178 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1181 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1183 struct arm_smmu_smr *smrs = smmu->smrs;
1184 int i, free_idx = -ENOSPC;
1186 /* Stream indexing is blissfully easy */
1187 if (!smrs)
1188 return id;
1190 /* Validating SMRs is... less so */
1191 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1192 if (!smrs[i].valid) {
1194 * Note the first free entry we come across, which
1195 * we'll claim in the end if nothing else matches.
1197 if (free_idx < 0)
1198 free_idx = i;
1199 continue;
1202 * If the new entry is _entirely_ matched by an existing entry,
1203 * then reuse that, with the guarantee that there also cannot
1204 * be any subsequent conflicting entries. In normal use we'd
1205 * expect simply identical entries for this case, but there's
1206 * no harm in accommodating the generalisation.
1208 if ((mask & smrs[i].mask) == mask &&
1209 !((id ^ smrs[i].id) & ~smrs[i].mask))
1210 return i;
1212 * If the new entry has any other overlap with an existing one,
1213 * though, then there always exists at least one stream ID
1214 * which would cause a conflict, and we can't allow that risk.
1216 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1217 return -EINVAL;
1220 return free_idx;
1223 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1225 if (--smmu->s2crs[idx].count)
1226 return false;
1228 smmu->s2crs[idx] = s2cr_init_val;
1229 if (smmu->smrs)
1230 smmu->smrs[idx].valid = false;
1232 return true;
1235 static int arm_smmu_master_alloc_smes(struct device *dev)
1237 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1238 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1239 struct arm_smmu_device *smmu = cfg->smmu;
1240 struct arm_smmu_smr *smrs = smmu->smrs;
1241 struct iommu_group *group;
1242 int i, idx, ret;
1244 mutex_lock(&smmu->stream_map_mutex);
1245 /* Figure out a viable stream map entry allocation */
1246 for_each_cfg_sme(fwspec, i, idx) {
1247 u16 sid = fwspec->ids[i];
1248 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1250 if (idx != INVALID_SMENDX) {
1251 ret = -EEXIST;
1252 goto out_err;
1255 ret = arm_smmu_find_sme(smmu, sid, mask);
1256 if (ret < 0)
1257 goto out_err;
1259 idx = ret;
1260 if (smrs && smmu->s2crs[idx].count == 0) {
1261 smrs[idx].id = sid;
1262 smrs[idx].mask = mask;
1263 smrs[idx].valid = true;
1265 smmu->s2crs[idx].count++;
1266 cfg->smendx[i] = (s16)idx;
1269 group = iommu_group_get_for_dev(dev);
1270 if (!group)
1271 group = ERR_PTR(-ENOMEM);
1272 if (IS_ERR(group)) {
1273 ret = PTR_ERR(group);
1274 goto out_err;
1276 iommu_group_put(group);
1278 /* It worked! Now, poke the actual hardware */
1279 for_each_cfg_sme(fwspec, i, idx) {
1280 arm_smmu_write_sme(smmu, idx);
1281 smmu->s2crs[idx].group = group;
1284 mutex_unlock(&smmu->stream_map_mutex);
1285 return 0;
1287 out_err:
1288 while (i--) {
1289 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1290 cfg->smendx[i] = INVALID_SMENDX;
1292 mutex_unlock(&smmu->stream_map_mutex);
1293 return ret;
1296 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1298 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1299 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1300 int i, idx;
1302 mutex_lock(&smmu->stream_map_mutex);
1303 for_each_cfg_sme(fwspec, i, idx) {
1304 if (arm_smmu_free_sme(smmu, idx))
1305 arm_smmu_write_sme(smmu, idx);
1306 cfg->smendx[i] = INVALID_SMENDX;
1308 mutex_unlock(&smmu->stream_map_mutex);
1311 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1312 struct iommu_fwspec *fwspec)
1314 struct arm_smmu_device *smmu = smmu_domain->smmu;
1315 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1316 u8 cbndx = smmu_domain->cfg.cbndx;
1317 enum arm_smmu_s2cr_type type;
1318 int i, idx;
1320 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1321 type = S2CR_TYPE_BYPASS;
1322 else
1323 type = S2CR_TYPE_TRANS;
1325 for_each_cfg_sme(fwspec, i, idx) {
1326 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1327 continue;
1329 s2cr[idx].type = type;
1330 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1331 s2cr[idx].cbndx = cbndx;
1332 arm_smmu_write_s2cr(smmu, idx);
1334 return 0;
1337 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1339 int ret;
1340 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1341 struct arm_smmu_device *smmu;
1342 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1344 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1345 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1346 return -ENXIO;
1350 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1351 * domains between of_xlate() and add_device() - we have no way to cope
1352 * with that, so until ARM gets converted to rely on groups and default
1353 * domains, just say no (but more politely than by dereferencing NULL).
1354 * This should be at least a WARN_ON once that's sorted.
1356 if (!fwspec->iommu_priv)
1357 return -ENODEV;
1359 smmu = fwspec_smmu(fwspec);
1360 /* Ensure that the domain is finalised */
1361 ret = arm_smmu_init_domain_context(domain, smmu);
1362 if (ret < 0)
1363 return ret;
1366 * Sanity check the domain. We don't support domains across
1367 * different SMMUs.
1369 if (smmu_domain->smmu != smmu) {
1370 dev_err(dev,
1371 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1372 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1373 return -EINVAL;
1376 /* Looks ok, so add the device to the domain */
1377 return arm_smmu_domain_add_master(smmu_domain, fwspec);
1380 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1381 phys_addr_t paddr, size_t size, int prot)
1383 int ret;
1384 unsigned long flags;
1385 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1386 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1388 if (!ops)
1389 return -ENODEV;
1391 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1392 ret = ops->map(ops, iova, paddr, size, prot);
1393 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1394 return ret;
1397 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1398 size_t size)
1400 size_t ret;
1401 unsigned long flags;
1402 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1403 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1405 if (!ops)
1406 return 0;
1408 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1409 ret = ops->unmap(ops, iova, size);
1410 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1411 return ret;
1414 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1415 dma_addr_t iova)
1417 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1418 struct arm_smmu_device *smmu = smmu_domain->smmu;
1419 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1420 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1421 struct device *dev = smmu->dev;
1422 void __iomem *cb_base;
1423 u32 tmp;
1424 u64 phys;
1425 unsigned long va;
1427 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1429 /* ATS1 registers can only be written atomically */
1430 va = iova & ~0xfffUL;
1431 if (smmu->version == ARM_SMMU_V2)
1432 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1433 else /* Register is only 32-bit in v1 */
1434 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1436 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1437 !(tmp & ATSR_ACTIVE), 5, 50)) {
1438 dev_err(dev,
1439 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1440 &iova);
1441 return ops->iova_to_phys(ops, iova);
1444 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1445 if (phys & CB_PAR_F) {
1446 dev_err(dev, "translation fault!\n");
1447 dev_err(dev, "PAR = 0x%llx\n", phys);
1448 return 0;
1451 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1454 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1455 dma_addr_t iova)
1457 phys_addr_t ret;
1458 unsigned long flags;
1459 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1460 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1462 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1463 return iova;
1465 if (!ops)
1466 return 0;
1468 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1469 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1470 smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1471 ret = arm_smmu_iova_to_phys_hard(domain, iova);
1472 } else {
1473 ret = ops->iova_to_phys(ops, iova);
1476 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1478 return ret;
1481 static bool arm_smmu_capable(enum iommu_cap cap)
1483 switch (cap) {
1484 case IOMMU_CAP_CACHE_COHERENCY:
1486 * Return true here as the SMMU can always send out coherent
1487 * requests.
1489 return true;
1490 case IOMMU_CAP_NOEXEC:
1491 return true;
1492 default:
1493 return false;
1497 static int arm_smmu_match_node(struct device *dev, void *data)
1499 return dev->fwnode == data;
1502 static
1503 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1505 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1506 fwnode, arm_smmu_match_node);
1507 put_device(dev);
1508 return dev ? dev_get_drvdata(dev) : NULL;
1511 static int arm_smmu_add_device(struct device *dev)
1513 struct arm_smmu_device *smmu;
1514 struct arm_smmu_master_cfg *cfg;
1515 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1516 int i, ret;
1518 if (using_legacy_binding) {
1519 ret = arm_smmu_register_legacy_master(dev, &smmu);
1520 fwspec = dev->iommu_fwspec;
1521 if (ret)
1522 goto out_free;
1523 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1524 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1525 } else {
1526 return -ENODEV;
1529 ret = -EINVAL;
1530 for (i = 0; i < fwspec->num_ids; i++) {
1531 u16 sid = fwspec->ids[i];
1532 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1534 if (sid & ~smmu->streamid_mask) {
1535 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1536 sid, smmu->streamid_mask);
1537 goto out_free;
1539 if (mask & ~smmu->smr_mask_mask) {
1540 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1541 mask, smmu->smr_mask_mask);
1542 goto out_free;
1546 ret = -ENOMEM;
1547 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1548 GFP_KERNEL);
1549 if (!cfg)
1550 goto out_free;
1552 cfg->smmu = smmu;
1553 fwspec->iommu_priv = cfg;
1554 while (i--)
1555 cfg->smendx[i] = INVALID_SMENDX;
1557 ret = arm_smmu_master_alloc_smes(dev);
1558 if (ret)
1559 goto out_free;
1561 iommu_device_link(&smmu->iommu, dev);
1563 return 0;
1565 out_free:
1566 if (fwspec)
1567 kfree(fwspec->iommu_priv);
1568 iommu_fwspec_free(dev);
1569 return ret;
1572 static void arm_smmu_remove_device(struct device *dev)
1574 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1575 struct arm_smmu_master_cfg *cfg;
1576 struct arm_smmu_device *smmu;
1579 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1580 return;
1582 cfg = fwspec->iommu_priv;
1583 smmu = cfg->smmu;
1585 iommu_device_unlink(&smmu->iommu, dev);
1586 arm_smmu_master_free_smes(fwspec);
1587 iommu_group_remove_device(dev);
1588 kfree(fwspec->iommu_priv);
1589 iommu_fwspec_free(dev);
1592 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1594 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1595 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1596 struct iommu_group *group = NULL;
1597 int i, idx;
1599 for_each_cfg_sme(fwspec, i, idx) {
1600 if (group && smmu->s2crs[idx].group &&
1601 group != smmu->s2crs[idx].group)
1602 return ERR_PTR(-EINVAL);
1604 group = smmu->s2crs[idx].group;
1607 if (group)
1608 return iommu_group_ref_get(group);
1610 if (dev_is_pci(dev))
1611 group = pci_device_group(dev);
1612 else
1613 group = generic_device_group(dev);
1615 return group;
1618 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1619 enum iommu_attr attr, void *data)
1621 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1623 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1624 return -EINVAL;
1626 switch (attr) {
1627 case DOMAIN_ATTR_NESTING:
1628 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1629 return 0;
1630 default:
1631 return -ENODEV;
1635 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1636 enum iommu_attr attr, void *data)
1638 int ret = 0;
1639 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1641 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1642 return -EINVAL;
1644 mutex_lock(&smmu_domain->init_mutex);
1646 switch (attr) {
1647 case DOMAIN_ATTR_NESTING:
1648 if (smmu_domain->smmu) {
1649 ret = -EPERM;
1650 goto out_unlock;
1653 if (*(int *)data)
1654 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1655 else
1656 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1658 break;
1659 default:
1660 ret = -ENODEV;
1663 out_unlock:
1664 mutex_unlock(&smmu_domain->init_mutex);
1665 return ret;
1668 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1670 u32 mask, fwid = 0;
1672 if (args->args_count > 0)
1673 fwid |= (u16)args->args[0];
1675 if (args->args_count > 1)
1676 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1677 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1678 fwid |= (u16)mask << SMR_MASK_SHIFT;
1680 return iommu_fwspec_add_ids(dev, &fwid, 1);
1683 static void arm_smmu_get_resv_regions(struct device *dev,
1684 struct list_head *head)
1686 struct iommu_resv_region *region;
1687 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1689 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1690 prot, IOMMU_RESV_SW_MSI);
1691 if (!region)
1692 return;
1694 list_add_tail(&region->list, head);
1696 iommu_dma_get_resv_regions(dev, head);
1699 static void arm_smmu_put_resv_regions(struct device *dev,
1700 struct list_head *head)
1702 struct iommu_resv_region *entry, *next;
1704 list_for_each_entry_safe(entry, next, head, list)
1705 kfree(entry);
1708 static struct iommu_ops arm_smmu_ops = {
1709 .capable = arm_smmu_capable,
1710 .domain_alloc = arm_smmu_domain_alloc,
1711 .domain_free = arm_smmu_domain_free,
1712 .attach_dev = arm_smmu_attach_dev,
1713 .map = arm_smmu_map,
1714 .unmap = arm_smmu_unmap,
1715 .map_sg = default_iommu_map_sg,
1716 .iova_to_phys = arm_smmu_iova_to_phys,
1717 .add_device = arm_smmu_add_device,
1718 .remove_device = arm_smmu_remove_device,
1719 .device_group = arm_smmu_device_group,
1720 .domain_get_attr = arm_smmu_domain_get_attr,
1721 .domain_set_attr = arm_smmu_domain_set_attr,
1722 .of_xlate = arm_smmu_of_xlate,
1723 .get_resv_regions = arm_smmu_get_resv_regions,
1724 .put_resv_regions = arm_smmu_put_resv_regions,
1725 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1728 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1730 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1731 void __iomem *cb_base;
1732 int i;
1733 u32 reg, major;
1735 /* clear global FSR */
1736 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1737 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1740 * Reset stream mapping groups: Initial values mark all SMRn as
1741 * invalid and all S2CRn as bypass unless overridden.
1743 for (i = 0; i < smmu->num_mapping_groups; ++i)
1744 arm_smmu_write_sme(smmu, i);
1746 if (smmu->model == ARM_MMU500) {
1748 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1749 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1750 * bit is only present in MMU-500r2 onwards.
1752 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1753 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1754 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1755 if (major >= 2)
1756 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1758 * Allow unmatched Stream IDs to allocate bypass
1759 * TLB entries for reduced latency.
1761 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
1762 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1765 /* Make sure all context banks are disabled and clear CB_FSR */
1766 for (i = 0; i < smmu->num_context_banks; ++i) {
1767 cb_base = ARM_SMMU_CB(smmu, i);
1768 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1769 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1771 * Disable MMU-500's not-particularly-beneficial next-page
1772 * prefetcher for the sake of errata #841119 and #826419.
1774 if (smmu->model == ARM_MMU500) {
1775 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1776 reg &= ~ARM_MMU500_ACTLR_CPRE;
1777 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1781 /* Invalidate the TLB, just in case */
1782 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1783 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1785 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1787 /* Enable fault reporting */
1788 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1790 /* Disable TLB broadcasting. */
1791 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1793 /* Enable client access, handling unmatched streams as appropriate */
1794 reg &= ~sCR0_CLIENTPD;
1795 if (disable_bypass)
1796 reg |= sCR0_USFCFG;
1797 else
1798 reg &= ~sCR0_USFCFG;
1800 /* Disable forced broadcasting */
1801 reg &= ~sCR0_FB;
1803 /* Don't upgrade barriers */
1804 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1806 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1807 reg |= sCR0_VMID16EN;
1809 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1810 reg |= sCR0_EXIDENABLE;
1812 /* Push the button */
1813 arm_smmu_tlb_sync_global(smmu);
1814 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1817 static int arm_smmu_id_size_to_bits(int size)
1819 switch (size) {
1820 case 0:
1821 return 32;
1822 case 1:
1823 return 36;
1824 case 2:
1825 return 40;
1826 case 3:
1827 return 42;
1828 case 4:
1829 return 44;
1830 case 5:
1831 default:
1832 return 48;
1836 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1838 unsigned long size;
1839 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1840 u32 id;
1841 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1842 int i;
1844 dev_notice(smmu->dev, "probing hardware configuration...\n");
1845 dev_notice(smmu->dev, "SMMUv%d with:\n",
1846 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1848 /* ID0 */
1849 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1851 /* Restrict available stages based on module parameter */
1852 if (force_stage == 1)
1853 id &= ~(ID0_S2TS | ID0_NTS);
1854 else if (force_stage == 2)
1855 id &= ~(ID0_S1TS | ID0_NTS);
1857 if (id & ID0_S1TS) {
1858 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1859 dev_notice(smmu->dev, "\tstage 1 translation\n");
1862 if (id & ID0_S2TS) {
1863 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1864 dev_notice(smmu->dev, "\tstage 2 translation\n");
1867 if (id & ID0_NTS) {
1868 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1869 dev_notice(smmu->dev, "\tnested translation\n");
1872 if (!(smmu->features &
1873 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1874 dev_err(smmu->dev, "\tno translation support!\n");
1875 return -ENODEV;
1878 if ((id & ID0_S1TS) &&
1879 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1880 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1881 dev_notice(smmu->dev, "\taddress translation ops\n");
1885 * In order for DMA API calls to work properly, we must defer to what
1886 * the FW says about coherency, regardless of what the hardware claims.
1887 * Fortunately, this also opens up a workaround for systems where the
1888 * ID register value has ended up configured incorrectly.
1890 cttw_reg = !!(id & ID0_CTTW);
1891 if (cttw_fw || cttw_reg)
1892 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1893 cttw_fw ? "" : "non-");
1894 if (cttw_fw != cttw_reg)
1895 dev_notice(smmu->dev,
1896 "\t(IDR0.CTTW overridden by FW configuration)\n");
1898 /* Max. number of entries we have for stream matching/indexing */
1899 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1900 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1901 size = 1 << 16;
1902 } else {
1903 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1905 smmu->streamid_mask = size - 1;
1906 if (id & ID0_SMS) {
1907 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1908 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1909 if (size == 0) {
1910 dev_err(smmu->dev,
1911 "stream-matching supported, but no SMRs present!\n");
1912 return -ENODEV;
1915 /* Zero-initialised to mark as invalid */
1916 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1917 GFP_KERNEL);
1918 if (!smmu->smrs)
1919 return -ENOMEM;
1921 dev_notice(smmu->dev,
1922 "\tstream matching with %lu register groups", size);
1924 /* s2cr->type == 0 means translation, so initialise explicitly */
1925 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1926 GFP_KERNEL);
1927 if (!smmu->s2crs)
1928 return -ENOMEM;
1929 for (i = 0; i < size; i++)
1930 smmu->s2crs[i] = s2cr_init_val;
1932 smmu->num_mapping_groups = size;
1933 mutex_init(&smmu->stream_map_mutex);
1935 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1936 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1937 if (!(id & ID0_PTFS_NO_AARCH32S))
1938 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1941 /* ID1 */
1942 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1943 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1945 /* Check for size mismatch of SMMU address space from mapped region */
1946 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1947 size <<= smmu->pgshift;
1948 if (smmu->cb_base != gr0_base + size)
1949 dev_warn(smmu->dev,
1950 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1951 size * 2, (smmu->cb_base - gr0_base) * 2);
1953 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1954 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1955 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1956 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1957 return -ENODEV;
1959 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1960 smmu->num_context_banks, smmu->num_s2_context_banks);
1962 * Cavium CN88xx erratum #27704.
1963 * Ensure ASID and VMID allocation is unique across all SMMUs in
1964 * the system.
1966 if (smmu->model == CAVIUM_SMMUV2) {
1967 smmu->cavium_id_base =
1968 atomic_add_return(smmu->num_context_banks,
1969 &cavium_smmu_context_count);
1970 smmu->cavium_id_base -= smmu->num_context_banks;
1971 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1974 /* ID2 */
1975 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1976 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1977 smmu->ipa_size = size;
1979 /* The output mask is also applied for bypass */
1980 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1981 smmu->pa_size = size;
1983 if (id & ID2_VMID16)
1984 smmu->features |= ARM_SMMU_FEAT_VMID16;
1987 * What the page table walker can address actually depends on which
1988 * descriptor format is in use, but since a) we don't know that yet,
1989 * and b) it can vary per context bank, this will have to do...
1991 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1992 dev_warn(smmu->dev,
1993 "failed to set DMA mask for table walker\n");
1995 if (smmu->version < ARM_SMMU_V2) {
1996 smmu->va_size = smmu->ipa_size;
1997 if (smmu->version == ARM_SMMU_V1_64K)
1998 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1999 } else {
2000 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
2001 smmu->va_size = arm_smmu_id_size_to_bits(size);
2002 if (id & ID2_PTFS_4K)
2003 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
2004 if (id & ID2_PTFS_16K)
2005 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
2006 if (id & ID2_PTFS_64K)
2007 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
2010 /* Now we've corralled the various formats, what'll it do? */
2011 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
2012 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
2013 if (smmu->features &
2014 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
2015 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2016 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
2017 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2018 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
2019 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2021 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2022 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2023 else
2024 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2025 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
2026 smmu->pgsize_bitmap);
2029 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
2030 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
2031 smmu->va_size, smmu->ipa_size);
2033 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
2034 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
2035 smmu->ipa_size, smmu->pa_size);
2037 return 0;
2040 struct arm_smmu_match_data {
2041 enum arm_smmu_arch_version version;
2042 enum arm_smmu_implementation model;
2045 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
2046 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
2048 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
2049 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
2050 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
2051 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
2052 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
2054 static const struct of_device_id arm_smmu_of_match[] = {
2055 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
2056 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
2057 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
2058 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
2059 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
2060 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
2061 { },
2063 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2065 #ifdef CONFIG_ACPI
2066 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
2068 int ret = 0;
2070 switch (model) {
2071 case ACPI_IORT_SMMU_V1:
2072 case ACPI_IORT_SMMU_CORELINK_MMU400:
2073 smmu->version = ARM_SMMU_V1;
2074 smmu->model = GENERIC_SMMU;
2075 break;
2076 case ACPI_IORT_SMMU_V2:
2077 smmu->version = ARM_SMMU_V2;
2078 smmu->model = GENERIC_SMMU;
2079 break;
2080 case ACPI_IORT_SMMU_CORELINK_MMU500:
2081 smmu->version = ARM_SMMU_V2;
2082 smmu->model = ARM_MMU500;
2083 break;
2084 default:
2085 ret = -ENODEV;
2088 return ret;
2091 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2092 struct arm_smmu_device *smmu)
2094 struct device *dev = smmu->dev;
2095 struct acpi_iort_node *node =
2096 *(struct acpi_iort_node **)dev_get_platdata(dev);
2097 struct acpi_iort_smmu *iort_smmu;
2098 int ret;
2100 /* Retrieve SMMU1/2 specific data */
2101 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2103 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2104 if (ret < 0)
2105 return ret;
2107 /* Ignore the configuration access interrupt */
2108 smmu->num_global_irqs = 1;
2110 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2111 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2113 return 0;
2115 #else
2116 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2117 struct arm_smmu_device *smmu)
2119 return -ENODEV;
2121 #endif
2123 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2124 struct arm_smmu_device *smmu)
2126 const struct arm_smmu_match_data *data;
2127 struct device *dev = &pdev->dev;
2128 bool legacy_binding;
2130 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2131 &smmu->num_global_irqs)) {
2132 dev_err(dev, "missing #global-interrupts property\n");
2133 return -ENODEV;
2136 data = of_device_get_match_data(dev);
2137 smmu->version = data->version;
2138 smmu->model = data->model;
2140 parse_driver_options(smmu);
2142 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2143 if (legacy_binding && !using_generic_binding) {
2144 if (!using_legacy_binding)
2145 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2146 using_legacy_binding = true;
2147 } else if (!legacy_binding && !using_legacy_binding) {
2148 using_generic_binding = true;
2149 } else {
2150 dev_err(dev, "not probing due to mismatched DT properties\n");
2151 return -ENODEV;
2154 if (of_dma_is_coherent(dev->of_node))
2155 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2157 return 0;
2160 static void arm_smmu_bus_init(void)
2162 /* Oh, for a proper bus abstraction */
2163 if (!iommu_present(&platform_bus_type))
2164 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2165 #ifdef CONFIG_ARM_AMBA
2166 if (!iommu_present(&amba_bustype))
2167 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2168 #endif
2169 #ifdef CONFIG_PCI
2170 if (!iommu_present(&pci_bus_type)) {
2171 pci_request_acs();
2172 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2174 #endif
2177 static int arm_smmu_device_probe(struct platform_device *pdev)
2179 struct resource *res;
2180 resource_size_t ioaddr;
2181 struct arm_smmu_device *smmu;
2182 struct device *dev = &pdev->dev;
2183 int num_irqs, i, err;
2185 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2186 if (!smmu) {
2187 dev_err(dev, "failed to allocate arm_smmu_device\n");
2188 return -ENOMEM;
2190 smmu->dev = dev;
2192 if (dev->of_node)
2193 err = arm_smmu_device_dt_probe(pdev, smmu);
2194 else
2195 err = arm_smmu_device_acpi_probe(pdev, smmu);
2197 if (err)
2198 return err;
2200 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2201 ioaddr = res->start;
2202 smmu->base = devm_ioremap_resource(dev, res);
2203 if (IS_ERR(smmu->base))
2204 return PTR_ERR(smmu->base);
2205 smmu->cb_base = smmu->base + resource_size(res) / 2;
2207 num_irqs = 0;
2208 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2209 num_irqs++;
2210 if (num_irqs > smmu->num_global_irqs)
2211 smmu->num_context_irqs++;
2214 if (!smmu->num_context_irqs) {
2215 dev_err(dev, "found %d interrupts but expected at least %d\n",
2216 num_irqs, smmu->num_global_irqs + 1);
2217 return -ENODEV;
2220 smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
2221 GFP_KERNEL);
2222 if (!smmu->irqs) {
2223 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2224 return -ENOMEM;
2227 for (i = 0; i < num_irqs; ++i) {
2228 int irq = platform_get_irq(pdev, i);
2230 if (irq < 0) {
2231 dev_err(dev, "failed to get irq index %d\n", i);
2232 return -ENODEV;
2234 smmu->irqs[i] = irq;
2237 err = arm_smmu_device_cfg_probe(smmu);
2238 if (err)
2239 return err;
2241 if (smmu->version == ARM_SMMU_V2 &&
2242 smmu->num_context_banks != smmu->num_context_irqs) {
2243 dev_err(dev,
2244 "found only %d context interrupt(s) but %d required\n",
2245 smmu->num_context_irqs, smmu->num_context_banks);
2246 return -ENODEV;
2249 for (i = 0; i < smmu->num_global_irqs; ++i) {
2250 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2251 arm_smmu_global_fault,
2252 IRQF_SHARED,
2253 "arm-smmu global fault",
2254 smmu);
2255 if (err) {
2256 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2257 i, smmu->irqs[i]);
2258 return err;
2262 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2263 "smmu.%pa", &ioaddr);
2264 if (err) {
2265 dev_err(dev, "Failed to register iommu in sysfs\n");
2266 return err;
2269 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2270 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2272 err = iommu_device_register(&smmu->iommu);
2273 if (err) {
2274 dev_err(dev, "Failed to register iommu\n");
2275 return err;
2278 platform_set_drvdata(pdev, smmu);
2279 arm_smmu_device_reset(smmu);
2280 arm_smmu_test_smr_masks(smmu);
2283 * For ACPI and generic DT bindings, an SMMU will be probed before
2284 * any device which might need it, so we want the bus ops in place
2285 * ready to handle default domain setup as soon as any SMMU exists.
2287 if (!using_legacy_binding)
2288 arm_smmu_bus_init();
2290 return 0;
2294 * With the legacy DT binding in play, though, we have no guarantees about
2295 * probe order, but then we're also not doing default domains, so we can
2296 * delay setting bus ops until we're sure every possible SMMU is ready,
2297 * and that way ensure that no add_device() calls get missed.
2299 static int arm_smmu_legacy_bus_init(void)
2301 if (using_legacy_binding)
2302 arm_smmu_bus_init();
2303 return 0;
2305 device_initcall_sync(arm_smmu_legacy_bus_init);
2307 static int arm_smmu_device_remove(struct platform_device *pdev)
2309 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2311 if (!smmu)
2312 return -ENODEV;
2314 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2315 dev_err(&pdev->dev, "removing device with active domains!\n");
2317 /* Turn the thing off */
2318 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2319 return 0;
2322 static struct platform_driver arm_smmu_driver = {
2323 .driver = {
2324 .name = "arm-smmu",
2325 .of_match_table = of_match_ptr(arm_smmu_of_match),
2327 .probe = arm_smmu_device_probe,
2328 .remove = arm_smmu_device_remove,
2330 module_platform_driver(arm_smmu_driver);
2332 IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL);
2333 IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL);
2334 IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL);
2335 IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL);
2336 IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL);
2337 IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL);
2339 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2340 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2341 MODULE_LICENSE("GPL v2");