mpls: Return error for RTA_GATEWAY attribute
[linux-stable.git] / drivers / iommu / arm-smmu.c
blobaf18a7e7f91724d62e7b042d14ecc602418713d9
1 /*
2 * IOMMU API for ARM architected SMMU implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * Copyright (C) 2013 ARM Limited
19 * Author: Will Deacon <will.deacon@arm.com>
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
27 * - Extended Stream ID (16 bit)
30 #define pr_fmt(fmt) "arm-smmu: " fmt
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/init.h>
45 #include <linux/moduleparam.h>
46 #include <linux/of.h>
47 #include <linux/of_address.h>
48 #include <linux/of_device.h>
49 #include <linux/of_iommu.h>
50 #include <linux/pci.h>
51 #include <linux/platform_device.h>
52 #include <linux/pm_runtime.h>
53 #include <linux/slab.h>
54 #include <linux/spinlock.h>
56 #include <linux/amba/bus.h>
57 #include <linux/fsl/mc.h>
59 #include "io-pgtable.h"
60 #include "arm-smmu-regs.h"
62 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
64 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
65 #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
66 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
68 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
69 #define TLB_SPIN_COUNT 10
71 /* Maximum number of context banks per SMMU */
72 #define ARM_SMMU_MAX_CBS 128
74 /* SMMU global address space */
75 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
76 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
79 * SMMU global address space with conditional offset to access secure
80 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
81 * nsGFSYNR0: 0x450)
83 #define ARM_SMMU_GR0_NS(smmu) \
84 ((smmu)->base + \
85 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
86 ? 0x400 : 0))
89 * Some 64-bit registers only make sense to write atomically, but in such
90 * cases all the data relevant to AArch32 formats lies within the lower word,
91 * therefore this actually makes more sense than it might first appear.
93 #ifdef CONFIG_64BIT
94 #define smmu_write_atomic_lq writeq_relaxed
95 #else
96 #define smmu_write_atomic_lq writel_relaxed
97 #endif
99 /* Translation context bank */
100 #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
102 #define MSI_IOVA_BASE 0x8000000
103 #define MSI_IOVA_LENGTH 0x100000
105 static int force_stage;
107 * not really modular, but the easiest way to keep compat with existing
108 * bootargs behaviour is to continue using module_param() here.
110 module_param(force_stage, int, S_IRUGO);
111 MODULE_PARM_DESC(force_stage,
112 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
113 static bool disable_bypass;
114 module_param(disable_bypass, bool, S_IRUGO);
115 MODULE_PARM_DESC(disable_bypass,
116 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
118 enum arm_smmu_arch_version {
119 ARM_SMMU_V1,
120 ARM_SMMU_V1_64K,
121 ARM_SMMU_V2,
124 enum arm_smmu_implementation {
125 GENERIC_SMMU,
126 ARM_MMU500,
127 CAVIUM_SMMUV2,
128 QCOM_SMMUV2,
131 struct arm_smmu_s2cr {
132 struct iommu_group *group;
133 int count;
134 enum arm_smmu_s2cr_type type;
135 enum arm_smmu_s2cr_privcfg privcfg;
136 u8 cbndx;
139 #define s2cr_init_val (struct arm_smmu_s2cr){ \
140 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
143 struct arm_smmu_smr {
144 u16 mask;
145 u16 id;
146 bool valid;
149 struct arm_smmu_cb {
150 u64 ttbr[2];
151 u32 tcr[2];
152 u32 mair[2];
153 struct arm_smmu_cfg *cfg;
156 struct arm_smmu_master_cfg {
157 struct arm_smmu_device *smmu;
158 s16 smendx[];
160 #define INVALID_SMENDX -1
161 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
162 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
163 #define fwspec_smendx(fw, i) \
164 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
165 #define for_each_cfg_sme(fw, i, idx) \
166 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
168 struct arm_smmu_device {
169 struct device *dev;
171 void __iomem *base;
172 void __iomem *cb_base;
173 unsigned long pgshift;
175 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
176 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
177 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
178 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
179 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
180 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
181 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
182 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
183 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
184 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
185 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
186 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
187 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
188 u32 features;
190 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
191 u32 options;
192 enum arm_smmu_arch_version version;
193 enum arm_smmu_implementation model;
195 u32 num_context_banks;
196 u32 num_s2_context_banks;
197 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
198 struct arm_smmu_cb *cbs;
199 atomic_t irptndx;
201 u32 num_mapping_groups;
202 u16 streamid_mask;
203 u16 smr_mask_mask;
204 struct arm_smmu_smr *smrs;
205 struct arm_smmu_s2cr *s2crs;
206 struct mutex stream_map_mutex;
208 unsigned long va_size;
209 unsigned long ipa_size;
210 unsigned long pa_size;
211 unsigned long pgsize_bitmap;
213 u32 num_global_irqs;
214 u32 num_context_irqs;
215 unsigned int *irqs;
216 struct clk_bulk_data *clks;
217 int num_clks;
219 u32 cavium_id_base; /* Specific to Cavium */
221 spinlock_t global_sync_lock;
223 /* IOMMU core code handle */
224 struct iommu_device iommu;
227 enum arm_smmu_context_fmt {
228 ARM_SMMU_CTX_FMT_NONE,
229 ARM_SMMU_CTX_FMT_AARCH64,
230 ARM_SMMU_CTX_FMT_AARCH32_L,
231 ARM_SMMU_CTX_FMT_AARCH32_S,
234 struct arm_smmu_cfg {
235 u8 cbndx;
236 u8 irptndx;
237 union {
238 u16 asid;
239 u16 vmid;
241 u32 cbar;
242 enum arm_smmu_context_fmt fmt;
244 #define INVALID_IRPTNDX 0xff
246 enum arm_smmu_domain_stage {
247 ARM_SMMU_DOMAIN_S1 = 0,
248 ARM_SMMU_DOMAIN_S2,
249 ARM_SMMU_DOMAIN_NESTED,
250 ARM_SMMU_DOMAIN_BYPASS,
253 struct arm_smmu_domain {
254 struct arm_smmu_device *smmu;
255 struct io_pgtable_ops *pgtbl_ops;
256 const struct iommu_gather_ops *tlb_ops;
257 struct arm_smmu_cfg cfg;
258 enum arm_smmu_domain_stage stage;
259 bool non_strict;
260 struct mutex init_mutex; /* Protects smmu pointer */
261 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
262 struct iommu_domain domain;
265 struct arm_smmu_option_prop {
266 u32 opt;
267 const char *prop;
270 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
272 static bool using_legacy_binding, using_generic_binding;
274 static struct arm_smmu_option_prop arm_smmu_options[] = {
275 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
276 { 0, NULL},
279 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
281 if (pm_runtime_enabled(smmu->dev))
282 return pm_runtime_get_sync(smmu->dev);
284 return 0;
287 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
289 if (pm_runtime_enabled(smmu->dev))
290 pm_runtime_put(smmu->dev);
293 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
295 return container_of(dom, struct arm_smmu_domain, domain);
298 static void parse_driver_options(struct arm_smmu_device *smmu)
300 int i = 0;
302 do {
303 if (of_property_read_bool(smmu->dev->of_node,
304 arm_smmu_options[i].prop)) {
305 smmu->options |= arm_smmu_options[i].opt;
306 dev_notice(smmu->dev, "option %s\n",
307 arm_smmu_options[i].prop);
309 } while (arm_smmu_options[++i].opt);
312 static struct device_node *dev_get_dev_node(struct device *dev)
314 if (dev_is_pci(dev)) {
315 struct pci_bus *bus = to_pci_dev(dev)->bus;
317 while (!pci_is_root_bus(bus))
318 bus = bus->parent;
319 return of_node_get(bus->bridge->parent->of_node);
322 return of_node_get(dev->of_node);
325 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
327 *((__be32 *)data) = cpu_to_be32(alias);
328 return 0; /* Continue walking */
331 static int __find_legacy_master_phandle(struct device *dev, void *data)
333 struct of_phandle_iterator *it = *(void **)data;
334 struct device_node *np = it->node;
335 int err;
337 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
338 "#stream-id-cells", 0)
339 if (it->node == np) {
340 *(void **)data = dev;
341 return 1;
343 it->node = np;
344 return err == -ENOENT ? 0 : err;
347 static struct platform_driver arm_smmu_driver;
348 static struct iommu_ops arm_smmu_ops;
350 static int arm_smmu_register_legacy_master(struct device *dev,
351 struct arm_smmu_device **smmu)
353 struct device *smmu_dev;
354 struct device_node *np;
355 struct of_phandle_iterator it;
356 void *data = &it;
357 u32 *sids;
358 __be32 pci_sid;
359 int err;
361 np = dev_get_dev_node(dev);
362 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
363 of_node_put(np);
364 return -ENODEV;
367 it.node = np;
368 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
369 __find_legacy_master_phandle);
370 smmu_dev = data;
371 of_node_put(np);
372 if (err == 0)
373 return -ENODEV;
374 if (err < 0)
375 return err;
377 if (dev_is_pci(dev)) {
378 /* "mmu-masters" assumes Stream ID == Requester ID */
379 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
380 &pci_sid);
381 it.cur = &pci_sid;
382 it.cur_count = 1;
385 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
386 &arm_smmu_ops);
387 if (err)
388 return err;
390 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
391 if (!sids)
392 return -ENOMEM;
394 *smmu = dev_get_drvdata(smmu_dev);
395 of_phandle_iterator_args(&it, sids, it.cur_count);
396 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
397 kfree(sids);
398 return err;
401 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
403 int idx;
405 do {
406 idx = find_next_zero_bit(map, end, start);
407 if (idx == end)
408 return -ENOSPC;
409 } while (test_and_set_bit(idx, map));
411 return idx;
414 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
416 clear_bit(idx, map);
419 /* Wait for any pending TLB invalidations to complete */
420 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
421 void __iomem *sync, void __iomem *status)
423 unsigned int spin_cnt, delay;
425 writel_relaxed(0, sync);
426 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
427 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
428 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
429 return;
430 cpu_relax();
432 udelay(delay);
434 dev_err_ratelimited(smmu->dev,
435 "TLB sync timed out -- SMMU may be deadlocked\n");
438 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
440 void __iomem *base = ARM_SMMU_GR0(smmu);
441 unsigned long flags;
443 spin_lock_irqsave(&smmu->global_sync_lock, flags);
444 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
445 base + ARM_SMMU_GR0_sTLBGSTATUS);
446 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
449 static void arm_smmu_tlb_sync_context(void *cookie)
451 struct arm_smmu_domain *smmu_domain = cookie;
452 struct arm_smmu_device *smmu = smmu_domain->smmu;
453 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
454 unsigned long flags;
456 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
457 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
458 base + ARM_SMMU_CB_TLBSTATUS);
459 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
462 static void arm_smmu_tlb_sync_vmid(void *cookie)
464 struct arm_smmu_domain *smmu_domain = cookie;
466 arm_smmu_tlb_sync_global(smmu_domain->smmu);
469 static void arm_smmu_tlb_inv_context_s1(void *cookie)
471 struct arm_smmu_domain *smmu_domain = cookie;
472 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
473 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
476 * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
477 * cleared by the current CPU are visible to the SMMU before the TLBI.
479 writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
480 arm_smmu_tlb_sync_context(cookie);
483 static void arm_smmu_tlb_inv_context_s2(void *cookie)
485 struct arm_smmu_domain *smmu_domain = cookie;
486 struct arm_smmu_device *smmu = smmu_domain->smmu;
487 void __iomem *base = ARM_SMMU_GR0(smmu);
489 /* NOTE: see above */
490 writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
491 arm_smmu_tlb_sync_global(smmu);
494 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
495 size_t granule, bool leaf, void *cookie)
497 struct arm_smmu_domain *smmu_domain = cookie;
498 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
499 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
500 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
502 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
503 wmb();
505 if (stage1) {
506 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
508 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
509 iova &= ~12UL;
510 iova |= cfg->asid;
511 do {
512 writel_relaxed(iova, reg);
513 iova += granule;
514 } while (size -= granule);
515 } else {
516 iova >>= 12;
517 iova |= (u64)cfg->asid << 48;
518 do {
519 writeq_relaxed(iova, reg);
520 iova += granule >> 12;
521 } while (size -= granule);
523 } else {
524 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
525 ARM_SMMU_CB_S2_TLBIIPAS2;
526 iova >>= 12;
527 do {
528 smmu_write_atomic_lq(iova, reg);
529 iova += granule >> 12;
530 } while (size -= granule);
535 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
536 * almost negligible, but the benefit of getting the first one in as far ahead
537 * of the sync as possible is significant, hence we don't just make this a
538 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
540 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
541 size_t granule, bool leaf, void *cookie)
543 struct arm_smmu_domain *smmu_domain = cookie;
544 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
546 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
547 wmb();
549 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
552 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
553 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
554 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
555 .tlb_sync = arm_smmu_tlb_sync_context,
558 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
559 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
560 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
561 .tlb_sync = arm_smmu_tlb_sync_context,
564 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
565 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
566 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
567 .tlb_sync = arm_smmu_tlb_sync_vmid,
570 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
572 u32 fsr, fsynr;
573 unsigned long iova;
574 struct iommu_domain *domain = dev;
575 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
576 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
577 struct arm_smmu_device *smmu = smmu_domain->smmu;
578 void __iomem *cb_base;
580 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
581 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
583 if (!(fsr & FSR_FAULT))
584 return IRQ_NONE;
586 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
587 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
589 dev_err_ratelimited(smmu->dev,
590 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
591 fsr, iova, fsynr, cfg->cbndx);
593 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
594 return IRQ_HANDLED;
597 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
599 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
600 struct arm_smmu_device *smmu = dev;
601 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
603 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
604 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
605 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
606 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
608 if (!gfsr)
609 return IRQ_NONE;
611 dev_err_ratelimited(smmu->dev,
612 "Unexpected global fault, this could be serious\n");
613 dev_err_ratelimited(smmu->dev,
614 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
615 gfsr, gfsynr0, gfsynr1, gfsynr2);
617 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
618 return IRQ_HANDLED;
621 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
622 struct io_pgtable_cfg *pgtbl_cfg)
624 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
625 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
626 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
628 cb->cfg = cfg;
630 /* TTBCR */
631 if (stage1) {
632 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
633 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
634 } else {
635 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
636 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
637 cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
638 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
639 cb->tcr[1] |= TTBCR2_AS;
641 } else {
642 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
645 /* TTBRs */
646 if (stage1) {
647 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
648 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
649 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
650 } else {
651 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
652 cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
653 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
654 cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
656 } else {
657 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
660 /* MAIRs (stage-1 only) */
661 if (stage1) {
662 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
663 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
664 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
665 } else {
666 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
667 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
672 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
674 u32 reg;
675 bool stage1;
676 struct arm_smmu_cb *cb = &smmu->cbs[idx];
677 struct arm_smmu_cfg *cfg = cb->cfg;
678 void __iomem *cb_base, *gr1_base;
680 cb_base = ARM_SMMU_CB(smmu, idx);
682 /* Unassigned context banks only need disabling */
683 if (!cfg) {
684 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
685 return;
688 gr1_base = ARM_SMMU_GR1(smmu);
689 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
691 /* CBA2R */
692 if (smmu->version > ARM_SMMU_V1) {
693 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
694 reg = CBA2R_RW64_64BIT;
695 else
696 reg = CBA2R_RW64_32BIT;
697 /* 16-bit VMIDs live in CBA2R */
698 if (smmu->features & ARM_SMMU_FEAT_VMID16)
699 reg |= cfg->vmid << CBA2R_VMID_SHIFT;
701 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
704 /* CBAR */
705 reg = cfg->cbar;
706 if (smmu->version < ARM_SMMU_V2)
707 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
710 * Use the weakest shareability/memory types, so they are
711 * overridden by the ttbcr/pte.
713 if (stage1) {
714 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
715 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
716 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
717 /* 8-bit VMIDs live in CBAR */
718 reg |= cfg->vmid << CBAR_VMID_SHIFT;
720 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
723 * TTBCR
724 * We must write this before the TTBRs, since it determines the
725 * access behaviour of some fields (in particular, ASID[15:8]).
727 if (stage1 && smmu->version > ARM_SMMU_V1)
728 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
729 writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
731 /* TTBRs */
732 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
733 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
734 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
735 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
736 } else {
737 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
738 if (stage1)
739 writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
742 /* MAIRs (stage-1 only) */
743 if (stage1) {
744 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
745 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
748 /* SCTLR */
749 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
750 if (stage1)
751 reg |= SCTLR_S1_ASIDPNE;
752 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
753 reg |= SCTLR_E;
755 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
758 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
759 struct arm_smmu_device *smmu)
761 int irq, start, ret = 0;
762 unsigned long ias, oas;
763 struct io_pgtable_ops *pgtbl_ops;
764 struct io_pgtable_cfg pgtbl_cfg;
765 enum io_pgtable_fmt fmt;
766 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
767 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
769 mutex_lock(&smmu_domain->init_mutex);
770 if (smmu_domain->smmu)
771 goto out_unlock;
773 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
774 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
775 smmu_domain->smmu = smmu;
776 goto out_unlock;
780 * Mapping the requested stage onto what we support is surprisingly
781 * complicated, mainly because the spec allows S1+S2 SMMUs without
782 * support for nested translation. That means we end up with the
783 * following table:
785 * Requested Supported Actual
786 * S1 N S1
787 * S1 S1+S2 S1
788 * S1 S2 S2
789 * S1 S1 S1
790 * N N N
791 * N S1+S2 S2
792 * N S2 S2
793 * N S1 S1
795 * Note that you can't actually request stage-2 mappings.
797 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
798 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
799 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
800 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
803 * Choosing a suitable context format is even more fiddly. Until we
804 * grow some way for the caller to express a preference, and/or move
805 * the decision into the io-pgtable code where it arguably belongs,
806 * just aim for the closest thing to the rest of the system, and hope
807 * that the hardware isn't esoteric enough that we can't assume AArch64
808 * support to be a superset of AArch32 support...
810 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
811 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
812 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
813 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
814 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
815 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
816 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
817 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
818 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
819 ARM_SMMU_FEAT_FMT_AARCH64_16K |
820 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
821 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
823 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
824 ret = -EINVAL;
825 goto out_unlock;
828 switch (smmu_domain->stage) {
829 case ARM_SMMU_DOMAIN_S1:
830 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
831 start = smmu->num_s2_context_banks;
832 ias = smmu->va_size;
833 oas = smmu->ipa_size;
834 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
835 fmt = ARM_64_LPAE_S1;
836 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
837 fmt = ARM_32_LPAE_S1;
838 ias = min(ias, 32UL);
839 oas = min(oas, 40UL);
840 } else {
841 fmt = ARM_V7S;
842 ias = min(ias, 32UL);
843 oas = min(oas, 32UL);
845 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
846 break;
847 case ARM_SMMU_DOMAIN_NESTED:
849 * We will likely want to change this if/when KVM gets
850 * involved.
852 case ARM_SMMU_DOMAIN_S2:
853 cfg->cbar = CBAR_TYPE_S2_TRANS;
854 start = 0;
855 ias = smmu->ipa_size;
856 oas = smmu->pa_size;
857 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
858 fmt = ARM_64_LPAE_S2;
859 } else {
860 fmt = ARM_32_LPAE_S2;
861 ias = min(ias, 40UL);
862 oas = min(oas, 40UL);
864 if (smmu->version == ARM_SMMU_V2)
865 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
866 else
867 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
868 break;
869 default:
870 ret = -EINVAL;
871 goto out_unlock;
873 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
874 smmu->num_context_banks);
875 if (ret < 0)
876 goto out_unlock;
878 cfg->cbndx = ret;
879 if (smmu->version < ARM_SMMU_V2) {
880 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
881 cfg->irptndx %= smmu->num_context_irqs;
882 } else {
883 cfg->irptndx = cfg->cbndx;
886 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
887 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
888 else
889 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
891 pgtbl_cfg = (struct io_pgtable_cfg) {
892 .pgsize_bitmap = smmu->pgsize_bitmap,
893 .ias = ias,
894 .oas = oas,
895 .tlb = smmu_domain->tlb_ops,
896 .iommu_dev = smmu->dev,
899 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
900 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
902 if (smmu_domain->non_strict)
903 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
905 smmu_domain->smmu = smmu;
906 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
907 if (!pgtbl_ops) {
908 ret = -ENOMEM;
909 goto out_clear_smmu;
912 /* Update the domain's page sizes to reflect the page table format */
913 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
914 domain->geometry.aperture_end = (1UL << ias) - 1;
915 domain->geometry.force_aperture = true;
917 /* Initialise the context bank with our page table cfg */
918 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
919 arm_smmu_write_context_bank(smmu, cfg->cbndx);
922 * Request context fault interrupt. Do this last to avoid the
923 * handler seeing a half-initialised domain state.
925 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
926 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
927 IRQF_SHARED, "arm-smmu-context-fault", domain);
928 if (ret < 0) {
929 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
930 cfg->irptndx, irq);
931 cfg->irptndx = INVALID_IRPTNDX;
934 mutex_unlock(&smmu_domain->init_mutex);
936 /* Publish page table ops for map/unmap */
937 smmu_domain->pgtbl_ops = pgtbl_ops;
938 return 0;
940 out_clear_smmu:
941 smmu_domain->smmu = NULL;
942 out_unlock:
943 mutex_unlock(&smmu_domain->init_mutex);
944 return ret;
947 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
949 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
950 struct arm_smmu_device *smmu = smmu_domain->smmu;
951 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
952 int ret, irq;
954 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
955 return;
957 ret = arm_smmu_rpm_get(smmu);
958 if (ret < 0)
959 return;
962 * Disable the context bank and free the page tables before freeing
963 * it.
965 smmu->cbs[cfg->cbndx].cfg = NULL;
966 arm_smmu_write_context_bank(smmu, cfg->cbndx);
968 if (cfg->irptndx != INVALID_IRPTNDX) {
969 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
970 devm_free_irq(smmu->dev, irq, domain);
973 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
974 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
976 arm_smmu_rpm_put(smmu);
979 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
981 struct arm_smmu_domain *smmu_domain;
983 if (type != IOMMU_DOMAIN_UNMANAGED &&
984 type != IOMMU_DOMAIN_DMA &&
985 type != IOMMU_DOMAIN_IDENTITY)
986 return NULL;
988 * Allocate the domain and initialise some of its data structures.
989 * We can't really do anything meaningful until we've added a
990 * master.
992 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
993 if (!smmu_domain)
994 return NULL;
996 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
997 iommu_get_dma_cookie(&smmu_domain->domain))) {
998 kfree(smmu_domain);
999 return NULL;
1002 mutex_init(&smmu_domain->init_mutex);
1003 spin_lock_init(&smmu_domain->cb_lock);
1005 return &smmu_domain->domain;
1008 static void arm_smmu_domain_free(struct iommu_domain *domain)
1010 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1013 * Free the domain resources. We assume that all devices have
1014 * already been detached.
1016 iommu_put_dma_cookie(domain);
1017 arm_smmu_destroy_domain_context(domain);
1018 kfree(smmu_domain);
1021 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1023 struct arm_smmu_smr *smr = smmu->smrs + idx;
1024 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1026 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1027 reg |= SMR_VALID;
1028 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1031 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1033 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1034 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1035 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1036 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1038 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1039 smmu->smrs[idx].valid)
1040 reg |= S2CR_EXIDVALID;
1041 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1044 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1046 arm_smmu_write_s2cr(smmu, idx);
1047 if (smmu->smrs)
1048 arm_smmu_write_smr(smmu, idx);
1052 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1053 * should be called after sCR0 is written.
1055 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1057 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1058 u32 smr;
1060 if (!smmu->smrs)
1061 return;
1064 * SMR.ID bits may not be preserved if the corresponding MASK
1065 * bits are set, so check each one separately. We can reject
1066 * masters later if they try to claim IDs outside these masks.
1068 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1069 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1070 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1071 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1073 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1074 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1075 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1076 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1079 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1081 struct arm_smmu_smr *smrs = smmu->smrs;
1082 int i, free_idx = -ENOSPC;
1084 /* Stream indexing is blissfully easy */
1085 if (!smrs)
1086 return id;
1088 /* Validating SMRs is... less so */
1089 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1090 if (!smrs[i].valid) {
1092 * Note the first free entry we come across, which
1093 * we'll claim in the end if nothing else matches.
1095 if (free_idx < 0)
1096 free_idx = i;
1097 continue;
1100 * If the new entry is _entirely_ matched by an existing entry,
1101 * then reuse that, with the guarantee that there also cannot
1102 * be any subsequent conflicting entries. In normal use we'd
1103 * expect simply identical entries for this case, but there's
1104 * no harm in accommodating the generalisation.
1106 if ((mask & smrs[i].mask) == mask &&
1107 !((id ^ smrs[i].id) & ~smrs[i].mask))
1108 return i;
1110 * If the new entry has any other overlap with an existing one,
1111 * though, then there always exists at least one stream ID
1112 * which would cause a conflict, and we can't allow that risk.
1114 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1115 return -EINVAL;
1118 return free_idx;
1121 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1123 if (--smmu->s2crs[idx].count)
1124 return false;
1126 smmu->s2crs[idx] = s2cr_init_val;
1127 if (smmu->smrs)
1128 smmu->smrs[idx].valid = false;
1130 return true;
1133 static int arm_smmu_master_alloc_smes(struct device *dev)
1135 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1136 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1137 struct arm_smmu_device *smmu = cfg->smmu;
1138 struct arm_smmu_smr *smrs = smmu->smrs;
1139 struct iommu_group *group;
1140 int i, idx, ret;
1142 mutex_lock(&smmu->stream_map_mutex);
1143 /* Figure out a viable stream map entry allocation */
1144 for_each_cfg_sme(fwspec, i, idx) {
1145 u16 sid = fwspec->ids[i];
1146 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1148 if (idx != INVALID_SMENDX) {
1149 ret = -EEXIST;
1150 goto out_err;
1153 ret = arm_smmu_find_sme(smmu, sid, mask);
1154 if (ret < 0)
1155 goto out_err;
1157 idx = ret;
1158 if (smrs && smmu->s2crs[idx].count == 0) {
1159 smrs[idx].id = sid;
1160 smrs[idx].mask = mask;
1161 smrs[idx].valid = true;
1163 smmu->s2crs[idx].count++;
1164 cfg->smendx[i] = (s16)idx;
1167 group = iommu_group_get_for_dev(dev);
1168 if (!group)
1169 group = ERR_PTR(-ENOMEM);
1170 if (IS_ERR(group)) {
1171 ret = PTR_ERR(group);
1172 goto out_err;
1174 iommu_group_put(group);
1176 /* It worked! Now, poke the actual hardware */
1177 for_each_cfg_sme(fwspec, i, idx) {
1178 arm_smmu_write_sme(smmu, idx);
1179 smmu->s2crs[idx].group = group;
1182 mutex_unlock(&smmu->stream_map_mutex);
1183 return 0;
1185 out_err:
1186 while (i--) {
1187 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1188 cfg->smendx[i] = INVALID_SMENDX;
1190 mutex_unlock(&smmu->stream_map_mutex);
1191 return ret;
1194 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1196 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1197 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1198 int i, idx;
1200 mutex_lock(&smmu->stream_map_mutex);
1201 for_each_cfg_sme(fwspec, i, idx) {
1202 if (arm_smmu_free_sme(smmu, idx))
1203 arm_smmu_write_sme(smmu, idx);
1204 cfg->smendx[i] = INVALID_SMENDX;
1206 mutex_unlock(&smmu->stream_map_mutex);
1209 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1210 struct iommu_fwspec *fwspec)
1212 struct arm_smmu_device *smmu = smmu_domain->smmu;
1213 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1214 u8 cbndx = smmu_domain->cfg.cbndx;
1215 enum arm_smmu_s2cr_type type;
1216 int i, idx;
1218 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1219 type = S2CR_TYPE_BYPASS;
1220 else
1221 type = S2CR_TYPE_TRANS;
1223 for_each_cfg_sme(fwspec, i, idx) {
1224 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1225 continue;
1227 s2cr[idx].type = type;
1228 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1229 s2cr[idx].cbndx = cbndx;
1230 arm_smmu_write_s2cr(smmu, idx);
1232 return 0;
1235 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1237 int ret;
1238 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1239 struct arm_smmu_device *smmu;
1240 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1242 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1243 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1244 return -ENXIO;
1248 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1249 * domains between of_xlate() and add_device() - we have no way to cope
1250 * with that, so until ARM gets converted to rely on groups and default
1251 * domains, just say no (but more politely than by dereferencing NULL).
1252 * This should be at least a WARN_ON once that's sorted.
1254 if (!fwspec->iommu_priv)
1255 return -ENODEV;
1257 smmu = fwspec_smmu(fwspec);
1259 ret = arm_smmu_rpm_get(smmu);
1260 if (ret < 0)
1261 return ret;
1263 /* Ensure that the domain is finalised */
1264 ret = arm_smmu_init_domain_context(domain, smmu);
1265 if (ret < 0)
1266 goto rpm_put;
1269 * Sanity check the domain. We don't support domains across
1270 * different SMMUs.
1272 if (smmu_domain->smmu != smmu) {
1273 dev_err(dev,
1274 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1275 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1276 ret = -EINVAL;
1277 goto rpm_put;
1280 /* Looks ok, so add the device to the domain */
1281 ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
1283 rpm_put:
1284 arm_smmu_rpm_put(smmu);
1285 return ret;
1288 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1289 phys_addr_t paddr, size_t size, int prot)
1291 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1292 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1293 int ret;
1295 if (!ops)
1296 return -ENODEV;
1298 arm_smmu_rpm_get(smmu);
1299 ret = ops->map(ops, iova, paddr, size, prot);
1300 arm_smmu_rpm_put(smmu);
1302 return ret;
1305 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1306 size_t size)
1308 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1309 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1310 size_t ret;
1312 if (!ops)
1313 return 0;
1315 arm_smmu_rpm_get(smmu);
1316 ret = ops->unmap(ops, iova, size);
1317 arm_smmu_rpm_put(smmu);
1319 return ret;
1322 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1324 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1325 struct arm_smmu_device *smmu = smmu_domain->smmu;
1327 if (smmu_domain->tlb_ops) {
1328 arm_smmu_rpm_get(smmu);
1329 smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
1330 arm_smmu_rpm_put(smmu);
1334 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1336 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1337 struct arm_smmu_device *smmu = smmu_domain->smmu;
1339 if (smmu_domain->tlb_ops) {
1340 arm_smmu_rpm_get(smmu);
1341 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1342 arm_smmu_rpm_put(smmu);
1346 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1347 dma_addr_t iova)
1349 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1350 struct arm_smmu_device *smmu = smmu_domain->smmu;
1351 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1352 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1353 struct device *dev = smmu->dev;
1354 void __iomem *cb_base;
1355 u32 tmp;
1356 u64 phys;
1357 unsigned long va, flags;
1358 int ret;
1360 ret = arm_smmu_rpm_get(smmu);
1361 if (ret < 0)
1362 return 0;
1364 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1366 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1367 /* ATS1 registers can only be written atomically */
1368 va = iova & ~0xfffUL;
1369 if (smmu->version == ARM_SMMU_V2)
1370 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1371 else /* Register is only 32-bit in v1 */
1372 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1374 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1375 !(tmp & ATSR_ACTIVE), 5, 50)) {
1376 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1377 dev_err(dev,
1378 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1379 &iova);
1380 return ops->iova_to_phys(ops, iova);
1383 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1384 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1385 if (phys & CB_PAR_F) {
1386 dev_err(dev, "translation fault!\n");
1387 dev_err(dev, "PAR = 0x%llx\n", phys);
1388 return 0;
1391 arm_smmu_rpm_put(smmu);
1393 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1396 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1397 dma_addr_t iova)
1399 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1400 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1402 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1403 return iova;
1405 if (!ops)
1406 return 0;
1408 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1409 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1410 return arm_smmu_iova_to_phys_hard(domain, iova);
1412 return ops->iova_to_phys(ops, iova);
1415 static bool arm_smmu_capable(enum iommu_cap cap)
1417 switch (cap) {
1418 case IOMMU_CAP_CACHE_COHERENCY:
1420 * Return true here as the SMMU can always send out coherent
1421 * requests.
1423 return true;
1424 case IOMMU_CAP_NOEXEC:
1425 return true;
1426 default:
1427 return false;
1431 static int arm_smmu_match_node(struct device *dev, void *data)
1433 return dev->fwnode == data;
1436 static
1437 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1439 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1440 fwnode, arm_smmu_match_node);
1441 put_device(dev);
1442 return dev ? dev_get_drvdata(dev) : NULL;
1445 static int arm_smmu_add_device(struct device *dev)
1447 struct arm_smmu_device *smmu;
1448 struct arm_smmu_master_cfg *cfg;
1449 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1450 int i, ret;
1452 if (using_legacy_binding) {
1453 ret = arm_smmu_register_legacy_master(dev, &smmu);
1456 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1457 * will allocate/initialise a new one. Thus we need to update fwspec for
1458 * later use.
1460 fwspec = dev_iommu_fwspec_get(dev);
1461 if (ret)
1462 goto out_free;
1463 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1464 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1465 } else {
1466 return -ENODEV;
1469 ret = -EINVAL;
1470 for (i = 0; i < fwspec->num_ids; i++) {
1471 u16 sid = fwspec->ids[i];
1472 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1474 if (sid & ~smmu->streamid_mask) {
1475 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1476 sid, smmu->streamid_mask);
1477 goto out_free;
1479 if (mask & ~smmu->smr_mask_mask) {
1480 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1481 mask, smmu->smr_mask_mask);
1482 goto out_free;
1486 ret = -ENOMEM;
1487 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1488 GFP_KERNEL);
1489 if (!cfg)
1490 goto out_free;
1492 cfg->smmu = smmu;
1493 fwspec->iommu_priv = cfg;
1494 while (i--)
1495 cfg->smendx[i] = INVALID_SMENDX;
1497 ret = arm_smmu_rpm_get(smmu);
1498 if (ret < 0)
1499 goto out_cfg_free;
1501 ret = arm_smmu_master_alloc_smes(dev);
1502 arm_smmu_rpm_put(smmu);
1504 if (ret)
1505 goto out_cfg_free;
1507 iommu_device_link(&smmu->iommu, dev);
1509 device_link_add(dev, smmu->dev,
1510 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1512 return 0;
1514 out_cfg_free:
1515 kfree(cfg);
1516 out_free:
1517 iommu_fwspec_free(dev);
1518 return ret;
1521 static void arm_smmu_remove_device(struct device *dev)
1523 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1524 struct arm_smmu_master_cfg *cfg;
1525 struct arm_smmu_device *smmu;
1526 int ret;
1528 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1529 return;
1531 cfg = fwspec->iommu_priv;
1532 smmu = cfg->smmu;
1534 ret = arm_smmu_rpm_get(smmu);
1535 if (ret < 0)
1536 return;
1538 iommu_device_unlink(&smmu->iommu, dev);
1539 arm_smmu_master_free_smes(fwspec);
1541 arm_smmu_rpm_put(smmu);
1543 iommu_group_remove_device(dev);
1544 kfree(fwspec->iommu_priv);
1545 iommu_fwspec_free(dev);
1548 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1550 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1551 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1552 struct iommu_group *group = NULL;
1553 int i, idx;
1555 for_each_cfg_sme(fwspec, i, idx) {
1556 if (group && smmu->s2crs[idx].group &&
1557 group != smmu->s2crs[idx].group)
1558 return ERR_PTR(-EINVAL);
1560 group = smmu->s2crs[idx].group;
1563 if (group)
1564 return iommu_group_ref_get(group);
1566 if (dev_is_pci(dev))
1567 group = pci_device_group(dev);
1568 else if (dev_is_fsl_mc(dev))
1569 group = fsl_mc_device_group(dev);
1570 else
1571 group = generic_device_group(dev);
1573 return group;
1576 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1577 enum iommu_attr attr, void *data)
1579 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1581 switch(domain->type) {
1582 case IOMMU_DOMAIN_UNMANAGED:
1583 switch (attr) {
1584 case DOMAIN_ATTR_NESTING:
1585 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1586 return 0;
1587 default:
1588 return -ENODEV;
1590 break;
1591 case IOMMU_DOMAIN_DMA:
1592 switch (attr) {
1593 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1594 *(int *)data = smmu_domain->non_strict;
1595 return 0;
1596 default:
1597 return -ENODEV;
1599 break;
1600 default:
1601 return -EINVAL;
1605 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1606 enum iommu_attr attr, void *data)
1608 int ret = 0;
1609 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1611 mutex_lock(&smmu_domain->init_mutex);
1613 switch(domain->type) {
1614 case IOMMU_DOMAIN_UNMANAGED:
1615 switch (attr) {
1616 case DOMAIN_ATTR_NESTING:
1617 if (smmu_domain->smmu) {
1618 ret = -EPERM;
1619 goto out_unlock;
1622 if (*(int *)data)
1623 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1624 else
1625 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1626 break;
1627 default:
1628 ret = -ENODEV;
1630 break;
1631 case IOMMU_DOMAIN_DMA:
1632 switch (attr) {
1633 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1634 smmu_domain->non_strict = *(int *)data;
1635 break;
1636 default:
1637 ret = -ENODEV;
1639 break;
1640 default:
1641 ret = -EINVAL;
1643 out_unlock:
1644 mutex_unlock(&smmu_domain->init_mutex);
1645 return ret;
1648 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1650 u32 mask, fwid = 0;
1652 if (args->args_count > 0)
1653 fwid |= (u16)args->args[0];
1655 if (args->args_count > 1)
1656 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1657 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1658 fwid |= (u16)mask << SMR_MASK_SHIFT;
1660 return iommu_fwspec_add_ids(dev, &fwid, 1);
1663 static void arm_smmu_get_resv_regions(struct device *dev,
1664 struct list_head *head)
1666 struct iommu_resv_region *region;
1667 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1669 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1670 prot, IOMMU_RESV_SW_MSI);
1671 if (!region)
1672 return;
1674 list_add_tail(&region->list, head);
1676 iommu_dma_get_resv_regions(dev, head);
1679 static void arm_smmu_put_resv_regions(struct device *dev,
1680 struct list_head *head)
1682 struct iommu_resv_region *entry, *next;
1684 list_for_each_entry_safe(entry, next, head, list)
1685 kfree(entry);
1688 static struct iommu_ops arm_smmu_ops = {
1689 .capable = arm_smmu_capable,
1690 .domain_alloc = arm_smmu_domain_alloc,
1691 .domain_free = arm_smmu_domain_free,
1692 .attach_dev = arm_smmu_attach_dev,
1693 .map = arm_smmu_map,
1694 .unmap = arm_smmu_unmap,
1695 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1696 .iotlb_sync = arm_smmu_iotlb_sync,
1697 .iova_to_phys = arm_smmu_iova_to_phys,
1698 .add_device = arm_smmu_add_device,
1699 .remove_device = arm_smmu_remove_device,
1700 .device_group = arm_smmu_device_group,
1701 .domain_get_attr = arm_smmu_domain_get_attr,
1702 .domain_set_attr = arm_smmu_domain_set_attr,
1703 .of_xlate = arm_smmu_of_xlate,
1704 .get_resv_regions = arm_smmu_get_resv_regions,
1705 .put_resv_regions = arm_smmu_put_resv_regions,
1706 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1709 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1711 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1712 int i;
1713 u32 reg, major;
1715 /* clear global FSR */
1716 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1717 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1720 * Reset stream mapping groups: Initial values mark all SMRn as
1721 * invalid and all S2CRn as bypass unless overridden.
1723 for (i = 0; i < smmu->num_mapping_groups; ++i)
1724 arm_smmu_write_sme(smmu, i);
1726 if (smmu->model == ARM_MMU500) {
1728 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1729 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1730 * bit is only present in MMU-500r2 onwards.
1732 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1733 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1734 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1735 if (major >= 2)
1736 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1738 * Allow unmatched Stream IDs to allocate bypass
1739 * TLB entries for reduced latency.
1741 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1742 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1745 /* Make sure all context banks are disabled and clear CB_FSR */
1746 for (i = 0; i < smmu->num_context_banks; ++i) {
1747 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1749 arm_smmu_write_context_bank(smmu, i);
1750 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1752 * Disable MMU-500's not-particularly-beneficial next-page
1753 * prefetcher for the sake of errata #841119 and #826419.
1755 if (smmu->model == ARM_MMU500) {
1756 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1757 reg &= ~ARM_MMU500_ACTLR_CPRE;
1758 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1762 /* Invalidate the TLB, just in case */
1763 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1764 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1766 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1768 /* Enable fault reporting */
1769 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1771 /* Disable TLB broadcasting. */
1772 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1774 /* Enable client access, handling unmatched streams as appropriate */
1775 reg &= ~sCR0_CLIENTPD;
1776 if (disable_bypass)
1777 reg |= sCR0_USFCFG;
1778 else
1779 reg &= ~sCR0_USFCFG;
1781 /* Disable forced broadcasting */
1782 reg &= ~sCR0_FB;
1784 /* Don't upgrade barriers */
1785 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1787 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1788 reg |= sCR0_VMID16EN;
1790 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1791 reg |= sCR0_EXIDENABLE;
1793 /* Push the button */
1794 arm_smmu_tlb_sync_global(smmu);
1795 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1798 static int arm_smmu_id_size_to_bits(int size)
1800 switch (size) {
1801 case 0:
1802 return 32;
1803 case 1:
1804 return 36;
1805 case 2:
1806 return 40;
1807 case 3:
1808 return 42;
1809 case 4:
1810 return 44;
1811 case 5:
1812 default:
1813 return 48;
1817 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1819 unsigned long size;
1820 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1821 u32 id;
1822 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1823 int i;
1825 dev_notice(smmu->dev, "probing hardware configuration...\n");
1826 dev_notice(smmu->dev, "SMMUv%d with:\n",
1827 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1829 /* ID0 */
1830 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1832 /* Restrict available stages based on module parameter */
1833 if (force_stage == 1)
1834 id &= ~(ID0_S2TS | ID0_NTS);
1835 else if (force_stage == 2)
1836 id &= ~(ID0_S1TS | ID0_NTS);
1838 if (id & ID0_S1TS) {
1839 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1840 dev_notice(smmu->dev, "\tstage 1 translation\n");
1843 if (id & ID0_S2TS) {
1844 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1845 dev_notice(smmu->dev, "\tstage 2 translation\n");
1848 if (id & ID0_NTS) {
1849 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1850 dev_notice(smmu->dev, "\tnested translation\n");
1853 if (!(smmu->features &
1854 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1855 dev_err(smmu->dev, "\tno translation support!\n");
1856 return -ENODEV;
1859 if ((id & ID0_S1TS) &&
1860 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1861 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1862 dev_notice(smmu->dev, "\taddress translation ops\n");
1866 * In order for DMA API calls to work properly, we must defer to what
1867 * the FW says about coherency, regardless of what the hardware claims.
1868 * Fortunately, this also opens up a workaround for systems where the
1869 * ID register value has ended up configured incorrectly.
1871 cttw_reg = !!(id & ID0_CTTW);
1872 if (cttw_fw || cttw_reg)
1873 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1874 cttw_fw ? "" : "non-");
1875 if (cttw_fw != cttw_reg)
1876 dev_notice(smmu->dev,
1877 "\t(IDR0.CTTW overridden by FW configuration)\n");
1879 /* Max. number of entries we have for stream matching/indexing */
1880 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1881 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1882 size = 1 << 16;
1883 } else {
1884 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1886 smmu->streamid_mask = size - 1;
1887 if (id & ID0_SMS) {
1888 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1889 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1890 if (size == 0) {
1891 dev_err(smmu->dev,
1892 "stream-matching supported, but no SMRs present!\n");
1893 return -ENODEV;
1896 /* Zero-initialised to mark as invalid */
1897 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1898 GFP_KERNEL);
1899 if (!smmu->smrs)
1900 return -ENOMEM;
1902 dev_notice(smmu->dev,
1903 "\tstream matching with %lu register groups", size);
1905 /* s2cr->type == 0 means translation, so initialise explicitly */
1906 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1907 GFP_KERNEL);
1908 if (!smmu->s2crs)
1909 return -ENOMEM;
1910 for (i = 0; i < size; i++)
1911 smmu->s2crs[i] = s2cr_init_val;
1913 smmu->num_mapping_groups = size;
1914 mutex_init(&smmu->stream_map_mutex);
1915 spin_lock_init(&smmu->global_sync_lock);
1917 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1918 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1919 if (!(id & ID0_PTFS_NO_AARCH32S))
1920 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1923 /* ID1 */
1924 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1925 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1927 /* Check for size mismatch of SMMU address space from mapped region */
1928 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1929 size <<= smmu->pgshift;
1930 if (smmu->cb_base != gr0_base + size)
1931 dev_warn(smmu->dev,
1932 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1933 size * 2, (smmu->cb_base - gr0_base) * 2);
1935 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1936 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1937 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1938 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1939 return -ENODEV;
1941 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1942 smmu->num_context_banks, smmu->num_s2_context_banks);
1944 * Cavium CN88xx erratum #27704.
1945 * Ensure ASID and VMID allocation is unique across all SMMUs in
1946 * the system.
1948 if (smmu->model == CAVIUM_SMMUV2) {
1949 smmu->cavium_id_base =
1950 atomic_add_return(smmu->num_context_banks,
1951 &cavium_smmu_context_count);
1952 smmu->cavium_id_base -= smmu->num_context_banks;
1953 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1955 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1956 sizeof(*smmu->cbs), GFP_KERNEL);
1957 if (!smmu->cbs)
1958 return -ENOMEM;
1960 /* ID2 */
1961 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1962 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1963 smmu->ipa_size = size;
1965 /* The output mask is also applied for bypass */
1966 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1967 smmu->pa_size = size;
1969 if (id & ID2_VMID16)
1970 smmu->features |= ARM_SMMU_FEAT_VMID16;
1973 * What the page table walker can address actually depends on which
1974 * descriptor format is in use, but since a) we don't know that yet,
1975 * and b) it can vary per context bank, this will have to do...
1977 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1978 dev_warn(smmu->dev,
1979 "failed to set DMA mask for table walker\n");
1981 if (smmu->version < ARM_SMMU_V2) {
1982 smmu->va_size = smmu->ipa_size;
1983 if (smmu->version == ARM_SMMU_V1_64K)
1984 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1985 } else {
1986 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1987 smmu->va_size = arm_smmu_id_size_to_bits(size);
1988 if (id & ID2_PTFS_4K)
1989 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1990 if (id & ID2_PTFS_16K)
1991 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1992 if (id & ID2_PTFS_64K)
1993 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1996 /* Now we've corralled the various formats, what'll it do? */
1997 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1998 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1999 if (smmu->features &
2000 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
2001 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2002 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
2003 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2004 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
2005 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2007 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2008 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2009 else
2010 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2011 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
2012 smmu->pgsize_bitmap);
2015 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
2016 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
2017 smmu->va_size, smmu->ipa_size);
2019 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
2020 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
2021 smmu->ipa_size, smmu->pa_size);
2023 return 0;
2026 struct arm_smmu_match_data {
2027 enum arm_smmu_arch_version version;
2028 enum arm_smmu_implementation model;
2031 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
2032 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
2034 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
2035 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
2036 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
2037 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
2038 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
2039 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
2041 static const struct of_device_id arm_smmu_of_match[] = {
2042 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
2043 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
2044 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
2045 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
2046 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
2047 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
2048 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
2049 { },
2052 #ifdef CONFIG_ACPI
2053 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
2055 int ret = 0;
2057 switch (model) {
2058 case ACPI_IORT_SMMU_V1:
2059 case ACPI_IORT_SMMU_CORELINK_MMU400:
2060 smmu->version = ARM_SMMU_V1;
2061 smmu->model = GENERIC_SMMU;
2062 break;
2063 case ACPI_IORT_SMMU_CORELINK_MMU401:
2064 smmu->version = ARM_SMMU_V1_64K;
2065 smmu->model = GENERIC_SMMU;
2066 break;
2067 case ACPI_IORT_SMMU_V2:
2068 smmu->version = ARM_SMMU_V2;
2069 smmu->model = GENERIC_SMMU;
2070 break;
2071 case ACPI_IORT_SMMU_CORELINK_MMU500:
2072 smmu->version = ARM_SMMU_V2;
2073 smmu->model = ARM_MMU500;
2074 break;
2075 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
2076 smmu->version = ARM_SMMU_V2;
2077 smmu->model = CAVIUM_SMMUV2;
2078 break;
2079 default:
2080 ret = -ENODEV;
2083 return ret;
2086 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2087 struct arm_smmu_device *smmu)
2089 struct device *dev = smmu->dev;
2090 struct acpi_iort_node *node =
2091 *(struct acpi_iort_node **)dev_get_platdata(dev);
2092 struct acpi_iort_smmu *iort_smmu;
2093 int ret;
2095 /* Retrieve SMMU1/2 specific data */
2096 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2098 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2099 if (ret < 0)
2100 return ret;
2102 /* Ignore the configuration access interrupt */
2103 smmu->num_global_irqs = 1;
2105 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2106 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2108 return 0;
2110 #else
2111 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2112 struct arm_smmu_device *smmu)
2114 return -ENODEV;
2116 #endif
2118 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2119 struct arm_smmu_device *smmu)
2121 const struct arm_smmu_match_data *data;
2122 struct device *dev = &pdev->dev;
2123 bool legacy_binding;
2125 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2126 &smmu->num_global_irqs)) {
2127 dev_err(dev, "missing #global-interrupts property\n");
2128 return -ENODEV;
2131 data = of_device_get_match_data(dev);
2132 smmu->version = data->version;
2133 smmu->model = data->model;
2135 parse_driver_options(smmu);
2137 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2138 if (legacy_binding && !using_generic_binding) {
2139 if (!using_legacy_binding)
2140 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2141 using_legacy_binding = true;
2142 } else if (!legacy_binding && !using_legacy_binding) {
2143 using_generic_binding = true;
2144 } else {
2145 dev_err(dev, "not probing due to mismatched DT properties\n");
2146 return -ENODEV;
2149 if (of_dma_is_coherent(dev->of_node))
2150 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2152 return 0;
2155 static void arm_smmu_bus_init(void)
2157 /* Oh, for a proper bus abstraction */
2158 if (!iommu_present(&platform_bus_type))
2159 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2160 #ifdef CONFIG_ARM_AMBA
2161 if (!iommu_present(&amba_bustype))
2162 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2163 #endif
2164 #ifdef CONFIG_PCI
2165 if (!iommu_present(&pci_bus_type)) {
2166 pci_request_acs();
2167 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2169 #endif
2170 #ifdef CONFIG_FSL_MC_BUS
2171 if (!iommu_present(&fsl_mc_bus_type))
2172 bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
2173 #endif
2176 static int arm_smmu_device_probe(struct platform_device *pdev)
2178 struct resource *res;
2179 resource_size_t ioaddr;
2180 struct arm_smmu_device *smmu;
2181 struct device *dev = &pdev->dev;
2182 int num_irqs, i, err;
2184 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2185 if (!smmu) {
2186 dev_err(dev, "failed to allocate arm_smmu_device\n");
2187 return -ENOMEM;
2189 smmu->dev = dev;
2191 if (dev->of_node)
2192 err = arm_smmu_device_dt_probe(pdev, smmu);
2193 else
2194 err = arm_smmu_device_acpi_probe(pdev, smmu);
2196 if (err)
2197 return err;
2199 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2200 ioaddr = res->start;
2201 smmu->base = devm_ioremap_resource(dev, res);
2202 if (IS_ERR(smmu->base))
2203 return PTR_ERR(smmu->base);
2204 smmu->cb_base = smmu->base + resource_size(res) / 2;
2206 num_irqs = 0;
2207 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2208 num_irqs++;
2209 if (num_irqs > smmu->num_global_irqs)
2210 smmu->num_context_irqs++;
2213 if (!smmu->num_context_irqs) {
2214 dev_err(dev, "found %d interrupts but expected at least %d\n",
2215 num_irqs, smmu->num_global_irqs + 1);
2216 return -ENODEV;
2219 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2220 GFP_KERNEL);
2221 if (!smmu->irqs) {
2222 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2223 return -ENOMEM;
2226 for (i = 0; i < num_irqs; ++i) {
2227 int irq = platform_get_irq(pdev, i);
2229 if (irq < 0) {
2230 dev_err(dev, "failed to get irq index %d\n", i);
2231 return -ENODEV;
2233 smmu->irqs[i] = irq;
2236 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2237 if (err < 0) {
2238 dev_err(dev, "failed to get clocks %d\n", err);
2239 return err;
2241 smmu->num_clks = err;
2243 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2244 if (err)
2245 return err;
2247 err = arm_smmu_device_cfg_probe(smmu);
2248 if (err)
2249 return err;
2251 if (smmu->version == ARM_SMMU_V2) {
2252 if (smmu->num_context_banks > smmu->num_context_irqs) {
2253 dev_err(dev,
2254 "found only %d context irq(s) but %d required\n",
2255 smmu->num_context_irqs, smmu->num_context_banks);
2256 return -ENODEV;
2259 /* Ignore superfluous interrupts */
2260 smmu->num_context_irqs = smmu->num_context_banks;
2263 for (i = 0; i < smmu->num_global_irqs; ++i) {
2264 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2265 arm_smmu_global_fault,
2266 IRQF_SHARED,
2267 "arm-smmu global fault",
2268 smmu);
2269 if (err) {
2270 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2271 i, smmu->irqs[i]);
2272 return err;
2276 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2277 "smmu.%pa", &ioaddr);
2278 if (err) {
2279 dev_err(dev, "Failed to register iommu in sysfs\n");
2280 return err;
2283 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2284 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2286 err = iommu_device_register(&smmu->iommu);
2287 if (err) {
2288 dev_err(dev, "Failed to register iommu\n");
2289 return err;
2292 platform_set_drvdata(pdev, smmu);
2293 arm_smmu_device_reset(smmu);
2294 arm_smmu_test_smr_masks(smmu);
2297 * We want to avoid touching dev->power.lock in fastpaths unless
2298 * it's really going to do something useful - pm_runtime_enabled()
2299 * can serve as an ideal proxy for that decision. So, conditionally
2300 * enable pm_runtime.
2302 if (dev->pm_domain) {
2303 pm_runtime_set_active(dev);
2304 pm_runtime_enable(dev);
2308 * For ACPI and generic DT bindings, an SMMU will be probed before
2309 * any device which might need it, so we want the bus ops in place
2310 * ready to handle default domain setup as soon as any SMMU exists.
2312 if (!using_legacy_binding)
2313 arm_smmu_bus_init();
2315 return 0;
2319 * With the legacy DT binding in play, though, we have no guarantees about
2320 * probe order, but then we're also not doing default domains, so we can
2321 * delay setting bus ops until we're sure every possible SMMU is ready,
2322 * and that way ensure that no add_device() calls get missed.
2324 static int arm_smmu_legacy_bus_init(void)
2326 if (using_legacy_binding)
2327 arm_smmu_bus_init();
2328 return 0;
2330 device_initcall_sync(arm_smmu_legacy_bus_init);
2332 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2334 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2336 if (!smmu)
2337 return;
2339 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2340 dev_err(&pdev->dev, "removing device with active domains!\n");
2342 arm_smmu_rpm_get(smmu);
2343 /* Turn the thing off */
2344 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2345 arm_smmu_rpm_put(smmu);
2347 if (pm_runtime_enabled(smmu->dev))
2348 pm_runtime_force_suspend(smmu->dev);
2349 else
2350 clk_bulk_disable(smmu->num_clks, smmu->clks);
2352 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2355 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2357 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2358 int ret;
2360 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2361 if (ret)
2362 return ret;
2364 arm_smmu_device_reset(smmu);
2366 return 0;
2369 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2371 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2373 clk_bulk_disable(smmu->num_clks, smmu->clks);
2375 return 0;
2378 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2380 if (pm_runtime_suspended(dev))
2381 return 0;
2383 return arm_smmu_runtime_resume(dev);
2386 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2388 if (pm_runtime_suspended(dev))
2389 return 0;
2391 return arm_smmu_runtime_suspend(dev);
2394 static const struct dev_pm_ops arm_smmu_pm_ops = {
2395 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2396 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2397 arm_smmu_runtime_resume, NULL)
2400 static struct platform_driver arm_smmu_driver = {
2401 .driver = {
2402 .name = "arm-smmu",
2403 .of_match_table = of_match_ptr(arm_smmu_of_match),
2404 .pm = &arm_smmu_pm_ops,
2405 .suppress_bind_attrs = true,
2407 .probe = arm_smmu_device_probe,
2408 .shutdown = arm_smmu_device_shutdown,
2410 builtin_platform_driver(arm_smmu_driver);