1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
28 #include <linux/io.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/pci.h>
35 #include <linux/platform_device.h>
36 #include <linux/pm_runtime.h>
37 #include <linux/ratelimit.h>
38 #include <linux/slab.h>
39 
40 #include <linux/amba/bus.h>
41 #include <linux/fsl/mc.h>
42 
43 #include "arm-smmu.h"
44 
45 /*
46  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
47  * global register space are still, in fact, using a hypervisor to mediate it
48  * by trapping and emulating register accesses. Sadly, some deployed versions
49  * of said trapping code have bugs wherein they go horribly wrong for stores
50  * using r31 (i.e. XZR/WZR) as the source register.
51  */
52 #define QCOM_DUMMY_VAL -1
53 
54 #define MSI_IOVA_BASE			0x8000000
55 #define MSI_IOVA_LENGTH			0x100000
56 
57 static int force_stage;
58 module_param(force_stage, int, S_IRUGO);
59 MODULE_PARM_DESC(force_stage,
60 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
61 static bool disable_bypass =
62 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
63 module_param(disable_bypass, bool, S_IRUGO);
64 MODULE_PARM_DESC(disable_bypass,
65 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
66 
67 #define s2cr_init_val (struct arm_smmu_s2cr){				\
68 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
69 }
70 
71 static bool using_legacy_binding, using_generic_binding;
72 
73 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
74 {
75 	if (pm_runtime_enabled(smmu->dev))
76 		return pm_runtime_resume_and_get(smmu->dev);
77 
78 	return 0;
79 }
80 
81 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
82 {
83 	if (pm_runtime_enabled(smmu->dev))
84 		pm_runtime_put_autosuspend(smmu->dev);
85 }
86 
87 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
88 {
89 	return container_of(dom, struct arm_smmu_domain, domain);
90 }
91 
92 static struct platform_driver arm_smmu_driver;
93 static struct iommu_ops arm_smmu_ops;
94 
95 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
96 static int arm_smmu_bus_init(struct iommu_ops *ops);
97 
98 static struct device_node *dev_get_dev_node(struct device *dev)
99 {
100 	if (dev_is_pci(dev)) {
101 		struct pci_bus *bus = to_pci_dev(dev)->bus;
102 
103 		while (!pci_is_root_bus(bus))
104 			bus = bus->parent;
105 		return of_node_get(bus->bridge->parent->of_node);
106 	}
107 
108 	return of_node_get(dev->of_node);
109 }
110 
111 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
112 {
113 	*((__be32 *)data) = cpu_to_be32(alias);
114 	return 0; /* Continue walking */
115 }
116 
117 static int __find_legacy_master_phandle(struct device *dev, void *data)
118 {
119 	struct of_phandle_iterator *it = *(void **)data;
120 	struct device_node *np = it->node;
121 	int err;
122 
123 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
124 			    "#stream-id-cells", -1)
125 		if (it->node == np) {
126 			*(void **)data = dev;
127 			return 1;
128 		}
129 	it->node = np;
130 	return err == -ENOENT ? 0 : err;
131 }
132 
133 static int arm_smmu_register_legacy_master(struct device *dev,
134 					   struct arm_smmu_device **smmu)
135 {
136 	struct device *smmu_dev;
137 	struct device_node *np;
138 	struct of_phandle_iterator it;
139 	void *data = &it;
140 	u32 *sids;
141 	__be32 pci_sid;
142 	int err;
143 
144 	np = dev_get_dev_node(dev);
145 	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
146 		of_node_put(np);
147 		return -ENODEV;
148 	}
149 
150 	it.node = np;
151 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
152 				     __find_legacy_master_phandle);
153 	smmu_dev = data;
154 	of_node_put(np);
155 	if (err == 0)
156 		return -ENODEV;
157 	if (err < 0)
158 		return err;
159 
160 	if (dev_is_pci(dev)) {
161 		/* "mmu-masters" assumes Stream ID == Requester ID */
162 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
163 				       &pci_sid);
164 		it.cur = &pci_sid;
165 		it.cur_count = 1;
166 	}
167 
168 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
169 				&arm_smmu_ops);
170 	if (err)
171 		return err;
172 
173 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
174 	if (!sids)
175 		return -ENOMEM;
176 
177 	*smmu = dev_get_drvdata(smmu_dev);
178 	of_phandle_iterator_args(&it, sids, it.cur_count);
179 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
180 	kfree(sids);
181 	return err;
182 }
183 
184 /*
185  * With the legacy DT binding in play, we have no guarantees about
186  * probe order, but then we're also not doing default domains, so we can
187  * delay setting bus ops until we're sure every possible SMMU is ready,
188  * and that way ensure that no probe_device() calls get missed.
189  */
190 static int arm_smmu_legacy_bus_init(void)
191 {
192 	if (using_legacy_binding)
193 		return arm_smmu_bus_init(&arm_smmu_ops);
194 	return 0;
195 }
196 device_initcall_sync(arm_smmu_legacy_bus_init);
197 #else
198 static int arm_smmu_register_legacy_master(struct device *dev,
199 					   struct arm_smmu_device **smmu)
200 {
201 	return -ENODEV;
202 }
203 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
204 
205 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
206 {
207 	clear_bit(idx, map);
208 }
209 
210 /* Wait for any pending TLB invalidations to complete */
211 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
212 				int sync, int status)
213 {
214 	unsigned int spin_cnt, delay;
215 	u32 reg;
216 
217 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
218 		return smmu->impl->tlb_sync(smmu, page, sync, status);
219 
220 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
221 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
222 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
223 			reg = arm_smmu_readl(smmu, page, status);
224 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
225 				return;
226 			cpu_relax();
227 		}
228 		udelay(delay);
229 	}
230 	dev_err_ratelimited(smmu->dev,
231 			    "TLB sync timed out -- SMMU may be deadlocked\n");
232 }
233 
234 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
235 {
236 	unsigned long flags;
237 
238 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
239 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
240 			    ARM_SMMU_GR0_sTLBGSTATUS);
241 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
242 }
243 
244 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
245 {
246 	struct arm_smmu_device *smmu = smmu_domain->smmu;
247 	unsigned long flags;
248 
249 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
250 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
251 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
252 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
253 }
254 
255 static void arm_smmu_tlb_inv_context_s1(void *cookie)
256 {
257 	struct arm_smmu_domain *smmu_domain = cookie;
258 	/*
259 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
260 	 * current CPU are visible beforehand.
261 	 */
262 	wmb();
263 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
264 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
265 	arm_smmu_tlb_sync_context(smmu_domain);
266 }
267 
268 static void arm_smmu_tlb_inv_context_s2(void *cookie)
269 {
270 	struct arm_smmu_domain *smmu_domain = cookie;
271 	struct arm_smmu_device *smmu = smmu_domain->smmu;
272 
273 	/* See above */
274 	wmb();
275 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
276 	arm_smmu_tlb_sync_global(smmu);
277 }
278 
279 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
280 				      size_t granule, void *cookie, int reg)
281 {
282 	struct arm_smmu_domain *smmu_domain = cookie;
283 	struct arm_smmu_device *smmu = smmu_domain->smmu;
284 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
285 	int idx = cfg->cbndx;
286 
287 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
288 		wmb();
289 
290 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
291 		iova = (iova >> 12) << 12;
292 		iova |= cfg->asid;
293 		do {
294 			arm_smmu_cb_write(smmu, idx, reg, iova);
295 			iova += granule;
296 		} while (size -= granule);
297 	} else {
298 		iova >>= 12;
299 		iova |= (u64)cfg->asid << 48;
300 		do {
301 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
302 			iova += granule >> 12;
303 		} while (size -= granule);
304 	}
305 }
306 
307 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
308 				      size_t granule, void *cookie, int reg)
309 {
310 	struct arm_smmu_domain *smmu_domain = cookie;
311 	struct arm_smmu_device *smmu = smmu_domain->smmu;
312 	int idx = smmu_domain->cfg.cbndx;
313 
314 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
315 		wmb();
316 
317 	iova >>= 12;
318 	do {
319 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
320 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
321 		else
322 			arm_smmu_cb_write(smmu, idx, reg, iova);
323 		iova += granule >> 12;
324 	} while (size -= granule);
325 }
326 
327 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
328 				     size_t granule, void *cookie)
329 {
330 	struct arm_smmu_domain *smmu_domain = cookie;
331 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
332 
333 	if (cfg->flush_walk_prefer_tlbiasid) {
334 		arm_smmu_tlb_inv_context_s1(cookie);
335 	} else {
336 		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
337 					  ARM_SMMU_CB_S1_TLBIVA);
338 		arm_smmu_tlb_sync_context(cookie);
339 	}
340 }
341 
342 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
343 				     unsigned long iova, size_t granule,
344 				     void *cookie)
345 {
346 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
347 				  ARM_SMMU_CB_S1_TLBIVAL);
348 }
349 
350 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
351 				     size_t granule, void *cookie)
352 {
353 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
354 				  ARM_SMMU_CB_S2_TLBIIPAS2);
355 	arm_smmu_tlb_sync_context(cookie);
356 }
357 
358 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
359 				     unsigned long iova, size_t granule,
360 				     void *cookie)
361 {
362 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
363 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
364 }
365 
366 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
367 					size_t granule, void *cookie)
368 {
369 	arm_smmu_tlb_inv_context_s2(cookie);
370 }
371 /*
372  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
373  * almost negligible, but the benefit of getting the first one in as far ahead
374  * of the sync as possible is significant, hence we don't just make this a
375  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
376  * think.
377  */
378 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
379 					unsigned long iova, size_t granule,
380 					void *cookie)
381 {
382 	struct arm_smmu_domain *smmu_domain = cookie;
383 	struct arm_smmu_device *smmu = smmu_domain->smmu;
384 
385 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
386 		wmb();
387 
388 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
389 }
390 
391 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
392 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
393 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
394 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
395 };
396 
397 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
398 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
399 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
400 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
401 };
402 
403 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
404 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
405 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
406 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
407 };
408 
409 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
410 {
411 	u32 fsr, fsynr, cbfrsynra;
412 	unsigned long iova;
413 	struct iommu_domain *domain = dev;
414 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
415 	struct arm_smmu_device *smmu = smmu_domain->smmu;
416 	int idx = smmu_domain->cfg.cbndx;
417 	int ret;
418 
419 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
420 	if (!(fsr & ARM_SMMU_FSR_FAULT))
421 		return IRQ_NONE;
422 
423 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
424 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
425 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
426 
427 	ret = report_iommu_fault(domain, NULL, iova,
428 		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
429 
430 	if (ret == -ENOSYS)
431 		dev_err_ratelimited(smmu->dev,
432 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
433 			    fsr, iova, fsynr, cbfrsynra, idx);
434 
435 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
436 	return IRQ_HANDLED;
437 }
438 
439 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
440 {
441 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
442 	struct arm_smmu_device *smmu = dev;
443 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
444 				      DEFAULT_RATELIMIT_BURST);
445 
446 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
447 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
448 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
449 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
450 
451 	if (!gfsr)
452 		return IRQ_NONE;
453 
454 	if (__ratelimit(&rs)) {
455 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
456 		    (gfsr & ARM_SMMU_sGFSR_USF))
457 			dev_err(smmu->dev,
458 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
459 				(u16)gfsynr1);
460 		else
461 			dev_err(smmu->dev,
462 				"Unexpected global fault, this could be serious\n");
463 		dev_err(smmu->dev,
464 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
465 			gfsr, gfsynr0, gfsynr1, gfsynr2);
466 	}
467 
468 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
469 	return IRQ_HANDLED;
470 }
471 
472 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
473 				       struct io_pgtable_cfg *pgtbl_cfg)
474 {
475 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
476 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
477 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
478 
479 	cb->cfg = cfg;
480 
481 	/* TCR */
482 	if (stage1) {
483 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
484 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
485 		} else {
486 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
487 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
488 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
489 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
490 			else
491 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
492 		}
493 	} else {
494 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
495 	}
496 
497 	/* TTBRs */
498 	if (stage1) {
499 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
500 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
501 			cb->ttbr[1] = 0;
502 		} else {
503 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
504 						 cfg->asid);
505 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
506 						 cfg->asid);
507 
508 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
509 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
510 			else
511 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
512 		}
513 	} else {
514 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
515 	}
516 
517 	/* MAIRs (stage-1 only) */
518 	if (stage1) {
519 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
520 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
521 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
522 		} else {
523 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
524 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
525 		}
526 	}
527 }
528 
529 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
530 {
531 	u32 reg;
532 	bool stage1;
533 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
534 	struct arm_smmu_cfg *cfg = cb->cfg;
535 
536 	/* Unassigned context banks only need disabling */
537 	if (!cfg) {
538 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
539 		return;
540 	}
541 
542 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
543 
544 	/* CBA2R */
545 	if (smmu->version > ARM_SMMU_V1) {
546 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
547 			reg = ARM_SMMU_CBA2R_VA64;
548 		else
549 			reg = 0;
550 		/* 16-bit VMIDs live in CBA2R */
551 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
552 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
553 
554 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
555 	}
556 
557 	/* CBAR */
558 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
559 	if (smmu->version < ARM_SMMU_V2)
560 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
561 
562 	/*
563 	 * Use the weakest shareability/memory types, so they are
564 	 * overridden by the ttbcr/pte.
565 	 */
566 	if (stage1) {
567 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
568 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
569 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
570 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
571 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
572 		/* 8-bit VMIDs live in CBAR */
573 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
574 	}
575 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
576 
577 	/*
578 	 * TCR
579 	 * We must write this before the TTBRs, since it determines the
580 	 * access behaviour of some fields (in particular, ASID[15:8]).
581 	 */
582 	if (stage1 && smmu->version > ARM_SMMU_V1)
583 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
584 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
585 
586 	/* TTBRs */
587 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
588 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
589 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
590 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
591 	} else {
592 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
593 		if (stage1)
594 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
595 					   cb->ttbr[1]);
596 	}
597 
598 	/* MAIRs (stage-1 only) */
599 	if (stage1) {
600 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
601 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
602 	}
603 
604 	/* SCTLR */
605 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
606 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
607 	if (stage1)
608 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
609 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
610 		reg |= ARM_SMMU_SCTLR_E;
611 
612 	if (smmu->impl && smmu->impl->write_sctlr)
613 		smmu->impl->write_sctlr(smmu, idx, reg);
614 	else
615 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
616 }
617 
618 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
619 				       struct arm_smmu_device *smmu,
620 				       struct device *dev, unsigned int start)
621 {
622 	if (smmu->impl && smmu->impl->alloc_context_bank)
623 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
624 
625 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
626 }
627 
628 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
629 					struct arm_smmu_device *smmu,
630 					struct device *dev)
631 {
632 	int irq, start, ret = 0;
633 	unsigned long ias, oas;
634 	struct io_pgtable_ops *pgtbl_ops;
635 	struct io_pgtable_cfg pgtbl_cfg;
636 	enum io_pgtable_fmt fmt;
637 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
638 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
639 	irqreturn_t (*context_fault)(int irq, void *dev);
640 
641 	mutex_lock(&smmu_domain->init_mutex);
642 	if (smmu_domain->smmu)
643 		goto out_unlock;
644 
645 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
646 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
647 		smmu_domain->smmu = smmu;
648 		goto out_unlock;
649 	}
650 
651 	/*
652 	 * Mapping the requested stage onto what we support is surprisingly
653 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
654 	 * support for nested translation. That means we end up with the
655 	 * following table:
656 	 *
657 	 * Requested        Supported        Actual
658 	 *     S1               N              S1
659 	 *     S1             S1+S2            S1
660 	 *     S1               S2             S2
661 	 *     S1               S1             S1
662 	 *     N                N              N
663 	 *     N              S1+S2            S2
664 	 *     N                S2             S2
665 	 *     N                S1             S1
666 	 *
667 	 * Note that you can't actually request stage-2 mappings.
668 	 */
669 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
670 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
671 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
672 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
673 
674 	/*
675 	 * Choosing a suitable context format is even more fiddly. Until we
676 	 * grow some way for the caller to express a preference, and/or move
677 	 * the decision into the io-pgtable code where it arguably belongs,
678 	 * just aim for the closest thing to the rest of the system, and hope
679 	 * that the hardware isn't esoteric enough that we can't assume AArch64
680 	 * support to be a superset of AArch32 support...
681 	 */
682 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
683 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
684 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
685 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
686 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
687 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
688 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
689 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
690 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
691 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
692 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
693 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
694 
695 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
696 		ret = -EINVAL;
697 		goto out_unlock;
698 	}
699 
700 	switch (smmu_domain->stage) {
701 	case ARM_SMMU_DOMAIN_S1:
702 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
703 		start = smmu->num_s2_context_banks;
704 		ias = smmu->va_size;
705 		oas = smmu->ipa_size;
706 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
707 			fmt = ARM_64_LPAE_S1;
708 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
709 			fmt = ARM_32_LPAE_S1;
710 			ias = min(ias, 32UL);
711 			oas = min(oas, 40UL);
712 		} else {
713 			fmt = ARM_V7S;
714 			ias = min(ias, 32UL);
715 			oas = min(oas, 32UL);
716 		}
717 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
718 		break;
719 	case ARM_SMMU_DOMAIN_NESTED:
720 		/*
721 		 * We will likely want to change this if/when KVM gets
722 		 * involved.
723 		 */
724 	case ARM_SMMU_DOMAIN_S2:
725 		cfg->cbar = CBAR_TYPE_S2_TRANS;
726 		start = 0;
727 		ias = smmu->ipa_size;
728 		oas = smmu->pa_size;
729 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
730 			fmt = ARM_64_LPAE_S2;
731 		} else {
732 			fmt = ARM_32_LPAE_S2;
733 			ias = min(ias, 40UL);
734 			oas = min(oas, 40UL);
735 		}
736 		if (smmu->version == ARM_SMMU_V2)
737 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
738 		else
739 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
740 		break;
741 	default:
742 		ret = -EINVAL;
743 		goto out_unlock;
744 	}
745 
746 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
747 	if (ret < 0) {
748 		goto out_unlock;
749 	}
750 
751 	smmu_domain->smmu = smmu;
752 
753 	cfg->cbndx = ret;
754 	if (smmu->version < ARM_SMMU_V2) {
755 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
756 		cfg->irptndx %= smmu->num_context_irqs;
757 	} else {
758 		cfg->irptndx = cfg->cbndx;
759 	}
760 
761 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
762 		cfg->vmid = cfg->cbndx + 1;
763 	else
764 		cfg->asid = cfg->cbndx;
765 
766 	pgtbl_cfg = (struct io_pgtable_cfg) {
767 		.pgsize_bitmap	= smmu->pgsize_bitmap,
768 		.ias		= ias,
769 		.oas		= oas,
770 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
771 		.tlb		= smmu_domain->flush_ops,
772 		.iommu_dev	= smmu->dev,
773 	};
774 
775 	if (smmu->impl && smmu->impl->init_context) {
776 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
777 		if (ret)
778 			goto out_clear_smmu;
779 	}
780 
781 	if (smmu_domain->pgtbl_quirks)
782 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
783 
784 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
785 	if (!pgtbl_ops) {
786 		ret = -ENOMEM;
787 		goto out_clear_smmu;
788 	}
789 
790 	/* Update the domain's page sizes to reflect the page table format */
791 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
792 
793 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
794 		domain->geometry.aperture_start = ~0UL << ias;
795 		domain->geometry.aperture_end = ~0UL;
796 	} else {
797 		domain->geometry.aperture_end = (1UL << ias) - 1;
798 	}
799 
800 	domain->geometry.force_aperture = true;
801 
802 	/* Initialise the context bank with our page table cfg */
803 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
804 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
805 
806 	/*
807 	 * Request context fault interrupt. Do this last to avoid the
808 	 * handler seeing a half-initialised domain state.
809 	 */
810 	irq = smmu->irqs[cfg->irptndx];
811 
812 	if (smmu->impl && smmu->impl->context_fault)
813 		context_fault = smmu->impl->context_fault;
814 	else
815 		context_fault = arm_smmu_context_fault;
816 
817 	ret = devm_request_irq(smmu->dev, irq, context_fault,
818 			       IRQF_SHARED, "arm-smmu-context-fault", domain);
819 	if (ret < 0) {
820 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
821 			cfg->irptndx, irq);
822 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
823 	}
824 
825 	mutex_unlock(&smmu_domain->init_mutex);
826 
827 	/* Publish page table ops for map/unmap */
828 	smmu_domain->pgtbl_ops = pgtbl_ops;
829 	return 0;
830 
831 out_clear_smmu:
832 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
833 	smmu_domain->smmu = NULL;
834 out_unlock:
835 	mutex_unlock(&smmu_domain->init_mutex);
836 	return ret;
837 }
838 
839 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
840 {
841 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
842 	struct arm_smmu_device *smmu = smmu_domain->smmu;
843 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
844 	int ret, irq;
845 
846 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
847 		return;
848 
849 	ret = arm_smmu_rpm_get(smmu);
850 	if (ret < 0)
851 		return;
852 
853 	/*
854 	 * Disable the context bank and free the page tables before freeing
855 	 * it.
856 	 */
857 	smmu->cbs[cfg->cbndx].cfg = NULL;
858 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
859 
860 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
861 		irq = smmu->irqs[cfg->irptndx];
862 		devm_free_irq(smmu->dev, irq, domain);
863 	}
864 
865 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
866 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
867 
868 	arm_smmu_rpm_put(smmu);
869 }
870 
871 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
872 {
873 	struct arm_smmu_domain *smmu_domain;
874 
875 	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
876 		if (using_legacy_binding ||
877 		    (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
878 			return NULL;
879 	}
880 	/*
881 	 * Allocate the domain and initialise some of its data structures.
882 	 * We can't really do anything meaningful until we've added a
883 	 * master.
884 	 */
885 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
886 	if (!smmu_domain)
887 		return NULL;
888 
889 	mutex_init(&smmu_domain->init_mutex);
890 	spin_lock_init(&smmu_domain->cb_lock);
891 
892 	return &smmu_domain->domain;
893 }
894 
895 static void arm_smmu_domain_free(struct iommu_domain *domain)
896 {
897 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
898 
899 	/*
900 	 * Free the domain resources. We assume that all devices have
901 	 * already been detached.
902 	 */
903 	arm_smmu_destroy_domain_context(domain);
904 	kfree(smmu_domain);
905 }
906 
907 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
908 {
909 	struct arm_smmu_smr *smr = smmu->smrs + idx;
910 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
911 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
912 
913 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
914 		reg |= ARM_SMMU_SMR_VALID;
915 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
916 }
917 
918 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
919 {
920 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
921 	u32 reg;
922 
923 	if (smmu->impl && smmu->impl->write_s2cr) {
924 		smmu->impl->write_s2cr(smmu, idx);
925 		return;
926 	}
927 
928 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
929 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
930 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
931 
932 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
933 	    smmu->smrs[idx].valid)
934 		reg |= ARM_SMMU_S2CR_EXIDVALID;
935 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
936 }
937 
938 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
939 {
940 	arm_smmu_write_s2cr(smmu, idx);
941 	if (smmu->smrs)
942 		arm_smmu_write_smr(smmu, idx);
943 }
944 
945 /*
946  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
947  * should be called after sCR0 is written.
948  */
949 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
950 {
951 	u32 smr;
952 	int i;
953 
954 	if (!smmu->smrs)
955 		return;
956 	/*
957 	 * If we've had to accommodate firmware memory regions, we may
958 	 * have live SMRs by now; tread carefully...
959 	 *
960 	 * Somewhat perversely, not having a free SMR for this test implies we
961 	 * can get away without it anyway, as we'll only be able to 'allocate'
962 	 * these SMRs for the ID/mask values we're already trusting to be OK.
963 	 */
964 	for (i = 0; i < smmu->num_mapping_groups; i++)
965 		if (!smmu->smrs[i].valid)
966 			goto smr_ok;
967 	return;
968 smr_ok:
969 	/*
970 	 * SMR.ID bits may not be preserved if the corresponding MASK
971 	 * bits are set, so check each one separately. We can reject
972 	 * masters later if they try to claim IDs outside these masks.
973 	 */
974 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
975 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
976 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
977 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
978 
979 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
980 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
981 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
982 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
983 }
984 
985 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
986 {
987 	struct arm_smmu_smr *smrs = smmu->smrs;
988 	int i, free_idx = -ENOSPC;
989 
990 	/* Stream indexing is blissfully easy */
991 	if (!smrs)
992 		return id;
993 
994 	/* Validating SMRs is... less so */
995 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
996 		if (!smrs[i].valid) {
997 			/*
998 			 * Note the first free entry we come across, which
999 			 * we'll claim in the end if nothing else matches.
1000 			 */
1001 			if (free_idx < 0)
1002 				free_idx = i;
1003 			continue;
1004 		}
1005 		/*
1006 		 * If the new entry is _entirely_ matched by an existing entry,
1007 		 * then reuse that, with the guarantee that there also cannot
1008 		 * be any subsequent conflicting entries. In normal use we'd
1009 		 * expect simply identical entries for this case, but there's
1010 		 * no harm in accommodating the generalisation.
1011 		 */
1012 		if ((mask & smrs[i].mask) == mask &&
1013 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
1014 			return i;
1015 		/*
1016 		 * If the new entry has any other overlap with an existing one,
1017 		 * though, then there always exists at least one stream ID
1018 		 * which would cause a conflict, and we can't allow that risk.
1019 		 */
1020 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1021 			return -EINVAL;
1022 	}
1023 
1024 	return free_idx;
1025 }
1026 
1027 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1028 {
1029 	if (--smmu->s2crs[idx].count)
1030 		return false;
1031 
1032 	smmu->s2crs[idx] = s2cr_init_val;
1033 	if (smmu->smrs)
1034 		smmu->smrs[idx].valid = false;
1035 
1036 	return true;
1037 }
1038 
1039 static int arm_smmu_master_alloc_smes(struct device *dev)
1040 {
1041 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1042 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1043 	struct arm_smmu_device *smmu = cfg->smmu;
1044 	struct arm_smmu_smr *smrs = smmu->smrs;
1045 	int i, idx, ret;
1046 
1047 	mutex_lock(&smmu->stream_map_mutex);
1048 	/* Figure out a viable stream map entry allocation */
1049 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1050 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1051 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1052 
1053 		if (idx != INVALID_SMENDX) {
1054 			ret = -EEXIST;
1055 			goto out_err;
1056 		}
1057 
1058 		ret = arm_smmu_find_sme(smmu, sid, mask);
1059 		if (ret < 0)
1060 			goto out_err;
1061 
1062 		idx = ret;
1063 		if (smrs && smmu->s2crs[idx].count == 0) {
1064 			smrs[idx].id = sid;
1065 			smrs[idx].mask = mask;
1066 			smrs[idx].valid = true;
1067 		}
1068 		smmu->s2crs[idx].count++;
1069 		cfg->smendx[i] = (s16)idx;
1070 	}
1071 
1072 	/* It worked! Now, poke the actual hardware */
1073 	for_each_cfg_sme(cfg, fwspec, i, idx)
1074 		arm_smmu_write_sme(smmu, idx);
1075 
1076 	mutex_unlock(&smmu->stream_map_mutex);
1077 	return 0;
1078 
1079 out_err:
1080 	while (i--) {
1081 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1082 		cfg->smendx[i] = INVALID_SMENDX;
1083 	}
1084 	mutex_unlock(&smmu->stream_map_mutex);
1085 	return ret;
1086 }
1087 
1088 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1089 				      struct iommu_fwspec *fwspec)
1090 {
1091 	struct arm_smmu_device *smmu = cfg->smmu;
1092 	int i, idx;
1093 
1094 	mutex_lock(&smmu->stream_map_mutex);
1095 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1096 		if (arm_smmu_free_sme(smmu, idx))
1097 			arm_smmu_write_sme(smmu, idx);
1098 		cfg->smendx[i] = INVALID_SMENDX;
1099 	}
1100 	mutex_unlock(&smmu->stream_map_mutex);
1101 }
1102 
1103 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1104 				      struct arm_smmu_master_cfg *cfg,
1105 				      struct iommu_fwspec *fwspec)
1106 {
1107 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1108 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1109 	u8 cbndx = smmu_domain->cfg.cbndx;
1110 	enum arm_smmu_s2cr_type type;
1111 	int i, idx;
1112 
1113 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1114 		type = S2CR_TYPE_BYPASS;
1115 	else
1116 		type = S2CR_TYPE_TRANS;
1117 
1118 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1119 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1120 			continue;
1121 
1122 		s2cr[idx].type = type;
1123 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1124 		s2cr[idx].cbndx = cbndx;
1125 		arm_smmu_write_s2cr(smmu, idx);
1126 	}
1127 	return 0;
1128 }
1129 
1130 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1131 {
1132 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1133 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1134 	struct arm_smmu_master_cfg *cfg;
1135 	struct arm_smmu_device *smmu;
1136 	int ret;
1137 
1138 	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1139 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1140 		return -ENXIO;
1141 	}
1142 
1143 	/*
1144 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1145 	 * domains between of_xlate() and probe_device() - we have no way to cope
1146 	 * with that, so until ARM gets converted to rely on groups and default
1147 	 * domains, just say no (but more politely than by dereferencing NULL).
1148 	 * This should be at least a WARN_ON once that's sorted.
1149 	 */
1150 	cfg = dev_iommu_priv_get(dev);
1151 	if (!cfg)
1152 		return -ENODEV;
1153 
1154 	smmu = cfg->smmu;
1155 
1156 	ret = arm_smmu_rpm_get(smmu);
1157 	if (ret < 0)
1158 		return ret;
1159 
1160 	/* Ensure that the domain is finalised */
1161 	ret = arm_smmu_init_domain_context(domain, smmu, dev);
1162 	if (ret < 0)
1163 		goto rpm_put;
1164 
1165 	/*
1166 	 * Sanity check the domain. We don't support domains across
1167 	 * different SMMUs.
1168 	 */
1169 	if (smmu_domain->smmu != smmu) {
1170 		dev_err(dev,
1171 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1172 			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1173 		ret = -EINVAL;
1174 		goto rpm_put;
1175 	}
1176 
1177 	/* Looks ok, so add the device to the domain */
1178 	ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1179 
1180 	/*
1181 	 * Setup an autosuspend delay to avoid bouncing runpm state.
1182 	 * Otherwise, if a driver for a suspended consumer device
1183 	 * unmaps buffers, it will runpm resume/suspend for each one.
1184 	 *
1185 	 * For example, when used by a GPU device, when an application
1186 	 * or game exits, it can trigger unmapping 100s or 1000s of
1187 	 * buffers.  With a runpm cycle for each buffer, that adds up
1188 	 * to 5-10sec worth of reprogramming the context bank, while
1189 	 * the system appears to be locked up to the user.
1190 	 */
1191 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1192 	pm_runtime_use_autosuspend(smmu->dev);
1193 
1194 rpm_put:
1195 	arm_smmu_rpm_put(smmu);
1196 	return ret;
1197 }
1198 
1199 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
1200 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
1201 			      int prot, gfp_t gfp, size_t *mapped)
1202 {
1203 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1204 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1205 	int ret;
1206 
1207 	if (!ops)
1208 		return -ENODEV;
1209 
1210 	arm_smmu_rpm_get(smmu);
1211 	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
1212 	arm_smmu_rpm_put(smmu);
1213 
1214 	return ret;
1215 }
1216 
1217 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
1218 				   size_t pgsize, size_t pgcount,
1219 				   struct iommu_iotlb_gather *iotlb_gather)
1220 {
1221 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1222 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1223 	size_t ret;
1224 
1225 	if (!ops)
1226 		return 0;
1227 
1228 	arm_smmu_rpm_get(smmu);
1229 	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
1230 	arm_smmu_rpm_put(smmu);
1231 
1232 	return ret;
1233 }
1234 
1235 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1236 {
1237 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1238 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1239 
1240 	if (smmu_domain->flush_ops) {
1241 		arm_smmu_rpm_get(smmu);
1242 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1243 		arm_smmu_rpm_put(smmu);
1244 	}
1245 }
1246 
1247 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1248 				struct iommu_iotlb_gather *gather)
1249 {
1250 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1251 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1252 
1253 	if (!smmu)
1254 		return;
1255 
1256 	arm_smmu_rpm_get(smmu);
1257 	if (smmu->version == ARM_SMMU_V2 ||
1258 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1259 		arm_smmu_tlb_sync_context(smmu_domain);
1260 	else
1261 		arm_smmu_tlb_sync_global(smmu);
1262 	arm_smmu_rpm_put(smmu);
1263 }
1264 
1265 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1266 					      dma_addr_t iova)
1267 {
1268 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1269 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1270 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1271 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1272 	struct device *dev = smmu->dev;
1273 	void __iomem *reg;
1274 	u32 tmp;
1275 	u64 phys;
1276 	unsigned long va, flags;
1277 	int ret, idx = cfg->cbndx;
1278 	phys_addr_t addr = 0;
1279 
1280 	ret = arm_smmu_rpm_get(smmu);
1281 	if (ret < 0)
1282 		return 0;
1283 
1284 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1285 	va = iova & ~0xfffUL;
1286 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1287 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1288 	else
1289 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1290 
1291 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1292 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1293 				      5, 50)) {
1294 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1295 		dev_err(dev,
1296 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1297 			&iova);
1298 		arm_smmu_rpm_put(smmu);
1299 		return ops->iova_to_phys(ops, iova);
1300 	}
1301 
1302 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1303 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1304 	if (phys & ARM_SMMU_CB_PAR_F) {
1305 		dev_err(dev, "translation fault!\n");
1306 		dev_err(dev, "PAR = 0x%llx\n", phys);
1307 		goto out;
1308 	}
1309 
1310 	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1311 out:
1312 	arm_smmu_rpm_put(smmu);
1313 
1314 	return addr;
1315 }
1316 
1317 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1318 					dma_addr_t iova)
1319 {
1320 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1321 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1322 
1323 	if (!ops)
1324 		return 0;
1325 
1326 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1327 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1328 		return arm_smmu_iova_to_phys_hard(domain, iova);
1329 
1330 	return ops->iova_to_phys(ops, iova);
1331 }
1332 
1333 static bool arm_smmu_capable(enum iommu_cap cap)
1334 {
1335 	switch (cap) {
1336 	case IOMMU_CAP_CACHE_COHERENCY:
1337 		/*
1338 		 * Return true here as the SMMU can always send out coherent
1339 		 * requests.
1340 		 */
1341 		return true;
1342 	case IOMMU_CAP_NOEXEC:
1343 		return true;
1344 	default:
1345 		return false;
1346 	}
1347 }
1348 
1349 static
1350 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1351 {
1352 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1353 							  fwnode);
1354 	put_device(dev);
1355 	return dev ? dev_get_drvdata(dev) : NULL;
1356 }
1357 
1358 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1359 {
1360 	struct arm_smmu_device *smmu = NULL;
1361 	struct arm_smmu_master_cfg *cfg;
1362 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1363 	int i, ret;
1364 
1365 	if (using_legacy_binding) {
1366 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1367 
1368 		/*
1369 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1370 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1371 		 * later use.
1372 		 */
1373 		fwspec = dev_iommu_fwspec_get(dev);
1374 		if (ret)
1375 			goto out_free;
1376 	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1377 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1378 	} else {
1379 		return ERR_PTR(-ENODEV);
1380 	}
1381 
1382 	ret = -EINVAL;
1383 	for (i = 0; i < fwspec->num_ids; i++) {
1384 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1385 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1386 
1387 		if (sid & ~smmu->streamid_mask) {
1388 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1389 				sid, smmu->streamid_mask);
1390 			goto out_free;
1391 		}
1392 		if (mask & ~smmu->smr_mask_mask) {
1393 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1394 				mask, smmu->smr_mask_mask);
1395 			goto out_free;
1396 		}
1397 	}
1398 
1399 	ret = -ENOMEM;
1400 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1401 		      GFP_KERNEL);
1402 	if (!cfg)
1403 		goto out_free;
1404 
1405 	cfg->smmu = smmu;
1406 	dev_iommu_priv_set(dev, cfg);
1407 	while (i--)
1408 		cfg->smendx[i] = INVALID_SMENDX;
1409 
1410 	ret = arm_smmu_rpm_get(smmu);
1411 	if (ret < 0)
1412 		goto out_cfg_free;
1413 
1414 	ret = arm_smmu_master_alloc_smes(dev);
1415 	arm_smmu_rpm_put(smmu);
1416 
1417 	if (ret)
1418 		goto out_cfg_free;
1419 
1420 	device_link_add(dev, smmu->dev,
1421 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1422 
1423 	return &smmu->iommu;
1424 
1425 out_cfg_free:
1426 	kfree(cfg);
1427 out_free:
1428 	iommu_fwspec_free(dev);
1429 	return ERR_PTR(ret);
1430 }
1431 
1432 static void arm_smmu_release_device(struct device *dev)
1433 {
1434 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1435 	struct arm_smmu_master_cfg *cfg;
1436 	struct arm_smmu_device *smmu;
1437 	int ret;
1438 
1439 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
1440 		return;
1441 
1442 	cfg  = dev_iommu_priv_get(dev);
1443 	smmu = cfg->smmu;
1444 
1445 	ret = arm_smmu_rpm_get(smmu);
1446 	if (ret < 0)
1447 		return;
1448 
1449 	arm_smmu_master_free_smes(cfg, fwspec);
1450 
1451 	arm_smmu_rpm_put(smmu);
1452 
1453 	dev_iommu_priv_set(dev, NULL);
1454 	kfree(cfg);
1455 	iommu_fwspec_free(dev);
1456 }
1457 
1458 static void arm_smmu_probe_finalize(struct device *dev)
1459 {
1460 	struct arm_smmu_master_cfg *cfg;
1461 	struct arm_smmu_device *smmu;
1462 
1463 	cfg = dev_iommu_priv_get(dev);
1464 	smmu = cfg->smmu;
1465 
1466 	if (smmu->impl && smmu->impl->probe_finalize)
1467 		smmu->impl->probe_finalize(smmu, dev);
1468 }
1469 
1470 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1471 {
1472 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1473 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1474 	struct arm_smmu_device *smmu = cfg->smmu;
1475 	struct iommu_group *group = NULL;
1476 	int i, idx;
1477 
1478 	mutex_lock(&smmu->stream_map_mutex);
1479 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1480 		if (group && smmu->s2crs[idx].group &&
1481 		    group != smmu->s2crs[idx].group) {
1482 			mutex_unlock(&smmu->stream_map_mutex);
1483 			return ERR_PTR(-EINVAL);
1484 		}
1485 
1486 		group = smmu->s2crs[idx].group;
1487 	}
1488 
1489 	if (group) {
1490 		mutex_unlock(&smmu->stream_map_mutex);
1491 		return iommu_group_ref_get(group);
1492 	}
1493 
1494 	if (dev_is_pci(dev))
1495 		group = pci_device_group(dev);
1496 	else if (dev_is_fsl_mc(dev))
1497 		group = fsl_mc_device_group(dev);
1498 	else
1499 		group = generic_device_group(dev);
1500 
1501 	/* Remember group for faster lookups */
1502 	if (!IS_ERR(group))
1503 		for_each_cfg_sme(cfg, fwspec, i, idx)
1504 			smmu->s2crs[idx].group = group;
1505 
1506 	mutex_unlock(&smmu->stream_map_mutex);
1507 	return group;
1508 }
1509 
1510 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1511 {
1512 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1513 	int ret = 0;
1514 
1515 	mutex_lock(&smmu_domain->init_mutex);
1516 	if (smmu_domain->smmu)
1517 		ret = -EPERM;
1518 	else
1519 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1520 	mutex_unlock(&smmu_domain->init_mutex);
1521 
1522 	return ret;
1523 }
1524 
1525 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1526 		unsigned long quirks)
1527 {
1528 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1529 	int ret = 0;
1530 
1531 	mutex_lock(&smmu_domain->init_mutex);
1532 	if (smmu_domain->smmu)
1533 		ret = -EPERM;
1534 	else
1535 		smmu_domain->pgtbl_quirks = quirks;
1536 	mutex_unlock(&smmu_domain->init_mutex);
1537 
1538 	return ret;
1539 }
1540 
1541 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1542 {
1543 	u32 mask, fwid = 0;
1544 
1545 	if (args->args_count > 0)
1546 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1547 
1548 	if (args->args_count > 1)
1549 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1550 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1551 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1552 
1553 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1554 }
1555 
1556 static void arm_smmu_get_resv_regions(struct device *dev,
1557 				      struct list_head *head)
1558 {
1559 	struct iommu_resv_region *region;
1560 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1561 
1562 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1563 					 prot, IOMMU_RESV_SW_MSI);
1564 	if (!region)
1565 		return;
1566 
1567 	list_add_tail(&region->list, head);
1568 
1569 	iommu_dma_get_resv_regions(dev, head);
1570 }
1571 
1572 static int arm_smmu_def_domain_type(struct device *dev)
1573 {
1574 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1575 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1576 
1577 	if (impl && impl->def_domain_type)
1578 		return impl->def_domain_type(dev);
1579 
1580 	return 0;
1581 }
1582 
1583 static struct iommu_ops arm_smmu_ops = {
1584 	.capable		= arm_smmu_capable,
1585 	.domain_alloc		= arm_smmu_domain_alloc,
1586 	.probe_device		= arm_smmu_probe_device,
1587 	.release_device		= arm_smmu_release_device,
1588 	.probe_finalize		= arm_smmu_probe_finalize,
1589 	.device_group		= arm_smmu_device_group,
1590 	.of_xlate		= arm_smmu_of_xlate,
1591 	.get_resv_regions	= arm_smmu_get_resv_regions,
1592 	.put_resv_regions	= generic_iommu_put_resv_regions,
1593 	.def_domain_type	= arm_smmu_def_domain_type,
1594 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1595 	.owner			= THIS_MODULE,
1596 	.default_domain_ops = &(const struct iommu_domain_ops) {
1597 		.attach_dev		= arm_smmu_attach_dev,
1598 		.map_pages		= arm_smmu_map_pages,
1599 		.unmap_pages		= arm_smmu_unmap_pages,
1600 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1601 		.iotlb_sync		= arm_smmu_iotlb_sync,
1602 		.iova_to_phys		= arm_smmu_iova_to_phys,
1603 		.enable_nesting		= arm_smmu_enable_nesting,
1604 		.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1605 		.free			= arm_smmu_domain_free,
1606 	}
1607 };
1608 
1609 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1610 {
1611 	int i;
1612 	u32 reg;
1613 
1614 	/* clear global FSR */
1615 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1616 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1617 
1618 	/*
1619 	 * Reset stream mapping groups: Initial values mark all SMRn as
1620 	 * invalid and all S2CRn as bypass unless overridden.
1621 	 */
1622 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1623 		arm_smmu_write_sme(smmu, i);
1624 
1625 	/* Make sure all context banks are disabled and clear CB_FSR  */
1626 	for (i = 0; i < smmu->num_context_banks; ++i) {
1627 		arm_smmu_write_context_bank(smmu, i);
1628 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1629 	}
1630 
1631 	/* Invalidate the TLB, just in case */
1632 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1633 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1634 
1635 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1636 
1637 	/* Enable fault reporting */
1638 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1639 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1640 
1641 	/* Disable TLB broadcasting. */
1642 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1643 
1644 	/* Enable client access, handling unmatched streams as appropriate */
1645 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1646 	if (disable_bypass)
1647 		reg |= ARM_SMMU_sCR0_USFCFG;
1648 	else
1649 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1650 
1651 	/* Disable forced broadcasting */
1652 	reg &= ~ARM_SMMU_sCR0_FB;
1653 
1654 	/* Don't upgrade barriers */
1655 	reg &= ~(ARM_SMMU_sCR0_BSU);
1656 
1657 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1658 		reg |= ARM_SMMU_sCR0_VMID16EN;
1659 
1660 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1661 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1662 
1663 	if (smmu->impl && smmu->impl->reset)
1664 		smmu->impl->reset(smmu);
1665 
1666 	/* Push the button */
1667 	arm_smmu_tlb_sync_global(smmu);
1668 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1669 }
1670 
1671 static int arm_smmu_id_size_to_bits(int size)
1672 {
1673 	switch (size) {
1674 	case 0:
1675 		return 32;
1676 	case 1:
1677 		return 36;
1678 	case 2:
1679 		return 40;
1680 	case 3:
1681 		return 42;
1682 	case 4:
1683 		return 44;
1684 	case 5:
1685 	default:
1686 		return 48;
1687 	}
1688 }
1689 
1690 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1691 {
1692 	unsigned int size;
1693 	u32 id;
1694 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1695 	int i, ret;
1696 
1697 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1698 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1699 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1700 
1701 	/* ID0 */
1702 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1703 
1704 	/* Restrict available stages based on module parameter */
1705 	if (force_stage == 1)
1706 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1707 	else if (force_stage == 2)
1708 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1709 
1710 	if (id & ARM_SMMU_ID0_S1TS) {
1711 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1712 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1713 	}
1714 
1715 	if (id & ARM_SMMU_ID0_S2TS) {
1716 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1717 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1718 	}
1719 
1720 	if (id & ARM_SMMU_ID0_NTS) {
1721 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1722 		dev_notice(smmu->dev, "\tnested translation\n");
1723 	}
1724 
1725 	if (!(smmu->features &
1726 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1727 		dev_err(smmu->dev, "\tno translation support!\n");
1728 		return -ENODEV;
1729 	}
1730 
1731 	if ((id & ARM_SMMU_ID0_S1TS) &&
1732 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1733 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1734 		dev_notice(smmu->dev, "\taddress translation ops\n");
1735 	}
1736 
1737 	/*
1738 	 * In order for DMA API calls to work properly, we must defer to what
1739 	 * the FW says about coherency, regardless of what the hardware claims.
1740 	 * Fortunately, this also opens up a workaround for systems where the
1741 	 * ID register value has ended up configured incorrectly.
1742 	 */
1743 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1744 	if (cttw_fw || cttw_reg)
1745 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1746 			   cttw_fw ? "" : "non-");
1747 	if (cttw_fw != cttw_reg)
1748 		dev_notice(smmu->dev,
1749 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1750 
1751 	/* Max. number of entries we have for stream matching/indexing */
1752 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1753 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1754 		size = 1 << 16;
1755 	} else {
1756 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1757 	}
1758 	smmu->streamid_mask = size - 1;
1759 	if (id & ARM_SMMU_ID0_SMS) {
1760 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1761 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1762 		if (size == 0) {
1763 			dev_err(smmu->dev,
1764 				"stream-matching supported, but no SMRs present!\n");
1765 			return -ENODEV;
1766 		}
1767 
1768 		/* Zero-initialised to mark as invalid */
1769 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1770 					  GFP_KERNEL);
1771 		if (!smmu->smrs)
1772 			return -ENOMEM;
1773 
1774 		dev_notice(smmu->dev,
1775 			   "\tstream matching with %u register groups", size);
1776 	}
1777 	/* s2cr->type == 0 means translation, so initialise explicitly */
1778 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1779 					 GFP_KERNEL);
1780 	if (!smmu->s2crs)
1781 		return -ENOMEM;
1782 	for (i = 0; i < size; i++)
1783 		smmu->s2crs[i] = s2cr_init_val;
1784 
1785 	smmu->num_mapping_groups = size;
1786 	mutex_init(&smmu->stream_map_mutex);
1787 	spin_lock_init(&smmu->global_sync_lock);
1788 
1789 	if (smmu->version < ARM_SMMU_V2 ||
1790 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1791 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1792 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1793 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1794 	}
1795 
1796 	/* ID1 */
1797 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1798 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1799 
1800 	/* Check for size mismatch of SMMU address space from mapped region */
1801 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1802 	if (smmu->numpage != 2 * size << smmu->pgshift)
1803 		dev_warn(smmu->dev,
1804 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1805 			2 * size << smmu->pgshift, smmu->numpage);
1806 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1807 	smmu->numpage = size;
1808 
1809 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1810 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1811 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1812 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1813 		return -ENODEV;
1814 	}
1815 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1816 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1817 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1818 				 sizeof(*smmu->cbs), GFP_KERNEL);
1819 	if (!smmu->cbs)
1820 		return -ENOMEM;
1821 
1822 	/* ID2 */
1823 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1824 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1825 	smmu->ipa_size = size;
1826 
1827 	/* The output mask is also applied for bypass */
1828 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1829 	smmu->pa_size = size;
1830 
1831 	if (id & ARM_SMMU_ID2_VMID16)
1832 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1833 
1834 	/*
1835 	 * What the page table walker can address actually depends on which
1836 	 * descriptor format is in use, but since a) we don't know that yet,
1837 	 * and b) it can vary per context bank, this will have to do...
1838 	 */
1839 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1840 		dev_warn(smmu->dev,
1841 			 "failed to set DMA mask for table walker\n");
1842 
1843 	if (smmu->version < ARM_SMMU_V2) {
1844 		smmu->va_size = smmu->ipa_size;
1845 		if (smmu->version == ARM_SMMU_V1_64K)
1846 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1847 	} else {
1848 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1849 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1850 		if (id & ARM_SMMU_ID2_PTFS_4K)
1851 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1852 		if (id & ARM_SMMU_ID2_PTFS_16K)
1853 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1854 		if (id & ARM_SMMU_ID2_PTFS_64K)
1855 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1856 	}
1857 
1858 	if (smmu->impl && smmu->impl->cfg_probe) {
1859 		ret = smmu->impl->cfg_probe(smmu);
1860 		if (ret)
1861 			return ret;
1862 	}
1863 
1864 	/* Now we've corralled the various formats, what'll it do? */
1865 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1866 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1867 	if (smmu->features &
1868 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1869 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1870 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1871 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1872 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1873 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1874 
1875 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1876 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1877 	else
1878 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1879 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1880 		   smmu->pgsize_bitmap);
1881 
1882 
1883 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1884 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1885 			   smmu->va_size, smmu->ipa_size);
1886 
1887 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1888 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1889 			   smmu->ipa_size, smmu->pa_size);
1890 
1891 	return 0;
1892 }
1893 
1894 struct arm_smmu_match_data {
1895 	enum arm_smmu_arch_version version;
1896 	enum arm_smmu_implementation model;
1897 };
1898 
1899 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1900 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1901 
1902 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1903 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1904 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1905 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1906 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1907 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1908 
1909 static const struct of_device_id arm_smmu_of_match[] = {
1910 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1911 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1912 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1913 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1914 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1915 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1916 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1917 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1918 	{ },
1919 };
1920 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1921 
1922 #ifdef CONFIG_ACPI
1923 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1924 {
1925 	int ret = 0;
1926 
1927 	switch (model) {
1928 	case ACPI_IORT_SMMU_V1:
1929 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1930 		smmu->version = ARM_SMMU_V1;
1931 		smmu->model = GENERIC_SMMU;
1932 		break;
1933 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1934 		smmu->version = ARM_SMMU_V1_64K;
1935 		smmu->model = GENERIC_SMMU;
1936 		break;
1937 	case ACPI_IORT_SMMU_V2:
1938 		smmu->version = ARM_SMMU_V2;
1939 		smmu->model = GENERIC_SMMU;
1940 		break;
1941 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1942 		smmu->version = ARM_SMMU_V2;
1943 		smmu->model = ARM_MMU500;
1944 		break;
1945 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1946 		smmu->version = ARM_SMMU_V2;
1947 		smmu->model = CAVIUM_SMMUV2;
1948 		break;
1949 	default:
1950 		ret = -ENODEV;
1951 	}
1952 
1953 	return ret;
1954 }
1955 
1956 static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1957 				      u32 *global_irqs, u32 *pmu_irqs)
1958 {
1959 	struct device *dev = smmu->dev;
1960 	struct acpi_iort_node *node =
1961 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1962 	struct acpi_iort_smmu *iort_smmu;
1963 	int ret;
1964 
1965 	/* Retrieve SMMU1/2 specific data */
1966 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1967 
1968 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1969 	if (ret < 0)
1970 		return ret;
1971 
1972 	/* Ignore the configuration access interrupt */
1973 	*global_irqs = 1;
1974 	*pmu_irqs = 0;
1975 
1976 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1977 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1978 
1979 	return 0;
1980 }
1981 #else
1982 static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1983 					     u32 *global_irqs, u32 *pmu_irqs)
1984 {
1985 	return -ENODEV;
1986 }
1987 #endif
1988 
1989 static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu,
1990 				    u32 *global_irqs, u32 *pmu_irqs)
1991 {
1992 	const struct arm_smmu_match_data *data;
1993 	struct device *dev = smmu->dev;
1994 	bool legacy_binding;
1995 
1996 	if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs))
1997 		return dev_err_probe(dev, -ENODEV,
1998 				     "missing #global-interrupts property\n");
1999 	*pmu_irqs = 0;
2000 
2001 	data = of_device_get_match_data(dev);
2002 	smmu->version = data->version;
2003 	smmu->model = data->model;
2004 
2005 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2006 	if (legacy_binding && !using_generic_binding) {
2007 		if (!using_legacy_binding) {
2008 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2009 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2010 		}
2011 		using_legacy_binding = true;
2012 	} else if (!legacy_binding && !using_legacy_binding) {
2013 		using_generic_binding = true;
2014 	} else {
2015 		dev_err(dev, "not probing due to mismatched DT properties\n");
2016 		return -ENODEV;
2017 	}
2018 
2019 	if (of_dma_is_coherent(dev->of_node))
2020 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2021 
2022 	return 0;
2023 }
2024 
2025 static int arm_smmu_bus_init(struct iommu_ops *ops)
2026 {
2027 	int err;
2028 
2029 	/* Oh, for a proper bus abstraction */
2030 	if (!iommu_present(&platform_bus_type)) {
2031 		err = bus_set_iommu(&platform_bus_type, ops);
2032 		if (err)
2033 			return err;
2034 	}
2035 #ifdef CONFIG_ARM_AMBA
2036 	if (!iommu_present(&amba_bustype)) {
2037 		err = bus_set_iommu(&amba_bustype, ops);
2038 		if (err)
2039 			goto err_reset_platform_ops;
2040 	}
2041 #endif
2042 #ifdef CONFIG_PCI
2043 	if (!iommu_present(&pci_bus_type)) {
2044 		err = bus_set_iommu(&pci_bus_type, ops);
2045 		if (err)
2046 			goto err_reset_amba_ops;
2047 	}
2048 #endif
2049 #ifdef CONFIG_FSL_MC_BUS
2050 	if (!iommu_present(&fsl_mc_bus_type)) {
2051 		err = bus_set_iommu(&fsl_mc_bus_type, ops);
2052 		if (err)
2053 			goto err_reset_pci_ops;
2054 	}
2055 #endif
2056 	return 0;
2057 
2058 err_reset_pci_ops: __maybe_unused;
2059 #ifdef CONFIG_PCI
2060 	bus_set_iommu(&pci_bus_type, NULL);
2061 #endif
2062 err_reset_amba_ops: __maybe_unused;
2063 #ifdef CONFIG_ARM_AMBA
2064 	bus_set_iommu(&amba_bustype, NULL);
2065 #endif
2066 err_reset_platform_ops: __maybe_unused;
2067 	bus_set_iommu(&platform_bus_type, NULL);
2068 	return err;
2069 }
2070 
2071 static int arm_smmu_device_probe(struct platform_device *pdev)
2072 {
2073 	struct resource *res;
2074 	resource_size_t ioaddr;
2075 	struct arm_smmu_device *smmu;
2076 	struct device *dev = &pdev->dev;
2077 	int num_irqs, i, err;
2078 	u32 global_irqs, pmu_irqs;
2079 	irqreturn_t (*global_fault)(int irq, void *dev);
2080 
2081 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2082 	if (!smmu) {
2083 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2084 		return -ENOMEM;
2085 	}
2086 	smmu->dev = dev;
2087 
2088 	if (dev->of_node)
2089 		err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs);
2090 	else
2091 		err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs);
2092 	if (err)
2093 		return err;
2094 
2095 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2096 	ioaddr = res->start;
2097 	smmu->base = devm_ioremap_resource(dev, res);
2098 	if (IS_ERR(smmu->base))
2099 		return PTR_ERR(smmu->base);
2100 	/*
2101 	 * The resource size should effectively match the value of SMMU_TOP;
2102 	 * stash that temporarily until we know PAGESIZE to validate it with.
2103 	 */
2104 	smmu->numpage = resource_size(res);
2105 
2106 	smmu = arm_smmu_impl_init(smmu);
2107 	if (IS_ERR(smmu))
2108 		return PTR_ERR(smmu);
2109 
2110 	num_irqs = platform_irq_count(pdev);
2111 
2112 	smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs;
2113 	if (smmu->num_context_irqs <= 0)
2114 		return dev_err_probe(dev, -ENODEV,
2115 				"found %d interrupts but expected at least %d\n",
2116 				num_irqs, global_irqs + pmu_irqs + 1);
2117 
2118 	smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs,
2119 				  sizeof(*smmu->irqs), GFP_KERNEL);
2120 	if (!smmu->irqs)
2121 		return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n",
2122 				     smmu->num_context_irqs);
2123 
2124 	for (i = 0; i < smmu->num_context_irqs; i++) {
2125 		int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i);
2126 
2127 		if (irq < 0)
2128 			return irq;
2129 		smmu->irqs[i] = irq;
2130 	}
2131 
2132 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2133 	if (err < 0) {
2134 		dev_err(dev, "failed to get clocks %d\n", err);
2135 		return err;
2136 	}
2137 	smmu->num_clks = err;
2138 
2139 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2140 	if (err)
2141 		return err;
2142 
2143 	err = arm_smmu_device_cfg_probe(smmu);
2144 	if (err)
2145 		return err;
2146 
2147 	if (smmu->version == ARM_SMMU_V2) {
2148 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2149 			dev_err(dev,
2150 			      "found only %d context irq(s) but %d required\n",
2151 			      smmu->num_context_irqs, smmu->num_context_banks);
2152 			return -ENODEV;
2153 		}
2154 
2155 		/* Ignore superfluous interrupts */
2156 		smmu->num_context_irqs = smmu->num_context_banks;
2157 	}
2158 
2159 	if (smmu->impl && smmu->impl->global_fault)
2160 		global_fault = smmu->impl->global_fault;
2161 	else
2162 		global_fault = arm_smmu_global_fault;
2163 
2164 	for (i = 0; i < global_irqs; i++) {
2165 		int irq = platform_get_irq(pdev, i);
2166 
2167 		if (irq < 0)
2168 			return irq;
2169 
2170 		err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
2171 				       "arm-smmu global fault", smmu);
2172 		if (err)
2173 			return dev_err_probe(dev, err,
2174 					"failed to request global IRQ %d (%u)\n",
2175 					i, irq);
2176 	}
2177 
2178 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2179 				     "smmu.%pa", &ioaddr);
2180 	if (err) {
2181 		dev_err(dev, "Failed to register iommu in sysfs\n");
2182 		return err;
2183 	}
2184 
2185 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
2186 	if (err) {
2187 		dev_err(dev, "Failed to register iommu\n");
2188 		goto err_sysfs_remove;
2189 	}
2190 
2191 	platform_set_drvdata(pdev, smmu);
2192 	arm_smmu_device_reset(smmu);
2193 	arm_smmu_test_smr_masks(smmu);
2194 
2195 	/*
2196 	 * We want to avoid touching dev->power.lock in fastpaths unless
2197 	 * it's really going to do something useful - pm_runtime_enabled()
2198 	 * can serve as an ideal proxy for that decision. So, conditionally
2199 	 * enable pm_runtime.
2200 	 */
2201 	if (dev->pm_domain) {
2202 		pm_runtime_set_active(dev);
2203 		pm_runtime_enable(dev);
2204 	}
2205 
2206 	/*
2207 	 * For ACPI and generic DT bindings, an SMMU will be probed before
2208 	 * any device which might need it, so we want the bus ops in place
2209 	 * ready to handle default domain setup as soon as any SMMU exists.
2210 	 */
2211 	if (!using_legacy_binding) {
2212 		err = arm_smmu_bus_init(&arm_smmu_ops);
2213 		if (err)
2214 			goto err_unregister_device;
2215 	}
2216 
2217 	return 0;
2218 
2219 err_unregister_device:
2220 	iommu_device_unregister(&smmu->iommu);
2221 err_sysfs_remove:
2222 	iommu_device_sysfs_remove(&smmu->iommu);
2223 	return err;
2224 }
2225 
2226 static int arm_smmu_device_remove(struct platform_device *pdev)
2227 {
2228 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2229 
2230 	if (!smmu)
2231 		return -ENODEV;
2232 
2233 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2234 		dev_notice(&pdev->dev, "disabling translation\n");
2235 
2236 	arm_smmu_bus_init(NULL);
2237 	iommu_device_unregister(&smmu->iommu);
2238 	iommu_device_sysfs_remove(&smmu->iommu);
2239 
2240 	arm_smmu_rpm_get(smmu);
2241 	/* Turn the thing off */
2242 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2243 	arm_smmu_rpm_put(smmu);
2244 
2245 	if (pm_runtime_enabled(smmu->dev))
2246 		pm_runtime_force_suspend(smmu->dev);
2247 	else
2248 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2249 
2250 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2251 	return 0;
2252 }
2253 
2254 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2255 {
2256 	arm_smmu_device_remove(pdev);
2257 }
2258 
2259 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2260 {
2261 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2262 	int ret;
2263 
2264 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2265 	if (ret)
2266 		return ret;
2267 
2268 	arm_smmu_device_reset(smmu);
2269 
2270 	return 0;
2271 }
2272 
2273 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2274 {
2275 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2276 
2277 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2278 
2279 	return 0;
2280 }
2281 
2282 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2283 {
2284 	int ret;
2285 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2286 
2287 	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
2288 	if (ret)
2289 		return ret;
2290 
2291 	if (pm_runtime_suspended(dev))
2292 		return 0;
2293 
2294 	ret = arm_smmu_runtime_resume(dev);
2295 	if (ret)
2296 		clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2297 
2298 	return ret;
2299 }
2300 
2301 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2302 {
2303 	int ret = 0;
2304 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2305 
2306 	if (pm_runtime_suspended(dev))
2307 		goto clk_unprepare;
2308 
2309 	ret = arm_smmu_runtime_suspend(dev);
2310 	if (ret)
2311 		return ret;
2312 
2313 clk_unprepare:
2314 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2315 	return ret;
2316 }
2317 
2318 static const struct dev_pm_ops arm_smmu_pm_ops = {
2319 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2320 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2321 			   arm_smmu_runtime_resume, NULL)
2322 };
2323 
2324 static struct platform_driver arm_smmu_driver = {
2325 	.driver	= {
2326 		.name			= "arm-smmu",
2327 		.of_match_table		= arm_smmu_of_match,
2328 		.pm			= &arm_smmu_pm_ops,
2329 		.suppress_bind_attrs    = true,
2330 	},
2331 	.probe	= arm_smmu_device_probe,
2332 	.remove	= arm_smmu_device_remove,
2333 	.shutdown = arm_smmu_device_shutdown,
2334 };
2335 module_platform_driver(arm_smmu_driver);
2336 
2337 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2338 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2339 MODULE_ALIAS("platform:arm-smmu");
2340 MODULE_LICENSE("GPL v2");
2341