1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
28 #include <linux/io.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/pci.h>
35 #include <linux/platform_device.h>
36 #include <linux/pm_runtime.h>
37 #include <linux/ratelimit.h>
38 #include <linux/slab.h>
39 
40 #include <linux/amba/bus.h>
41 #include <linux/fsl/mc.h>
42 
43 #include "arm-smmu.h"
44 
45 /*
46  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
47  * global register space are still, in fact, using a hypervisor to mediate it
48  * by trapping and emulating register accesses. Sadly, some deployed versions
49  * of said trapping code have bugs wherein they go horribly wrong for stores
50  * using r31 (i.e. XZR/WZR) as the source register.
51  */
52 #define QCOM_DUMMY_VAL -1
53 
54 #define MSI_IOVA_BASE			0x8000000
55 #define MSI_IOVA_LENGTH			0x100000
56 
57 static int force_stage;
58 module_param(force_stage, int, S_IRUGO);
59 MODULE_PARM_DESC(force_stage,
60 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
61 static bool disable_bypass =
62 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
63 module_param(disable_bypass, bool, S_IRUGO);
64 MODULE_PARM_DESC(disable_bypass,
65 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
66 
67 #define s2cr_init_val (struct arm_smmu_s2cr){				\
68 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
69 }
70 
71 static bool using_legacy_binding, using_generic_binding;
72 
73 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
74 {
75 	if (pm_runtime_enabled(smmu->dev))
76 		return pm_runtime_resume_and_get(smmu->dev);
77 
78 	return 0;
79 }
80 
81 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
82 {
83 	if (pm_runtime_enabled(smmu->dev))
84 		pm_runtime_put_autosuspend(smmu->dev);
85 }
86 
87 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
88 {
89 	return container_of(dom, struct arm_smmu_domain, domain);
90 }
91 
92 static struct platform_driver arm_smmu_driver;
93 static struct iommu_ops arm_smmu_ops;
94 
95 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
96 static int arm_smmu_bus_init(struct iommu_ops *ops);
97 
98 static struct device_node *dev_get_dev_node(struct device *dev)
99 {
100 	if (dev_is_pci(dev)) {
101 		struct pci_bus *bus = to_pci_dev(dev)->bus;
102 
103 		while (!pci_is_root_bus(bus))
104 			bus = bus->parent;
105 		return of_node_get(bus->bridge->parent->of_node);
106 	}
107 
108 	return of_node_get(dev->of_node);
109 }
110 
111 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
112 {
113 	*((__be32 *)data) = cpu_to_be32(alias);
114 	return 0; /* Continue walking */
115 }
116 
117 static int __find_legacy_master_phandle(struct device *dev, void *data)
118 {
119 	struct of_phandle_iterator *it = *(void **)data;
120 	struct device_node *np = it->node;
121 	int err;
122 
123 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
124 			    "#stream-id-cells", -1)
125 		if (it->node == np) {
126 			*(void **)data = dev;
127 			return 1;
128 		}
129 	it->node = np;
130 	return err == -ENOENT ? 0 : err;
131 }
132 
133 static int arm_smmu_register_legacy_master(struct device *dev,
134 					   struct arm_smmu_device **smmu)
135 {
136 	struct device *smmu_dev;
137 	struct device_node *np;
138 	struct of_phandle_iterator it;
139 	void *data = &it;
140 	u32 *sids;
141 	__be32 pci_sid;
142 	int err;
143 
144 	np = dev_get_dev_node(dev);
145 	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
146 		of_node_put(np);
147 		return -ENODEV;
148 	}
149 
150 	it.node = np;
151 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
152 				     __find_legacy_master_phandle);
153 	smmu_dev = data;
154 	of_node_put(np);
155 	if (err == 0)
156 		return -ENODEV;
157 	if (err < 0)
158 		return err;
159 
160 	if (dev_is_pci(dev)) {
161 		/* "mmu-masters" assumes Stream ID == Requester ID */
162 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
163 				       &pci_sid);
164 		it.cur = &pci_sid;
165 		it.cur_count = 1;
166 	}
167 
168 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
169 				&arm_smmu_ops);
170 	if (err)
171 		return err;
172 
173 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
174 	if (!sids)
175 		return -ENOMEM;
176 
177 	*smmu = dev_get_drvdata(smmu_dev);
178 	of_phandle_iterator_args(&it, sids, it.cur_count);
179 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
180 	kfree(sids);
181 	return err;
182 }
183 
184 /*
185  * With the legacy DT binding in play, we have no guarantees about
186  * probe order, but then we're also not doing default domains, so we can
187  * delay setting bus ops until we're sure every possible SMMU is ready,
188  * and that way ensure that no probe_device() calls get missed.
189  */
190 static int arm_smmu_legacy_bus_init(void)
191 {
192 	if (using_legacy_binding)
193 		return arm_smmu_bus_init(&arm_smmu_ops);
194 	return 0;
195 }
196 device_initcall_sync(arm_smmu_legacy_bus_init);
197 #else
198 static int arm_smmu_register_legacy_master(struct device *dev,
199 					   struct arm_smmu_device **smmu)
200 {
201 	return -ENODEV;
202 }
203 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
204 
205 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
206 {
207 	clear_bit(idx, map);
208 }
209 
210 /* Wait for any pending TLB invalidations to complete */
211 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
212 				int sync, int status)
213 {
214 	unsigned int spin_cnt, delay;
215 	u32 reg;
216 
217 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
218 		return smmu->impl->tlb_sync(smmu, page, sync, status);
219 
220 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
221 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
222 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
223 			reg = arm_smmu_readl(smmu, page, status);
224 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
225 				return;
226 			cpu_relax();
227 		}
228 		udelay(delay);
229 	}
230 	dev_err_ratelimited(smmu->dev,
231 			    "TLB sync timed out -- SMMU may be deadlocked\n");
232 }
233 
234 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
235 {
236 	unsigned long flags;
237 
238 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
239 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
240 			    ARM_SMMU_GR0_sTLBGSTATUS);
241 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
242 }
243 
244 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
245 {
246 	struct arm_smmu_device *smmu = smmu_domain->smmu;
247 	unsigned long flags;
248 
249 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
250 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
251 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
252 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
253 }
254 
255 static void arm_smmu_tlb_inv_context_s1(void *cookie)
256 {
257 	struct arm_smmu_domain *smmu_domain = cookie;
258 	/*
259 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
260 	 * current CPU are visible beforehand.
261 	 */
262 	wmb();
263 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
264 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
265 	arm_smmu_tlb_sync_context(smmu_domain);
266 }
267 
268 static void arm_smmu_tlb_inv_context_s2(void *cookie)
269 {
270 	struct arm_smmu_domain *smmu_domain = cookie;
271 	struct arm_smmu_device *smmu = smmu_domain->smmu;
272 
273 	/* See above */
274 	wmb();
275 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
276 	arm_smmu_tlb_sync_global(smmu);
277 }
278 
279 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
280 				      size_t granule, void *cookie, int reg)
281 {
282 	struct arm_smmu_domain *smmu_domain = cookie;
283 	struct arm_smmu_device *smmu = smmu_domain->smmu;
284 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
285 	int idx = cfg->cbndx;
286 
287 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
288 		wmb();
289 
290 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
291 		iova = (iova >> 12) << 12;
292 		iova |= cfg->asid;
293 		do {
294 			arm_smmu_cb_write(smmu, idx, reg, iova);
295 			iova += granule;
296 		} while (size -= granule);
297 	} else {
298 		iova >>= 12;
299 		iova |= (u64)cfg->asid << 48;
300 		do {
301 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
302 			iova += granule >> 12;
303 		} while (size -= granule);
304 	}
305 }
306 
307 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
308 				      size_t granule, void *cookie, int reg)
309 {
310 	struct arm_smmu_domain *smmu_domain = cookie;
311 	struct arm_smmu_device *smmu = smmu_domain->smmu;
312 	int idx = smmu_domain->cfg.cbndx;
313 
314 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
315 		wmb();
316 
317 	iova >>= 12;
318 	do {
319 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
320 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
321 		else
322 			arm_smmu_cb_write(smmu, idx, reg, iova);
323 		iova += granule >> 12;
324 	} while (size -= granule);
325 }
326 
327 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
328 				     size_t granule, void *cookie)
329 {
330 	struct arm_smmu_domain *smmu_domain = cookie;
331 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
332 
333 	if (cfg->flush_walk_prefer_tlbiasid) {
334 		arm_smmu_tlb_inv_context_s1(cookie);
335 	} else {
336 		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
337 					  ARM_SMMU_CB_S1_TLBIVA);
338 		arm_smmu_tlb_sync_context(cookie);
339 	}
340 }
341 
342 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
343 				     unsigned long iova, size_t granule,
344 				     void *cookie)
345 {
346 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
347 				  ARM_SMMU_CB_S1_TLBIVAL);
348 }
349 
350 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
351 				     size_t granule, void *cookie)
352 {
353 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
354 				  ARM_SMMU_CB_S2_TLBIIPAS2);
355 	arm_smmu_tlb_sync_context(cookie);
356 }
357 
358 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
359 				     unsigned long iova, size_t granule,
360 				     void *cookie)
361 {
362 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
363 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
364 }
365 
366 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
367 					size_t granule, void *cookie)
368 {
369 	arm_smmu_tlb_inv_context_s2(cookie);
370 }
371 /*
372  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
373  * almost negligible, but the benefit of getting the first one in as far ahead
374  * of the sync as possible is significant, hence we don't just make this a
375  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
376  * think.
377  */
378 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
379 					unsigned long iova, size_t granule,
380 					void *cookie)
381 {
382 	struct arm_smmu_domain *smmu_domain = cookie;
383 	struct arm_smmu_device *smmu = smmu_domain->smmu;
384 
385 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
386 		wmb();
387 
388 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
389 }
390 
391 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
392 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
393 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
394 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
395 };
396 
397 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
398 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
399 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
400 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
401 };
402 
403 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
404 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
405 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
406 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
407 };
408 
409 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
410 {
411 	u32 fsr, fsynr, cbfrsynra;
412 	unsigned long iova;
413 	struct iommu_domain *domain = dev;
414 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
415 	struct arm_smmu_device *smmu = smmu_domain->smmu;
416 	int idx = smmu_domain->cfg.cbndx;
417 	int ret;
418 
419 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
420 	if (!(fsr & ARM_SMMU_FSR_FAULT))
421 		return IRQ_NONE;
422 
423 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
424 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
425 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
426 
427 	ret = report_iommu_fault(domain, NULL, iova,
428 		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
429 
430 	if (ret == -ENOSYS)
431 		dev_err_ratelimited(smmu->dev,
432 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
433 			    fsr, iova, fsynr, cbfrsynra, idx);
434 
435 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
436 	return IRQ_HANDLED;
437 }
438 
439 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
440 {
441 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
442 	struct arm_smmu_device *smmu = dev;
443 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
444 				      DEFAULT_RATELIMIT_BURST);
445 
446 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
447 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
448 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
449 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
450 
451 	if (!gfsr)
452 		return IRQ_NONE;
453 
454 	if (__ratelimit(&rs)) {
455 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
456 		    (gfsr & ARM_SMMU_sGFSR_USF))
457 			dev_err(smmu->dev,
458 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
459 				(u16)gfsynr1);
460 		else
461 			dev_err(smmu->dev,
462 				"Unexpected global fault, this could be serious\n");
463 		dev_err(smmu->dev,
464 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
465 			gfsr, gfsynr0, gfsynr1, gfsynr2);
466 	}
467 
468 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
469 	return IRQ_HANDLED;
470 }
471 
472 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
473 				       struct io_pgtable_cfg *pgtbl_cfg)
474 {
475 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
476 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
477 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
478 
479 	cb->cfg = cfg;
480 
481 	/* TCR */
482 	if (stage1) {
483 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
484 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
485 		} else {
486 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
487 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
488 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
489 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
490 			else
491 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
492 		}
493 	} else {
494 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
495 	}
496 
497 	/* TTBRs */
498 	if (stage1) {
499 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
500 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
501 			cb->ttbr[1] = 0;
502 		} else {
503 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
504 						 cfg->asid);
505 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
506 						 cfg->asid);
507 
508 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
509 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
510 			else
511 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
512 		}
513 	} else {
514 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
515 	}
516 
517 	/* MAIRs (stage-1 only) */
518 	if (stage1) {
519 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
520 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
521 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
522 		} else {
523 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
524 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
525 		}
526 	}
527 }
528 
529 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
530 {
531 	u32 reg;
532 	bool stage1;
533 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
534 	struct arm_smmu_cfg *cfg = cb->cfg;
535 
536 	/* Unassigned context banks only need disabling */
537 	if (!cfg) {
538 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
539 		return;
540 	}
541 
542 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
543 
544 	/* CBA2R */
545 	if (smmu->version > ARM_SMMU_V1) {
546 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
547 			reg = ARM_SMMU_CBA2R_VA64;
548 		else
549 			reg = 0;
550 		/* 16-bit VMIDs live in CBA2R */
551 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
552 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
553 
554 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
555 	}
556 
557 	/* CBAR */
558 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
559 	if (smmu->version < ARM_SMMU_V2)
560 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
561 
562 	/*
563 	 * Use the weakest shareability/memory types, so they are
564 	 * overridden by the ttbcr/pte.
565 	 */
566 	if (stage1) {
567 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
568 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
569 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
570 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
571 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
572 		/* 8-bit VMIDs live in CBAR */
573 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
574 	}
575 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
576 
577 	/*
578 	 * TCR
579 	 * We must write this before the TTBRs, since it determines the
580 	 * access behaviour of some fields (in particular, ASID[15:8]).
581 	 */
582 	if (stage1 && smmu->version > ARM_SMMU_V1)
583 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
584 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
585 
586 	/* TTBRs */
587 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
588 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
589 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
590 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
591 	} else {
592 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
593 		if (stage1)
594 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
595 					   cb->ttbr[1]);
596 	}
597 
598 	/* MAIRs (stage-1 only) */
599 	if (stage1) {
600 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
601 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
602 	}
603 
604 	/* SCTLR */
605 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
606 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
607 	if (stage1)
608 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
609 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
610 		reg |= ARM_SMMU_SCTLR_E;
611 
612 	if (smmu->impl && smmu->impl->write_sctlr)
613 		smmu->impl->write_sctlr(smmu, idx, reg);
614 	else
615 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
616 }
617 
618 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
619 				       struct arm_smmu_device *smmu,
620 				       struct device *dev, unsigned int start)
621 {
622 	if (smmu->impl && smmu->impl->alloc_context_bank)
623 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
624 
625 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
626 }
627 
628 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
629 					struct arm_smmu_device *smmu,
630 					struct device *dev)
631 {
632 	int irq, start, ret = 0;
633 	unsigned long ias, oas;
634 	struct io_pgtable_ops *pgtbl_ops;
635 	struct io_pgtable_cfg pgtbl_cfg;
636 	enum io_pgtable_fmt fmt;
637 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
638 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
639 	irqreturn_t (*context_fault)(int irq, void *dev);
640 
641 	mutex_lock(&smmu_domain->init_mutex);
642 	if (smmu_domain->smmu)
643 		goto out_unlock;
644 
645 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
646 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
647 		smmu_domain->smmu = smmu;
648 		goto out_unlock;
649 	}
650 
651 	/*
652 	 * Mapping the requested stage onto what we support is surprisingly
653 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
654 	 * support for nested translation. That means we end up with the
655 	 * following table:
656 	 *
657 	 * Requested        Supported        Actual
658 	 *     S1               N              S1
659 	 *     S1             S1+S2            S1
660 	 *     S1               S2             S2
661 	 *     S1               S1             S1
662 	 *     N                N              N
663 	 *     N              S1+S2            S2
664 	 *     N                S2             S2
665 	 *     N                S1             S1
666 	 *
667 	 * Note that you can't actually request stage-2 mappings.
668 	 */
669 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
670 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
671 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
672 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
673 
674 	/*
675 	 * Choosing a suitable context format is even more fiddly. Until we
676 	 * grow some way for the caller to express a preference, and/or move
677 	 * the decision into the io-pgtable code where it arguably belongs,
678 	 * just aim for the closest thing to the rest of the system, and hope
679 	 * that the hardware isn't esoteric enough that we can't assume AArch64
680 	 * support to be a superset of AArch32 support...
681 	 */
682 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
683 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
684 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
685 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
686 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
687 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
688 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
689 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
690 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
691 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
692 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
693 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
694 
695 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
696 		ret = -EINVAL;
697 		goto out_unlock;
698 	}
699 
700 	switch (smmu_domain->stage) {
701 	case ARM_SMMU_DOMAIN_S1:
702 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
703 		start = smmu->num_s2_context_banks;
704 		ias = smmu->va_size;
705 		oas = smmu->ipa_size;
706 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
707 			fmt = ARM_64_LPAE_S1;
708 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
709 			fmt = ARM_32_LPAE_S1;
710 			ias = min(ias, 32UL);
711 			oas = min(oas, 40UL);
712 		} else {
713 			fmt = ARM_V7S;
714 			ias = min(ias, 32UL);
715 			oas = min(oas, 32UL);
716 		}
717 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
718 		break;
719 	case ARM_SMMU_DOMAIN_NESTED:
720 		/*
721 		 * We will likely want to change this if/when KVM gets
722 		 * involved.
723 		 */
724 	case ARM_SMMU_DOMAIN_S2:
725 		cfg->cbar = CBAR_TYPE_S2_TRANS;
726 		start = 0;
727 		ias = smmu->ipa_size;
728 		oas = smmu->pa_size;
729 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
730 			fmt = ARM_64_LPAE_S2;
731 		} else {
732 			fmt = ARM_32_LPAE_S2;
733 			ias = min(ias, 40UL);
734 			oas = min(oas, 40UL);
735 		}
736 		if (smmu->version == ARM_SMMU_V2)
737 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
738 		else
739 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
740 		break;
741 	default:
742 		ret = -EINVAL;
743 		goto out_unlock;
744 	}
745 
746 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
747 	if (ret < 0) {
748 		goto out_unlock;
749 	}
750 
751 	smmu_domain->smmu = smmu;
752 
753 	cfg->cbndx = ret;
754 	if (smmu->version < ARM_SMMU_V2) {
755 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
756 		cfg->irptndx %= smmu->num_context_irqs;
757 	} else {
758 		cfg->irptndx = cfg->cbndx;
759 	}
760 
761 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
762 		cfg->vmid = cfg->cbndx + 1;
763 	else
764 		cfg->asid = cfg->cbndx;
765 
766 	pgtbl_cfg = (struct io_pgtable_cfg) {
767 		.pgsize_bitmap	= smmu->pgsize_bitmap,
768 		.ias		= ias,
769 		.oas		= oas,
770 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
771 		.tlb		= smmu_domain->flush_ops,
772 		.iommu_dev	= smmu->dev,
773 	};
774 
775 	if (smmu->impl && smmu->impl->init_context) {
776 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
777 		if (ret)
778 			goto out_clear_smmu;
779 	}
780 
781 	if (smmu_domain->pgtbl_quirks)
782 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
783 
784 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
785 	if (!pgtbl_ops) {
786 		ret = -ENOMEM;
787 		goto out_clear_smmu;
788 	}
789 
790 	/* Update the domain's page sizes to reflect the page table format */
791 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
792 
793 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
794 		domain->geometry.aperture_start = ~0UL << ias;
795 		domain->geometry.aperture_end = ~0UL;
796 	} else {
797 		domain->geometry.aperture_end = (1UL << ias) - 1;
798 	}
799 
800 	domain->geometry.force_aperture = true;
801 
802 	/* Initialise the context bank with our page table cfg */
803 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
804 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
805 
806 	/*
807 	 * Request context fault interrupt. Do this last to avoid the
808 	 * handler seeing a half-initialised domain state.
809 	 */
810 	irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
811 
812 	if (smmu->impl && smmu->impl->context_fault)
813 		context_fault = smmu->impl->context_fault;
814 	else
815 		context_fault = arm_smmu_context_fault;
816 
817 	ret = devm_request_irq(smmu->dev, irq, context_fault,
818 			       IRQF_SHARED, "arm-smmu-context-fault", domain);
819 	if (ret < 0) {
820 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
821 			cfg->irptndx, irq);
822 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
823 	}
824 
825 	mutex_unlock(&smmu_domain->init_mutex);
826 
827 	/* Publish page table ops for map/unmap */
828 	smmu_domain->pgtbl_ops = pgtbl_ops;
829 	return 0;
830 
831 out_clear_smmu:
832 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
833 	smmu_domain->smmu = NULL;
834 out_unlock:
835 	mutex_unlock(&smmu_domain->init_mutex);
836 	return ret;
837 }
838 
839 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
840 {
841 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
842 	struct arm_smmu_device *smmu = smmu_domain->smmu;
843 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
844 	int ret, irq;
845 
846 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
847 		return;
848 
849 	ret = arm_smmu_rpm_get(smmu);
850 	if (ret < 0)
851 		return;
852 
853 	/*
854 	 * Disable the context bank and free the page tables before freeing
855 	 * it.
856 	 */
857 	smmu->cbs[cfg->cbndx].cfg = NULL;
858 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
859 
860 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
861 		irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
862 		devm_free_irq(smmu->dev, irq, domain);
863 	}
864 
865 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
866 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
867 
868 	arm_smmu_rpm_put(smmu);
869 }
870 
871 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
872 {
873 	struct arm_smmu_domain *smmu_domain;
874 
875 	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
876 		if (using_legacy_binding ||
877 		    (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
878 			return NULL;
879 	}
880 	/*
881 	 * Allocate the domain and initialise some of its data structures.
882 	 * We can't really do anything meaningful until we've added a
883 	 * master.
884 	 */
885 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
886 	if (!smmu_domain)
887 		return NULL;
888 
889 	mutex_init(&smmu_domain->init_mutex);
890 	spin_lock_init(&smmu_domain->cb_lock);
891 
892 	return &smmu_domain->domain;
893 }
894 
895 static void arm_smmu_domain_free(struct iommu_domain *domain)
896 {
897 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
898 
899 	/*
900 	 * Free the domain resources. We assume that all devices have
901 	 * already been detached.
902 	 */
903 	arm_smmu_destroy_domain_context(domain);
904 	kfree(smmu_domain);
905 }
906 
907 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
908 {
909 	struct arm_smmu_smr *smr = smmu->smrs + idx;
910 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
911 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
912 
913 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
914 		reg |= ARM_SMMU_SMR_VALID;
915 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
916 }
917 
918 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
919 {
920 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
921 	u32 reg;
922 
923 	if (smmu->impl && smmu->impl->write_s2cr) {
924 		smmu->impl->write_s2cr(smmu, idx);
925 		return;
926 	}
927 
928 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
929 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
930 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
931 
932 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
933 	    smmu->smrs[idx].valid)
934 		reg |= ARM_SMMU_S2CR_EXIDVALID;
935 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
936 }
937 
938 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
939 {
940 	arm_smmu_write_s2cr(smmu, idx);
941 	if (smmu->smrs)
942 		arm_smmu_write_smr(smmu, idx);
943 }
944 
945 /*
946  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
947  * should be called after sCR0 is written.
948  */
949 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
950 {
951 	u32 smr;
952 	int i;
953 
954 	if (!smmu->smrs)
955 		return;
956 	/*
957 	 * If we've had to accommodate firmware memory regions, we may
958 	 * have live SMRs by now; tread carefully...
959 	 *
960 	 * Somewhat perversely, not having a free SMR for this test implies we
961 	 * can get away without it anyway, as we'll only be able to 'allocate'
962 	 * these SMRs for the ID/mask values we're already trusting to be OK.
963 	 */
964 	for (i = 0; i < smmu->num_mapping_groups; i++)
965 		if (!smmu->smrs[i].valid)
966 			goto smr_ok;
967 	return;
968 smr_ok:
969 	/*
970 	 * SMR.ID bits may not be preserved if the corresponding MASK
971 	 * bits are set, so check each one separately. We can reject
972 	 * masters later if they try to claim IDs outside these masks.
973 	 */
974 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
975 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
976 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
977 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
978 
979 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
980 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
981 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
982 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
983 }
984 
985 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
986 {
987 	struct arm_smmu_smr *smrs = smmu->smrs;
988 	int i, free_idx = -ENOSPC;
989 
990 	/* Stream indexing is blissfully easy */
991 	if (!smrs)
992 		return id;
993 
994 	/* Validating SMRs is... less so */
995 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
996 		if (!smrs[i].valid) {
997 			/*
998 			 * Note the first free entry we come across, which
999 			 * we'll claim in the end if nothing else matches.
1000 			 */
1001 			if (free_idx < 0)
1002 				free_idx = i;
1003 			continue;
1004 		}
1005 		/*
1006 		 * If the new entry is _entirely_ matched by an existing entry,
1007 		 * then reuse that, with the guarantee that there also cannot
1008 		 * be any subsequent conflicting entries. In normal use we'd
1009 		 * expect simply identical entries for this case, but there's
1010 		 * no harm in accommodating the generalisation.
1011 		 */
1012 		if ((mask & smrs[i].mask) == mask &&
1013 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
1014 			return i;
1015 		/*
1016 		 * If the new entry has any other overlap with an existing one,
1017 		 * though, then there always exists at least one stream ID
1018 		 * which would cause a conflict, and we can't allow that risk.
1019 		 */
1020 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1021 			return -EINVAL;
1022 	}
1023 
1024 	return free_idx;
1025 }
1026 
1027 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1028 {
1029 	if (--smmu->s2crs[idx].count)
1030 		return false;
1031 
1032 	smmu->s2crs[idx] = s2cr_init_val;
1033 	if (smmu->smrs)
1034 		smmu->smrs[idx].valid = false;
1035 
1036 	return true;
1037 }
1038 
1039 static int arm_smmu_master_alloc_smes(struct device *dev)
1040 {
1041 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1042 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1043 	struct arm_smmu_device *smmu = cfg->smmu;
1044 	struct arm_smmu_smr *smrs = smmu->smrs;
1045 	int i, idx, ret;
1046 
1047 	mutex_lock(&smmu->stream_map_mutex);
1048 	/* Figure out a viable stream map entry allocation */
1049 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1050 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1051 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1052 
1053 		if (idx != INVALID_SMENDX) {
1054 			ret = -EEXIST;
1055 			goto out_err;
1056 		}
1057 
1058 		ret = arm_smmu_find_sme(smmu, sid, mask);
1059 		if (ret < 0)
1060 			goto out_err;
1061 
1062 		idx = ret;
1063 		if (smrs && smmu->s2crs[idx].count == 0) {
1064 			smrs[idx].id = sid;
1065 			smrs[idx].mask = mask;
1066 			smrs[idx].valid = true;
1067 		}
1068 		smmu->s2crs[idx].count++;
1069 		cfg->smendx[i] = (s16)idx;
1070 	}
1071 
1072 	/* It worked! Now, poke the actual hardware */
1073 	for_each_cfg_sme(cfg, fwspec, i, idx)
1074 		arm_smmu_write_sme(smmu, idx);
1075 
1076 	mutex_unlock(&smmu->stream_map_mutex);
1077 	return 0;
1078 
1079 out_err:
1080 	while (i--) {
1081 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1082 		cfg->smendx[i] = INVALID_SMENDX;
1083 	}
1084 	mutex_unlock(&smmu->stream_map_mutex);
1085 	return ret;
1086 }
1087 
1088 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1089 				      struct iommu_fwspec *fwspec)
1090 {
1091 	struct arm_smmu_device *smmu = cfg->smmu;
1092 	int i, idx;
1093 
1094 	mutex_lock(&smmu->stream_map_mutex);
1095 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1096 		if (arm_smmu_free_sme(smmu, idx))
1097 			arm_smmu_write_sme(smmu, idx);
1098 		cfg->smendx[i] = INVALID_SMENDX;
1099 	}
1100 	mutex_unlock(&smmu->stream_map_mutex);
1101 }
1102 
1103 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1104 				      struct arm_smmu_master_cfg *cfg,
1105 				      struct iommu_fwspec *fwspec)
1106 {
1107 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1108 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1109 	u8 cbndx = smmu_domain->cfg.cbndx;
1110 	enum arm_smmu_s2cr_type type;
1111 	int i, idx;
1112 
1113 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1114 		type = S2CR_TYPE_BYPASS;
1115 	else
1116 		type = S2CR_TYPE_TRANS;
1117 
1118 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1119 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1120 			continue;
1121 
1122 		s2cr[idx].type = type;
1123 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1124 		s2cr[idx].cbndx = cbndx;
1125 		arm_smmu_write_s2cr(smmu, idx);
1126 	}
1127 	return 0;
1128 }
1129 
1130 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1131 {
1132 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1133 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1134 	struct arm_smmu_master_cfg *cfg;
1135 	struct arm_smmu_device *smmu;
1136 	int ret;
1137 
1138 	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1139 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1140 		return -ENXIO;
1141 	}
1142 
1143 	/*
1144 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1145 	 * domains between of_xlate() and probe_device() - we have no way to cope
1146 	 * with that, so until ARM gets converted to rely on groups and default
1147 	 * domains, just say no (but more politely than by dereferencing NULL).
1148 	 * This should be at least a WARN_ON once that's sorted.
1149 	 */
1150 	cfg = dev_iommu_priv_get(dev);
1151 	if (!cfg)
1152 		return -ENODEV;
1153 
1154 	smmu = cfg->smmu;
1155 
1156 	ret = arm_smmu_rpm_get(smmu);
1157 	if (ret < 0)
1158 		return ret;
1159 
1160 	/* Ensure that the domain is finalised */
1161 	ret = arm_smmu_init_domain_context(domain, smmu, dev);
1162 	if (ret < 0)
1163 		goto rpm_put;
1164 
1165 	/*
1166 	 * Sanity check the domain. We don't support domains across
1167 	 * different SMMUs.
1168 	 */
1169 	if (smmu_domain->smmu != smmu) {
1170 		dev_err(dev,
1171 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1172 			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1173 		ret = -EINVAL;
1174 		goto rpm_put;
1175 	}
1176 
1177 	/* Looks ok, so add the device to the domain */
1178 	ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1179 
1180 	/*
1181 	 * Setup an autosuspend delay to avoid bouncing runpm state.
1182 	 * Otherwise, if a driver for a suspended consumer device
1183 	 * unmaps buffers, it will runpm resume/suspend for each one.
1184 	 *
1185 	 * For example, when used by a GPU device, when an application
1186 	 * or game exits, it can trigger unmapping 100s or 1000s of
1187 	 * buffers.  With a runpm cycle for each buffer, that adds up
1188 	 * to 5-10sec worth of reprogramming the context bank, while
1189 	 * the system appears to be locked up to the user.
1190 	 */
1191 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1192 	pm_runtime_use_autosuspend(smmu->dev);
1193 
1194 rpm_put:
1195 	arm_smmu_rpm_put(smmu);
1196 	return ret;
1197 }
1198 
1199 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
1200 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
1201 			      int prot, gfp_t gfp, size_t *mapped)
1202 {
1203 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1204 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1205 	int ret;
1206 
1207 	if (!ops)
1208 		return -ENODEV;
1209 
1210 	arm_smmu_rpm_get(smmu);
1211 	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
1212 	arm_smmu_rpm_put(smmu);
1213 
1214 	return ret;
1215 }
1216 
1217 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
1218 				   size_t pgsize, size_t pgcount,
1219 				   struct iommu_iotlb_gather *iotlb_gather)
1220 {
1221 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1222 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1223 	size_t ret;
1224 
1225 	if (!ops)
1226 		return 0;
1227 
1228 	arm_smmu_rpm_get(smmu);
1229 	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
1230 	arm_smmu_rpm_put(smmu);
1231 
1232 	return ret;
1233 }
1234 
1235 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1236 {
1237 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1238 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1239 
1240 	if (smmu_domain->flush_ops) {
1241 		arm_smmu_rpm_get(smmu);
1242 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1243 		arm_smmu_rpm_put(smmu);
1244 	}
1245 }
1246 
1247 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1248 				struct iommu_iotlb_gather *gather)
1249 {
1250 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1251 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1252 
1253 	if (!smmu)
1254 		return;
1255 
1256 	arm_smmu_rpm_get(smmu);
1257 	if (smmu->version == ARM_SMMU_V2 ||
1258 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1259 		arm_smmu_tlb_sync_context(smmu_domain);
1260 	else
1261 		arm_smmu_tlb_sync_global(smmu);
1262 	arm_smmu_rpm_put(smmu);
1263 }
1264 
1265 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1266 					      dma_addr_t iova)
1267 {
1268 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1269 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1270 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1271 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1272 	struct device *dev = smmu->dev;
1273 	void __iomem *reg;
1274 	u32 tmp;
1275 	u64 phys;
1276 	unsigned long va, flags;
1277 	int ret, idx = cfg->cbndx;
1278 	phys_addr_t addr = 0;
1279 
1280 	ret = arm_smmu_rpm_get(smmu);
1281 	if (ret < 0)
1282 		return 0;
1283 
1284 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1285 	va = iova & ~0xfffUL;
1286 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1287 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1288 	else
1289 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1290 
1291 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1292 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1293 				      5, 50)) {
1294 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1295 		dev_err(dev,
1296 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1297 			&iova);
1298 		arm_smmu_rpm_put(smmu);
1299 		return ops->iova_to_phys(ops, iova);
1300 	}
1301 
1302 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1303 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1304 	if (phys & ARM_SMMU_CB_PAR_F) {
1305 		dev_err(dev, "translation fault!\n");
1306 		dev_err(dev, "PAR = 0x%llx\n", phys);
1307 		goto out;
1308 	}
1309 
1310 	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1311 out:
1312 	arm_smmu_rpm_put(smmu);
1313 
1314 	return addr;
1315 }
1316 
1317 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1318 					dma_addr_t iova)
1319 {
1320 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1321 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1322 
1323 	if (!ops)
1324 		return 0;
1325 
1326 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1327 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1328 		return arm_smmu_iova_to_phys_hard(domain, iova);
1329 
1330 	return ops->iova_to_phys(ops, iova);
1331 }
1332 
1333 static bool arm_smmu_capable(enum iommu_cap cap)
1334 {
1335 	switch (cap) {
1336 	case IOMMU_CAP_CACHE_COHERENCY:
1337 		/*
1338 		 * Return true here as the SMMU can always send out coherent
1339 		 * requests.
1340 		 */
1341 		return true;
1342 	case IOMMU_CAP_NOEXEC:
1343 		return true;
1344 	default:
1345 		return false;
1346 	}
1347 }
1348 
1349 static
1350 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1351 {
1352 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1353 							  fwnode);
1354 	put_device(dev);
1355 	return dev ? dev_get_drvdata(dev) : NULL;
1356 }
1357 
1358 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1359 {
1360 	struct arm_smmu_device *smmu = NULL;
1361 	struct arm_smmu_master_cfg *cfg;
1362 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1363 	int i, ret;
1364 
1365 	if (using_legacy_binding) {
1366 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1367 
1368 		/*
1369 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1370 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1371 		 * later use.
1372 		 */
1373 		fwspec = dev_iommu_fwspec_get(dev);
1374 		if (ret)
1375 			goto out_free;
1376 	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1377 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1378 	} else {
1379 		return ERR_PTR(-ENODEV);
1380 	}
1381 
1382 	ret = -EINVAL;
1383 	for (i = 0; i < fwspec->num_ids; i++) {
1384 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1385 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1386 
1387 		if (sid & ~smmu->streamid_mask) {
1388 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1389 				sid, smmu->streamid_mask);
1390 			goto out_free;
1391 		}
1392 		if (mask & ~smmu->smr_mask_mask) {
1393 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1394 				mask, smmu->smr_mask_mask);
1395 			goto out_free;
1396 		}
1397 	}
1398 
1399 	ret = -ENOMEM;
1400 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1401 		      GFP_KERNEL);
1402 	if (!cfg)
1403 		goto out_free;
1404 
1405 	cfg->smmu = smmu;
1406 	dev_iommu_priv_set(dev, cfg);
1407 	while (i--)
1408 		cfg->smendx[i] = INVALID_SMENDX;
1409 
1410 	ret = arm_smmu_rpm_get(smmu);
1411 	if (ret < 0)
1412 		goto out_cfg_free;
1413 
1414 	ret = arm_smmu_master_alloc_smes(dev);
1415 	arm_smmu_rpm_put(smmu);
1416 
1417 	if (ret)
1418 		goto out_cfg_free;
1419 
1420 	device_link_add(dev, smmu->dev,
1421 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1422 
1423 	return &smmu->iommu;
1424 
1425 out_cfg_free:
1426 	kfree(cfg);
1427 out_free:
1428 	iommu_fwspec_free(dev);
1429 	return ERR_PTR(ret);
1430 }
1431 
1432 static void arm_smmu_release_device(struct device *dev)
1433 {
1434 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1435 	struct arm_smmu_master_cfg *cfg;
1436 	struct arm_smmu_device *smmu;
1437 	int ret;
1438 
1439 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
1440 		return;
1441 
1442 	cfg  = dev_iommu_priv_get(dev);
1443 	smmu = cfg->smmu;
1444 
1445 	ret = arm_smmu_rpm_get(smmu);
1446 	if (ret < 0)
1447 		return;
1448 
1449 	arm_smmu_master_free_smes(cfg, fwspec);
1450 
1451 	arm_smmu_rpm_put(smmu);
1452 
1453 	dev_iommu_priv_set(dev, NULL);
1454 	kfree(cfg);
1455 	iommu_fwspec_free(dev);
1456 }
1457 
1458 static void arm_smmu_probe_finalize(struct device *dev)
1459 {
1460 	struct arm_smmu_master_cfg *cfg;
1461 	struct arm_smmu_device *smmu;
1462 
1463 	cfg = dev_iommu_priv_get(dev);
1464 	smmu = cfg->smmu;
1465 
1466 	if (smmu->impl && smmu->impl->probe_finalize)
1467 		smmu->impl->probe_finalize(smmu, dev);
1468 }
1469 
1470 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1471 {
1472 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1473 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1474 	struct arm_smmu_device *smmu = cfg->smmu;
1475 	struct iommu_group *group = NULL;
1476 	int i, idx;
1477 
1478 	mutex_lock(&smmu->stream_map_mutex);
1479 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1480 		if (group && smmu->s2crs[idx].group &&
1481 		    group != smmu->s2crs[idx].group) {
1482 			mutex_unlock(&smmu->stream_map_mutex);
1483 			return ERR_PTR(-EINVAL);
1484 		}
1485 
1486 		group = smmu->s2crs[idx].group;
1487 	}
1488 
1489 	if (group) {
1490 		mutex_unlock(&smmu->stream_map_mutex);
1491 		return iommu_group_ref_get(group);
1492 	}
1493 
1494 	if (dev_is_pci(dev))
1495 		group = pci_device_group(dev);
1496 	else if (dev_is_fsl_mc(dev))
1497 		group = fsl_mc_device_group(dev);
1498 	else
1499 		group = generic_device_group(dev);
1500 
1501 	/* Remember group for faster lookups */
1502 	if (!IS_ERR(group))
1503 		for_each_cfg_sme(cfg, fwspec, i, idx)
1504 			smmu->s2crs[idx].group = group;
1505 
1506 	mutex_unlock(&smmu->stream_map_mutex);
1507 	return group;
1508 }
1509 
1510 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1511 {
1512 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1513 	int ret = 0;
1514 
1515 	mutex_lock(&smmu_domain->init_mutex);
1516 	if (smmu_domain->smmu)
1517 		ret = -EPERM;
1518 	else
1519 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1520 	mutex_unlock(&smmu_domain->init_mutex);
1521 
1522 	return ret;
1523 }
1524 
1525 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1526 		unsigned long quirks)
1527 {
1528 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1529 	int ret = 0;
1530 
1531 	mutex_lock(&smmu_domain->init_mutex);
1532 	if (smmu_domain->smmu)
1533 		ret = -EPERM;
1534 	else
1535 		smmu_domain->pgtbl_quirks = quirks;
1536 	mutex_unlock(&smmu_domain->init_mutex);
1537 
1538 	return ret;
1539 }
1540 
1541 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1542 {
1543 	u32 mask, fwid = 0;
1544 
1545 	if (args->args_count > 0)
1546 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1547 
1548 	if (args->args_count > 1)
1549 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1550 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1551 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1552 
1553 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1554 }
1555 
1556 static void arm_smmu_get_resv_regions(struct device *dev,
1557 				      struct list_head *head)
1558 {
1559 	struct iommu_resv_region *region;
1560 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1561 
1562 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1563 					 prot, IOMMU_RESV_SW_MSI);
1564 	if (!region)
1565 		return;
1566 
1567 	list_add_tail(&region->list, head);
1568 
1569 	iommu_dma_get_resv_regions(dev, head);
1570 }
1571 
1572 static int arm_smmu_def_domain_type(struct device *dev)
1573 {
1574 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1575 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1576 
1577 	if (impl && impl->def_domain_type)
1578 		return impl->def_domain_type(dev);
1579 
1580 	return 0;
1581 }
1582 
1583 static struct iommu_ops arm_smmu_ops = {
1584 	.capable		= arm_smmu_capable,
1585 	.domain_alloc		= arm_smmu_domain_alloc,
1586 	.domain_free		= arm_smmu_domain_free,
1587 	.attach_dev		= arm_smmu_attach_dev,
1588 	.map_pages		= arm_smmu_map_pages,
1589 	.unmap_pages		= arm_smmu_unmap_pages,
1590 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1591 	.iotlb_sync		= arm_smmu_iotlb_sync,
1592 	.iova_to_phys		= arm_smmu_iova_to_phys,
1593 	.probe_device		= arm_smmu_probe_device,
1594 	.release_device		= arm_smmu_release_device,
1595 	.probe_finalize		= arm_smmu_probe_finalize,
1596 	.device_group		= arm_smmu_device_group,
1597 	.enable_nesting		= arm_smmu_enable_nesting,
1598 	.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1599 	.of_xlate		= arm_smmu_of_xlate,
1600 	.get_resv_regions	= arm_smmu_get_resv_regions,
1601 	.put_resv_regions	= generic_iommu_put_resv_regions,
1602 	.def_domain_type	= arm_smmu_def_domain_type,
1603 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1604 	.owner			= THIS_MODULE,
1605 };
1606 
1607 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1608 {
1609 	int i;
1610 	u32 reg;
1611 
1612 	/* clear global FSR */
1613 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1614 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1615 
1616 	/*
1617 	 * Reset stream mapping groups: Initial values mark all SMRn as
1618 	 * invalid and all S2CRn as bypass unless overridden.
1619 	 */
1620 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1621 		arm_smmu_write_sme(smmu, i);
1622 
1623 	/* Make sure all context banks are disabled and clear CB_FSR  */
1624 	for (i = 0; i < smmu->num_context_banks; ++i) {
1625 		arm_smmu_write_context_bank(smmu, i);
1626 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1627 	}
1628 
1629 	/* Invalidate the TLB, just in case */
1630 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1631 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1632 
1633 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1634 
1635 	/* Enable fault reporting */
1636 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1637 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1638 
1639 	/* Disable TLB broadcasting. */
1640 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1641 
1642 	/* Enable client access, handling unmatched streams as appropriate */
1643 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1644 	if (disable_bypass)
1645 		reg |= ARM_SMMU_sCR0_USFCFG;
1646 	else
1647 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1648 
1649 	/* Disable forced broadcasting */
1650 	reg &= ~ARM_SMMU_sCR0_FB;
1651 
1652 	/* Don't upgrade barriers */
1653 	reg &= ~(ARM_SMMU_sCR0_BSU);
1654 
1655 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1656 		reg |= ARM_SMMU_sCR0_VMID16EN;
1657 
1658 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1659 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1660 
1661 	if (smmu->impl && smmu->impl->reset)
1662 		smmu->impl->reset(smmu);
1663 
1664 	/* Push the button */
1665 	arm_smmu_tlb_sync_global(smmu);
1666 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1667 }
1668 
1669 static int arm_smmu_id_size_to_bits(int size)
1670 {
1671 	switch (size) {
1672 	case 0:
1673 		return 32;
1674 	case 1:
1675 		return 36;
1676 	case 2:
1677 		return 40;
1678 	case 3:
1679 		return 42;
1680 	case 4:
1681 		return 44;
1682 	case 5:
1683 	default:
1684 		return 48;
1685 	}
1686 }
1687 
1688 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1689 {
1690 	unsigned int size;
1691 	u32 id;
1692 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1693 	int i, ret;
1694 
1695 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1696 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1697 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1698 
1699 	/* ID0 */
1700 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1701 
1702 	/* Restrict available stages based on module parameter */
1703 	if (force_stage == 1)
1704 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1705 	else if (force_stage == 2)
1706 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1707 
1708 	if (id & ARM_SMMU_ID0_S1TS) {
1709 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1710 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1711 	}
1712 
1713 	if (id & ARM_SMMU_ID0_S2TS) {
1714 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1715 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1716 	}
1717 
1718 	if (id & ARM_SMMU_ID0_NTS) {
1719 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1720 		dev_notice(smmu->dev, "\tnested translation\n");
1721 	}
1722 
1723 	if (!(smmu->features &
1724 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1725 		dev_err(smmu->dev, "\tno translation support!\n");
1726 		return -ENODEV;
1727 	}
1728 
1729 	if ((id & ARM_SMMU_ID0_S1TS) &&
1730 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1731 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1732 		dev_notice(smmu->dev, "\taddress translation ops\n");
1733 	}
1734 
1735 	/*
1736 	 * In order for DMA API calls to work properly, we must defer to what
1737 	 * the FW says about coherency, regardless of what the hardware claims.
1738 	 * Fortunately, this also opens up a workaround for systems where the
1739 	 * ID register value has ended up configured incorrectly.
1740 	 */
1741 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1742 	if (cttw_fw || cttw_reg)
1743 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1744 			   cttw_fw ? "" : "non-");
1745 	if (cttw_fw != cttw_reg)
1746 		dev_notice(smmu->dev,
1747 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1748 
1749 	/* Max. number of entries we have for stream matching/indexing */
1750 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1751 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1752 		size = 1 << 16;
1753 	} else {
1754 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1755 	}
1756 	smmu->streamid_mask = size - 1;
1757 	if (id & ARM_SMMU_ID0_SMS) {
1758 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1759 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1760 		if (size == 0) {
1761 			dev_err(smmu->dev,
1762 				"stream-matching supported, but no SMRs present!\n");
1763 			return -ENODEV;
1764 		}
1765 
1766 		/* Zero-initialised to mark as invalid */
1767 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1768 					  GFP_KERNEL);
1769 		if (!smmu->smrs)
1770 			return -ENOMEM;
1771 
1772 		dev_notice(smmu->dev,
1773 			   "\tstream matching with %u register groups", size);
1774 	}
1775 	/* s2cr->type == 0 means translation, so initialise explicitly */
1776 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1777 					 GFP_KERNEL);
1778 	if (!smmu->s2crs)
1779 		return -ENOMEM;
1780 	for (i = 0; i < size; i++)
1781 		smmu->s2crs[i] = s2cr_init_val;
1782 
1783 	smmu->num_mapping_groups = size;
1784 	mutex_init(&smmu->stream_map_mutex);
1785 	spin_lock_init(&smmu->global_sync_lock);
1786 
1787 	if (smmu->version < ARM_SMMU_V2 ||
1788 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1789 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1790 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1791 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1792 	}
1793 
1794 	/* ID1 */
1795 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1796 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1797 
1798 	/* Check for size mismatch of SMMU address space from mapped region */
1799 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1800 	if (smmu->numpage != 2 * size << smmu->pgshift)
1801 		dev_warn(smmu->dev,
1802 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1803 			2 * size << smmu->pgshift, smmu->numpage);
1804 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1805 	smmu->numpage = size;
1806 
1807 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1808 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1809 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1810 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1811 		return -ENODEV;
1812 	}
1813 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1814 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1815 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1816 				 sizeof(*smmu->cbs), GFP_KERNEL);
1817 	if (!smmu->cbs)
1818 		return -ENOMEM;
1819 
1820 	/* ID2 */
1821 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1822 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1823 	smmu->ipa_size = size;
1824 
1825 	/* The output mask is also applied for bypass */
1826 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1827 	smmu->pa_size = size;
1828 
1829 	if (id & ARM_SMMU_ID2_VMID16)
1830 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1831 
1832 	/*
1833 	 * What the page table walker can address actually depends on which
1834 	 * descriptor format is in use, but since a) we don't know that yet,
1835 	 * and b) it can vary per context bank, this will have to do...
1836 	 */
1837 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1838 		dev_warn(smmu->dev,
1839 			 "failed to set DMA mask for table walker\n");
1840 
1841 	if (smmu->version < ARM_SMMU_V2) {
1842 		smmu->va_size = smmu->ipa_size;
1843 		if (smmu->version == ARM_SMMU_V1_64K)
1844 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1845 	} else {
1846 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1847 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1848 		if (id & ARM_SMMU_ID2_PTFS_4K)
1849 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1850 		if (id & ARM_SMMU_ID2_PTFS_16K)
1851 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1852 		if (id & ARM_SMMU_ID2_PTFS_64K)
1853 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1854 	}
1855 
1856 	if (smmu->impl && smmu->impl->cfg_probe) {
1857 		ret = smmu->impl->cfg_probe(smmu);
1858 		if (ret)
1859 			return ret;
1860 	}
1861 
1862 	/* Now we've corralled the various formats, what'll it do? */
1863 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1864 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1865 	if (smmu->features &
1866 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1867 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1868 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1869 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1870 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1871 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1872 
1873 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1874 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1875 	else
1876 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1877 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1878 		   smmu->pgsize_bitmap);
1879 
1880 
1881 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1882 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1883 			   smmu->va_size, smmu->ipa_size);
1884 
1885 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1886 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1887 			   smmu->ipa_size, smmu->pa_size);
1888 
1889 	return 0;
1890 }
1891 
1892 struct arm_smmu_match_data {
1893 	enum arm_smmu_arch_version version;
1894 	enum arm_smmu_implementation model;
1895 };
1896 
1897 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1898 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1899 
1900 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1901 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1902 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1903 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1904 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1905 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1906 
1907 static const struct of_device_id arm_smmu_of_match[] = {
1908 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1909 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1910 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1911 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1912 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1913 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1914 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1915 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1916 	{ },
1917 };
1918 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1919 
1920 #ifdef CONFIG_ACPI
1921 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1922 {
1923 	int ret = 0;
1924 
1925 	switch (model) {
1926 	case ACPI_IORT_SMMU_V1:
1927 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1928 		smmu->version = ARM_SMMU_V1;
1929 		smmu->model = GENERIC_SMMU;
1930 		break;
1931 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1932 		smmu->version = ARM_SMMU_V1_64K;
1933 		smmu->model = GENERIC_SMMU;
1934 		break;
1935 	case ACPI_IORT_SMMU_V2:
1936 		smmu->version = ARM_SMMU_V2;
1937 		smmu->model = GENERIC_SMMU;
1938 		break;
1939 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1940 		smmu->version = ARM_SMMU_V2;
1941 		smmu->model = ARM_MMU500;
1942 		break;
1943 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1944 		smmu->version = ARM_SMMU_V2;
1945 		smmu->model = CAVIUM_SMMUV2;
1946 		break;
1947 	default:
1948 		ret = -ENODEV;
1949 	}
1950 
1951 	return ret;
1952 }
1953 
1954 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1955 				      struct arm_smmu_device *smmu)
1956 {
1957 	struct device *dev = smmu->dev;
1958 	struct acpi_iort_node *node =
1959 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1960 	struct acpi_iort_smmu *iort_smmu;
1961 	int ret;
1962 
1963 	/* Retrieve SMMU1/2 specific data */
1964 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1965 
1966 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1967 	if (ret < 0)
1968 		return ret;
1969 
1970 	/* Ignore the configuration access interrupt */
1971 	smmu->num_global_irqs = 1;
1972 
1973 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1974 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1975 
1976 	return 0;
1977 }
1978 #else
1979 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1980 					     struct arm_smmu_device *smmu)
1981 {
1982 	return -ENODEV;
1983 }
1984 #endif
1985 
1986 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1987 				    struct arm_smmu_device *smmu)
1988 {
1989 	const struct arm_smmu_match_data *data;
1990 	struct device *dev = &pdev->dev;
1991 	bool legacy_binding;
1992 
1993 	if (of_property_read_u32(dev->of_node, "#global-interrupts",
1994 				 &smmu->num_global_irqs)) {
1995 		dev_err(dev, "missing #global-interrupts property\n");
1996 		return -ENODEV;
1997 	}
1998 
1999 	data = of_device_get_match_data(dev);
2000 	smmu->version = data->version;
2001 	smmu->model = data->model;
2002 
2003 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2004 	if (legacy_binding && !using_generic_binding) {
2005 		if (!using_legacy_binding) {
2006 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2007 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2008 		}
2009 		using_legacy_binding = true;
2010 	} else if (!legacy_binding && !using_legacy_binding) {
2011 		using_generic_binding = true;
2012 	} else {
2013 		dev_err(dev, "not probing due to mismatched DT properties\n");
2014 		return -ENODEV;
2015 	}
2016 
2017 	if (of_dma_is_coherent(dev->of_node))
2018 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2019 
2020 	return 0;
2021 }
2022 
2023 static int arm_smmu_bus_init(struct iommu_ops *ops)
2024 {
2025 	int err;
2026 
2027 	/* Oh, for a proper bus abstraction */
2028 	if (!iommu_present(&platform_bus_type)) {
2029 		err = bus_set_iommu(&platform_bus_type, ops);
2030 		if (err)
2031 			return err;
2032 	}
2033 #ifdef CONFIG_ARM_AMBA
2034 	if (!iommu_present(&amba_bustype)) {
2035 		err = bus_set_iommu(&amba_bustype, ops);
2036 		if (err)
2037 			goto err_reset_platform_ops;
2038 	}
2039 #endif
2040 #ifdef CONFIG_PCI
2041 	if (!iommu_present(&pci_bus_type)) {
2042 		err = bus_set_iommu(&pci_bus_type, ops);
2043 		if (err)
2044 			goto err_reset_amba_ops;
2045 	}
2046 #endif
2047 #ifdef CONFIG_FSL_MC_BUS
2048 	if (!iommu_present(&fsl_mc_bus_type)) {
2049 		err = bus_set_iommu(&fsl_mc_bus_type, ops);
2050 		if (err)
2051 			goto err_reset_pci_ops;
2052 	}
2053 #endif
2054 	return 0;
2055 
2056 err_reset_pci_ops: __maybe_unused;
2057 #ifdef CONFIG_PCI
2058 	bus_set_iommu(&pci_bus_type, NULL);
2059 #endif
2060 err_reset_amba_ops: __maybe_unused;
2061 #ifdef CONFIG_ARM_AMBA
2062 	bus_set_iommu(&amba_bustype, NULL);
2063 #endif
2064 err_reset_platform_ops: __maybe_unused;
2065 	bus_set_iommu(&platform_bus_type, NULL);
2066 	return err;
2067 }
2068 
2069 static int arm_smmu_device_probe(struct platform_device *pdev)
2070 {
2071 	struct resource *res;
2072 	resource_size_t ioaddr;
2073 	struct arm_smmu_device *smmu;
2074 	struct device *dev = &pdev->dev;
2075 	int num_irqs, i, err;
2076 	irqreturn_t (*global_fault)(int irq, void *dev);
2077 
2078 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2079 	if (!smmu) {
2080 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2081 		return -ENOMEM;
2082 	}
2083 	smmu->dev = dev;
2084 
2085 	if (dev->of_node)
2086 		err = arm_smmu_device_dt_probe(pdev, smmu);
2087 	else
2088 		err = arm_smmu_device_acpi_probe(pdev, smmu);
2089 
2090 	if (err)
2091 		return err;
2092 
2093 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2094 	ioaddr = res->start;
2095 	smmu->base = devm_ioremap_resource(dev, res);
2096 	if (IS_ERR(smmu->base))
2097 		return PTR_ERR(smmu->base);
2098 	/*
2099 	 * The resource size should effectively match the value of SMMU_TOP;
2100 	 * stash that temporarily until we know PAGESIZE to validate it with.
2101 	 */
2102 	smmu->numpage = resource_size(res);
2103 
2104 	smmu = arm_smmu_impl_init(smmu);
2105 	if (IS_ERR(smmu))
2106 		return PTR_ERR(smmu);
2107 
2108 	num_irqs = 0;
2109 	while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2110 		num_irqs++;
2111 		if (num_irqs > smmu->num_global_irqs)
2112 			smmu->num_context_irqs++;
2113 	}
2114 
2115 	if (!smmu->num_context_irqs) {
2116 		dev_err(dev, "found %d interrupts but expected at least %d\n",
2117 			num_irqs, smmu->num_global_irqs + 1);
2118 		return -ENODEV;
2119 	}
2120 
2121 	smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2122 				  GFP_KERNEL);
2123 	if (!smmu->irqs) {
2124 		dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2125 		return -ENOMEM;
2126 	}
2127 
2128 	for (i = 0; i < num_irqs; ++i) {
2129 		int irq = platform_get_irq(pdev, i);
2130 
2131 		if (irq < 0)
2132 			return -ENODEV;
2133 		smmu->irqs[i] = irq;
2134 	}
2135 
2136 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2137 	if (err < 0) {
2138 		dev_err(dev, "failed to get clocks %d\n", err);
2139 		return err;
2140 	}
2141 	smmu->num_clks = err;
2142 
2143 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2144 	if (err)
2145 		return err;
2146 
2147 	err = arm_smmu_device_cfg_probe(smmu);
2148 	if (err)
2149 		return err;
2150 
2151 	if (smmu->version == ARM_SMMU_V2) {
2152 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2153 			dev_err(dev,
2154 			      "found only %d context irq(s) but %d required\n",
2155 			      smmu->num_context_irqs, smmu->num_context_banks);
2156 			return -ENODEV;
2157 		}
2158 
2159 		/* Ignore superfluous interrupts */
2160 		smmu->num_context_irqs = smmu->num_context_banks;
2161 	}
2162 
2163 	if (smmu->impl && smmu->impl->global_fault)
2164 		global_fault = smmu->impl->global_fault;
2165 	else
2166 		global_fault = arm_smmu_global_fault;
2167 
2168 	for (i = 0; i < smmu->num_global_irqs; ++i) {
2169 		err = devm_request_irq(smmu->dev, smmu->irqs[i],
2170 				       global_fault,
2171 				       IRQF_SHARED,
2172 				       "arm-smmu global fault",
2173 				       smmu);
2174 		if (err) {
2175 			dev_err(dev, "failed to request global IRQ %d (%u)\n",
2176 				i, smmu->irqs[i]);
2177 			return err;
2178 		}
2179 	}
2180 
2181 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2182 				     "smmu.%pa", &ioaddr);
2183 	if (err) {
2184 		dev_err(dev, "Failed to register iommu in sysfs\n");
2185 		return err;
2186 	}
2187 
2188 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
2189 	if (err) {
2190 		dev_err(dev, "Failed to register iommu\n");
2191 		goto err_sysfs_remove;
2192 	}
2193 
2194 	platform_set_drvdata(pdev, smmu);
2195 	arm_smmu_device_reset(smmu);
2196 	arm_smmu_test_smr_masks(smmu);
2197 
2198 	/*
2199 	 * We want to avoid touching dev->power.lock in fastpaths unless
2200 	 * it's really going to do something useful - pm_runtime_enabled()
2201 	 * can serve as an ideal proxy for that decision. So, conditionally
2202 	 * enable pm_runtime.
2203 	 */
2204 	if (dev->pm_domain) {
2205 		pm_runtime_set_active(dev);
2206 		pm_runtime_enable(dev);
2207 	}
2208 
2209 	/*
2210 	 * For ACPI and generic DT bindings, an SMMU will be probed before
2211 	 * any device which might need it, so we want the bus ops in place
2212 	 * ready to handle default domain setup as soon as any SMMU exists.
2213 	 */
2214 	if (!using_legacy_binding) {
2215 		err = arm_smmu_bus_init(&arm_smmu_ops);
2216 		if (err)
2217 			goto err_unregister_device;
2218 	}
2219 
2220 	return 0;
2221 
2222 err_unregister_device:
2223 	iommu_device_unregister(&smmu->iommu);
2224 err_sysfs_remove:
2225 	iommu_device_sysfs_remove(&smmu->iommu);
2226 	return err;
2227 }
2228 
2229 static int arm_smmu_device_remove(struct platform_device *pdev)
2230 {
2231 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2232 
2233 	if (!smmu)
2234 		return -ENODEV;
2235 
2236 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2237 		dev_notice(&pdev->dev, "disabling translation\n");
2238 
2239 	arm_smmu_bus_init(NULL);
2240 	iommu_device_unregister(&smmu->iommu);
2241 	iommu_device_sysfs_remove(&smmu->iommu);
2242 
2243 	arm_smmu_rpm_get(smmu);
2244 	/* Turn the thing off */
2245 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2246 	arm_smmu_rpm_put(smmu);
2247 
2248 	if (pm_runtime_enabled(smmu->dev))
2249 		pm_runtime_force_suspend(smmu->dev);
2250 	else
2251 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2252 
2253 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2254 	return 0;
2255 }
2256 
2257 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2258 {
2259 	arm_smmu_device_remove(pdev);
2260 }
2261 
2262 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2263 {
2264 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2265 	int ret;
2266 
2267 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2268 	if (ret)
2269 		return ret;
2270 
2271 	arm_smmu_device_reset(smmu);
2272 
2273 	return 0;
2274 }
2275 
2276 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2277 {
2278 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2279 
2280 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2281 
2282 	return 0;
2283 }
2284 
2285 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2286 {
2287 	int ret;
2288 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2289 
2290 	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
2291 	if (ret)
2292 		return ret;
2293 
2294 	if (pm_runtime_suspended(dev))
2295 		return 0;
2296 
2297 	ret = arm_smmu_runtime_resume(dev);
2298 	if (ret)
2299 		clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2300 
2301 	return ret;
2302 }
2303 
2304 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2305 {
2306 	int ret = 0;
2307 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2308 
2309 	if (pm_runtime_suspended(dev))
2310 		goto clk_unprepare;
2311 
2312 	ret = arm_smmu_runtime_suspend(dev);
2313 	if (ret)
2314 		return ret;
2315 
2316 clk_unprepare:
2317 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2318 	return ret;
2319 }
2320 
2321 static const struct dev_pm_ops arm_smmu_pm_ops = {
2322 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2323 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2324 			   arm_smmu_runtime_resume, NULL)
2325 };
2326 
2327 static struct platform_driver arm_smmu_driver = {
2328 	.driver	= {
2329 		.name			= "arm-smmu",
2330 		.of_match_table		= arm_smmu_of_match,
2331 		.pm			= &arm_smmu_pm_ops,
2332 		.suppress_bind_attrs    = true,
2333 	},
2334 	.probe	= arm_smmu_device_probe,
2335 	.remove	= arm_smmu_device_remove,
2336 	.shutdown = arm_smmu_device_shutdown,
2337 };
2338 module_platform_driver(arm_smmu_driver);
2339 
2340 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2341 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2342 MODULE_ALIAS("platform:arm-smmu");
2343 MODULE_LICENSE("GPL v2");
2344