1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
28 #include <linux/io.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/pci.h>
35 #include <linux/platform_device.h>
36 #include <linux/pm_runtime.h>
37 #include <linux/ratelimit.h>
38 #include <linux/slab.h>
39 
40 #include <linux/amba/bus.h>
41 #include <linux/fsl/mc.h>
42 
43 #include "arm-smmu.h"
44 
45 /*
46  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
47  * global register space are still, in fact, using a hypervisor to mediate it
48  * by trapping and emulating register accesses. Sadly, some deployed versions
49  * of said trapping code have bugs wherein they go horribly wrong for stores
50  * using r31 (i.e. XZR/WZR) as the source register.
51  */
52 #define QCOM_DUMMY_VAL -1
53 
54 #define MSI_IOVA_BASE			0x8000000
55 #define MSI_IOVA_LENGTH			0x100000
56 
57 static int force_stage;
58 module_param(force_stage, int, S_IRUGO);
59 MODULE_PARM_DESC(force_stage,
60 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
61 static bool disable_bypass =
62 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
63 module_param(disable_bypass, bool, S_IRUGO);
64 MODULE_PARM_DESC(disable_bypass,
65 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
66 
67 #define s2cr_init_val (struct arm_smmu_s2cr){				\
68 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
69 }
70 
71 static bool using_legacy_binding, using_generic_binding;
72 
73 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
74 {
75 	if (pm_runtime_enabled(smmu->dev))
76 		return pm_runtime_resume_and_get(smmu->dev);
77 
78 	return 0;
79 }
80 
81 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
82 {
83 	if (pm_runtime_enabled(smmu->dev))
84 		pm_runtime_put_autosuspend(smmu->dev);
85 }
86 
87 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
88 {
89 	return container_of(dom, struct arm_smmu_domain, domain);
90 }
91 
92 static struct platform_driver arm_smmu_driver;
93 static struct iommu_ops arm_smmu_ops;
94 
95 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
96 static int arm_smmu_bus_init(struct iommu_ops *ops);
97 
98 static struct device_node *dev_get_dev_node(struct device *dev)
99 {
100 	if (dev_is_pci(dev)) {
101 		struct pci_bus *bus = to_pci_dev(dev)->bus;
102 
103 		while (!pci_is_root_bus(bus))
104 			bus = bus->parent;
105 		return of_node_get(bus->bridge->parent->of_node);
106 	}
107 
108 	return of_node_get(dev->of_node);
109 }
110 
111 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
112 {
113 	*((__be32 *)data) = cpu_to_be32(alias);
114 	return 0; /* Continue walking */
115 }
116 
117 static int __find_legacy_master_phandle(struct device *dev, void *data)
118 {
119 	struct of_phandle_iterator *it = *(void **)data;
120 	struct device_node *np = it->node;
121 	int err;
122 
123 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
124 			    "#stream-id-cells", -1)
125 		if (it->node == np) {
126 			*(void **)data = dev;
127 			return 1;
128 		}
129 	it->node = np;
130 	return err == -ENOENT ? 0 : err;
131 }
132 
133 static int arm_smmu_register_legacy_master(struct device *dev,
134 					   struct arm_smmu_device **smmu)
135 {
136 	struct device *smmu_dev;
137 	struct device_node *np;
138 	struct of_phandle_iterator it;
139 	void *data = &it;
140 	u32 *sids;
141 	__be32 pci_sid;
142 	int err;
143 
144 	np = dev_get_dev_node(dev);
145 	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
146 		of_node_put(np);
147 		return -ENODEV;
148 	}
149 
150 	it.node = np;
151 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
152 				     __find_legacy_master_phandle);
153 	smmu_dev = data;
154 	of_node_put(np);
155 	if (err == 0)
156 		return -ENODEV;
157 	if (err < 0)
158 		return err;
159 
160 	if (dev_is_pci(dev)) {
161 		/* "mmu-masters" assumes Stream ID == Requester ID */
162 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
163 				       &pci_sid);
164 		it.cur = &pci_sid;
165 		it.cur_count = 1;
166 	}
167 
168 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
169 				&arm_smmu_ops);
170 	if (err)
171 		return err;
172 
173 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
174 	if (!sids)
175 		return -ENOMEM;
176 
177 	*smmu = dev_get_drvdata(smmu_dev);
178 	of_phandle_iterator_args(&it, sids, it.cur_count);
179 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
180 	kfree(sids);
181 	return err;
182 }
183 
184 /*
185  * With the legacy DT binding in play, we have no guarantees about
186  * probe order, but then we're also not doing default domains, so we can
187  * delay setting bus ops until we're sure every possible SMMU is ready,
188  * and that way ensure that no probe_device() calls get missed.
189  */
190 static int arm_smmu_legacy_bus_init(void)
191 {
192 	if (using_legacy_binding)
193 		return arm_smmu_bus_init(&arm_smmu_ops);
194 	return 0;
195 }
196 device_initcall_sync(arm_smmu_legacy_bus_init);
197 #else
198 static int arm_smmu_register_legacy_master(struct device *dev,
199 					   struct arm_smmu_device **smmu)
200 {
201 	return -ENODEV;
202 }
203 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
204 
205 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
206 {
207 	clear_bit(idx, map);
208 }
209 
210 /* Wait for any pending TLB invalidations to complete */
211 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
212 				int sync, int status)
213 {
214 	unsigned int spin_cnt, delay;
215 	u32 reg;
216 
217 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
218 		return smmu->impl->tlb_sync(smmu, page, sync, status);
219 
220 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
221 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
222 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
223 			reg = arm_smmu_readl(smmu, page, status);
224 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
225 				return;
226 			cpu_relax();
227 		}
228 		udelay(delay);
229 	}
230 	dev_err_ratelimited(smmu->dev,
231 			    "TLB sync timed out -- SMMU may be deadlocked\n");
232 }
233 
234 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
235 {
236 	unsigned long flags;
237 
238 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
239 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
240 			    ARM_SMMU_GR0_sTLBGSTATUS);
241 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
242 }
243 
244 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
245 {
246 	struct arm_smmu_device *smmu = smmu_domain->smmu;
247 	unsigned long flags;
248 
249 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
250 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
251 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
252 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
253 }
254 
255 static void arm_smmu_tlb_inv_context_s1(void *cookie)
256 {
257 	struct arm_smmu_domain *smmu_domain = cookie;
258 	/*
259 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
260 	 * current CPU are visible beforehand.
261 	 */
262 	wmb();
263 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
264 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
265 	arm_smmu_tlb_sync_context(smmu_domain);
266 }
267 
268 static void arm_smmu_tlb_inv_context_s2(void *cookie)
269 {
270 	struct arm_smmu_domain *smmu_domain = cookie;
271 	struct arm_smmu_device *smmu = smmu_domain->smmu;
272 
273 	/* See above */
274 	wmb();
275 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
276 	arm_smmu_tlb_sync_global(smmu);
277 }
278 
279 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
280 				      size_t granule, void *cookie, int reg)
281 {
282 	struct arm_smmu_domain *smmu_domain = cookie;
283 	struct arm_smmu_device *smmu = smmu_domain->smmu;
284 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
285 	int idx = cfg->cbndx;
286 
287 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
288 		wmb();
289 
290 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
291 		iova = (iova >> 12) << 12;
292 		iova |= cfg->asid;
293 		do {
294 			arm_smmu_cb_write(smmu, idx, reg, iova);
295 			iova += granule;
296 		} while (size -= granule);
297 	} else {
298 		iova >>= 12;
299 		iova |= (u64)cfg->asid << 48;
300 		do {
301 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
302 			iova += granule >> 12;
303 		} while (size -= granule);
304 	}
305 }
306 
307 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
308 				      size_t granule, void *cookie, int reg)
309 {
310 	struct arm_smmu_domain *smmu_domain = cookie;
311 	struct arm_smmu_device *smmu = smmu_domain->smmu;
312 	int idx = smmu_domain->cfg.cbndx;
313 
314 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
315 		wmb();
316 
317 	iova >>= 12;
318 	do {
319 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
320 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
321 		else
322 			arm_smmu_cb_write(smmu, idx, reg, iova);
323 		iova += granule >> 12;
324 	} while (size -= granule);
325 }
326 
327 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
328 				     size_t granule, void *cookie)
329 {
330 	struct arm_smmu_domain *smmu_domain = cookie;
331 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
332 
333 	if (cfg->flush_walk_prefer_tlbiasid) {
334 		arm_smmu_tlb_inv_context_s1(cookie);
335 	} else {
336 		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
337 					  ARM_SMMU_CB_S1_TLBIVA);
338 		arm_smmu_tlb_sync_context(cookie);
339 	}
340 }
341 
342 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
343 				     unsigned long iova, size_t granule,
344 				     void *cookie)
345 {
346 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
347 				  ARM_SMMU_CB_S1_TLBIVAL);
348 }
349 
350 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
351 				     size_t granule, void *cookie)
352 {
353 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
354 				  ARM_SMMU_CB_S2_TLBIIPAS2);
355 	arm_smmu_tlb_sync_context(cookie);
356 }
357 
358 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
359 				     unsigned long iova, size_t granule,
360 				     void *cookie)
361 {
362 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
363 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
364 }
365 
366 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
367 					size_t granule, void *cookie)
368 {
369 	arm_smmu_tlb_inv_context_s2(cookie);
370 }
371 /*
372  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
373  * almost negligible, but the benefit of getting the first one in as far ahead
374  * of the sync as possible is significant, hence we don't just make this a
375  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
376  * think.
377  */
378 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
379 					unsigned long iova, size_t granule,
380 					void *cookie)
381 {
382 	struct arm_smmu_domain *smmu_domain = cookie;
383 	struct arm_smmu_device *smmu = smmu_domain->smmu;
384 
385 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
386 		wmb();
387 
388 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
389 }
390 
391 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
392 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
393 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
394 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
395 };
396 
397 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
398 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
399 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
400 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
401 };
402 
403 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
404 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
405 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
406 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
407 };
408 
409 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
410 {
411 	u32 fsr, fsynr, cbfrsynra;
412 	unsigned long iova;
413 	struct iommu_domain *domain = dev;
414 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
415 	struct arm_smmu_device *smmu = smmu_domain->smmu;
416 	int idx = smmu_domain->cfg.cbndx;
417 	int ret;
418 
419 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
420 	if (!(fsr & ARM_SMMU_FSR_FAULT))
421 		return IRQ_NONE;
422 
423 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
424 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
425 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
426 
427 	ret = report_iommu_fault(domain, NULL, iova,
428 		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
429 
430 	if (ret == -ENOSYS)
431 		dev_err_ratelimited(smmu->dev,
432 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
433 			    fsr, iova, fsynr, cbfrsynra, idx);
434 
435 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
436 	return IRQ_HANDLED;
437 }
438 
439 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
440 {
441 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
442 	struct arm_smmu_device *smmu = dev;
443 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
444 				      DEFAULT_RATELIMIT_BURST);
445 
446 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
447 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
448 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
449 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
450 
451 	if (!gfsr)
452 		return IRQ_NONE;
453 
454 	if (__ratelimit(&rs)) {
455 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
456 		    (gfsr & ARM_SMMU_sGFSR_USF))
457 			dev_err(smmu->dev,
458 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
459 				(u16)gfsynr1);
460 		else
461 			dev_err(smmu->dev,
462 				"Unexpected global fault, this could be serious\n");
463 		dev_err(smmu->dev,
464 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
465 			gfsr, gfsynr0, gfsynr1, gfsynr2);
466 	}
467 
468 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
469 	return IRQ_HANDLED;
470 }
471 
472 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
473 				       struct io_pgtable_cfg *pgtbl_cfg)
474 {
475 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
476 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
477 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
478 
479 	cb->cfg = cfg;
480 
481 	/* TCR */
482 	if (stage1) {
483 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
484 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
485 		} else {
486 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
487 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
488 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
489 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
490 			else
491 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
492 		}
493 	} else {
494 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
495 	}
496 
497 	/* TTBRs */
498 	if (stage1) {
499 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
500 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
501 			cb->ttbr[1] = 0;
502 		} else {
503 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
504 						 cfg->asid);
505 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
506 						 cfg->asid);
507 
508 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
509 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
510 			else
511 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
512 		}
513 	} else {
514 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
515 	}
516 
517 	/* MAIRs (stage-1 only) */
518 	if (stage1) {
519 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
520 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
521 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
522 		} else {
523 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
524 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
525 		}
526 	}
527 }
528 
529 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
530 {
531 	u32 reg;
532 	bool stage1;
533 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
534 	struct arm_smmu_cfg *cfg = cb->cfg;
535 
536 	/* Unassigned context banks only need disabling */
537 	if (!cfg) {
538 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
539 		return;
540 	}
541 
542 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
543 
544 	/* CBA2R */
545 	if (smmu->version > ARM_SMMU_V1) {
546 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
547 			reg = ARM_SMMU_CBA2R_VA64;
548 		else
549 			reg = 0;
550 		/* 16-bit VMIDs live in CBA2R */
551 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
552 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
553 
554 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
555 	}
556 
557 	/* CBAR */
558 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
559 	if (smmu->version < ARM_SMMU_V2)
560 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
561 
562 	/*
563 	 * Use the weakest shareability/memory types, so they are
564 	 * overridden by the ttbcr/pte.
565 	 */
566 	if (stage1) {
567 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
568 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
569 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
570 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
571 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
572 		/* 8-bit VMIDs live in CBAR */
573 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
574 	}
575 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
576 
577 	/*
578 	 * TCR
579 	 * We must write this before the TTBRs, since it determines the
580 	 * access behaviour of some fields (in particular, ASID[15:8]).
581 	 */
582 	if (stage1 && smmu->version > ARM_SMMU_V1)
583 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
584 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
585 
586 	/* TTBRs */
587 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
588 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
589 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
590 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
591 	} else {
592 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
593 		if (stage1)
594 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
595 					   cb->ttbr[1]);
596 	}
597 
598 	/* MAIRs (stage-1 only) */
599 	if (stage1) {
600 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
601 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
602 	}
603 
604 	/* SCTLR */
605 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
606 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
607 	if (stage1)
608 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
609 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
610 		reg |= ARM_SMMU_SCTLR_E;
611 
612 	if (smmu->impl && smmu->impl->write_sctlr)
613 		smmu->impl->write_sctlr(smmu, idx, reg);
614 	else
615 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
616 }
617 
618 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
619 				       struct arm_smmu_device *smmu,
620 				       struct device *dev, unsigned int start)
621 {
622 	if (smmu->impl && smmu->impl->alloc_context_bank)
623 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
624 
625 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
626 }
627 
628 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
629 					struct arm_smmu_device *smmu,
630 					struct device *dev)
631 {
632 	int irq, start, ret = 0;
633 	unsigned long ias, oas;
634 	struct io_pgtable_ops *pgtbl_ops;
635 	struct io_pgtable_cfg pgtbl_cfg;
636 	enum io_pgtable_fmt fmt;
637 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
638 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
639 	irqreturn_t (*context_fault)(int irq, void *dev);
640 
641 	mutex_lock(&smmu_domain->init_mutex);
642 	if (smmu_domain->smmu)
643 		goto out_unlock;
644 
645 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
646 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
647 		smmu_domain->smmu = smmu;
648 		goto out_unlock;
649 	}
650 
651 	/*
652 	 * Mapping the requested stage onto what we support is surprisingly
653 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
654 	 * support for nested translation. That means we end up with the
655 	 * following table:
656 	 *
657 	 * Requested        Supported        Actual
658 	 *     S1               N              S1
659 	 *     S1             S1+S2            S1
660 	 *     S1               S2             S2
661 	 *     S1               S1             S1
662 	 *     N                N              N
663 	 *     N              S1+S2            S2
664 	 *     N                S2             S2
665 	 *     N                S1             S1
666 	 *
667 	 * Note that you can't actually request stage-2 mappings.
668 	 */
669 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
670 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
671 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
672 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
673 
674 	/*
675 	 * Choosing a suitable context format is even more fiddly. Until we
676 	 * grow some way for the caller to express a preference, and/or move
677 	 * the decision into the io-pgtable code where it arguably belongs,
678 	 * just aim for the closest thing to the rest of the system, and hope
679 	 * that the hardware isn't esoteric enough that we can't assume AArch64
680 	 * support to be a superset of AArch32 support...
681 	 */
682 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
683 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
684 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
685 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
686 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
687 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
688 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
689 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
690 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
691 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
692 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
693 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
694 
695 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
696 		ret = -EINVAL;
697 		goto out_unlock;
698 	}
699 
700 	switch (smmu_domain->stage) {
701 	case ARM_SMMU_DOMAIN_S1:
702 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
703 		start = smmu->num_s2_context_banks;
704 		ias = smmu->va_size;
705 		oas = smmu->ipa_size;
706 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
707 			fmt = ARM_64_LPAE_S1;
708 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
709 			fmt = ARM_32_LPAE_S1;
710 			ias = min(ias, 32UL);
711 			oas = min(oas, 40UL);
712 		} else {
713 			fmt = ARM_V7S;
714 			ias = min(ias, 32UL);
715 			oas = min(oas, 32UL);
716 		}
717 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
718 		break;
719 	case ARM_SMMU_DOMAIN_NESTED:
720 		/*
721 		 * We will likely want to change this if/when KVM gets
722 		 * involved.
723 		 */
724 	case ARM_SMMU_DOMAIN_S2:
725 		cfg->cbar = CBAR_TYPE_S2_TRANS;
726 		start = 0;
727 		ias = smmu->ipa_size;
728 		oas = smmu->pa_size;
729 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
730 			fmt = ARM_64_LPAE_S2;
731 		} else {
732 			fmt = ARM_32_LPAE_S2;
733 			ias = min(ias, 40UL);
734 			oas = min(oas, 40UL);
735 		}
736 		if (smmu->version == ARM_SMMU_V2)
737 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
738 		else
739 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
740 		break;
741 	default:
742 		ret = -EINVAL;
743 		goto out_unlock;
744 	}
745 
746 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
747 	if (ret < 0) {
748 		goto out_unlock;
749 	}
750 
751 	smmu_domain->smmu = smmu;
752 
753 	cfg->cbndx = ret;
754 	if (smmu->version < ARM_SMMU_V2) {
755 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
756 		cfg->irptndx %= smmu->num_context_irqs;
757 	} else {
758 		cfg->irptndx = cfg->cbndx;
759 	}
760 
761 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
762 		cfg->vmid = cfg->cbndx + 1;
763 	else
764 		cfg->asid = cfg->cbndx;
765 
766 	pgtbl_cfg = (struct io_pgtable_cfg) {
767 		.pgsize_bitmap	= smmu->pgsize_bitmap,
768 		.ias		= ias,
769 		.oas		= oas,
770 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
771 		.tlb		= smmu_domain->flush_ops,
772 		.iommu_dev	= smmu->dev,
773 	};
774 
775 	if (smmu->impl && smmu->impl->init_context) {
776 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
777 		if (ret)
778 			goto out_clear_smmu;
779 	}
780 
781 	if (smmu_domain->pgtbl_quirks)
782 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
783 
784 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
785 	if (!pgtbl_ops) {
786 		ret = -ENOMEM;
787 		goto out_clear_smmu;
788 	}
789 
790 	/* Update the domain's page sizes to reflect the page table format */
791 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
792 
793 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
794 		domain->geometry.aperture_start = ~0UL << ias;
795 		domain->geometry.aperture_end = ~0UL;
796 	} else {
797 		domain->geometry.aperture_end = (1UL << ias) - 1;
798 	}
799 
800 	domain->geometry.force_aperture = true;
801 
802 	/* Initialise the context bank with our page table cfg */
803 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
804 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
805 
806 	/*
807 	 * Request context fault interrupt. Do this last to avoid the
808 	 * handler seeing a half-initialised domain state.
809 	 */
810 	irq = smmu->irqs[cfg->irptndx];
811 
812 	if (smmu->impl && smmu->impl->context_fault)
813 		context_fault = smmu->impl->context_fault;
814 	else
815 		context_fault = arm_smmu_context_fault;
816 
817 	ret = devm_request_irq(smmu->dev, irq, context_fault,
818 			       IRQF_SHARED, "arm-smmu-context-fault", domain);
819 	if (ret < 0) {
820 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
821 			cfg->irptndx, irq);
822 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
823 	}
824 
825 	mutex_unlock(&smmu_domain->init_mutex);
826 
827 	/* Publish page table ops for map/unmap */
828 	smmu_domain->pgtbl_ops = pgtbl_ops;
829 	return 0;
830 
831 out_clear_smmu:
832 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
833 	smmu_domain->smmu = NULL;
834 out_unlock:
835 	mutex_unlock(&smmu_domain->init_mutex);
836 	return ret;
837 }
838 
839 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
840 {
841 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
842 	struct arm_smmu_device *smmu = smmu_domain->smmu;
843 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
844 	int ret, irq;
845 
846 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
847 		return;
848 
849 	ret = arm_smmu_rpm_get(smmu);
850 	if (ret < 0)
851 		return;
852 
853 	/*
854 	 * Disable the context bank and free the page tables before freeing
855 	 * it.
856 	 */
857 	smmu->cbs[cfg->cbndx].cfg = NULL;
858 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
859 
860 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
861 		irq = smmu->irqs[cfg->irptndx];
862 		devm_free_irq(smmu->dev, irq, domain);
863 	}
864 
865 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
866 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
867 
868 	arm_smmu_rpm_put(smmu);
869 }
870 
871 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
872 {
873 	struct arm_smmu_domain *smmu_domain;
874 
875 	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
876 		if (using_legacy_binding ||
877 		    (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
878 			return NULL;
879 	}
880 	/*
881 	 * Allocate the domain and initialise some of its data structures.
882 	 * We can't really do anything meaningful until we've added a
883 	 * master.
884 	 */
885 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
886 	if (!smmu_domain)
887 		return NULL;
888 
889 	mutex_init(&smmu_domain->init_mutex);
890 	spin_lock_init(&smmu_domain->cb_lock);
891 
892 	return &smmu_domain->domain;
893 }
894 
895 static void arm_smmu_domain_free(struct iommu_domain *domain)
896 {
897 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
898 
899 	/*
900 	 * Free the domain resources. We assume that all devices have
901 	 * already been detached.
902 	 */
903 	arm_smmu_destroy_domain_context(domain);
904 	kfree(smmu_domain);
905 }
906 
907 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
908 {
909 	struct arm_smmu_smr *smr = smmu->smrs + idx;
910 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
911 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
912 
913 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
914 		reg |= ARM_SMMU_SMR_VALID;
915 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
916 }
917 
918 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
919 {
920 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
921 	u32 reg;
922 
923 	if (smmu->impl && smmu->impl->write_s2cr) {
924 		smmu->impl->write_s2cr(smmu, idx);
925 		return;
926 	}
927 
928 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
929 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
930 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
931 
932 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
933 	    smmu->smrs[idx].valid)
934 		reg |= ARM_SMMU_S2CR_EXIDVALID;
935 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
936 }
937 
938 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
939 {
940 	arm_smmu_write_s2cr(smmu, idx);
941 	if (smmu->smrs)
942 		arm_smmu_write_smr(smmu, idx);
943 }
944 
945 /*
946  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
947  * should be called after sCR0 is written.
948  */
949 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
950 {
951 	u32 smr;
952 	int i;
953 
954 	if (!smmu->smrs)
955 		return;
956 	/*
957 	 * If we've had to accommodate firmware memory regions, we may
958 	 * have live SMRs by now; tread carefully...
959 	 *
960 	 * Somewhat perversely, not having a free SMR for this test implies we
961 	 * can get away without it anyway, as we'll only be able to 'allocate'
962 	 * these SMRs for the ID/mask values we're already trusting to be OK.
963 	 */
964 	for (i = 0; i < smmu->num_mapping_groups; i++)
965 		if (!smmu->smrs[i].valid)
966 			goto smr_ok;
967 	return;
968 smr_ok:
969 	/*
970 	 * SMR.ID bits may not be preserved if the corresponding MASK
971 	 * bits are set, so check each one separately. We can reject
972 	 * masters later if they try to claim IDs outside these masks.
973 	 */
974 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
975 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
976 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
977 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
978 
979 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
980 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
981 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
982 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
983 }
984 
985 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
986 {
987 	struct arm_smmu_smr *smrs = smmu->smrs;
988 	int i, free_idx = -ENOSPC;
989 
990 	/* Stream indexing is blissfully easy */
991 	if (!smrs)
992 		return id;
993 
994 	/* Validating SMRs is... less so */
995 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
996 		if (!smrs[i].valid) {
997 			/*
998 			 * Note the first free entry we come across, which
999 			 * we'll claim in the end if nothing else matches.
1000 			 */
1001 			if (free_idx < 0)
1002 				free_idx = i;
1003 			continue;
1004 		}
1005 		/*
1006 		 * If the new entry is _entirely_ matched by an existing entry,
1007 		 * then reuse that, with the guarantee that there also cannot
1008 		 * be any subsequent conflicting entries. In normal use we'd
1009 		 * expect simply identical entries for this case, but there's
1010 		 * no harm in accommodating the generalisation.
1011 		 */
1012 		if ((mask & smrs[i].mask) == mask &&
1013 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
1014 			return i;
1015 		/*
1016 		 * If the new entry has any other overlap with an existing one,
1017 		 * though, then there always exists at least one stream ID
1018 		 * which would cause a conflict, and we can't allow that risk.
1019 		 */
1020 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1021 			return -EINVAL;
1022 	}
1023 
1024 	return free_idx;
1025 }
1026 
1027 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1028 {
1029 	if (--smmu->s2crs[idx].count)
1030 		return false;
1031 
1032 	smmu->s2crs[idx] = s2cr_init_val;
1033 	if (smmu->smrs)
1034 		smmu->smrs[idx].valid = false;
1035 
1036 	return true;
1037 }
1038 
1039 static int arm_smmu_master_alloc_smes(struct device *dev)
1040 {
1041 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1042 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1043 	struct arm_smmu_device *smmu = cfg->smmu;
1044 	struct arm_smmu_smr *smrs = smmu->smrs;
1045 	int i, idx, ret;
1046 
1047 	mutex_lock(&smmu->stream_map_mutex);
1048 	/* Figure out a viable stream map entry allocation */
1049 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1050 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1051 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1052 
1053 		if (idx != INVALID_SMENDX) {
1054 			ret = -EEXIST;
1055 			goto out_err;
1056 		}
1057 
1058 		ret = arm_smmu_find_sme(smmu, sid, mask);
1059 		if (ret < 0)
1060 			goto out_err;
1061 
1062 		idx = ret;
1063 		if (smrs && smmu->s2crs[idx].count == 0) {
1064 			smrs[idx].id = sid;
1065 			smrs[idx].mask = mask;
1066 			smrs[idx].valid = true;
1067 		}
1068 		smmu->s2crs[idx].count++;
1069 		cfg->smendx[i] = (s16)idx;
1070 	}
1071 
1072 	/* It worked! Now, poke the actual hardware */
1073 	for_each_cfg_sme(cfg, fwspec, i, idx)
1074 		arm_smmu_write_sme(smmu, idx);
1075 
1076 	mutex_unlock(&smmu->stream_map_mutex);
1077 	return 0;
1078 
1079 out_err:
1080 	while (i--) {
1081 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1082 		cfg->smendx[i] = INVALID_SMENDX;
1083 	}
1084 	mutex_unlock(&smmu->stream_map_mutex);
1085 	return ret;
1086 }
1087 
1088 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1089 				      struct iommu_fwspec *fwspec)
1090 {
1091 	struct arm_smmu_device *smmu = cfg->smmu;
1092 	int i, idx;
1093 
1094 	mutex_lock(&smmu->stream_map_mutex);
1095 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1096 		if (arm_smmu_free_sme(smmu, idx))
1097 			arm_smmu_write_sme(smmu, idx);
1098 		cfg->smendx[i] = INVALID_SMENDX;
1099 	}
1100 	mutex_unlock(&smmu->stream_map_mutex);
1101 }
1102 
1103 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1104 				      struct arm_smmu_master_cfg *cfg,
1105 				      struct iommu_fwspec *fwspec)
1106 {
1107 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1108 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1109 	u8 cbndx = smmu_domain->cfg.cbndx;
1110 	enum arm_smmu_s2cr_type type;
1111 	int i, idx;
1112 
1113 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1114 		type = S2CR_TYPE_BYPASS;
1115 	else
1116 		type = S2CR_TYPE_TRANS;
1117 
1118 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1119 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1120 			continue;
1121 
1122 		s2cr[idx].type = type;
1123 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1124 		s2cr[idx].cbndx = cbndx;
1125 		arm_smmu_write_s2cr(smmu, idx);
1126 	}
1127 	return 0;
1128 }
1129 
1130 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1131 {
1132 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1133 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1134 	struct arm_smmu_master_cfg *cfg;
1135 	struct arm_smmu_device *smmu;
1136 	int ret;
1137 
1138 	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1139 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1140 		return -ENXIO;
1141 	}
1142 
1143 	/*
1144 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1145 	 * domains between of_xlate() and probe_device() - we have no way to cope
1146 	 * with that, so until ARM gets converted to rely on groups and default
1147 	 * domains, just say no (but more politely than by dereferencing NULL).
1148 	 * This should be at least a WARN_ON once that's sorted.
1149 	 */
1150 	cfg = dev_iommu_priv_get(dev);
1151 	if (!cfg)
1152 		return -ENODEV;
1153 
1154 	smmu = cfg->smmu;
1155 
1156 	ret = arm_smmu_rpm_get(smmu);
1157 	if (ret < 0)
1158 		return ret;
1159 
1160 	/* Ensure that the domain is finalised */
1161 	ret = arm_smmu_init_domain_context(domain, smmu, dev);
1162 	if (ret < 0)
1163 		goto rpm_put;
1164 
1165 	/*
1166 	 * Sanity check the domain. We don't support domains across
1167 	 * different SMMUs.
1168 	 */
1169 	if (smmu_domain->smmu != smmu) {
1170 		dev_err(dev,
1171 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1172 			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1173 		ret = -EINVAL;
1174 		goto rpm_put;
1175 	}
1176 
1177 	/* Looks ok, so add the device to the domain */
1178 	ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1179 
1180 	/*
1181 	 * Setup an autosuspend delay to avoid bouncing runpm state.
1182 	 * Otherwise, if a driver for a suspended consumer device
1183 	 * unmaps buffers, it will runpm resume/suspend for each one.
1184 	 *
1185 	 * For example, when used by a GPU device, when an application
1186 	 * or game exits, it can trigger unmapping 100s or 1000s of
1187 	 * buffers.  With a runpm cycle for each buffer, that adds up
1188 	 * to 5-10sec worth of reprogramming the context bank, while
1189 	 * the system appears to be locked up to the user.
1190 	 */
1191 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1192 	pm_runtime_use_autosuspend(smmu->dev);
1193 
1194 rpm_put:
1195 	arm_smmu_rpm_put(smmu);
1196 	return ret;
1197 }
1198 
1199 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
1200 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
1201 			      int prot, gfp_t gfp, size_t *mapped)
1202 {
1203 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1204 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1205 	int ret;
1206 
1207 	if (!ops)
1208 		return -ENODEV;
1209 
1210 	arm_smmu_rpm_get(smmu);
1211 	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
1212 	arm_smmu_rpm_put(smmu);
1213 
1214 	return ret;
1215 }
1216 
1217 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
1218 				   size_t pgsize, size_t pgcount,
1219 				   struct iommu_iotlb_gather *iotlb_gather)
1220 {
1221 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1222 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1223 	size_t ret;
1224 
1225 	if (!ops)
1226 		return 0;
1227 
1228 	arm_smmu_rpm_get(smmu);
1229 	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
1230 	arm_smmu_rpm_put(smmu);
1231 
1232 	return ret;
1233 }
1234 
1235 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1236 {
1237 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1238 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1239 
1240 	if (smmu_domain->flush_ops) {
1241 		arm_smmu_rpm_get(smmu);
1242 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1243 		arm_smmu_rpm_put(smmu);
1244 	}
1245 }
1246 
1247 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1248 				struct iommu_iotlb_gather *gather)
1249 {
1250 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1251 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1252 
1253 	if (!smmu)
1254 		return;
1255 
1256 	arm_smmu_rpm_get(smmu);
1257 	if (smmu->version == ARM_SMMU_V2 ||
1258 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1259 		arm_smmu_tlb_sync_context(smmu_domain);
1260 	else
1261 		arm_smmu_tlb_sync_global(smmu);
1262 	arm_smmu_rpm_put(smmu);
1263 }
1264 
1265 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1266 					      dma_addr_t iova)
1267 {
1268 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1269 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1270 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1271 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1272 	struct device *dev = smmu->dev;
1273 	void __iomem *reg;
1274 	u32 tmp;
1275 	u64 phys;
1276 	unsigned long va, flags;
1277 	int ret, idx = cfg->cbndx;
1278 	phys_addr_t addr = 0;
1279 
1280 	ret = arm_smmu_rpm_get(smmu);
1281 	if (ret < 0)
1282 		return 0;
1283 
1284 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1285 	va = iova & ~0xfffUL;
1286 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1287 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1288 	else
1289 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1290 
1291 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1292 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1293 				      5, 50)) {
1294 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1295 		dev_err(dev,
1296 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1297 			&iova);
1298 		arm_smmu_rpm_put(smmu);
1299 		return ops->iova_to_phys(ops, iova);
1300 	}
1301 
1302 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1303 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1304 	if (phys & ARM_SMMU_CB_PAR_F) {
1305 		dev_err(dev, "translation fault!\n");
1306 		dev_err(dev, "PAR = 0x%llx\n", phys);
1307 		goto out;
1308 	}
1309 
1310 	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1311 out:
1312 	arm_smmu_rpm_put(smmu);
1313 
1314 	return addr;
1315 }
1316 
1317 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1318 					dma_addr_t iova)
1319 {
1320 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1321 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1322 
1323 	if (!ops)
1324 		return 0;
1325 
1326 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1327 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1328 		return arm_smmu_iova_to_phys_hard(domain, iova);
1329 
1330 	return ops->iova_to_phys(ops, iova);
1331 }
1332 
1333 static bool arm_smmu_capable(enum iommu_cap cap)
1334 {
1335 	switch (cap) {
1336 	case IOMMU_CAP_CACHE_COHERENCY:
1337 		/*
1338 		 * Return true here as the SMMU can always send out coherent
1339 		 * requests.
1340 		 */
1341 		return true;
1342 	case IOMMU_CAP_NOEXEC:
1343 		return true;
1344 	default:
1345 		return false;
1346 	}
1347 }
1348 
1349 static
1350 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1351 {
1352 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1353 							  fwnode);
1354 	put_device(dev);
1355 	return dev ? dev_get_drvdata(dev) : NULL;
1356 }
1357 
1358 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1359 {
1360 	struct arm_smmu_device *smmu = NULL;
1361 	struct arm_smmu_master_cfg *cfg;
1362 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1363 	int i, ret;
1364 
1365 	if (using_legacy_binding) {
1366 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1367 
1368 		/*
1369 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1370 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1371 		 * later use.
1372 		 */
1373 		fwspec = dev_iommu_fwspec_get(dev);
1374 		if (ret)
1375 			goto out_free;
1376 	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1377 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1378 	} else {
1379 		return ERR_PTR(-ENODEV);
1380 	}
1381 
1382 	ret = -EINVAL;
1383 	for (i = 0; i < fwspec->num_ids; i++) {
1384 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1385 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1386 
1387 		if (sid & ~smmu->streamid_mask) {
1388 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1389 				sid, smmu->streamid_mask);
1390 			goto out_free;
1391 		}
1392 		if (mask & ~smmu->smr_mask_mask) {
1393 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1394 				mask, smmu->smr_mask_mask);
1395 			goto out_free;
1396 		}
1397 	}
1398 
1399 	ret = -ENOMEM;
1400 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1401 		      GFP_KERNEL);
1402 	if (!cfg)
1403 		goto out_free;
1404 
1405 	cfg->smmu = smmu;
1406 	dev_iommu_priv_set(dev, cfg);
1407 	while (i--)
1408 		cfg->smendx[i] = INVALID_SMENDX;
1409 
1410 	ret = arm_smmu_rpm_get(smmu);
1411 	if (ret < 0)
1412 		goto out_cfg_free;
1413 
1414 	ret = arm_smmu_master_alloc_smes(dev);
1415 	arm_smmu_rpm_put(smmu);
1416 
1417 	if (ret)
1418 		goto out_cfg_free;
1419 
1420 	device_link_add(dev, smmu->dev,
1421 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1422 
1423 	return &smmu->iommu;
1424 
1425 out_cfg_free:
1426 	kfree(cfg);
1427 out_free:
1428 	iommu_fwspec_free(dev);
1429 	return ERR_PTR(ret);
1430 }
1431 
1432 static void arm_smmu_release_device(struct device *dev)
1433 {
1434 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1435 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1436 	int ret;
1437 
1438 	ret = arm_smmu_rpm_get(cfg->smmu);
1439 	if (ret < 0)
1440 		return;
1441 
1442 	arm_smmu_master_free_smes(cfg, fwspec);
1443 
1444 	arm_smmu_rpm_put(cfg->smmu);
1445 
1446 	dev_iommu_priv_set(dev, NULL);
1447 	kfree(cfg);
1448 }
1449 
1450 static void arm_smmu_probe_finalize(struct device *dev)
1451 {
1452 	struct arm_smmu_master_cfg *cfg;
1453 	struct arm_smmu_device *smmu;
1454 
1455 	cfg = dev_iommu_priv_get(dev);
1456 	smmu = cfg->smmu;
1457 
1458 	if (smmu->impl && smmu->impl->probe_finalize)
1459 		smmu->impl->probe_finalize(smmu, dev);
1460 }
1461 
1462 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1463 {
1464 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1465 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1466 	struct arm_smmu_device *smmu = cfg->smmu;
1467 	struct iommu_group *group = NULL;
1468 	int i, idx;
1469 
1470 	mutex_lock(&smmu->stream_map_mutex);
1471 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1472 		if (group && smmu->s2crs[idx].group &&
1473 		    group != smmu->s2crs[idx].group) {
1474 			mutex_unlock(&smmu->stream_map_mutex);
1475 			return ERR_PTR(-EINVAL);
1476 		}
1477 
1478 		group = smmu->s2crs[idx].group;
1479 	}
1480 
1481 	if (group) {
1482 		mutex_unlock(&smmu->stream_map_mutex);
1483 		return iommu_group_ref_get(group);
1484 	}
1485 
1486 	if (dev_is_pci(dev))
1487 		group = pci_device_group(dev);
1488 	else if (dev_is_fsl_mc(dev))
1489 		group = fsl_mc_device_group(dev);
1490 	else
1491 		group = generic_device_group(dev);
1492 
1493 	/* Remember group for faster lookups */
1494 	if (!IS_ERR(group))
1495 		for_each_cfg_sme(cfg, fwspec, i, idx)
1496 			smmu->s2crs[idx].group = group;
1497 
1498 	mutex_unlock(&smmu->stream_map_mutex);
1499 	return group;
1500 }
1501 
1502 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1503 {
1504 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1505 	int ret = 0;
1506 
1507 	mutex_lock(&smmu_domain->init_mutex);
1508 	if (smmu_domain->smmu)
1509 		ret = -EPERM;
1510 	else
1511 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1512 	mutex_unlock(&smmu_domain->init_mutex);
1513 
1514 	return ret;
1515 }
1516 
1517 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1518 		unsigned long quirks)
1519 {
1520 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1521 	int ret = 0;
1522 
1523 	mutex_lock(&smmu_domain->init_mutex);
1524 	if (smmu_domain->smmu)
1525 		ret = -EPERM;
1526 	else
1527 		smmu_domain->pgtbl_quirks = quirks;
1528 	mutex_unlock(&smmu_domain->init_mutex);
1529 
1530 	return ret;
1531 }
1532 
1533 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1534 {
1535 	u32 mask, fwid = 0;
1536 
1537 	if (args->args_count > 0)
1538 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1539 
1540 	if (args->args_count > 1)
1541 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1542 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1543 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1544 
1545 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1546 }
1547 
1548 static void arm_smmu_get_resv_regions(struct device *dev,
1549 				      struct list_head *head)
1550 {
1551 	struct iommu_resv_region *region;
1552 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1553 
1554 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1555 					 prot, IOMMU_RESV_SW_MSI);
1556 	if (!region)
1557 		return;
1558 
1559 	list_add_tail(&region->list, head);
1560 
1561 	iommu_dma_get_resv_regions(dev, head);
1562 }
1563 
1564 static int arm_smmu_def_domain_type(struct device *dev)
1565 {
1566 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1567 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1568 
1569 	if (using_legacy_binding)
1570 		return IOMMU_DOMAIN_IDENTITY;
1571 
1572 	if (impl && impl->def_domain_type)
1573 		return impl->def_domain_type(dev);
1574 
1575 	return 0;
1576 }
1577 
1578 static struct iommu_ops arm_smmu_ops = {
1579 	.capable		= arm_smmu_capable,
1580 	.domain_alloc		= arm_smmu_domain_alloc,
1581 	.probe_device		= arm_smmu_probe_device,
1582 	.release_device		= arm_smmu_release_device,
1583 	.probe_finalize		= arm_smmu_probe_finalize,
1584 	.device_group		= arm_smmu_device_group,
1585 	.of_xlate		= arm_smmu_of_xlate,
1586 	.get_resv_regions	= arm_smmu_get_resv_regions,
1587 	.def_domain_type	= arm_smmu_def_domain_type,
1588 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1589 	.owner			= THIS_MODULE,
1590 	.default_domain_ops = &(const struct iommu_domain_ops) {
1591 		.attach_dev		= arm_smmu_attach_dev,
1592 		.map_pages		= arm_smmu_map_pages,
1593 		.unmap_pages		= arm_smmu_unmap_pages,
1594 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1595 		.iotlb_sync		= arm_smmu_iotlb_sync,
1596 		.iova_to_phys		= arm_smmu_iova_to_phys,
1597 		.enable_nesting		= arm_smmu_enable_nesting,
1598 		.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1599 		.free			= arm_smmu_domain_free,
1600 	}
1601 };
1602 
1603 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1604 {
1605 	int i;
1606 	u32 reg;
1607 
1608 	/* clear global FSR */
1609 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1610 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1611 
1612 	/*
1613 	 * Reset stream mapping groups: Initial values mark all SMRn as
1614 	 * invalid and all S2CRn as bypass unless overridden.
1615 	 */
1616 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1617 		arm_smmu_write_sme(smmu, i);
1618 
1619 	/* Make sure all context banks are disabled and clear CB_FSR  */
1620 	for (i = 0; i < smmu->num_context_banks; ++i) {
1621 		arm_smmu_write_context_bank(smmu, i);
1622 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1623 	}
1624 
1625 	/* Invalidate the TLB, just in case */
1626 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1627 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1628 
1629 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1630 
1631 	/* Enable fault reporting */
1632 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1633 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1634 
1635 	/* Disable TLB broadcasting. */
1636 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1637 
1638 	/* Enable client access, handling unmatched streams as appropriate */
1639 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1640 	if (disable_bypass)
1641 		reg |= ARM_SMMU_sCR0_USFCFG;
1642 	else
1643 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1644 
1645 	/* Disable forced broadcasting */
1646 	reg &= ~ARM_SMMU_sCR0_FB;
1647 
1648 	/* Don't upgrade barriers */
1649 	reg &= ~(ARM_SMMU_sCR0_BSU);
1650 
1651 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1652 		reg |= ARM_SMMU_sCR0_VMID16EN;
1653 
1654 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1655 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1656 
1657 	if (smmu->impl && smmu->impl->reset)
1658 		smmu->impl->reset(smmu);
1659 
1660 	/* Push the button */
1661 	arm_smmu_tlb_sync_global(smmu);
1662 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1663 }
1664 
1665 static int arm_smmu_id_size_to_bits(int size)
1666 {
1667 	switch (size) {
1668 	case 0:
1669 		return 32;
1670 	case 1:
1671 		return 36;
1672 	case 2:
1673 		return 40;
1674 	case 3:
1675 		return 42;
1676 	case 4:
1677 		return 44;
1678 	case 5:
1679 	default:
1680 		return 48;
1681 	}
1682 }
1683 
1684 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1685 {
1686 	unsigned int size;
1687 	u32 id;
1688 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1689 	int i, ret;
1690 
1691 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1692 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1693 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1694 
1695 	/* ID0 */
1696 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1697 
1698 	/* Restrict available stages based on module parameter */
1699 	if (force_stage == 1)
1700 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1701 	else if (force_stage == 2)
1702 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1703 
1704 	if (id & ARM_SMMU_ID0_S1TS) {
1705 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1706 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1707 	}
1708 
1709 	if (id & ARM_SMMU_ID0_S2TS) {
1710 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1711 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1712 	}
1713 
1714 	if (id & ARM_SMMU_ID0_NTS) {
1715 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1716 		dev_notice(smmu->dev, "\tnested translation\n");
1717 	}
1718 
1719 	if (!(smmu->features &
1720 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1721 		dev_err(smmu->dev, "\tno translation support!\n");
1722 		return -ENODEV;
1723 	}
1724 
1725 	if ((id & ARM_SMMU_ID0_S1TS) &&
1726 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1727 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1728 		dev_notice(smmu->dev, "\taddress translation ops\n");
1729 	}
1730 
1731 	/*
1732 	 * In order for DMA API calls to work properly, we must defer to what
1733 	 * the FW says about coherency, regardless of what the hardware claims.
1734 	 * Fortunately, this also opens up a workaround for systems where the
1735 	 * ID register value has ended up configured incorrectly.
1736 	 */
1737 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1738 	if (cttw_fw || cttw_reg)
1739 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1740 			   cttw_fw ? "" : "non-");
1741 	if (cttw_fw != cttw_reg)
1742 		dev_notice(smmu->dev,
1743 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1744 
1745 	/* Max. number of entries we have for stream matching/indexing */
1746 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1747 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1748 		size = 1 << 16;
1749 	} else {
1750 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1751 	}
1752 	smmu->streamid_mask = size - 1;
1753 	if (id & ARM_SMMU_ID0_SMS) {
1754 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1755 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1756 		if (size == 0) {
1757 			dev_err(smmu->dev,
1758 				"stream-matching supported, but no SMRs present!\n");
1759 			return -ENODEV;
1760 		}
1761 
1762 		/* Zero-initialised to mark as invalid */
1763 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1764 					  GFP_KERNEL);
1765 		if (!smmu->smrs)
1766 			return -ENOMEM;
1767 
1768 		dev_notice(smmu->dev,
1769 			   "\tstream matching with %u register groups", size);
1770 	}
1771 	/* s2cr->type == 0 means translation, so initialise explicitly */
1772 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1773 					 GFP_KERNEL);
1774 	if (!smmu->s2crs)
1775 		return -ENOMEM;
1776 	for (i = 0; i < size; i++)
1777 		smmu->s2crs[i] = s2cr_init_val;
1778 
1779 	smmu->num_mapping_groups = size;
1780 	mutex_init(&smmu->stream_map_mutex);
1781 	spin_lock_init(&smmu->global_sync_lock);
1782 
1783 	if (smmu->version < ARM_SMMU_V2 ||
1784 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1785 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1786 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1787 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1788 	}
1789 
1790 	/* ID1 */
1791 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1792 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1793 
1794 	/* Check for size mismatch of SMMU address space from mapped region */
1795 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1796 	if (smmu->numpage != 2 * size << smmu->pgshift)
1797 		dev_warn(smmu->dev,
1798 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1799 			2 * size << smmu->pgshift, smmu->numpage);
1800 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1801 	smmu->numpage = size;
1802 
1803 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1804 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1805 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1806 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1807 		return -ENODEV;
1808 	}
1809 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1810 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1811 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1812 				 sizeof(*smmu->cbs), GFP_KERNEL);
1813 	if (!smmu->cbs)
1814 		return -ENOMEM;
1815 
1816 	/* ID2 */
1817 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1818 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1819 	smmu->ipa_size = size;
1820 
1821 	/* The output mask is also applied for bypass */
1822 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1823 	smmu->pa_size = size;
1824 
1825 	if (id & ARM_SMMU_ID2_VMID16)
1826 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1827 
1828 	/*
1829 	 * What the page table walker can address actually depends on which
1830 	 * descriptor format is in use, but since a) we don't know that yet,
1831 	 * and b) it can vary per context bank, this will have to do...
1832 	 */
1833 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1834 		dev_warn(smmu->dev,
1835 			 "failed to set DMA mask for table walker\n");
1836 
1837 	if (smmu->version < ARM_SMMU_V2) {
1838 		smmu->va_size = smmu->ipa_size;
1839 		if (smmu->version == ARM_SMMU_V1_64K)
1840 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1841 	} else {
1842 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1843 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1844 		if (id & ARM_SMMU_ID2_PTFS_4K)
1845 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1846 		if (id & ARM_SMMU_ID2_PTFS_16K)
1847 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1848 		if (id & ARM_SMMU_ID2_PTFS_64K)
1849 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1850 	}
1851 
1852 	if (smmu->impl && smmu->impl->cfg_probe) {
1853 		ret = smmu->impl->cfg_probe(smmu);
1854 		if (ret)
1855 			return ret;
1856 	}
1857 
1858 	/* Now we've corralled the various formats, what'll it do? */
1859 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1860 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1861 	if (smmu->features &
1862 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1863 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1864 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1865 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1866 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1867 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1868 
1869 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1870 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1871 	else
1872 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1873 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1874 		   smmu->pgsize_bitmap);
1875 
1876 
1877 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1878 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1879 			   smmu->va_size, smmu->ipa_size);
1880 
1881 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1882 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1883 			   smmu->ipa_size, smmu->pa_size);
1884 
1885 	return 0;
1886 }
1887 
1888 struct arm_smmu_match_data {
1889 	enum arm_smmu_arch_version version;
1890 	enum arm_smmu_implementation model;
1891 };
1892 
1893 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1894 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1895 
1896 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1897 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1898 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1899 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1900 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1901 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1902 
1903 static const struct of_device_id arm_smmu_of_match[] = {
1904 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1905 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1906 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1907 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1908 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1909 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1910 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1911 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1912 	{ },
1913 };
1914 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1915 
1916 #ifdef CONFIG_ACPI
1917 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1918 {
1919 	int ret = 0;
1920 
1921 	switch (model) {
1922 	case ACPI_IORT_SMMU_V1:
1923 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1924 		smmu->version = ARM_SMMU_V1;
1925 		smmu->model = GENERIC_SMMU;
1926 		break;
1927 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1928 		smmu->version = ARM_SMMU_V1_64K;
1929 		smmu->model = GENERIC_SMMU;
1930 		break;
1931 	case ACPI_IORT_SMMU_V2:
1932 		smmu->version = ARM_SMMU_V2;
1933 		smmu->model = GENERIC_SMMU;
1934 		break;
1935 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1936 		smmu->version = ARM_SMMU_V2;
1937 		smmu->model = ARM_MMU500;
1938 		break;
1939 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1940 		smmu->version = ARM_SMMU_V2;
1941 		smmu->model = CAVIUM_SMMUV2;
1942 		break;
1943 	default:
1944 		ret = -ENODEV;
1945 	}
1946 
1947 	return ret;
1948 }
1949 
1950 static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1951 				      u32 *global_irqs, u32 *pmu_irqs)
1952 {
1953 	struct device *dev = smmu->dev;
1954 	struct acpi_iort_node *node =
1955 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1956 	struct acpi_iort_smmu *iort_smmu;
1957 	int ret;
1958 
1959 	/* Retrieve SMMU1/2 specific data */
1960 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1961 
1962 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1963 	if (ret < 0)
1964 		return ret;
1965 
1966 	/* Ignore the configuration access interrupt */
1967 	*global_irqs = 1;
1968 	*pmu_irqs = 0;
1969 
1970 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1971 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1972 
1973 	return 0;
1974 }
1975 #else
1976 static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1977 					     u32 *global_irqs, u32 *pmu_irqs)
1978 {
1979 	return -ENODEV;
1980 }
1981 #endif
1982 
1983 static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu,
1984 				    u32 *global_irqs, u32 *pmu_irqs)
1985 {
1986 	const struct arm_smmu_match_data *data;
1987 	struct device *dev = smmu->dev;
1988 	bool legacy_binding;
1989 
1990 	if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs))
1991 		return dev_err_probe(dev, -ENODEV,
1992 				     "missing #global-interrupts property\n");
1993 	*pmu_irqs = 0;
1994 
1995 	data = of_device_get_match_data(dev);
1996 	smmu->version = data->version;
1997 	smmu->model = data->model;
1998 
1999 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2000 	if (legacy_binding && !using_generic_binding) {
2001 		if (!using_legacy_binding) {
2002 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2003 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2004 		}
2005 		using_legacy_binding = true;
2006 	} else if (!legacy_binding && !using_legacy_binding) {
2007 		using_generic_binding = true;
2008 	} else {
2009 		dev_err(dev, "not probing due to mismatched DT properties\n");
2010 		return -ENODEV;
2011 	}
2012 
2013 	if (of_dma_is_coherent(dev->of_node))
2014 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2015 
2016 	return 0;
2017 }
2018 
2019 static int arm_smmu_bus_init(struct iommu_ops *ops)
2020 {
2021 	int err;
2022 
2023 	/* Oh, for a proper bus abstraction */
2024 	if (!iommu_present(&platform_bus_type)) {
2025 		err = bus_set_iommu(&platform_bus_type, ops);
2026 		if (err)
2027 			return err;
2028 	}
2029 #ifdef CONFIG_ARM_AMBA
2030 	if (!iommu_present(&amba_bustype)) {
2031 		err = bus_set_iommu(&amba_bustype, ops);
2032 		if (err)
2033 			goto err_reset_platform_ops;
2034 	}
2035 #endif
2036 #ifdef CONFIG_PCI
2037 	if (!iommu_present(&pci_bus_type)) {
2038 		err = bus_set_iommu(&pci_bus_type, ops);
2039 		if (err)
2040 			goto err_reset_amba_ops;
2041 	}
2042 #endif
2043 #ifdef CONFIG_FSL_MC_BUS
2044 	if (!iommu_present(&fsl_mc_bus_type)) {
2045 		err = bus_set_iommu(&fsl_mc_bus_type, ops);
2046 		if (err)
2047 			goto err_reset_pci_ops;
2048 	}
2049 #endif
2050 	return 0;
2051 
2052 err_reset_pci_ops: __maybe_unused;
2053 #ifdef CONFIG_PCI
2054 	bus_set_iommu(&pci_bus_type, NULL);
2055 #endif
2056 err_reset_amba_ops: __maybe_unused;
2057 #ifdef CONFIG_ARM_AMBA
2058 	bus_set_iommu(&amba_bustype, NULL);
2059 #endif
2060 err_reset_platform_ops: __maybe_unused;
2061 	bus_set_iommu(&platform_bus_type, NULL);
2062 	return err;
2063 }
2064 
2065 static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu)
2066 {
2067 	struct list_head rmr_list;
2068 	struct iommu_resv_region *e;
2069 	int idx, cnt = 0;
2070 	u32 reg;
2071 
2072 	INIT_LIST_HEAD(&rmr_list);
2073 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2074 
2075 	/*
2076 	 * Rather than trying to look at existing mappings that
2077 	 * are setup by the firmware and then invalidate the ones
2078 	 * that do no have matching RMR entries, just disable the
2079 	 * SMMU until it gets enabled again in the reset routine.
2080 	 */
2081 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
2082 	reg |= ARM_SMMU_sCR0_CLIENTPD;
2083 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
2084 
2085 	list_for_each_entry(e, &rmr_list, list) {
2086 		struct iommu_iort_rmr_data *rmr;
2087 		int i;
2088 
2089 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
2090 		for (i = 0; i < rmr->num_sids; i++) {
2091 			idx = arm_smmu_find_sme(smmu, rmr->sids[i], ~0);
2092 			if (idx < 0)
2093 				continue;
2094 
2095 			if (smmu->s2crs[idx].count == 0) {
2096 				smmu->smrs[idx].id = rmr->sids[i];
2097 				smmu->smrs[idx].mask = 0;
2098 				smmu->smrs[idx].valid = true;
2099 			}
2100 			smmu->s2crs[idx].count++;
2101 			smmu->s2crs[idx].type = S2CR_TYPE_BYPASS;
2102 			smmu->s2crs[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
2103 
2104 			cnt++;
2105 		}
2106 	}
2107 
2108 	dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt,
2109 		   cnt == 1 ? "" : "s");
2110 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2111 }
2112 
2113 static int arm_smmu_device_probe(struct platform_device *pdev)
2114 {
2115 	struct resource *res;
2116 	struct arm_smmu_device *smmu;
2117 	struct device *dev = &pdev->dev;
2118 	int num_irqs, i, err;
2119 	u32 global_irqs, pmu_irqs;
2120 	irqreturn_t (*global_fault)(int irq, void *dev);
2121 
2122 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2123 	if (!smmu) {
2124 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2125 		return -ENOMEM;
2126 	}
2127 	smmu->dev = dev;
2128 
2129 	if (dev->of_node)
2130 		err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs);
2131 	else
2132 		err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs);
2133 	if (err)
2134 		return err;
2135 
2136 	smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
2137 	if (IS_ERR(smmu->base))
2138 		return PTR_ERR(smmu->base);
2139 	smmu->ioaddr = res->start;
2140 
2141 	/*
2142 	 * The resource size should effectively match the value of SMMU_TOP;
2143 	 * stash that temporarily until we know PAGESIZE to validate it with.
2144 	 */
2145 	smmu->numpage = resource_size(res);
2146 
2147 	smmu = arm_smmu_impl_init(smmu);
2148 	if (IS_ERR(smmu))
2149 		return PTR_ERR(smmu);
2150 
2151 	num_irqs = platform_irq_count(pdev);
2152 
2153 	smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs;
2154 	if (smmu->num_context_irqs <= 0)
2155 		return dev_err_probe(dev, -ENODEV,
2156 				"found %d interrupts but expected at least %d\n",
2157 				num_irqs, global_irqs + pmu_irqs + 1);
2158 
2159 	smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs,
2160 				  sizeof(*smmu->irqs), GFP_KERNEL);
2161 	if (!smmu->irqs)
2162 		return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n",
2163 				     smmu->num_context_irqs);
2164 
2165 	for (i = 0; i < smmu->num_context_irqs; i++) {
2166 		int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i);
2167 
2168 		if (irq < 0)
2169 			return irq;
2170 		smmu->irqs[i] = irq;
2171 	}
2172 
2173 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2174 	if (err < 0) {
2175 		dev_err(dev, "failed to get clocks %d\n", err);
2176 		return err;
2177 	}
2178 	smmu->num_clks = err;
2179 
2180 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2181 	if (err)
2182 		return err;
2183 
2184 	err = arm_smmu_device_cfg_probe(smmu);
2185 	if (err)
2186 		return err;
2187 
2188 	if (smmu->version == ARM_SMMU_V2) {
2189 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2190 			dev_err(dev,
2191 			      "found only %d context irq(s) but %d required\n",
2192 			      smmu->num_context_irqs, smmu->num_context_banks);
2193 			return -ENODEV;
2194 		}
2195 
2196 		/* Ignore superfluous interrupts */
2197 		smmu->num_context_irqs = smmu->num_context_banks;
2198 	}
2199 
2200 	if (smmu->impl && smmu->impl->global_fault)
2201 		global_fault = smmu->impl->global_fault;
2202 	else
2203 		global_fault = arm_smmu_global_fault;
2204 
2205 	for (i = 0; i < global_irqs; i++) {
2206 		int irq = platform_get_irq(pdev, i);
2207 
2208 		if (irq < 0)
2209 			return irq;
2210 
2211 		err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
2212 				       "arm-smmu global fault", smmu);
2213 		if (err)
2214 			return dev_err_probe(dev, err,
2215 					"failed to request global IRQ %d (%u)\n",
2216 					i, irq);
2217 	}
2218 
2219 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2220 				     "smmu.%pa", &smmu->ioaddr);
2221 	if (err) {
2222 		dev_err(dev, "Failed to register iommu in sysfs\n");
2223 		return err;
2224 	}
2225 
2226 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
2227 	if (err) {
2228 		dev_err(dev, "Failed to register iommu\n");
2229 		goto err_sysfs_remove;
2230 	}
2231 
2232 	platform_set_drvdata(pdev, smmu);
2233 
2234 	/* Check for RMRs and install bypass SMRs if any */
2235 	arm_smmu_rmr_install_bypass_smr(smmu);
2236 
2237 	arm_smmu_device_reset(smmu);
2238 	arm_smmu_test_smr_masks(smmu);
2239 
2240 	/*
2241 	 * We want to avoid touching dev->power.lock in fastpaths unless
2242 	 * it's really going to do something useful - pm_runtime_enabled()
2243 	 * can serve as an ideal proxy for that decision. So, conditionally
2244 	 * enable pm_runtime.
2245 	 */
2246 	if (dev->pm_domain) {
2247 		pm_runtime_set_active(dev);
2248 		pm_runtime_enable(dev);
2249 	}
2250 
2251 	/*
2252 	 * For ACPI and generic DT bindings, an SMMU will be probed before
2253 	 * any device which might need it, so we want the bus ops in place
2254 	 * ready to handle default domain setup as soon as any SMMU exists.
2255 	 */
2256 	if (!using_legacy_binding) {
2257 		err = arm_smmu_bus_init(&arm_smmu_ops);
2258 		if (err)
2259 			goto err_unregister_device;
2260 	}
2261 
2262 	return 0;
2263 
2264 err_unregister_device:
2265 	iommu_device_unregister(&smmu->iommu);
2266 err_sysfs_remove:
2267 	iommu_device_sysfs_remove(&smmu->iommu);
2268 	return err;
2269 }
2270 
2271 static int arm_smmu_device_remove(struct platform_device *pdev)
2272 {
2273 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2274 
2275 	if (!smmu)
2276 		return -ENODEV;
2277 
2278 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2279 		dev_notice(&pdev->dev, "disabling translation\n");
2280 
2281 	arm_smmu_bus_init(NULL);
2282 	iommu_device_unregister(&smmu->iommu);
2283 	iommu_device_sysfs_remove(&smmu->iommu);
2284 
2285 	arm_smmu_rpm_get(smmu);
2286 	/* Turn the thing off */
2287 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2288 	arm_smmu_rpm_put(smmu);
2289 
2290 	if (pm_runtime_enabled(smmu->dev))
2291 		pm_runtime_force_suspend(smmu->dev);
2292 	else
2293 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2294 
2295 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2296 	return 0;
2297 }
2298 
2299 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2300 {
2301 	arm_smmu_device_remove(pdev);
2302 }
2303 
2304 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2305 {
2306 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2307 	int ret;
2308 
2309 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2310 	if (ret)
2311 		return ret;
2312 
2313 	arm_smmu_device_reset(smmu);
2314 
2315 	return 0;
2316 }
2317 
2318 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2319 {
2320 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2321 
2322 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2323 
2324 	return 0;
2325 }
2326 
2327 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2328 {
2329 	int ret;
2330 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2331 
2332 	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
2333 	if (ret)
2334 		return ret;
2335 
2336 	if (pm_runtime_suspended(dev))
2337 		return 0;
2338 
2339 	ret = arm_smmu_runtime_resume(dev);
2340 	if (ret)
2341 		clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2342 
2343 	return ret;
2344 }
2345 
2346 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2347 {
2348 	int ret = 0;
2349 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2350 
2351 	if (pm_runtime_suspended(dev))
2352 		goto clk_unprepare;
2353 
2354 	ret = arm_smmu_runtime_suspend(dev);
2355 	if (ret)
2356 		return ret;
2357 
2358 clk_unprepare:
2359 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2360 	return ret;
2361 }
2362 
2363 static const struct dev_pm_ops arm_smmu_pm_ops = {
2364 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2365 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2366 			   arm_smmu_runtime_resume, NULL)
2367 };
2368 
2369 static struct platform_driver arm_smmu_driver = {
2370 	.driver	= {
2371 		.name			= "arm-smmu",
2372 		.of_match_table		= arm_smmu_of_match,
2373 		.pm			= &arm_smmu_pm_ops,
2374 		.suppress_bind_attrs    = true,
2375 	},
2376 	.probe	= arm_smmu_device_probe,
2377 	.remove	= arm_smmu_device_remove,
2378 	.shutdown = arm_smmu_device_shutdown,
2379 };
2380 module_platform_driver(arm_smmu_driver);
2381 
2382 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2383 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2384 MODULE_ALIAS("platform:arm-smmu");
2385 MODULE_LICENSE("GPL v2");
2386