1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
28 #include <linux/io.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/pci.h>
35 #include <linux/platform_device.h>
36 #include <linux/pm_runtime.h>
37 #include <linux/ratelimit.h>
38 #include <linux/slab.h>
39 
40 #include <linux/amba/bus.h>
41 #include <linux/fsl/mc.h>
42 
43 #include "arm-smmu.h"
44 
45 /*
46  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
47  * global register space are still, in fact, using a hypervisor to mediate it
48  * by trapping and emulating register accesses. Sadly, some deployed versions
49  * of said trapping code have bugs wherein they go horribly wrong for stores
50  * using r31 (i.e. XZR/WZR) as the source register.
51  */
52 #define QCOM_DUMMY_VAL -1
53 
54 #define MSI_IOVA_BASE			0x8000000
55 #define MSI_IOVA_LENGTH			0x100000
56 
57 static int force_stage;
58 module_param(force_stage, int, S_IRUGO);
59 MODULE_PARM_DESC(force_stage,
60 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
61 static bool disable_bypass =
62 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
63 module_param(disable_bypass, bool, S_IRUGO);
64 MODULE_PARM_DESC(disable_bypass,
65 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
66 
67 #define s2cr_init_val (struct arm_smmu_s2cr){				\
68 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
69 }
70 
71 static bool using_legacy_binding, using_generic_binding;
72 
73 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
74 {
75 	if (pm_runtime_enabled(smmu->dev))
76 		return pm_runtime_resume_and_get(smmu->dev);
77 
78 	return 0;
79 }
80 
81 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
82 {
83 	if (pm_runtime_enabled(smmu->dev))
84 		pm_runtime_put_autosuspend(smmu->dev);
85 }
86 
87 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
88 {
89 	return container_of(dom, struct arm_smmu_domain, domain);
90 }
91 
92 static struct platform_driver arm_smmu_driver;
93 static struct iommu_ops arm_smmu_ops;
94 
95 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
96 static int arm_smmu_bus_init(struct iommu_ops *ops);
97 
98 static struct device_node *dev_get_dev_node(struct device *dev)
99 {
100 	if (dev_is_pci(dev)) {
101 		struct pci_bus *bus = to_pci_dev(dev)->bus;
102 
103 		while (!pci_is_root_bus(bus))
104 			bus = bus->parent;
105 		return of_node_get(bus->bridge->parent->of_node);
106 	}
107 
108 	return of_node_get(dev->of_node);
109 }
110 
111 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
112 {
113 	*((__be32 *)data) = cpu_to_be32(alias);
114 	return 0; /* Continue walking */
115 }
116 
117 static int __find_legacy_master_phandle(struct device *dev, void *data)
118 {
119 	struct of_phandle_iterator *it = *(void **)data;
120 	struct device_node *np = it->node;
121 	int err;
122 
123 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
124 			    "#stream-id-cells", -1)
125 		if (it->node == np) {
126 			*(void **)data = dev;
127 			return 1;
128 		}
129 	it->node = np;
130 	return err == -ENOENT ? 0 : err;
131 }
132 
133 static int arm_smmu_register_legacy_master(struct device *dev,
134 					   struct arm_smmu_device **smmu)
135 {
136 	struct device *smmu_dev;
137 	struct device_node *np;
138 	struct of_phandle_iterator it;
139 	void *data = &it;
140 	u32 *sids;
141 	__be32 pci_sid;
142 	int err;
143 
144 	np = dev_get_dev_node(dev);
145 	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
146 		of_node_put(np);
147 		return -ENODEV;
148 	}
149 
150 	it.node = np;
151 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
152 				     __find_legacy_master_phandle);
153 	smmu_dev = data;
154 	of_node_put(np);
155 	if (err == 0)
156 		return -ENODEV;
157 	if (err < 0)
158 		return err;
159 
160 	if (dev_is_pci(dev)) {
161 		/* "mmu-masters" assumes Stream ID == Requester ID */
162 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
163 				       &pci_sid);
164 		it.cur = &pci_sid;
165 		it.cur_count = 1;
166 	}
167 
168 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
169 				&arm_smmu_ops);
170 	if (err)
171 		return err;
172 
173 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
174 	if (!sids)
175 		return -ENOMEM;
176 
177 	*smmu = dev_get_drvdata(smmu_dev);
178 	of_phandle_iterator_args(&it, sids, it.cur_count);
179 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
180 	kfree(sids);
181 	return err;
182 }
183 
184 /*
185  * With the legacy DT binding in play, we have no guarantees about
186  * probe order, but then we're also not doing default domains, so we can
187  * delay setting bus ops until we're sure every possible SMMU is ready,
188  * and that way ensure that no probe_device() calls get missed.
189  */
190 static int arm_smmu_legacy_bus_init(void)
191 {
192 	if (using_legacy_binding)
193 		return arm_smmu_bus_init(&arm_smmu_ops);
194 	return 0;
195 }
196 device_initcall_sync(arm_smmu_legacy_bus_init);
197 #else
198 static int arm_smmu_register_legacy_master(struct device *dev,
199 					   struct arm_smmu_device **smmu)
200 {
201 	return -ENODEV;
202 }
203 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
204 
205 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
206 {
207 	clear_bit(idx, map);
208 }
209 
210 /* Wait for any pending TLB invalidations to complete */
211 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
212 				int sync, int status)
213 {
214 	unsigned int spin_cnt, delay;
215 	u32 reg;
216 
217 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
218 		return smmu->impl->tlb_sync(smmu, page, sync, status);
219 
220 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
221 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
222 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
223 			reg = arm_smmu_readl(smmu, page, status);
224 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
225 				return;
226 			cpu_relax();
227 		}
228 		udelay(delay);
229 	}
230 	dev_err_ratelimited(smmu->dev,
231 			    "TLB sync timed out -- SMMU may be deadlocked\n");
232 }
233 
234 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
235 {
236 	unsigned long flags;
237 
238 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
239 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
240 			    ARM_SMMU_GR0_sTLBGSTATUS);
241 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
242 }
243 
244 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
245 {
246 	struct arm_smmu_device *smmu = smmu_domain->smmu;
247 	unsigned long flags;
248 
249 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
250 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
251 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
252 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
253 }
254 
255 static void arm_smmu_tlb_inv_context_s1(void *cookie)
256 {
257 	struct arm_smmu_domain *smmu_domain = cookie;
258 	/*
259 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
260 	 * current CPU are visible beforehand.
261 	 */
262 	wmb();
263 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
264 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
265 	arm_smmu_tlb_sync_context(smmu_domain);
266 }
267 
268 static void arm_smmu_tlb_inv_context_s2(void *cookie)
269 {
270 	struct arm_smmu_domain *smmu_domain = cookie;
271 	struct arm_smmu_device *smmu = smmu_domain->smmu;
272 
273 	/* See above */
274 	wmb();
275 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
276 	arm_smmu_tlb_sync_global(smmu);
277 }
278 
279 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
280 				      size_t granule, void *cookie, int reg)
281 {
282 	struct arm_smmu_domain *smmu_domain = cookie;
283 	struct arm_smmu_device *smmu = smmu_domain->smmu;
284 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
285 	int idx = cfg->cbndx;
286 
287 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
288 		wmb();
289 
290 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
291 		iova = (iova >> 12) << 12;
292 		iova |= cfg->asid;
293 		do {
294 			arm_smmu_cb_write(smmu, idx, reg, iova);
295 			iova += granule;
296 		} while (size -= granule);
297 	} else {
298 		iova >>= 12;
299 		iova |= (u64)cfg->asid << 48;
300 		do {
301 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
302 			iova += granule >> 12;
303 		} while (size -= granule);
304 	}
305 }
306 
307 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
308 				      size_t granule, void *cookie, int reg)
309 {
310 	struct arm_smmu_domain *smmu_domain = cookie;
311 	struct arm_smmu_device *smmu = smmu_domain->smmu;
312 	int idx = smmu_domain->cfg.cbndx;
313 
314 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
315 		wmb();
316 
317 	iova >>= 12;
318 	do {
319 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
320 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
321 		else
322 			arm_smmu_cb_write(smmu, idx, reg, iova);
323 		iova += granule >> 12;
324 	} while (size -= granule);
325 }
326 
327 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
328 				     size_t granule, void *cookie)
329 {
330 	arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
331 				  ARM_SMMU_CB_S1_TLBIVA);
332 	arm_smmu_tlb_sync_context(cookie);
333 }
334 
335 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
336 				     unsigned long iova, size_t granule,
337 				     void *cookie)
338 {
339 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
340 				  ARM_SMMU_CB_S1_TLBIVAL);
341 }
342 
343 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
344 				     size_t granule, void *cookie)
345 {
346 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
347 				  ARM_SMMU_CB_S2_TLBIIPAS2);
348 	arm_smmu_tlb_sync_context(cookie);
349 }
350 
351 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
352 				     unsigned long iova, size_t granule,
353 				     void *cookie)
354 {
355 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
356 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
357 }
358 
359 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
360 					size_t granule, void *cookie)
361 {
362 	arm_smmu_tlb_inv_context_s2(cookie);
363 }
364 /*
365  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
366  * almost negligible, but the benefit of getting the first one in as far ahead
367  * of the sync as possible is significant, hence we don't just make this a
368  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
369  * think.
370  */
371 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
372 					unsigned long iova, size_t granule,
373 					void *cookie)
374 {
375 	struct arm_smmu_domain *smmu_domain = cookie;
376 	struct arm_smmu_device *smmu = smmu_domain->smmu;
377 
378 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
379 		wmb();
380 
381 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
382 }
383 
384 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
385 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
386 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
387 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
388 };
389 
390 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
391 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
392 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
393 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
394 };
395 
396 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
397 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
398 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
399 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
400 };
401 
402 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
403 {
404 	u32 fsr, fsynr, cbfrsynra;
405 	unsigned long iova;
406 	struct iommu_domain *domain = dev;
407 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
408 	struct arm_smmu_device *smmu = smmu_domain->smmu;
409 	int idx = smmu_domain->cfg.cbndx;
410 	int ret;
411 
412 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
413 	if (!(fsr & ARM_SMMU_FSR_FAULT))
414 		return IRQ_NONE;
415 
416 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
417 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
418 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
419 
420 	ret = report_iommu_fault(domain, NULL, iova,
421 		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
422 
423 	if (ret == -ENOSYS)
424 		dev_err_ratelimited(smmu->dev,
425 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
426 			    fsr, iova, fsynr, cbfrsynra, idx);
427 
428 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
429 	return IRQ_HANDLED;
430 }
431 
432 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
433 {
434 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
435 	struct arm_smmu_device *smmu = dev;
436 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
437 				      DEFAULT_RATELIMIT_BURST);
438 
439 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
440 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
441 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
442 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
443 
444 	if (!gfsr)
445 		return IRQ_NONE;
446 
447 	if (__ratelimit(&rs)) {
448 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
449 		    (gfsr & ARM_SMMU_sGFSR_USF))
450 			dev_err(smmu->dev,
451 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
452 				(u16)gfsynr1);
453 		else
454 			dev_err(smmu->dev,
455 				"Unexpected global fault, this could be serious\n");
456 		dev_err(smmu->dev,
457 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
458 			gfsr, gfsynr0, gfsynr1, gfsynr2);
459 	}
460 
461 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
462 	return IRQ_HANDLED;
463 }
464 
465 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
466 				       struct io_pgtable_cfg *pgtbl_cfg)
467 {
468 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
469 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
470 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
471 
472 	cb->cfg = cfg;
473 
474 	/* TCR */
475 	if (stage1) {
476 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
477 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
478 		} else {
479 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
480 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
481 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
482 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
483 			else
484 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
485 		}
486 	} else {
487 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
488 	}
489 
490 	/* TTBRs */
491 	if (stage1) {
492 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
493 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
494 			cb->ttbr[1] = 0;
495 		} else {
496 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
497 						 cfg->asid);
498 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
499 						 cfg->asid);
500 
501 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
502 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
503 			else
504 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
505 		}
506 	} else {
507 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
508 	}
509 
510 	/* MAIRs (stage-1 only) */
511 	if (stage1) {
512 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
513 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
514 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
515 		} else {
516 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
517 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
518 		}
519 	}
520 }
521 
522 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
523 {
524 	u32 reg;
525 	bool stage1;
526 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
527 	struct arm_smmu_cfg *cfg = cb->cfg;
528 
529 	/* Unassigned context banks only need disabling */
530 	if (!cfg) {
531 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
532 		return;
533 	}
534 
535 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
536 
537 	/* CBA2R */
538 	if (smmu->version > ARM_SMMU_V1) {
539 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
540 			reg = ARM_SMMU_CBA2R_VA64;
541 		else
542 			reg = 0;
543 		/* 16-bit VMIDs live in CBA2R */
544 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
545 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
546 
547 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
548 	}
549 
550 	/* CBAR */
551 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
552 	if (smmu->version < ARM_SMMU_V2)
553 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
554 
555 	/*
556 	 * Use the weakest shareability/memory types, so they are
557 	 * overridden by the ttbcr/pte.
558 	 */
559 	if (stage1) {
560 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
561 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
562 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
563 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
564 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
565 		/* 8-bit VMIDs live in CBAR */
566 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
567 	}
568 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
569 
570 	/*
571 	 * TCR
572 	 * We must write this before the TTBRs, since it determines the
573 	 * access behaviour of some fields (in particular, ASID[15:8]).
574 	 */
575 	if (stage1 && smmu->version > ARM_SMMU_V1)
576 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
577 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
578 
579 	/* TTBRs */
580 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
581 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
582 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
583 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
584 	} else {
585 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
586 		if (stage1)
587 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
588 					   cb->ttbr[1]);
589 	}
590 
591 	/* MAIRs (stage-1 only) */
592 	if (stage1) {
593 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
594 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
595 	}
596 
597 	/* SCTLR */
598 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
599 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
600 	if (stage1)
601 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
602 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
603 		reg |= ARM_SMMU_SCTLR_E;
604 
605 	if (smmu->impl && smmu->impl->write_sctlr)
606 		smmu->impl->write_sctlr(smmu, idx, reg);
607 	else
608 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
609 }
610 
611 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
612 				       struct arm_smmu_device *smmu,
613 				       struct device *dev, unsigned int start)
614 {
615 	if (smmu->impl && smmu->impl->alloc_context_bank)
616 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
617 
618 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
619 }
620 
621 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
622 					struct arm_smmu_device *smmu,
623 					struct device *dev)
624 {
625 	int irq, start, ret = 0;
626 	unsigned long ias, oas;
627 	struct io_pgtable_ops *pgtbl_ops;
628 	struct io_pgtable_cfg pgtbl_cfg;
629 	enum io_pgtable_fmt fmt;
630 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
631 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
632 	irqreturn_t (*context_fault)(int irq, void *dev);
633 
634 	mutex_lock(&smmu_domain->init_mutex);
635 	if (smmu_domain->smmu)
636 		goto out_unlock;
637 
638 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
639 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
640 		smmu_domain->smmu = smmu;
641 		goto out_unlock;
642 	}
643 
644 	/*
645 	 * Mapping the requested stage onto what we support is surprisingly
646 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
647 	 * support for nested translation. That means we end up with the
648 	 * following table:
649 	 *
650 	 * Requested        Supported        Actual
651 	 *     S1               N              S1
652 	 *     S1             S1+S2            S1
653 	 *     S1               S2             S2
654 	 *     S1               S1             S1
655 	 *     N                N              N
656 	 *     N              S1+S2            S2
657 	 *     N                S2             S2
658 	 *     N                S1             S1
659 	 *
660 	 * Note that you can't actually request stage-2 mappings.
661 	 */
662 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
663 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
664 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
665 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
666 
667 	/*
668 	 * Choosing a suitable context format is even more fiddly. Until we
669 	 * grow some way for the caller to express a preference, and/or move
670 	 * the decision into the io-pgtable code where it arguably belongs,
671 	 * just aim for the closest thing to the rest of the system, and hope
672 	 * that the hardware isn't esoteric enough that we can't assume AArch64
673 	 * support to be a superset of AArch32 support...
674 	 */
675 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
676 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
677 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
678 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
679 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
680 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
681 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
682 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
683 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
684 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
685 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
686 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
687 
688 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
689 		ret = -EINVAL;
690 		goto out_unlock;
691 	}
692 
693 	switch (smmu_domain->stage) {
694 	case ARM_SMMU_DOMAIN_S1:
695 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
696 		start = smmu->num_s2_context_banks;
697 		ias = smmu->va_size;
698 		oas = smmu->ipa_size;
699 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
700 			fmt = ARM_64_LPAE_S1;
701 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
702 			fmt = ARM_32_LPAE_S1;
703 			ias = min(ias, 32UL);
704 			oas = min(oas, 40UL);
705 		} else {
706 			fmt = ARM_V7S;
707 			ias = min(ias, 32UL);
708 			oas = min(oas, 32UL);
709 		}
710 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
711 		break;
712 	case ARM_SMMU_DOMAIN_NESTED:
713 		/*
714 		 * We will likely want to change this if/when KVM gets
715 		 * involved.
716 		 */
717 	case ARM_SMMU_DOMAIN_S2:
718 		cfg->cbar = CBAR_TYPE_S2_TRANS;
719 		start = 0;
720 		ias = smmu->ipa_size;
721 		oas = smmu->pa_size;
722 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
723 			fmt = ARM_64_LPAE_S2;
724 		} else {
725 			fmt = ARM_32_LPAE_S2;
726 			ias = min(ias, 40UL);
727 			oas = min(oas, 40UL);
728 		}
729 		if (smmu->version == ARM_SMMU_V2)
730 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
731 		else
732 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
733 		break;
734 	default:
735 		ret = -EINVAL;
736 		goto out_unlock;
737 	}
738 
739 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
740 	if (ret < 0) {
741 		goto out_unlock;
742 	}
743 
744 	smmu_domain->smmu = smmu;
745 
746 	cfg->cbndx = ret;
747 	if (smmu->version < ARM_SMMU_V2) {
748 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
749 		cfg->irptndx %= smmu->num_context_irqs;
750 	} else {
751 		cfg->irptndx = cfg->cbndx;
752 	}
753 
754 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
755 		cfg->vmid = cfg->cbndx + 1;
756 	else
757 		cfg->asid = cfg->cbndx;
758 
759 	pgtbl_cfg = (struct io_pgtable_cfg) {
760 		.pgsize_bitmap	= smmu->pgsize_bitmap,
761 		.ias		= ias,
762 		.oas		= oas,
763 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
764 		.tlb		= smmu_domain->flush_ops,
765 		.iommu_dev	= smmu->dev,
766 	};
767 
768 	if (!iommu_get_dma_strict(domain))
769 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
770 
771 	if (smmu->impl && smmu->impl->init_context) {
772 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
773 		if (ret)
774 			goto out_clear_smmu;
775 	}
776 
777 	if (smmu_domain->pgtbl_quirks)
778 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
779 
780 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
781 	if (!pgtbl_ops) {
782 		ret = -ENOMEM;
783 		goto out_clear_smmu;
784 	}
785 
786 	/* Update the domain's page sizes to reflect the page table format */
787 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
788 
789 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
790 		domain->geometry.aperture_start = ~0UL << ias;
791 		domain->geometry.aperture_end = ~0UL;
792 	} else {
793 		domain->geometry.aperture_end = (1UL << ias) - 1;
794 	}
795 
796 	domain->geometry.force_aperture = true;
797 
798 	/* Initialise the context bank with our page table cfg */
799 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
800 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
801 
802 	/*
803 	 * Request context fault interrupt. Do this last to avoid the
804 	 * handler seeing a half-initialised domain state.
805 	 */
806 	irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
807 
808 	if (smmu->impl && smmu->impl->context_fault)
809 		context_fault = smmu->impl->context_fault;
810 	else
811 		context_fault = arm_smmu_context_fault;
812 
813 	ret = devm_request_irq(smmu->dev, irq, context_fault,
814 			       IRQF_SHARED, "arm-smmu-context-fault", domain);
815 	if (ret < 0) {
816 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
817 			cfg->irptndx, irq);
818 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
819 	}
820 
821 	mutex_unlock(&smmu_domain->init_mutex);
822 
823 	/* Publish page table ops for map/unmap */
824 	smmu_domain->pgtbl_ops = pgtbl_ops;
825 	return 0;
826 
827 out_clear_smmu:
828 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
829 	smmu_domain->smmu = NULL;
830 out_unlock:
831 	mutex_unlock(&smmu_domain->init_mutex);
832 	return ret;
833 }
834 
835 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
836 {
837 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
838 	struct arm_smmu_device *smmu = smmu_domain->smmu;
839 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
840 	int ret, irq;
841 
842 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
843 		return;
844 
845 	ret = arm_smmu_rpm_get(smmu);
846 	if (ret < 0)
847 		return;
848 
849 	/*
850 	 * Disable the context bank and free the page tables before freeing
851 	 * it.
852 	 */
853 	smmu->cbs[cfg->cbndx].cfg = NULL;
854 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
855 
856 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
857 		irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
858 		devm_free_irq(smmu->dev, irq, domain);
859 	}
860 
861 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
862 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
863 
864 	arm_smmu_rpm_put(smmu);
865 }
866 
867 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
868 {
869 	struct arm_smmu_domain *smmu_domain;
870 
871 	if (type != IOMMU_DOMAIN_UNMANAGED &&
872 	    type != IOMMU_DOMAIN_DMA &&
873 	    type != IOMMU_DOMAIN_IDENTITY)
874 		return NULL;
875 	/*
876 	 * Allocate the domain and initialise some of its data structures.
877 	 * We can't really do anything meaningful until we've added a
878 	 * master.
879 	 */
880 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
881 	if (!smmu_domain)
882 		return NULL;
883 
884 	if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
885 	    iommu_get_dma_cookie(&smmu_domain->domain))) {
886 		kfree(smmu_domain);
887 		return NULL;
888 	}
889 
890 	mutex_init(&smmu_domain->init_mutex);
891 	spin_lock_init(&smmu_domain->cb_lock);
892 
893 	return &smmu_domain->domain;
894 }
895 
896 static void arm_smmu_domain_free(struct iommu_domain *domain)
897 {
898 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
899 
900 	/*
901 	 * Free the domain resources. We assume that all devices have
902 	 * already been detached.
903 	 */
904 	iommu_put_dma_cookie(domain);
905 	arm_smmu_destroy_domain_context(domain);
906 	kfree(smmu_domain);
907 }
908 
909 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
910 {
911 	struct arm_smmu_smr *smr = smmu->smrs + idx;
912 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
913 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
914 
915 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
916 		reg |= ARM_SMMU_SMR_VALID;
917 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
918 }
919 
920 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
921 {
922 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
923 	u32 reg;
924 
925 	if (smmu->impl && smmu->impl->write_s2cr) {
926 		smmu->impl->write_s2cr(smmu, idx);
927 		return;
928 	}
929 
930 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
931 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
932 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
933 
934 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
935 	    smmu->smrs[idx].valid)
936 		reg |= ARM_SMMU_S2CR_EXIDVALID;
937 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
938 }
939 
940 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
941 {
942 	arm_smmu_write_s2cr(smmu, idx);
943 	if (smmu->smrs)
944 		arm_smmu_write_smr(smmu, idx);
945 }
946 
947 /*
948  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
949  * should be called after sCR0 is written.
950  */
951 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
952 {
953 	u32 smr;
954 	int i;
955 
956 	if (!smmu->smrs)
957 		return;
958 	/*
959 	 * If we've had to accommodate firmware memory regions, we may
960 	 * have live SMRs by now; tread carefully...
961 	 *
962 	 * Somewhat perversely, not having a free SMR for this test implies we
963 	 * can get away without it anyway, as we'll only be able to 'allocate'
964 	 * these SMRs for the ID/mask values we're already trusting to be OK.
965 	 */
966 	for (i = 0; i < smmu->num_mapping_groups; i++)
967 		if (!smmu->smrs[i].valid)
968 			goto smr_ok;
969 	return;
970 smr_ok:
971 	/*
972 	 * SMR.ID bits may not be preserved if the corresponding MASK
973 	 * bits are set, so check each one separately. We can reject
974 	 * masters later if they try to claim IDs outside these masks.
975 	 */
976 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
977 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
978 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
979 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
980 
981 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
982 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
983 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
984 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
985 }
986 
987 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
988 {
989 	struct arm_smmu_smr *smrs = smmu->smrs;
990 	int i, free_idx = -ENOSPC;
991 
992 	/* Stream indexing is blissfully easy */
993 	if (!smrs)
994 		return id;
995 
996 	/* Validating SMRs is... less so */
997 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
998 		if (!smrs[i].valid) {
999 			/*
1000 			 * Note the first free entry we come across, which
1001 			 * we'll claim in the end if nothing else matches.
1002 			 */
1003 			if (free_idx < 0)
1004 				free_idx = i;
1005 			continue;
1006 		}
1007 		/*
1008 		 * If the new entry is _entirely_ matched by an existing entry,
1009 		 * then reuse that, with the guarantee that there also cannot
1010 		 * be any subsequent conflicting entries. In normal use we'd
1011 		 * expect simply identical entries for this case, but there's
1012 		 * no harm in accommodating the generalisation.
1013 		 */
1014 		if ((mask & smrs[i].mask) == mask &&
1015 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
1016 			return i;
1017 		/*
1018 		 * If the new entry has any other overlap with an existing one,
1019 		 * though, then there always exists at least one stream ID
1020 		 * which would cause a conflict, and we can't allow that risk.
1021 		 */
1022 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1023 			return -EINVAL;
1024 	}
1025 
1026 	return free_idx;
1027 }
1028 
1029 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1030 {
1031 	if (--smmu->s2crs[idx].count)
1032 		return false;
1033 
1034 	smmu->s2crs[idx] = s2cr_init_val;
1035 	if (smmu->smrs)
1036 		smmu->smrs[idx].valid = false;
1037 
1038 	return true;
1039 }
1040 
1041 static int arm_smmu_master_alloc_smes(struct device *dev)
1042 {
1043 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1044 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1045 	struct arm_smmu_device *smmu = cfg->smmu;
1046 	struct arm_smmu_smr *smrs = smmu->smrs;
1047 	int i, idx, ret;
1048 
1049 	mutex_lock(&smmu->stream_map_mutex);
1050 	/* Figure out a viable stream map entry allocation */
1051 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1052 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1053 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1054 
1055 		if (idx != INVALID_SMENDX) {
1056 			ret = -EEXIST;
1057 			goto out_err;
1058 		}
1059 
1060 		ret = arm_smmu_find_sme(smmu, sid, mask);
1061 		if (ret < 0)
1062 			goto out_err;
1063 
1064 		idx = ret;
1065 		if (smrs && smmu->s2crs[idx].count == 0) {
1066 			smrs[idx].id = sid;
1067 			smrs[idx].mask = mask;
1068 			smrs[idx].valid = true;
1069 		}
1070 		smmu->s2crs[idx].count++;
1071 		cfg->smendx[i] = (s16)idx;
1072 	}
1073 
1074 	/* It worked! Now, poke the actual hardware */
1075 	for_each_cfg_sme(cfg, fwspec, i, idx)
1076 		arm_smmu_write_sme(smmu, idx);
1077 
1078 	mutex_unlock(&smmu->stream_map_mutex);
1079 	return 0;
1080 
1081 out_err:
1082 	while (i--) {
1083 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1084 		cfg->smendx[i] = INVALID_SMENDX;
1085 	}
1086 	mutex_unlock(&smmu->stream_map_mutex);
1087 	return ret;
1088 }
1089 
1090 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1091 				      struct iommu_fwspec *fwspec)
1092 {
1093 	struct arm_smmu_device *smmu = cfg->smmu;
1094 	int i, idx;
1095 
1096 	mutex_lock(&smmu->stream_map_mutex);
1097 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1098 		if (arm_smmu_free_sme(smmu, idx))
1099 			arm_smmu_write_sme(smmu, idx);
1100 		cfg->smendx[i] = INVALID_SMENDX;
1101 	}
1102 	mutex_unlock(&smmu->stream_map_mutex);
1103 }
1104 
1105 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1106 				      struct arm_smmu_master_cfg *cfg,
1107 				      struct iommu_fwspec *fwspec)
1108 {
1109 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1110 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1111 	u8 cbndx = smmu_domain->cfg.cbndx;
1112 	enum arm_smmu_s2cr_type type;
1113 	int i, idx;
1114 
1115 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1116 		type = S2CR_TYPE_BYPASS;
1117 	else
1118 		type = S2CR_TYPE_TRANS;
1119 
1120 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1121 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1122 			continue;
1123 
1124 		s2cr[idx].type = type;
1125 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1126 		s2cr[idx].cbndx = cbndx;
1127 		arm_smmu_write_s2cr(smmu, idx);
1128 	}
1129 	return 0;
1130 }
1131 
1132 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1133 {
1134 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1135 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1136 	struct arm_smmu_master_cfg *cfg;
1137 	struct arm_smmu_device *smmu;
1138 	int ret;
1139 
1140 	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1141 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1142 		return -ENXIO;
1143 	}
1144 
1145 	/*
1146 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1147 	 * domains between of_xlate() and probe_device() - we have no way to cope
1148 	 * with that, so until ARM gets converted to rely on groups and default
1149 	 * domains, just say no (but more politely than by dereferencing NULL).
1150 	 * This should be at least a WARN_ON once that's sorted.
1151 	 */
1152 	cfg = dev_iommu_priv_get(dev);
1153 	if (!cfg)
1154 		return -ENODEV;
1155 
1156 	smmu = cfg->smmu;
1157 
1158 	ret = arm_smmu_rpm_get(smmu);
1159 	if (ret < 0)
1160 		return ret;
1161 
1162 	/* Ensure that the domain is finalised */
1163 	ret = arm_smmu_init_domain_context(domain, smmu, dev);
1164 	if (ret < 0)
1165 		goto rpm_put;
1166 
1167 	/*
1168 	 * Sanity check the domain. We don't support domains across
1169 	 * different SMMUs.
1170 	 */
1171 	if (smmu_domain->smmu != smmu) {
1172 		dev_err(dev,
1173 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1174 			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1175 		ret = -EINVAL;
1176 		goto rpm_put;
1177 	}
1178 
1179 	/* Looks ok, so add the device to the domain */
1180 	ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1181 
1182 	/*
1183 	 * Setup an autosuspend delay to avoid bouncing runpm state.
1184 	 * Otherwise, if a driver for a suspended consumer device
1185 	 * unmaps buffers, it will runpm resume/suspend for each one.
1186 	 *
1187 	 * For example, when used by a GPU device, when an application
1188 	 * or game exits, it can trigger unmapping 100s or 1000s of
1189 	 * buffers.  With a runpm cycle for each buffer, that adds up
1190 	 * to 5-10sec worth of reprogramming the context bank, while
1191 	 * the system appears to be locked up to the user.
1192 	 */
1193 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1194 	pm_runtime_use_autosuspend(smmu->dev);
1195 
1196 rpm_put:
1197 	arm_smmu_rpm_put(smmu);
1198 	return ret;
1199 }
1200 
1201 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1202 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
1203 {
1204 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1205 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1206 	int ret;
1207 
1208 	if (!ops)
1209 		return -ENODEV;
1210 
1211 	arm_smmu_rpm_get(smmu);
1212 	ret = ops->map(ops, iova, paddr, size, prot, gfp);
1213 	arm_smmu_rpm_put(smmu);
1214 
1215 	return ret;
1216 }
1217 
1218 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1219 			     size_t size, struct iommu_iotlb_gather *gather)
1220 {
1221 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1222 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1223 	size_t ret;
1224 
1225 	if (!ops)
1226 		return 0;
1227 
1228 	arm_smmu_rpm_get(smmu);
1229 	ret = ops->unmap(ops, iova, size, gather);
1230 	arm_smmu_rpm_put(smmu);
1231 
1232 	return ret;
1233 }
1234 
1235 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1236 {
1237 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1238 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1239 
1240 	if (smmu_domain->flush_ops) {
1241 		arm_smmu_rpm_get(smmu);
1242 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1243 		arm_smmu_rpm_put(smmu);
1244 	}
1245 }
1246 
1247 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1248 				struct iommu_iotlb_gather *gather)
1249 {
1250 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1251 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1252 
1253 	if (!smmu)
1254 		return;
1255 
1256 	arm_smmu_rpm_get(smmu);
1257 	if (smmu->version == ARM_SMMU_V2 ||
1258 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1259 		arm_smmu_tlb_sync_context(smmu_domain);
1260 	else
1261 		arm_smmu_tlb_sync_global(smmu);
1262 	arm_smmu_rpm_put(smmu);
1263 }
1264 
1265 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1266 					      dma_addr_t iova)
1267 {
1268 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1269 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1270 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1271 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1272 	struct device *dev = smmu->dev;
1273 	void __iomem *reg;
1274 	u32 tmp;
1275 	u64 phys;
1276 	unsigned long va, flags;
1277 	int ret, idx = cfg->cbndx;
1278 	phys_addr_t addr = 0;
1279 
1280 	ret = arm_smmu_rpm_get(smmu);
1281 	if (ret < 0)
1282 		return 0;
1283 
1284 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1285 	va = iova & ~0xfffUL;
1286 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1287 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1288 	else
1289 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1290 
1291 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1292 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1293 				      5, 50)) {
1294 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1295 		dev_err(dev,
1296 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1297 			&iova);
1298 		arm_smmu_rpm_put(smmu);
1299 		return ops->iova_to_phys(ops, iova);
1300 	}
1301 
1302 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1303 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1304 	if (phys & ARM_SMMU_CB_PAR_F) {
1305 		dev_err(dev, "translation fault!\n");
1306 		dev_err(dev, "PAR = 0x%llx\n", phys);
1307 		goto out;
1308 	}
1309 
1310 	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1311 out:
1312 	arm_smmu_rpm_put(smmu);
1313 
1314 	return addr;
1315 }
1316 
1317 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1318 					dma_addr_t iova)
1319 {
1320 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1321 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1322 
1323 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
1324 		return iova;
1325 
1326 	if (!ops)
1327 		return 0;
1328 
1329 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1330 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1331 		return arm_smmu_iova_to_phys_hard(domain, iova);
1332 
1333 	return ops->iova_to_phys(ops, iova);
1334 }
1335 
1336 static bool arm_smmu_capable(enum iommu_cap cap)
1337 {
1338 	switch (cap) {
1339 	case IOMMU_CAP_CACHE_COHERENCY:
1340 		/*
1341 		 * Return true here as the SMMU can always send out coherent
1342 		 * requests.
1343 		 */
1344 		return true;
1345 	case IOMMU_CAP_NOEXEC:
1346 		return true;
1347 	default:
1348 		return false;
1349 	}
1350 }
1351 
1352 static
1353 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1354 {
1355 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1356 							  fwnode);
1357 	put_device(dev);
1358 	return dev ? dev_get_drvdata(dev) : NULL;
1359 }
1360 
1361 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1362 {
1363 	struct arm_smmu_device *smmu = NULL;
1364 	struct arm_smmu_master_cfg *cfg;
1365 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1366 	int i, ret;
1367 
1368 	if (using_legacy_binding) {
1369 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1370 
1371 		/*
1372 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1373 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1374 		 * later use.
1375 		 */
1376 		fwspec = dev_iommu_fwspec_get(dev);
1377 		if (ret)
1378 			goto out_free;
1379 	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1380 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1381 	} else {
1382 		return ERR_PTR(-ENODEV);
1383 	}
1384 
1385 	ret = -EINVAL;
1386 	for (i = 0; i < fwspec->num_ids; i++) {
1387 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1388 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1389 
1390 		if (sid & ~smmu->streamid_mask) {
1391 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1392 				sid, smmu->streamid_mask);
1393 			goto out_free;
1394 		}
1395 		if (mask & ~smmu->smr_mask_mask) {
1396 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1397 				mask, smmu->smr_mask_mask);
1398 			goto out_free;
1399 		}
1400 	}
1401 
1402 	ret = -ENOMEM;
1403 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1404 		      GFP_KERNEL);
1405 	if (!cfg)
1406 		goto out_free;
1407 
1408 	cfg->smmu = smmu;
1409 	dev_iommu_priv_set(dev, cfg);
1410 	while (i--)
1411 		cfg->smendx[i] = INVALID_SMENDX;
1412 
1413 	ret = arm_smmu_rpm_get(smmu);
1414 	if (ret < 0)
1415 		goto out_cfg_free;
1416 
1417 	ret = arm_smmu_master_alloc_smes(dev);
1418 	arm_smmu_rpm_put(smmu);
1419 
1420 	if (ret)
1421 		goto out_cfg_free;
1422 
1423 	device_link_add(dev, smmu->dev,
1424 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1425 
1426 	return &smmu->iommu;
1427 
1428 out_cfg_free:
1429 	kfree(cfg);
1430 out_free:
1431 	iommu_fwspec_free(dev);
1432 	return ERR_PTR(ret);
1433 }
1434 
1435 static void arm_smmu_release_device(struct device *dev)
1436 {
1437 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1438 	struct arm_smmu_master_cfg *cfg;
1439 	struct arm_smmu_device *smmu;
1440 	int ret;
1441 
1442 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
1443 		return;
1444 
1445 	cfg  = dev_iommu_priv_get(dev);
1446 	smmu = cfg->smmu;
1447 
1448 	ret = arm_smmu_rpm_get(smmu);
1449 	if (ret < 0)
1450 		return;
1451 
1452 	arm_smmu_master_free_smes(cfg, fwspec);
1453 
1454 	arm_smmu_rpm_put(smmu);
1455 
1456 	dev_iommu_priv_set(dev, NULL);
1457 	kfree(cfg);
1458 	iommu_fwspec_free(dev);
1459 }
1460 
1461 static void arm_smmu_probe_finalize(struct device *dev)
1462 {
1463 	struct arm_smmu_master_cfg *cfg;
1464 	struct arm_smmu_device *smmu;
1465 
1466 	cfg = dev_iommu_priv_get(dev);
1467 	smmu = cfg->smmu;
1468 
1469 	if (smmu->impl && smmu->impl->probe_finalize)
1470 		smmu->impl->probe_finalize(smmu, dev);
1471 }
1472 
1473 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1474 {
1475 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1476 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1477 	struct arm_smmu_device *smmu = cfg->smmu;
1478 	struct iommu_group *group = NULL;
1479 	int i, idx;
1480 
1481 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1482 		if (group && smmu->s2crs[idx].group &&
1483 		    group != smmu->s2crs[idx].group)
1484 			return ERR_PTR(-EINVAL);
1485 
1486 		group = smmu->s2crs[idx].group;
1487 	}
1488 
1489 	if (group)
1490 		return iommu_group_ref_get(group);
1491 
1492 	if (dev_is_pci(dev))
1493 		group = pci_device_group(dev);
1494 	else if (dev_is_fsl_mc(dev))
1495 		group = fsl_mc_device_group(dev);
1496 	else
1497 		group = generic_device_group(dev);
1498 
1499 	/* Remember group for faster lookups */
1500 	if (!IS_ERR(group))
1501 		for_each_cfg_sme(cfg, fwspec, i, idx)
1502 			smmu->s2crs[idx].group = group;
1503 
1504 	return group;
1505 }
1506 
1507 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1508 {
1509 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1510 	int ret = 0;
1511 
1512 	mutex_lock(&smmu_domain->init_mutex);
1513 	if (smmu_domain->smmu)
1514 		ret = -EPERM;
1515 	else
1516 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1517 	mutex_unlock(&smmu_domain->init_mutex);
1518 
1519 	return ret;
1520 }
1521 
1522 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1523 		unsigned long quirks)
1524 {
1525 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1526 	int ret = 0;
1527 
1528 	mutex_lock(&smmu_domain->init_mutex);
1529 	if (smmu_domain->smmu)
1530 		ret = -EPERM;
1531 	else
1532 		smmu_domain->pgtbl_quirks = quirks;
1533 	mutex_unlock(&smmu_domain->init_mutex);
1534 
1535 	return ret;
1536 }
1537 
1538 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1539 {
1540 	u32 mask, fwid = 0;
1541 
1542 	if (args->args_count > 0)
1543 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1544 
1545 	if (args->args_count > 1)
1546 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1547 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1548 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1549 
1550 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1551 }
1552 
1553 static void arm_smmu_get_resv_regions(struct device *dev,
1554 				      struct list_head *head)
1555 {
1556 	struct iommu_resv_region *region;
1557 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1558 
1559 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1560 					 prot, IOMMU_RESV_SW_MSI);
1561 	if (!region)
1562 		return;
1563 
1564 	list_add_tail(&region->list, head);
1565 
1566 	iommu_dma_get_resv_regions(dev, head);
1567 }
1568 
1569 static int arm_smmu_def_domain_type(struct device *dev)
1570 {
1571 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1572 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1573 
1574 	if (impl && impl->def_domain_type)
1575 		return impl->def_domain_type(dev);
1576 
1577 	return 0;
1578 }
1579 
1580 static struct iommu_ops arm_smmu_ops = {
1581 	.capable		= arm_smmu_capable,
1582 	.domain_alloc		= arm_smmu_domain_alloc,
1583 	.domain_free		= arm_smmu_domain_free,
1584 	.attach_dev		= arm_smmu_attach_dev,
1585 	.map			= arm_smmu_map,
1586 	.unmap			= arm_smmu_unmap,
1587 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1588 	.iotlb_sync		= arm_smmu_iotlb_sync,
1589 	.iova_to_phys		= arm_smmu_iova_to_phys,
1590 	.probe_device		= arm_smmu_probe_device,
1591 	.release_device		= arm_smmu_release_device,
1592 	.probe_finalize		= arm_smmu_probe_finalize,
1593 	.device_group		= arm_smmu_device_group,
1594 	.enable_nesting		= arm_smmu_enable_nesting,
1595 	.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1596 	.of_xlate		= arm_smmu_of_xlate,
1597 	.get_resv_regions	= arm_smmu_get_resv_regions,
1598 	.put_resv_regions	= generic_iommu_put_resv_regions,
1599 	.def_domain_type	= arm_smmu_def_domain_type,
1600 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1601 	.owner			= THIS_MODULE,
1602 };
1603 
1604 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1605 {
1606 	int i;
1607 	u32 reg;
1608 
1609 	/* clear global FSR */
1610 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1611 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1612 
1613 	/*
1614 	 * Reset stream mapping groups: Initial values mark all SMRn as
1615 	 * invalid and all S2CRn as bypass unless overridden.
1616 	 */
1617 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1618 		arm_smmu_write_sme(smmu, i);
1619 
1620 	/* Make sure all context banks are disabled and clear CB_FSR  */
1621 	for (i = 0; i < smmu->num_context_banks; ++i) {
1622 		arm_smmu_write_context_bank(smmu, i);
1623 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1624 	}
1625 
1626 	/* Invalidate the TLB, just in case */
1627 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1628 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1629 
1630 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1631 
1632 	/* Enable fault reporting */
1633 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1634 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1635 
1636 	/* Disable TLB broadcasting. */
1637 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1638 
1639 	/* Enable client access, handling unmatched streams as appropriate */
1640 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1641 	if (disable_bypass)
1642 		reg |= ARM_SMMU_sCR0_USFCFG;
1643 	else
1644 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1645 
1646 	/* Disable forced broadcasting */
1647 	reg &= ~ARM_SMMU_sCR0_FB;
1648 
1649 	/* Don't upgrade barriers */
1650 	reg &= ~(ARM_SMMU_sCR0_BSU);
1651 
1652 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1653 		reg |= ARM_SMMU_sCR0_VMID16EN;
1654 
1655 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1656 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1657 
1658 	if (smmu->impl && smmu->impl->reset)
1659 		smmu->impl->reset(smmu);
1660 
1661 	/* Push the button */
1662 	arm_smmu_tlb_sync_global(smmu);
1663 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1664 }
1665 
1666 static int arm_smmu_id_size_to_bits(int size)
1667 {
1668 	switch (size) {
1669 	case 0:
1670 		return 32;
1671 	case 1:
1672 		return 36;
1673 	case 2:
1674 		return 40;
1675 	case 3:
1676 		return 42;
1677 	case 4:
1678 		return 44;
1679 	case 5:
1680 	default:
1681 		return 48;
1682 	}
1683 }
1684 
1685 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1686 {
1687 	unsigned int size;
1688 	u32 id;
1689 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1690 	int i, ret;
1691 
1692 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1693 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1694 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1695 
1696 	/* ID0 */
1697 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1698 
1699 	/* Restrict available stages based on module parameter */
1700 	if (force_stage == 1)
1701 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1702 	else if (force_stage == 2)
1703 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1704 
1705 	if (id & ARM_SMMU_ID0_S1TS) {
1706 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1707 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1708 	}
1709 
1710 	if (id & ARM_SMMU_ID0_S2TS) {
1711 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1712 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1713 	}
1714 
1715 	if (id & ARM_SMMU_ID0_NTS) {
1716 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1717 		dev_notice(smmu->dev, "\tnested translation\n");
1718 	}
1719 
1720 	if (!(smmu->features &
1721 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1722 		dev_err(smmu->dev, "\tno translation support!\n");
1723 		return -ENODEV;
1724 	}
1725 
1726 	if ((id & ARM_SMMU_ID0_S1TS) &&
1727 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1728 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1729 		dev_notice(smmu->dev, "\taddress translation ops\n");
1730 	}
1731 
1732 	/*
1733 	 * In order for DMA API calls to work properly, we must defer to what
1734 	 * the FW says about coherency, regardless of what the hardware claims.
1735 	 * Fortunately, this also opens up a workaround for systems where the
1736 	 * ID register value has ended up configured incorrectly.
1737 	 */
1738 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1739 	if (cttw_fw || cttw_reg)
1740 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1741 			   cttw_fw ? "" : "non-");
1742 	if (cttw_fw != cttw_reg)
1743 		dev_notice(smmu->dev,
1744 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1745 
1746 	/* Max. number of entries we have for stream matching/indexing */
1747 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1748 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1749 		size = 1 << 16;
1750 	} else {
1751 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1752 	}
1753 	smmu->streamid_mask = size - 1;
1754 	if (id & ARM_SMMU_ID0_SMS) {
1755 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1756 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1757 		if (size == 0) {
1758 			dev_err(smmu->dev,
1759 				"stream-matching supported, but no SMRs present!\n");
1760 			return -ENODEV;
1761 		}
1762 
1763 		/* Zero-initialised to mark as invalid */
1764 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1765 					  GFP_KERNEL);
1766 		if (!smmu->smrs)
1767 			return -ENOMEM;
1768 
1769 		dev_notice(smmu->dev,
1770 			   "\tstream matching with %u register groups", size);
1771 	}
1772 	/* s2cr->type == 0 means translation, so initialise explicitly */
1773 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1774 					 GFP_KERNEL);
1775 	if (!smmu->s2crs)
1776 		return -ENOMEM;
1777 	for (i = 0; i < size; i++)
1778 		smmu->s2crs[i] = s2cr_init_val;
1779 
1780 	smmu->num_mapping_groups = size;
1781 	mutex_init(&smmu->stream_map_mutex);
1782 	spin_lock_init(&smmu->global_sync_lock);
1783 
1784 	if (smmu->version < ARM_SMMU_V2 ||
1785 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1786 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1787 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1788 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1789 	}
1790 
1791 	/* ID1 */
1792 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1793 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1794 
1795 	/* Check for size mismatch of SMMU address space from mapped region */
1796 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1797 	if (smmu->numpage != 2 * size << smmu->pgshift)
1798 		dev_warn(smmu->dev,
1799 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1800 			2 * size << smmu->pgshift, smmu->numpage);
1801 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1802 	smmu->numpage = size;
1803 
1804 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1805 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1806 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1807 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1808 		return -ENODEV;
1809 	}
1810 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1811 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1812 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1813 				 sizeof(*smmu->cbs), GFP_KERNEL);
1814 	if (!smmu->cbs)
1815 		return -ENOMEM;
1816 
1817 	/* ID2 */
1818 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1819 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1820 	smmu->ipa_size = size;
1821 
1822 	/* The output mask is also applied for bypass */
1823 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1824 	smmu->pa_size = size;
1825 
1826 	if (id & ARM_SMMU_ID2_VMID16)
1827 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1828 
1829 	/*
1830 	 * What the page table walker can address actually depends on which
1831 	 * descriptor format is in use, but since a) we don't know that yet,
1832 	 * and b) it can vary per context bank, this will have to do...
1833 	 */
1834 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1835 		dev_warn(smmu->dev,
1836 			 "failed to set DMA mask for table walker\n");
1837 
1838 	if (smmu->version < ARM_SMMU_V2) {
1839 		smmu->va_size = smmu->ipa_size;
1840 		if (smmu->version == ARM_SMMU_V1_64K)
1841 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1842 	} else {
1843 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1844 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1845 		if (id & ARM_SMMU_ID2_PTFS_4K)
1846 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1847 		if (id & ARM_SMMU_ID2_PTFS_16K)
1848 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1849 		if (id & ARM_SMMU_ID2_PTFS_64K)
1850 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1851 	}
1852 
1853 	if (smmu->impl && smmu->impl->cfg_probe) {
1854 		ret = smmu->impl->cfg_probe(smmu);
1855 		if (ret)
1856 			return ret;
1857 	}
1858 
1859 	/* Now we've corralled the various formats, what'll it do? */
1860 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1861 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1862 	if (smmu->features &
1863 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1864 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1865 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1866 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1867 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1868 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1869 
1870 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1871 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1872 	else
1873 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1874 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1875 		   smmu->pgsize_bitmap);
1876 
1877 
1878 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1879 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1880 			   smmu->va_size, smmu->ipa_size);
1881 
1882 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1883 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1884 			   smmu->ipa_size, smmu->pa_size);
1885 
1886 	return 0;
1887 }
1888 
1889 struct arm_smmu_match_data {
1890 	enum arm_smmu_arch_version version;
1891 	enum arm_smmu_implementation model;
1892 };
1893 
1894 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1895 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1896 
1897 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1898 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1899 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1900 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1901 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1902 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1903 
1904 static const struct of_device_id arm_smmu_of_match[] = {
1905 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1906 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1907 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1908 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1909 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1910 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1911 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1912 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1913 	{ },
1914 };
1915 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1916 
1917 #ifdef CONFIG_ACPI
1918 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1919 {
1920 	int ret = 0;
1921 
1922 	switch (model) {
1923 	case ACPI_IORT_SMMU_V1:
1924 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1925 		smmu->version = ARM_SMMU_V1;
1926 		smmu->model = GENERIC_SMMU;
1927 		break;
1928 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1929 		smmu->version = ARM_SMMU_V1_64K;
1930 		smmu->model = GENERIC_SMMU;
1931 		break;
1932 	case ACPI_IORT_SMMU_V2:
1933 		smmu->version = ARM_SMMU_V2;
1934 		smmu->model = GENERIC_SMMU;
1935 		break;
1936 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1937 		smmu->version = ARM_SMMU_V2;
1938 		smmu->model = ARM_MMU500;
1939 		break;
1940 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1941 		smmu->version = ARM_SMMU_V2;
1942 		smmu->model = CAVIUM_SMMUV2;
1943 		break;
1944 	default:
1945 		ret = -ENODEV;
1946 	}
1947 
1948 	return ret;
1949 }
1950 
1951 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1952 				      struct arm_smmu_device *smmu)
1953 {
1954 	struct device *dev = smmu->dev;
1955 	struct acpi_iort_node *node =
1956 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1957 	struct acpi_iort_smmu *iort_smmu;
1958 	int ret;
1959 
1960 	/* Retrieve SMMU1/2 specific data */
1961 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1962 
1963 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1964 	if (ret < 0)
1965 		return ret;
1966 
1967 	/* Ignore the configuration access interrupt */
1968 	smmu->num_global_irqs = 1;
1969 
1970 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1971 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1972 
1973 	return 0;
1974 }
1975 #else
1976 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1977 					     struct arm_smmu_device *smmu)
1978 {
1979 	return -ENODEV;
1980 }
1981 #endif
1982 
1983 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1984 				    struct arm_smmu_device *smmu)
1985 {
1986 	const struct arm_smmu_match_data *data;
1987 	struct device *dev = &pdev->dev;
1988 	bool legacy_binding;
1989 
1990 	if (of_property_read_u32(dev->of_node, "#global-interrupts",
1991 				 &smmu->num_global_irqs)) {
1992 		dev_err(dev, "missing #global-interrupts property\n");
1993 		return -ENODEV;
1994 	}
1995 
1996 	data = of_device_get_match_data(dev);
1997 	smmu->version = data->version;
1998 	smmu->model = data->model;
1999 
2000 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2001 	if (legacy_binding && !using_generic_binding) {
2002 		if (!using_legacy_binding) {
2003 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2004 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2005 		}
2006 		using_legacy_binding = true;
2007 	} else if (!legacy_binding && !using_legacy_binding) {
2008 		using_generic_binding = true;
2009 	} else {
2010 		dev_err(dev, "not probing due to mismatched DT properties\n");
2011 		return -ENODEV;
2012 	}
2013 
2014 	if (of_dma_is_coherent(dev->of_node))
2015 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2016 
2017 	return 0;
2018 }
2019 
2020 static int arm_smmu_bus_init(struct iommu_ops *ops)
2021 {
2022 	int err;
2023 
2024 	/* Oh, for a proper bus abstraction */
2025 	if (!iommu_present(&platform_bus_type)) {
2026 		err = bus_set_iommu(&platform_bus_type, ops);
2027 		if (err)
2028 			return err;
2029 	}
2030 #ifdef CONFIG_ARM_AMBA
2031 	if (!iommu_present(&amba_bustype)) {
2032 		err = bus_set_iommu(&amba_bustype, ops);
2033 		if (err)
2034 			goto err_reset_platform_ops;
2035 	}
2036 #endif
2037 #ifdef CONFIG_PCI
2038 	if (!iommu_present(&pci_bus_type)) {
2039 		err = bus_set_iommu(&pci_bus_type, ops);
2040 		if (err)
2041 			goto err_reset_amba_ops;
2042 	}
2043 #endif
2044 #ifdef CONFIG_FSL_MC_BUS
2045 	if (!iommu_present(&fsl_mc_bus_type)) {
2046 		err = bus_set_iommu(&fsl_mc_bus_type, ops);
2047 		if (err)
2048 			goto err_reset_pci_ops;
2049 	}
2050 #endif
2051 	return 0;
2052 
2053 err_reset_pci_ops: __maybe_unused;
2054 #ifdef CONFIG_PCI
2055 	bus_set_iommu(&pci_bus_type, NULL);
2056 #endif
2057 err_reset_amba_ops: __maybe_unused;
2058 #ifdef CONFIG_ARM_AMBA
2059 	bus_set_iommu(&amba_bustype, NULL);
2060 #endif
2061 err_reset_platform_ops: __maybe_unused;
2062 	bus_set_iommu(&platform_bus_type, NULL);
2063 	return err;
2064 }
2065 
2066 static int arm_smmu_device_probe(struct platform_device *pdev)
2067 {
2068 	struct resource *res;
2069 	resource_size_t ioaddr;
2070 	struct arm_smmu_device *smmu;
2071 	struct device *dev = &pdev->dev;
2072 	int num_irqs, i, err;
2073 	irqreturn_t (*global_fault)(int irq, void *dev);
2074 
2075 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2076 	if (!smmu) {
2077 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2078 		return -ENOMEM;
2079 	}
2080 	smmu->dev = dev;
2081 
2082 	if (dev->of_node)
2083 		err = arm_smmu_device_dt_probe(pdev, smmu);
2084 	else
2085 		err = arm_smmu_device_acpi_probe(pdev, smmu);
2086 
2087 	if (err)
2088 		return err;
2089 
2090 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2091 	ioaddr = res->start;
2092 	smmu->base = devm_ioremap_resource(dev, res);
2093 	if (IS_ERR(smmu->base))
2094 		return PTR_ERR(smmu->base);
2095 	/*
2096 	 * The resource size should effectively match the value of SMMU_TOP;
2097 	 * stash that temporarily until we know PAGESIZE to validate it with.
2098 	 */
2099 	smmu->numpage = resource_size(res);
2100 
2101 	smmu = arm_smmu_impl_init(smmu);
2102 	if (IS_ERR(smmu))
2103 		return PTR_ERR(smmu);
2104 
2105 	num_irqs = 0;
2106 	while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2107 		num_irqs++;
2108 		if (num_irqs > smmu->num_global_irqs)
2109 			smmu->num_context_irqs++;
2110 	}
2111 
2112 	if (!smmu->num_context_irqs) {
2113 		dev_err(dev, "found %d interrupts but expected at least %d\n",
2114 			num_irqs, smmu->num_global_irqs + 1);
2115 		return -ENODEV;
2116 	}
2117 
2118 	smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2119 				  GFP_KERNEL);
2120 	if (!smmu->irqs) {
2121 		dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2122 		return -ENOMEM;
2123 	}
2124 
2125 	for (i = 0; i < num_irqs; ++i) {
2126 		int irq = platform_get_irq(pdev, i);
2127 
2128 		if (irq < 0)
2129 			return -ENODEV;
2130 		smmu->irqs[i] = irq;
2131 	}
2132 
2133 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2134 	if (err < 0) {
2135 		dev_err(dev, "failed to get clocks %d\n", err);
2136 		return err;
2137 	}
2138 	smmu->num_clks = err;
2139 
2140 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2141 	if (err)
2142 		return err;
2143 
2144 	err = arm_smmu_device_cfg_probe(smmu);
2145 	if (err)
2146 		return err;
2147 
2148 	if (smmu->version == ARM_SMMU_V2) {
2149 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2150 			dev_err(dev,
2151 			      "found only %d context irq(s) but %d required\n",
2152 			      smmu->num_context_irqs, smmu->num_context_banks);
2153 			return -ENODEV;
2154 		}
2155 
2156 		/* Ignore superfluous interrupts */
2157 		smmu->num_context_irqs = smmu->num_context_banks;
2158 	}
2159 
2160 	if (smmu->impl && smmu->impl->global_fault)
2161 		global_fault = smmu->impl->global_fault;
2162 	else
2163 		global_fault = arm_smmu_global_fault;
2164 
2165 	for (i = 0; i < smmu->num_global_irqs; ++i) {
2166 		err = devm_request_irq(smmu->dev, smmu->irqs[i],
2167 				       global_fault,
2168 				       IRQF_SHARED,
2169 				       "arm-smmu global fault",
2170 				       smmu);
2171 		if (err) {
2172 			dev_err(dev, "failed to request global IRQ %d (%u)\n",
2173 				i, smmu->irqs[i]);
2174 			return err;
2175 		}
2176 	}
2177 
2178 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2179 				     "smmu.%pa", &ioaddr);
2180 	if (err) {
2181 		dev_err(dev, "Failed to register iommu in sysfs\n");
2182 		return err;
2183 	}
2184 
2185 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
2186 	if (err) {
2187 		dev_err(dev, "Failed to register iommu\n");
2188 		goto err_sysfs_remove;
2189 	}
2190 
2191 	platform_set_drvdata(pdev, smmu);
2192 	arm_smmu_device_reset(smmu);
2193 	arm_smmu_test_smr_masks(smmu);
2194 
2195 	/*
2196 	 * We want to avoid touching dev->power.lock in fastpaths unless
2197 	 * it's really going to do something useful - pm_runtime_enabled()
2198 	 * can serve as an ideal proxy for that decision. So, conditionally
2199 	 * enable pm_runtime.
2200 	 */
2201 	if (dev->pm_domain) {
2202 		pm_runtime_set_active(dev);
2203 		pm_runtime_enable(dev);
2204 	}
2205 
2206 	/*
2207 	 * For ACPI and generic DT bindings, an SMMU will be probed before
2208 	 * any device which might need it, so we want the bus ops in place
2209 	 * ready to handle default domain setup as soon as any SMMU exists.
2210 	 */
2211 	if (!using_legacy_binding) {
2212 		err = arm_smmu_bus_init(&arm_smmu_ops);
2213 		if (err)
2214 			goto err_unregister_device;
2215 	}
2216 
2217 	return 0;
2218 
2219 err_unregister_device:
2220 	iommu_device_unregister(&smmu->iommu);
2221 err_sysfs_remove:
2222 	iommu_device_sysfs_remove(&smmu->iommu);
2223 	return err;
2224 }
2225 
2226 static int arm_smmu_device_remove(struct platform_device *pdev)
2227 {
2228 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2229 
2230 	if (!smmu)
2231 		return -ENODEV;
2232 
2233 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2234 		dev_notice(&pdev->dev, "disabling translation\n");
2235 
2236 	arm_smmu_bus_init(NULL);
2237 	iommu_device_unregister(&smmu->iommu);
2238 	iommu_device_sysfs_remove(&smmu->iommu);
2239 
2240 	arm_smmu_rpm_get(smmu);
2241 	/* Turn the thing off */
2242 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2243 	arm_smmu_rpm_put(smmu);
2244 
2245 	if (pm_runtime_enabled(smmu->dev))
2246 		pm_runtime_force_suspend(smmu->dev);
2247 	else
2248 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2249 
2250 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2251 	return 0;
2252 }
2253 
2254 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2255 {
2256 	arm_smmu_device_remove(pdev);
2257 }
2258 
2259 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2260 {
2261 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2262 	int ret;
2263 
2264 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2265 	if (ret)
2266 		return ret;
2267 
2268 	arm_smmu_device_reset(smmu);
2269 
2270 	return 0;
2271 }
2272 
2273 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2274 {
2275 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2276 
2277 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2278 
2279 	return 0;
2280 }
2281 
2282 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2283 {
2284 	if (pm_runtime_suspended(dev))
2285 		return 0;
2286 
2287 	return arm_smmu_runtime_resume(dev);
2288 }
2289 
2290 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2291 {
2292 	if (pm_runtime_suspended(dev))
2293 		return 0;
2294 
2295 	return arm_smmu_runtime_suspend(dev);
2296 }
2297 
2298 static const struct dev_pm_ops arm_smmu_pm_ops = {
2299 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2300 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2301 			   arm_smmu_runtime_resume, NULL)
2302 };
2303 
2304 static struct platform_driver arm_smmu_driver = {
2305 	.driver	= {
2306 		.name			= "arm-smmu",
2307 		.of_match_table		= arm_smmu_of_match,
2308 		.pm			= &arm_smmu_pm_ops,
2309 		.suppress_bind_attrs    = true,
2310 	},
2311 	.probe	= arm_smmu_device_probe,
2312 	.remove	= arm_smmu_device_remove,
2313 	.shutdown = arm_smmu_device_shutdown,
2314 };
2315 module_platform_driver(arm_smmu_driver);
2316 
2317 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2318 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2319 MODULE_ALIAS("platform:arm-smmu");
2320 MODULE_LICENSE("GPL v2");
2321