1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-mapping.h>
25 #include <linux/err.h>
26 #include <linux/interrupt.h>
27 #include <linux/io.h>
28 #include <linux/iopoll.h>
29 #include <linux/module.h>
30 #include <linux/of.h>
31 #include <linux/of_address.h>
32 #include <linux/of_device.h>
33 #include <linux/pci.h>
34 #include <linux/platform_device.h>
35 #include <linux/pm_runtime.h>
36 #include <linux/ratelimit.h>
37 #include <linux/slab.h>
38 
39 #include <linux/fsl/mc.h>
40 
41 #include "arm-smmu.h"
42 #include "../../dma-iommu.h"
43 
44 /*
45  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
46  * global register space are still, in fact, using a hypervisor to mediate it
47  * by trapping and emulating register accesses. Sadly, some deployed versions
48  * of said trapping code have bugs wherein they go horribly wrong for stores
49  * using r31 (i.e. XZR/WZR) as the source register.
50  */
51 #define QCOM_DUMMY_VAL -1
52 
53 #define MSI_IOVA_BASE			0x8000000
54 #define MSI_IOVA_LENGTH			0x100000
55 
56 static int force_stage;
57 module_param(force_stage, int, S_IRUGO);
58 MODULE_PARM_DESC(force_stage,
59 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
60 static bool disable_bypass =
61 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
62 module_param(disable_bypass, bool, S_IRUGO);
63 MODULE_PARM_DESC(disable_bypass,
64 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
65 
66 #define s2cr_init_val (struct arm_smmu_s2cr){				\
67 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
68 }
69 
70 static bool using_legacy_binding, using_generic_binding;
71 
72 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
73 {
74 	if (pm_runtime_enabled(smmu->dev))
75 		return pm_runtime_resume_and_get(smmu->dev);
76 
77 	return 0;
78 }
79 
80 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
81 {
82 	if (pm_runtime_enabled(smmu->dev))
83 		pm_runtime_put_autosuspend(smmu->dev);
84 }
85 
86 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
87 {
88 	return container_of(dom, struct arm_smmu_domain, domain);
89 }
90 
91 static struct platform_driver arm_smmu_driver;
92 static struct iommu_ops arm_smmu_ops;
93 
94 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
95 static struct device_node *dev_get_dev_node(struct device *dev)
96 {
97 	if (dev_is_pci(dev)) {
98 		struct pci_bus *bus = to_pci_dev(dev)->bus;
99 
100 		while (!pci_is_root_bus(bus))
101 			bus = bus->parent;
102 		return of_node_get(bus->bridge->parent->of_node);
103 	}
104 
105 	return of_node_get(dev->of_node);
106 }
107 
108 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
109 {
110 	*((__be32 *)data) = cpu_to_be32(alias);
111 	return 0; /* Continue walking */
112 }
113 
114 static int __find_legacy_master_phandle(struct device *dev, void *data)
115 {
116 	struct of_phandle_iterator *it = *(void **)data;
117 	struct device_node *np = it->node;
118 	int err;
119 
120 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
121 			    "#stream-id-cells", -1)
122 		if (it->node == np) {
123 			*(void **)data = dev;
124 			return 1;
125 		}
126 	it->node = np;
127 	return err == -ENOENT ? 0 : err;
128 }
129 
130 static int arm_smmu_register_legacy_master(struct device *dev,
131 					   struct arm_smmu_device **smmu)
132 {
133 	struct device *smmu_dev;
134 	struct device_node *np;
135 	struct of_phandle_iterator it;
136 	void *data = &it;
137 	u32 *sids;
138 	__be32 pci_sid;
139 	int err;
140 
141 	np = dev_get_dev_node(dev);
142 	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
143 		of_node_put(np);
144 		return -ENODEV;
145 	}
146 
147 	it.node = np;
148 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
149 				     __find_legacy_master_phandle);
150 	smmu_dev = data;
151 	of_node_put(np);
152 	if (err == 0)
153 		return -ENODEV;
154 	if (err < 0)
155 		return err;
156 
157 	if (dev_is_pci(dev)) {
158 		/* "mmu-masters" assumes Stream ID == Requester ID */
159 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
160 				       &pci_sid);
161 		it.cur = &pci_sid;
162 		it.cur_count = 1;
163 	}
164 
165 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
166 				&arm_smmu_ops);
167 	if (err)
168 		return err;
169 
170 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
171 	if (!sids)
172 		return -ENOMEM;
173 
174 	*smmu = dev_get_drvdata(smmu_dev);
175 	of_phandle_iterator_args(&it, sids, it.cur_count);
176 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
177 	kfree(sids);
178 	return err;
179 }
180 #else
181 static int arm_smmu_register_legacy_master(struct device *dev,
182 					   struct arm_smmu_device **smmu)
183 {
184 	return -ENODEV;
185 }
186 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
187 
188 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
189 {
190 	clear_bit(idx, map);
191 }
192 
193 /* Wait for any pending TLB invalidations to complete */
194 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
195 				int sync, int status)
196 {
197 	unsigned int spin_cnt, delay;
198 	u32 reg;
199 
200 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
201 		return smmu->impl->tlb_sync(smmu, page, sync, status);
202 
203 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
204 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
205 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
206 			reg = arm_smmu_readl(smmu, page, status);
207 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
208 				return;
209 			cpu_relax();
210 		}
211 		udelay(delay);
212 	}
213 	dev_err_ratelimited(smmu->dev,
214 			    "TLB sync timed out -- SMMU may be deadlocked\n");
215 }
216 
217 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
218 {
219 	unsigned long flags;
220 
221 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
222 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
223 			    ARM_SMMU_GR0_sTLBGSTATUS);
224 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
225 }
226 
227 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
228 {
229 	struct arm_smmu_device *smmu = smmu_domain->smmu;
230 	unsigned long flags;
231 
232 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
233 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
234 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
235 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
236 }
237 
238 static void arm_smmu_tlb_inv_context_s1(void *cookie)
239 {
240 	struct arm_smmu_domain *smmu_domain = cookie;
241 	/*
242 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
243 	 * current CPU are visible beforehand.
244 	 */
245 	wmb();
246 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
247 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
248 	arm_smmu_tlb_sync_context(smmu_domain);
249 }
250 
251 static void arm_smmu_tlb_inv_context_s2(void *cookie)
252 {
253 	struct arm_smmu_domain *smmu_domain = cookie;
254 	struct arm_smmu_device *smmu = smmu_domain->smmu;
255 
256 	/* See above */
257 	wmb();
258 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
259 	arm_smmu_tlb_sync_global(smmu);
260 }
261 
262 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
263 				      size_t granule, void *cookie, int reg)
264 {
265 	struct arm_smmu_domain *smmu_domain = cookie;
266 	struct arm_smmu_device *smmu = smmu_domain->smmu;
267 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
268 	int idx = cfg->cbndx;
269 
270 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
271 		wmb();
272 
273 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
274 		iova = (iova >> 12) << 12;
275 		iova |= cfg->asid;
276 		do {
277 			arm_smmu_cb_write(smmu, idx, reg, iova);
278 			iova += granule;
279 		} while (size -= granule);
280 	} else {
281 		iova >>= 12;
282 		iova |= (u64)cfg->asid << 48;
283 		do {
284 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
285 			iova += granule >> 12;
286 		} while (size -= granule);
287 	}
288 }
289 
290 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
291 				      size_t granule, void *cookie, int reg)
292 {
293 	struct arm_smmu_domain *smmu_domain = cookie;
294 	struct arm_smmu_device *smmu = smmu_domain->smmu;
295 	int idx = smmu_domain->cfg.cbndx;
296 
297 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
298 		wmb();
299 
300 	iova >>= 12;
301 	do {
302 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
303 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
304 		else
305 			arm_smmu_cb_write(smmu, idx, reg, iova);
306 		iova += granule >> 12;
307 	} while (size -= granule);
308 }
309 
310 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
311 				     size_t granule, void *cookie)
312 {
313 	struct arm_smmu_domain *smmu_domain = cookie;
314 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
315 
316 	if (cfg->flush_walk_prefer_tlbiasid) {
317 		arm_smmu_tlb_inv_context_s1(cookie);
318 	} else {
319 		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
320 					  ARM_SMMU_CB_S1_TLBIVA);
321 		arm_smmu_tlb_sync_context(cookie);
322 	}
323 }
324 
325 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
326 				     unsigned long iova, size_t granule,
327 				     void *cookie)
328 {
329 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
330 				  ARM_SMMU_CB_S1_TLBIVAL);
331 }
332 
333 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
334 				     size_t granule, void *cookie)
335 {
336 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
337 				  ARM_SMMU_CB_S2_TLBIIPAS2);
338 	arm_smmu_tlb_sync_context(cookie);
339 }
340 
341 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
342 				     unsigned long iova, size_t granule,
343 				     void *cookie)
344 {
345 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
346 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
347 }
348 
349 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
350 					size_t granule, void *cookie)
351 {
352 	arm_smmu_tlb_inv_context_s2(cookie);
353 }
354 /*
355  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
356  * almost negligible, but the benefit of getting the first one in as far ahead
357  * of the sync as possible is significant, hence we don't just make this a
358  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
359  * think.
360  */
361 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
362 					unsigned long iova, size_t granule,
363 					void *cookie)
364 {
365 	struct arm_smmu_domain *smmu_domain = cookie;
366 	struct arm_smmu_device *smmu = smmu_domain->smmu;
367 
368 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
369 		wmb();
370 
371 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
372 }
373 
374 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
375 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
376 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
377 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
378 };
379 
380 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
381 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
382 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
383 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
384 };
385 
386 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
387 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
388 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
389 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
390 };
391 
392 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
393 {
394 	u32 fsr, fsynr, cbfrsynra;
395 	unsigned long iova;
396 	struct iommu_domain *domain = dev;
397 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
398 	struct arm_smmu_device *smmu = smmu_domain->smmu;
399 	int idx = smmu_domain->cfg.cbndx;
400 	int ret;
401 
402 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
403 	if (!(fsr & ARM_SMMU_FSR_FAULT))
404 		return IRQ_NONE;
405 
406 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
407 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
408 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
409 
410 	ret = report_iommu_fault(domain, NULL, iova,
411 		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
412 
413 	if (ret == -ENOSYS)
414 		dev_err_ratelimited(smmu->dev,
415 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
416 			    fsr, iova, fsynr, cbfrsynra, idx);
417 
418 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
419 	return IRQ_HANDLED;
420 }
421 
422 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
423 {
424 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
425 	struct arm_smmu_device *smmu = dev;
426 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
427 				      DEFAULT_RATELIMIT_BURST);
428 
429 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
430 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
431 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
432 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
433 
434 	if (!gfsr)
435 		return IRQ_NONE;
436 
437 	if (__ratelimit(&rs)) {
438 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
439 		    (gfsr & ARM_SMMU_sGFSR_USF))
440 			dev_err(smmu->dev,
441 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
442 				(u16)gfsynr1);
443 		else
444 			dev_err(smmu->dev,
445 				"Unexpected global fault, this could be serious\n");
446 		dev_err(smmu->dev,
447 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
448 			gfsr, gfsynr0, gfsynr1, gfsynr2);
449 	}
450 
451 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
452 	return IRQ_HANDLED;
453 }
454 
455 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
456 				       struct io_pgtable_cfg *pgtbl_cfg)
457 {
458 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
459 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
460 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
461 
462 	cb->cfg = cfg;
463 
464 	/* TCR */
465 	if (stage1) {
466 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
467 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
468 		} else {
469 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
470 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
471 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
472 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
473 			else
474 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
475 		}
476 	} else {
477 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
478 	}
479 
480 	/* TTBRs */
481 	if (stage1) {
482 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
483 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
484 			cb->ttbr[1] = 0;
485 		} else {
486 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
487 						 cfg->asid);
488 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
489 						 cfg->asid);
490 
491 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
492 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
493 			else
494 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
495 		}
496 	} else {
497 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
498 	}
499 
500 	/* MAIRs (stage-1 only) */
501 	if (stage1) {
502 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
503 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
504 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
505 		} else {
506 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
507 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
508 		}
509 	}
510 }
511 
512 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
513 {
514 	u32 reg;
515 	bool stage1;
516 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
517 	struct arm_smmu_cfg *cfg = cb->cfg;
518 
519 	/* Unassigned context banks only need disabling */
520 	if (!cfg) {
521 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
522 		return;
523 	}
524 
525 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
526 
527 	/* CBA2R */
528 	if (smmu->version > ARM_SMMU_V1) {
529 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
530 			reg = ARM_SMMU_CBA2R_VA64;
531 		else
532 			reg = 0;
533 		/* 16-bit VMIDs live in CBA2R */
534 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
535 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
536 
537 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
538 	}
539 
540 	/* CBAR */
541 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
542 	if (smmu->version < ARM_SMMU_V2)
543 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
544 
545 	/*
546 	 * Use the weakest shareability/memory types, so they are
547 	 * overridden by the ttbcr/pte.
548 	 */
549 	if (stage1) {
550 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
551 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
552 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
553 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
554 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
555 		/* 8-bit VMIDs live in CBAR */
556 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
557 	}
558 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
559 
560 	/*
561 	 * TCR
562 	 * We must write this before the TTBRs, since it determines the
563 	 * access behaviour of some fields (in particular, ASID[15:8]).
564 	 */
565 	if (stage1 && smmu->version > ARM_SMMU_V1)
566 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
567 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
568 
569 	/* TTBRs */
570 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
571 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
572 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
573 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
574 	} else {
575 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
576 		if (stage1)
577 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
578 					   cb->ttbr[1]);
579 	}
580 
581 	/* MAIRs (stage-1 only) */
582 	if (stage1) {
583 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
584 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
585 	}
586 
587 	/* SCTLR */
588 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
589 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
590 	if (stage1)
591 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
592 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
593 		reg |= ARM_SMMU_SCTLR_E;
594 
595 	if (smmu->impl && smmu->impl->write_sctlr)
596 		smmu->impl->write_sctlr(smmu, idx, reg);
597 	else
598 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
599 }
600 
601 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
602 				       struct arm_smmu_device *smmu,
603 				       struct device *dev, unsigned int start)
604 {
605 	if (smmu->impl && smmu->impl->alloc_context_bank)
606 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
607 
608 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
609 }
610 
611 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
612 					struct arm_smmu_device *smmu,
613 					struct device *dev)
614 {
615 	int irq, start, ret = 0;
616 	unsigned long ias, oas;
617 	struct io_pgtable_ops *pgtbl_ops;
618 	struct io_pgtable_cfg pgtbl_cfg;
619 	enum io_pgtable_fmt fmt;
620 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
621 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
622 	irqreturn_t (*context_fault)(int irq, void *dev);
623 
624 	mutex_lock(&smmu_domain->init_mutex);
625 	if (smmu_domain->smmu)
626 		goto out_unlock;
627 
628 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
629 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
630 		smmu_domain->smmu = smmu;
631 		goto out_unlock;
632 	}
633 
634 	/*
635 	 * Mapping the requested stage onto what we support is surprisingly
636 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
637 	 * support for nested translation. That means we end up with the
638 	 * following table:
639 	 *
640 	 * Requested        Supported        Actual
641 	 *     S1               N              S1
642 	 *     S1             S1+S2            S1
643 	 *     S1               S2             S2
644 	 *     S1               S1             S1
645 	 *     N                N              N
646 	 *     N              S1+S2            S2
647 	 *     N                S2             S2
648 	 *     N                S1             S1
649 	 *
650 	 * Note that you can't actually request stage-2 mappings.
651 	 */
652 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
653 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
654 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
655 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
656 
657 	/*
658 	 * Choosing a suitable context format is even more fiddly. Until we
659 	 * grow some way for the caller to express a preference, and/or move
660 	 * the decision into the io-pgtable code where it arguably belongs,
661 	 * just aim for the closest thing to the rest of the system, and hope
662 	 * that the hardware isn't esoteric enough that we can't assume AArch64
663 	 * support to be a superset of AArch32 support...
664 	 */
665 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
666 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
667 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
668 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
669 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
670 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
671 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
672 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
673 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
674 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
675 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
676 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
677 
678 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
679 		ret = -EINVAL;
680 		goto out_unlock;
681 	}
682 
683 	switch (smmu_domain->stage) {
684 	case ARM_SMMU_DOMAIN_S1:
685 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
686 		start = smmu->num_s2_context_banks;
687 		ias = smmu->va_size;
688 		oas = smmu->ipa_size;
689 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
690 			fmt = ARM_64_LPAE_S1;
691 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
692 			fmt = ARM_32_LPAE_S1;
693 			ias = min(ias, 32UL);
694 			oas = min(oas, 40UL);
695 		} else {
696 			fmt = ARM_V7S;
697 			ias = min(ias, 32UL);
698 			oas = min(oas, 32UL);
699 		}
700 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
701 		break;
702 	case ARM_SMMU_DOMAIN_NESTED:
703 		/*
704 		 * We will likely want to change this if/when KVM gets
705 		 * involved.
706 		 */
707 	case ARM_SMMU_DOMAIN_S2:
708 		cfg->cbar = CBAR_TYPE_S2_TRANS;
709 		start = 0;
710 		ias = smmu->ipa_size;
711 		oas = smmu->pa_size;
712 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
713 			fmt = ARM_64_LPAE_S2;
714 		} else {
715 			fmt = ARM_32_LPAE_S2;
716 			ias = min(ias, 40UL);
717 			oas = min(oas, 40UL);
718 		}
719 		if (smmu->version == ARM_SMMU_V2)
720 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
721 		else
722 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
723 		break;
724 	default:
725 		ret = -EINVAL;
726 		goto out_unlock;
727 	}
728 
729 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
730 	if (ret < 0) {
731 		goto out_unlock;
732 	}
733 
734 	smmu_domain->smmu = smmu;
735 
736 	cfg->cbndx = ret;
737 	if (smmu->version < ARM_SMMU_V2) {
738 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
739 		cfg->irptndx %= smmu->num_context_irqs;
740 	} else {
741 		cfg->irptndx = cfg->cbndx;
742 	}
743 
744 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
745 		cfg->vmid = cfg->cbndx + 1;
746 	else
747 		cfg->asid = cfg->cbndx;
748 
749 	pgtbl_cfg = (struct io_pgtable_cfg) {
750 		.pgsize_bitmap	= smmu->pgsize_bitmap,
751 		.ias		= ias,
752 		.oas		= oas,
753 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
754 		.tlb		= smmu_domain->flush_ops,
755 		.iommu_dev	= smmu->dev,
756 	};
757 
758 	if (smmu->impl && smmu->impl->init_context) {
759 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
760 		if (ret)
761 			goto out_clear_smmu;
762 	}
763 
764 	if (smmu_domain->pgtbl_quirks)
765 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
766 
767 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
768 	if (!pgtbl_ops) {
769 		ret = -ENOMEM;
770 		goto out_clear_smmu;
771 	}
772 
773 	/* Update the domain's page sizes to reflect the page table format */
774 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
775 
776 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
777 		domain->geometry.aperture_start = ~0UL << ias;
778 		domain->geometry.aperture_end = ~0UL;
779 	} else {
780 		domain->geometry.aperture_end = (1UL << ias) - 1;
781 	}
782 
783 	domain->geometry.force_aperture = true;
784 
785 	/* Initialise the context bank with our page table cfg */
786 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
787 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
788 
789 	/*
790 	 * Request context fault interrupt. Do this last to avoid the
791 	 * handler seeing a half-initialised domain state.
792 	 */
793 	irq = smmu->irqs[cfg->irptndx];
794 
795 	if (smmu->impl && smmu->impl->context_fault)
796 		context_fault = smmu->impl->context_fault;
797 	else
798 		context_fault = arm_smmu_context_fault;
799 
800 	ret = devm_request_irq(smmu->dev, irq, context_fault,
801 			       IRQF_SHARED, "arm-smmu-context-fault", domain);
802 	if (ret < 0) {
803 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
804 			cfg->irptndx, irq);
805 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
806 	}
807 
808 	mutex_unlock(&smmu_domain->init_mutex);
809 
810 	/* Publish page table ops for map/unmap */
811 	smmu_domain->pgtbl_ops = pgtbl_ops;
812 	return 0;
813 
814 out_clear_smmu:
815 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
816 	smmu_domain->smmu = NULL;
817 out_unlock:
818 	mutex_unlock(&smmu_domain->init_mutex);
819 	return ret;
820 }
821 
822 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
823 {
824 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
825 	struct arm_smmu_device *smmu = smmu_domain->smmu;
826 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
827 	int ret, irq;
828 
829 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
830 		return;
831 
832 	ret = arm_smmu_rpm_get(smmu);
833 	if (ret < 0)
834 		return;
835 
836 	/*
837 	 * Disable the context bank and free the page tables before freeing
838 	 * it.
839 	 */
840 	smmu->cbs[cfg->cbndx].cfg = NULL;
841 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
842 
843 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
844 		irq = smmu->irqs[cfg->irptndx];
845 		devm_free_irq(smmu->dev, irq, domain);
846 	}
847 
848 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
849 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
850 
851 	arm_smmu_rpm_put(smmu);
852 }
853 
854 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
855 {
856 	struct arm_smmu_domain *smmu_domain;
857 
858 	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
859 		if (using_legacy_binding ||
860 		    (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
861 			return NULL;
862 	}
863 	/*
864 	 * Allocate the domain and initialise some of its data structures.
865 	 * We can't really do anything meaningful until we've added a
866 	 * master.
867 	 */
868 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
869 	if (!smmu_domain)
870 		return NULL;
871 
872 	mutex_init(&smmu_domain->init_mutex);
873 	spin_lock_init(&smmu_domain->cb_lock);
874 
875 	return &smmu_domain->domain;
876 }
877 
878 static void arm_smmu_domain_free(struct iommu_domain *domain)
879 {
880 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
881 
882 	/*
883 	 * Free the domain resources. We assume that all devices have
884 	 * already been detached.
885 	 */
886 	arm_smmu_destroy_domain_context(domain);
887 	kfree(smmu_domain);
888 }
889 
890 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
891 {
892 	struct arm_smmu_smr *smr = smmu->smrs + idx;
893 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
894 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
895 
896 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
897 		reg |= ARM_SMMU_SMR_VALID;
898 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
899 }
900 
901 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
902 {
903 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
904 	u32 reg;
905 
906 	if (smmu->impl && smmu->impl->write_s2cr) {
907 		smmu->impl->write_s2cr(smmu, idx);
908 		return;
909 	}
910 
911 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
912 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
913 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
914 
915 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
916 	    smmu->smrs[idx].valid)
917 		reg |= ARM_SMMU_S2CR_EXIDVALID;
918 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
919 }
920 
921 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
922 {
923 	arm_smmu_write_s2cr(smmu, idx);
924 	if (smmu->smrs)
925 		arm_smmu_write_smr(smmu, idx);
926 }
927 
928 /*
929  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
930  * should be called after sCR0 is written.
931  */
932 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
933 {
934 	u32 smr;
935 	int i;
936 
937 	if (!smmu->smrs)
938 		return;
939 	/*
940 	 * If we've had to accommodate firmware memory regions, we may
941 	 * have live SMRs by now; tread carefully...
942 	 *
943 	 * Somewhat perversely, not having a free SMR for this test implies we
944 	 * can get away without it anyway, as we'll only be able to 'allocate'
945 	 * these SMRs for the ID/mask values we're already trusting to be OK.
946 	 */
947 	for (i = 0; i < smmu->num_mapping_groups; i++)
948 		if (!smmu->smrs[i].valid)
949 			goto smr_ok;
950 	return;
951 smr_ok:
952 	/*
953 	 * SMR.ID bits may not be preserved if the corresponding MASK
954 	 * bits are set, so check each one separately. We can reject
955 	 * masters later if they try to claim IDs outside these masks.
956 	 */
957 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
958 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
959 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
960 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
961 
962 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
963 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
964 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
965 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
966 }
967 
968 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
969 {
970 	struct arm_smmu_smr *smrs = smmu->smrs;
971 	int i, free_idx = -ENOSPC;
972 
973 	/* Stream indexing is blissfully easy */
974 	if (!smrs)
975 		return id;
976 
977 	/* Validating SMRs is... less so */
978 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
979 		if (!smrs[i].valid) {
980 			/*
981 			 * Note the first free entry we come across, which
982 			 * we'll claim in the end if nothing else matches.
983 			 */
984 			if (free_idx < 0)
985 				free_idx = i;
986 			continue;
987 		}
988 		/*
989 		 * If the new entry is _entirely_ matched by an existing entry,
990 		 * then reuse that, with the guarantee that there also cannot
991 		 * be any subsequent conflicting entries. In normal use we'd
992 		 * expect simply identical entries for this case, but there's
993 		 * no harm in accommodating the generalisation.
994 		 */
995 		if ((mask & smrs[i].mask) == mask &&
996 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
997 			return i;
998 		/*
999 		 * If the new entry has any other overlap with an existing one,
1000 		 * though, then there always exists at least one stream ID
1001 		 * which would cause a conflict, and we can't allow that risk.
1002 		 */
1003 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1004 			return -EINVAL;
1005 	}
1006 
1007 	return free_idx;
1008 }
1009 
1010 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1011 {
1012 	if (--smmu->s2crs[idx].count)
1013 		return false;
1014 
1015 	smmu->s2crs[idx] = s2cr_init_val;
1016 	if (smmu->smrs)
1017 		smmu->smrs[idx].valid = false;
1018 
1019 	return true;
1020 }
1021 
1022 static int arm_smmu_master_alloc_smes(struct device *dev)
1023 {
1024 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1025 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1026 	struct arm_smmu_device *smmu = cfg->smmu;
1027 	struct arm_smmu_smr *smrs = smmu->smrs;
1028 	int i, idx, ret;
1029 
1030 	mutex_lock(&smmu->stream_map_mutex);
1031 	/* Figure out a viable stream map entry allocation */
1032 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1033 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1034 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1035 
1036 		if (idx != INVALID_SMENDX) {
1037 			ret = -EEXIST;
1038 			goto out_err;
1039 		}
1040 
1041 		ret = arm_smmu_find_sme(smmu, sid, mask);
1042 		if (ret < 0)
1043 			goto out_err;
1044 
1045 		idx = ret;
1046 		if (smrs && smmu->s2crs[idx].count == 0) {
1047 			smrs[idx].id = sid;
1048 			smrs[idx].mask = mask;
1049 			smrs[idx].valid = true;
1050 		}
1051 		smmu->s2crs[idx].count++;
1052 		cfg->smendx[i] = (s16)idx;
1053 	}
1054 
1055 	/* It worked! Now, poke the actual hardware */
1056 	for_each_cfg_sme(cfg, fwspec, i, idx)
1057 		arm_smmu_write_sme(smmu, idx);
1058 
1059 	mutex_unlock(&smmu->stream_map_mutex);
1060 	return 0;
1061 
1062 out_err:
1063 	while (i--) {
1064 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1065 		cfg->smendx[i] = INVALID_SMENDX;
1066 	}
1067 	mutex_unlock(&smmu->stream_map_mutex);
1068 	return ret;
1069 }
1070 
1071 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1072 				      struct iommu_fwspec *fwspec)
1073 {
1074 	struct arm_smmu_device *smmu = cfg->smmu;
1075 	int i, idx;
1076 
1077 	mutex_lock(&smmu->stream_map_mutex);
1078 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1079 		if (arm_smmu_free_sme(smmu, idx))
1080 			arm_smmu_write_sme(smmu, idx);
1081 		cfg->smendx[i] = INVALID_SMENDX;
1082 	}
1083 	mutex_unlock(&smmu->stream_map_mutex);
1084 }
1085 
1086 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1087 				      struct arm_smmu_master_cfg *cfg,
1088 				      struct iommu_fwspec *fwspec)
1089 {
1090 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1091 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1092 	u8 cbndx = smmu_domain->cfg.cbndx;
1093 	enum arm_smmu_s2cr_type type;
1094 	int i, idx;
1095 
1096 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1097 		type = S2CR_TYPE_BYPASS;
1098 	else
1099 		type = S2CR_TYPE_TRANS;
1100 
1101 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1102 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1103 			continue;
1104 
1105 		s2cr[idx].type = type;
1106 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1107 		s2cr[idx].cbndx = cbndx;
1108 		arm_smmu_write_s2cr(smmu, idx);
1109 	}
1110 	return 0;
1111 }
1112 
1113 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1114 {
1115 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1116 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1117 	struct arm_smmu_master_cfg *cfg;
1118 	struct arm_smmu_device *smmu;
1119 	int ret;
1120 
1121 	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1122 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1123 		return -ENXIO;
1124 	}
1125 
1126 	/*
1127 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1128 	 * domains between of_xlate() and probe_device() - we have no way to cope
1129 	 * with that, so until ARM gets converted to rely on groups and default
1130 	 * domains, just say no (but more politely than by dereferencing NULL).
1131 	 * This should be at least a WARN_ON once that's sorted.
1132 	 */
1133 	cfg = dev_iommu_priv_get(dev);
1134 	if (!cfg)
1135 		return -ENODEV;
1136 
1137 	smmu = cfg->smmu;
1138 
1139 	ret = arm_smmu_rpm_get(smmu);
1140 	if (ret < 0)
1141 		return ret;
1142 
1143 	/* Ensure that the domain is finalised */
1144 	ret = arm_smmu_init_domain_context(domain, smmu, dev);
1145 	if (ret < 0)
1146 		goto rpm_put;
1147 
1148 	/*
1149 	 * Sanity check the domain. We don't support domains across
1150 	 * different SMMUs.
1151 	 */
1152 	if (smmu_domain->smmu != smmu) {
1153 		dev_err(dev,
1154 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1155 			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1156 		ret = -EINVAL;
1157 		goto rpm_put;
1158 	}
1159 
1160 	/* Looks ok, so add the device to the domain */
1161 	ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1162 
1163 	/*
1164 	 * Setup an autosuspend delay to avoid bouncing runpm state.
1165 	 * Otherwise, if a driver for a suspended consumer device
1166 	 * unmaps buffers, it will runpm resume/suspend for each one.
1167 	 *
1168 	 * For example, when used by a GPU device, when an application
1169 	 * or game exits, it can trigger unmapping 100s or 1000s of
1170 	 * buffers.  With a runpm cycle for each buffer, that adds up
1171 	 * to 5-10sec worth of reprogramming the context bank, while
1172 	 * the system appears to be locked up to the user.
1173 	 */
1174 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1175 	pm_runtime_use_autosuspend(smmu->dev);
1176 
1177 rpm_put:
1178 	arm_smmu_rpm_put(smmu);
1179 	return ret;
1180 }
1181 
1182 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
1183 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
1184 			      int prot, gfp_t gfp, size_t *mapped)
1185 {
1186 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1187 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1188 	int ret;
1189 
1190 	if (!ops)
1191 		return -ENODEV;
1192 
1193 	arm_smmu_rpm_get(smmu);
1194 	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
1195 	arm_smmu_rpm_put(smmu);
1196 
1197 	return ret;
1198 }
1199 
1200 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
1201 				   size_t pgsize, size_t pgcount,
1202 				   struct iommu_iotlb_gather *iotlb_gather)
1203 {
1204 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1205 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1206 	size_t ret;
1207 
1208 	if (!ops)
1209 		return 0;
1210 
1211 	arm_smmu_rpm_get(smmu);
1212 	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
1213 	arm_smmu_rpm_put(smmu);
1214 
1215 	return ret;
1216 }
1217 
1218 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1219 {
1220 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1221 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1222 
1223 	if (smmu_domain->flush_ops) {
1224 		arm_smmu_rpm_get(smmu);
1225 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1226 		arm_smmu_rpm_put(smmu);
1227 	}
1228 }
1229 
1230 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1231 				struct iommu_iotlb_gather *gather)
1232 {
1233 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1234 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1235 
1236 	if (!smmu)
1237 		return;
1238 
1239 	arm_smmu_rpm_get(smmu);
1240 	if (smmu->version == ARM_SMMU_V2 ||
1241 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1242 		arm_smmu_tlb_sync_context(smmu_domain);
1243 	else
1244 		arm_smmu_tlb_sync_global(smmu);
1245 	arm_smmu_rpm_put(smmu);
1246 }
1247 
1248 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1249 					      dma_addr_t iova)
1250 {
1251 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1252 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1253 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1254 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1255 	struct device *dev = smmu->dev;
1256 	void __iomem *reg;
1257 	u32 tmp;
1258 	u64 phys;
1259 	unsigned long va, flags;
1260 	int ret, idx = cfg->cbndx;
1261 	phys_addr_t addr = 0;
1262 
1263 	ret = arm_smmu_rpm_get(smmu);
1264 	if (ret < 0)
1265 		return 0;
1266 
1267 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1268 	va = iova & ~0xfffUL;
1269 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1270 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1271 	else
1272 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1273 
1274 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1275 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1276 				      5, 50)) {
1277 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1278 		dev_err(dev,
1279 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1280 			&iova);
1281 		arm_smmu_rpm_put(smmu);
1282 		return ops->iova_to_phys(ops, iova);
1283 	}
1284 
1285 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1286 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1287 	if (phys & ARM_SMMU_CB_PAR_F) {
1288 		dev_err(dev, "translation fault!\n");
1289 		dev_err(dev, "PAR = 0x%llx\n", phys);
1290 		goto out;
1291 	}
1292 
1293 	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1294 out:
1295 	arm_smmu_rpm_put(smmu);
1296 
1297 	return addr;
1298 }
1299 
1300 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1301 					dma_addr_t iova)
1302 {
1303 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1304 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1305 
1306 	if (!ops)
1307 		return 0;
1308 
1309 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1310 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1311 		return arm_smmu_iova_to_phys_hard(domain, iova);
1312 
1313 	return ops->iova_to_phys(ops, iova);
1314 }
1315 
1316 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
1317 {
1318 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1319 
1320 	switch (cap) {
1321 	case IOMMU_CAP_CACHE_COHERENCY:
1322 		/* Assume that a coherent TCU implies coherent TBUs */
1323 		return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1324 	case IOMMU_CAP_NOEXEC:
1325 		return true;
1326 	default:
1327 		return false;
1328 	}
1329 }
1330 
1331 static
1332 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1333 {
1334 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1335 							  fwnode);
1336 	put_device(dev);
1337 	return dev ? dev_get_drvdata(dev) : NULL;
1338 }
1339 
1340 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1341 {
1342 	struct arm_smmu_device *smmu = NULL;
1343 	struct arm_smmu_master_cfg *cfg;
1344 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1345 	int i, ret;
1346 
1347 	if (using_legacy_binding) {
1348 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1349 
1350 		/*
1351 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1352 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1353 		 * later use.
1354 		 */
1355 		fwspec = dev_iommu_fwspec_get(dev);
1356 		if (ret)
1357 			goto out_free;
1358 	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1359 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1360 	} else {
1361 		return ERR_PTR(-ENODEV);
1362 	}
1363 
1364 	ret = -EINVAL;
1365 	for (i = 0; i < fwspec->num_ids; i++) {
1366 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1367 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1368 
1369 		if (sid & ~smmu->streamid_mask) {
1370 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1371 				sid, smmu->streamid_mask);
1372 			goto out_free;
1373 		}
1374 		if (mask & ~smmu->smr_mask_mask) {
1375 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1376 				mask, smmu->smr_mask_mask);
1377 			goto out_free;
1378 		}
1379 	}
1380 
1381 	ret = -ENOMEM;
1382 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1383 		      GFP_KERNEL);
1384 	if (!cfg)
1385 		goto out_free;
1386 
1387 	cfg->smmu = smmu;
1388 	dev_iommu_priv_set(dev, cfg);
1389 	while (i--)
1390 		cfg->smendx[i] = INVALID_SMENDX;
1391 
1392 	ret = arm_smmu_rpm_get(smmu);
1393 	if (ret < 0)
1394 		goto out_cfg_free;
1395 
1396 	ret = arm_smmu_master_alloc_smes(dev);
1397 	arm_smmu_rpm_put(smmu);
1398 
1399 	if (ret)
1400 		goto out_cfg_free;
1401 
1402 	device_link_add(dev, smmu->dev,
1403 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1404 
1405 	return &smmu->iommu;
1406 
1407 out_cfg_free:
1408 	kfree(cfg);
1409 out_free:
1410 	iommu_fwspec_free(dev);
1411 	return ERR_PTR(ret);
1412 }
1413 
1414 static void arm_smmu_release_device(struct device *dev)
1415 {
1416 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1417 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1418 	int ret;
1419 
1420 	ret = arm_smmu_rpm_get(cfg->smmu);
1421 	if (ret < 0)
1422 		return;
1423 
1424 	arm_smmu_master_free_smes(cfg, fwspec);
1425 
1426 	arm_smmu_rpm_put(cfg->smmu);
1427 
1428 	dev_iommu_priv_set(dev, NULL);
1429 	kfree(cfg);
1430 }
1431 
1432 static void arm_smmu_probe_finalize(struct device *dev)
1433 {
1434 	struct arm_smmu_master_cfg *cfg;
1435 	struct arm_smmu_device *smmu;
1436 
1437 	cfg = dev_iommu_priv_get(dev);
1438 	smmu = cfg->smmu;
1439 
1440 	if (smmu->impl && smmu->impl->probe_finalize)
1441 		smmu->impl->probe_finalize(smmu, dev);
1442 }
1443 
1444 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1445 {
1446 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1447 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1448 	struct arm_smmu_device *smmu = cfg->smmu;
1449 	struct iommu_group *group = NULL;
1450 	int i, idx;
1451 
1452 	mutex_lock(&smmu->stream_map_mutex);
1453 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1454 		if (group && smmu->s2crs[idx].group &&
1455 		    group != smmu->s2crs[idx].group) {
1456 			mutex_unlock(&smmu->stream_map_mutex);
1457 			return ERR_PTR(-EINVAL);
1458 		}
1459 
1460 		group = smmu->s2crs[idx].group;
1461 	}
1462 
1463 	if (group) {
1464 		mutex_unlock(&smmu->stream_map_mutex);
1465 		return iommu_group_ref_get(group);
1466 	}
1467 
1468 	if (dev_is_pci(dev))
1469 		group = pci_device_group(dev);
1470 	else if (dev_is_fsl_mc(dev))
1471 		group = fsl_mc_device_group(dev);
1472 	else
1473 		group = generic_device_group(dev);
1474 
1475 	/* Remember group for faster lookups */
1476 	if (!IS_ERR(group))
1477 		for_each_cfg_sme(cfg, fwspec, i, idx)
1478 			smmu->s2crs[idx].group = group;
1479 
1480 	mutex_unlock(&smmu->stream_map_mutex);
1481 	return group;
1482 }
1483 
1484 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1485 {
1486 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1487 	int ret = 0;
1488 
1489 	mutex_lock(&smmu_domain->init_mutex);
1490 	if (smmu_domain->smmu)
1491 		ret = -EPERM;
1492 	else
1493 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1494 	mutex_unlock(&smmu_domain->init_mutex);
1495 
1496 	return ret;
1497 }
1498 
1499 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1500 		unsigned long quirks)
1501 {
1502 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1503 	int ret = 0;
1504 
1505 	mutex_lock(&smmu_domain->init_mutex);
1506 	if (smmu_domain->smmu)
1507 		ret = -EPERM;
1508 	else
1509 		smmu_domain->pgtbl_quirks = quirks;
1510 	mutex_unlock(&smmu_domain->init_mutex);
1511 
1512 	return ret;
1513 }
1514 
1515 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1516 {
1517 	u32 mask, fwid = 0;
1518 
1519 	if (args->args_count > 0)
1520 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1521 
1522 	if (args->args_count > 1)
1523 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1524 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1525 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1526 
1527 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1528 }
1529 
1530 static void arm_smmu_get_resv_regions(struct device *dev,
1531 				      struct list_head *head)
1532 {
1533 	struct iommu_resv_region *region;
1534 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1535 
1536 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1537 					 prot, IOMMU_RESV_SW_MSI);
1538 	if (!region)
1539 		return;
1540 
1541 	list_add_tail(&region->list, head);
1542 
1543 	iommu_dma_get_resv_regions(dev, head);
1544 }
1545 
1546 static int arm_smmu_def_domain_type(struct device *dev)
1547 {
1548 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1549 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1550 
1551 	if (using_legacy_binding)
1552 		return IOMMU_DOMAIN_IDENTITY;
1553 
1554 	if (impl && impl->def_domain_type)
1555 		return impl->def_domain_type(dev);
1556 
1557 	return 0;
1558 }
1559 
1560 static struct iommu_ops arm_smmu_ops = {
1561 	.capable		= arm_smmu_capable,
1562 	.domain_alloc		= arm_smmu_domain_alloc,
1563 	.probe_device		= arm_smmu_probe_device,
1564 	.release_device		= arm_smmu_release_device,
1565 	.probe_finalize		= arm_smmu_probe_finalize,
1566 	.device_group		= arm_smmu_device_group,
1567 	.of_xlate		= arm_smmu_of_xlate,
1568 	.get_resv_regions	= arm_smmu_get_resv_regions,
1569 	.def_domain_type	= arm_smmu_def_domain_type,
1570 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1571 	.owner			= THIS_MODULE,
1572 	.default_domain_ops = &(const struct iommu_domain_ops) {
1573 		.attach_dev		= arm_smmu_attach_dev,
1574 		.map_pages		= arm_smmu_map_pages,
1575 		.unmap_pages		= arm_smmu_unmap_pages,
1576 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1577 		.iotlb_sync		= arm_smmu_iotlb_sync,
1578 		.iova_to_phys		= arm_smmu_iova_to_phys,
1579 		.enable_nesting		= arm_smmu_enable_nesting,
1580 		.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1581 		.free			= arm_smmu_domain_free,
1582 	}
1583 };
1584 
1585 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1586 {
1587 	int i;
1588 	u32 reg;
1589 
1590 	/* clear global FSR */
1591 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1592 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1593 
1594 	/*
1595 	 * Reset stream mapping groups: Initial values mark all SMRn as
1596 	 * invalid and all S2CRn as bypass unless overridden.
1597 	 */
1598 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1599 		arm_smmu_write_sme(smmu, i);
1600 
1601 	/* Make sure all context banks are disabled and clear CB_FSR  */
1602 	for (i = 0; i < smmu->num_context_banks; ++i) {
1603 		arm_smmu_write_context_bank(smmu, i);
1604 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1605 	}
1606 
1607 	/* Invalidate the TLB, just in case */
1608 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1609 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1610 
1611 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1612 
1613 	/* Enable fault reporting */
1614 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1615 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1616 
1617 	/* Disable TLB broadcasting. */
1618 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1619 
1620 	/* Enable client access, handling unmatched streams as appropriate */
1621 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1622 	if (disable_bypass)
1623 		reg |= ARM_SMMU_sCR0_USFCFG;
1624 	else
1625 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1626 
1627 	/* Disable forced broadcasting */
1628 	reg &= ~ARM_SMMU_sCR0_FB;
1629 
1630 	/* Don't upgrade barriers */
1631 	reg &= ~(ARM_SMMU_sCR0_BSU);
1632 
1633 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1634 		reg |= ARM_SMMU_sCR0_VMID16EN;
1635 
1636 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1637 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1638 
1639 	if (smmu->impl && smmu->impl->reset)
1640 		smmu->impl->reset(smmu);
1641 
1642 	/* Push the button */
1643 	arm_smmu_tlb_sync_global(smmu);
1644 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1645 }
1646 
1647 static int arm_smmu_id_size_to_bits(int size)
1648 {
1649 	switch (size) {
1650 	case 0:
1651 		return 32;
1652 	case 1:
1653 		return 36;
1654 	case 2:
1655 		return 40;
1656 	case 3:
1657 		return 42;
1658 	case 4:
1659 		return 44;
1660 	case 5:
1661 	default:
1662 		return 48;
1663 	}
1664 }
1665 
1666 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1667 {
1668 	unsigned int size;
1669 	u32 id;
1670 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1671 	int i, ret;
1672 
1673 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1674 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1675 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1676 
1677 	/* ID0 */
1678 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1679 
1680 	/* Restrict available stages based on module parameter */
1681 	if (force_stage == 1)
1682 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1683 	else if (force_stage == 2)
1684 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1685 
1686 	if (id & ARM_SMMU_ID0_S1TS) {
1687 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1688 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1689 	}
1690 
1691 	if (id & ARM_SMMU_ID0_S2TS) {
1692 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1693 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1694 	}
1695 
1696 	if (id & ARM_SMMU_ID0_NTS) {
1697 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1698 		dev_notice(smmu->dev, "\tnested translation\n");
1699 	}
1700 
1701 	if (!(smmu->features &
1702 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1703 		dev_err(smmu->dev, "\tno translation support!\n");
1704 		return -ENODEV;
1705 	}
1706 
1707 	if ((id & ARM_SMMU_ID0_S1TS) &&
1708 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1709 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1710 		dev_notice(smmu->dev, "\taddress translation ops\n");
1711 	}
1712 
1713 	/*
1714 	 * In order for DMA API calls to work properly, we must defer to what
1715 	 * the FW says about coherency, regardless of what the hardware claims.
1716 	 * Fortunately, this also opens up a workaround for systems where the
1717 	 * ID register value has ended up configured incorrectly.
1718 	 */
1719 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1720 	if (cttw_fw || cttw_reg)
1721 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1722 			   cttw_fw ? "" : "non-");
1723 	if (cttw_fw != cttw_reg)
1724 		dev_notice(smmu->dev,
1725 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1726 
1727 	/* Max. number of entries we have for stream matching/indexing */
1728 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1729 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1730 		size = 1 << 16;
1731 	} else {
1732 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1733 	}
1734 	smmu->streamid_mask = size - 1;
1735 	if (id & ARM_SMMU_ID0_SMS) {
1736 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1737 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1738 		if (size == 0) {
1739 			dev_err(smmu->dev,
1740 				"stream-matching supported, but no SMRs present!\n");
1741 			return -ENODEV;
1742 		}
1743 
1744 		/* Zero-initialised to mark as invalid */
1745 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1746 					  GFP_KERNEL);
1747 		if (!smmu->smrs)
1748 			return -ENOMEM;
1749 
1750 		dev_notice(smmu->dev,
1751 			   "\tstream matching with %u register groups", size);
1752 	}
1753 	/* s2cr->type == 0 means translation, so initialise explicitly */
1754 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1755 					 GFP_KERNEL);
1756 	if (!smmu->s2crs)
1757 		return -ENOMEM;
1758 	for (i = 0; i < size; i++)
1759 		smmu->s2crs[i] = s2cr_init_val;
1760 
1761 	smmu->num_mapping_groups = size;
1762 	mutex_init(&smmu->stream_map_mutex);
1763 	spin_lock_init(&smmu->global_sync_lock);
1764 
1765 	if (smmu->version < ARM_SMMU_V2 ||
1766 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1767 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1768 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1769 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1770 	}
1771 
1772 	/* ID1 */
1773 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1774 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1775 
1776 	/* Check for size mismatch of SMMU address space from mapped region */
1777 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1778 	if (smmu->numpage != 2 * size << smmu->pgshift)
1779 		dev_warn(smmu->dev,
1780 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1781 			2 * size << smmu->pgshift, smmu->numpage);
1782 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1783 	smmu->numpage = size;
1784 
1785 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1786 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1787 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1788 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1789 		return -ENODEV;
1790 	}
1791 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1792 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1793 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1794 				 sizeof(*smmu->cbs), GFP_KERNEL);
1795 	if (!smmu->cbs)
1796 		return -ENOMEM;
1797 
1798 	/* ID2 */
1799 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1800 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1801 	smmu->ipa_size = size;
1802 
1803 	/* The output mask is also applied for bypass */
1804 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1805 	smmu->pa_size = size;
1806 
1807 	if (id & ARM_SMMU_ID2_VMID16)
1808 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1809 
1810 	/*
1811 	 * What the page table walker can address actually depends on which
1812 	 * descriptor format is in use, but since a) we don't know that yet,
1813 	 * and b) it can vary per context bank, this will have to do...
1814 	 */
1815 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1816 		dev_warn(smmu->dev,
1817 			 "failed to set DMA mask for table walker\n");
1818 
1819 	if (smmu->version < ARM_SMMU_V2) {
1820 		smmu->va_size = smmu->ipa_size;
1821 		if (smmu->version == ARM_SMMU_V1_64K)
1822 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1823 	} else {
1824 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1825 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1826 		if (id & ARM_SMMU_ID2_PTFS_4K)
1827 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1828 		if (id & ARM_SMMU_ID2_PTFS_16K)
1829 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1830 		if (id & ARM_SMMU_ID2_PTFS_64K)
1831 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1832 	}
1833 
1834 	if (smmu->impl && smmu->impl->cfg_probe) {
1835 		ret = smmu->impl->cfg_probe(smmu);
1836 		if (ret)
1837 			return ret;
1838 	}
1839 
1840 	/* Now we've corralled the various formats, what'll it do? */
1841 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1842 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1843 	if (smmu->features &
1844 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1845 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1846 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1847 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1848 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1849 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1850 
1851 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1852 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1853 	else
1854 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1855 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1856 		   smmu->pgsize_bitmap);
1857 
1858 
1859 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1860 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1861 			   smmu->va_size, smmu->ipa_size);
1862 
1863 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1864 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1865 			   smmu->ipa_size, smmu->pa_size);
1866 
1867 	return 0;
1868 }
1869 
1870 struct arm_smmu_match_data {
1871 	enum arm_smmu_arch_version version;
1872 	enum arm_smmu_implementation model;
1873 };
1874 
1875 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1876 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1877 
1878 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1879 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1880 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1881 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1882 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1883 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1884 
1885 static const struct of_device_id arm_smmu_of_match[] = {
1886 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1887 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1888 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1889 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1890 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1891 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1892 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1893 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1894 	{ },
1895 };
1896 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1897 
1898 #ifdef CONFIG_ACPI
1899 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1900 {
1901 	int ret = 0;
1902 
1903 	switch (model) {
1904 	case ACPI_IORT_SMMU_V1:
1905 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1906 		smmu->version = ARM_SMMU_V1;
1907 		smmu->model = GENERIC_SMMU;
1908 		break;
1909 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1910 		smmu->version = ARM_SMMU_V1_64K;
1911 		smmu->model = GENERIC_SMMU;
1912 		break;
1913 	case ACPI_IORT_SMMU_V2:
1914 		smmu->version = ARM_SMMU_V2;
1915 		smmu->model = GENERIC_SMMU;
1916 		break;
1917 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1918 		smmu->version = ARM_SMMU_V2;
1919 		smmu->model = ARM_MMU500;
1920 		break;
1921 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1922 		smmu->version = ARM_SMMU_V2;
1923 		smmu->model = CAVIUM_SMMUV2;
1924 		break;
1925 	default:
1926 		ret = -ENODEV;
1927 	}
1928 
1929 	return ret;
1930 }
1931 
1932 static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1933 				      u32 *global_irqs, u32 *pmu_irqs)
1934 {
1935 	struct device *dev = smmu->dev;
1936 	struct acpi_iort_node *node =
1937 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1938 	struct acpi_iort_smmu *iort_smmu;
1939 	int ret;
1940 
1941 	/* Retrieve SMMU1/2 specific data */
1942 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1943 
1944 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1945 	if (ret < 0)
1946 		return ret;
1947 
1948 	/* Ignore the configuration access interrupt */
1949 	*global_irqs = 1;
1950 	*pmu_irqs = 0;
1951 
1952 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1953 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1954 
1955 	return 0;
1956 }
1957 #else
1958 static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1959 					     u32 *global_irqs, u32 *pmu_irqs)
1960 {
1961 	return -ENODEV;
1962 }
1963 #endif
1964 
1965 static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu,
1966 				    u32 *global_irqs, u32 *pmu_irqs)
1967 {
1968 	const struct arm_smmu_match_data *data;
1969 	struct device *dev = smmu->dev;
1970 	bool legacy_binding;
1971 
1972 	if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs))
1973 		return dev_err_probe(dev, -ENODEV,
1974 				     "missing #global-interrupts property\n");
1975 	*pmu_irqs = 0;
1976 
1977 	data = of_device_get_match_data(dev);
1978 	smmu->version = data->version;
1979 	smmu->model = data->model;
1980 
1981 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
1982 	if (legacy_binding && !using_generic_binding) {
1983 		if (!using_legacy_binding) {
1984 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
1985 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
1986 		}
1987 		using_legacy_binding = true;
1988 	} else if (!legacy_binding && !using_legacy_binding) {
1989 		using_generic_binding = true;
1990 	} else {
1991 		dev_err(dev, "not probing due to mismatched DT properties\n");
1992 		return -ENODEV;
1993 	}
1994 
1995 	if (of_dma_is_coherent(dev->of_node))
1996 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1997 
1998 	return 0;
1999 }
2000 
2001 static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu)
2002 {
2003 	struct list_head rmr_list;
2004 	struct iommu_resv_region *e;
2005 	int idx, cnt = 0;
2006 	u32 reg;
2007 
2008 	INIT_LIST_HEAD(&rmr_list);
2009 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2010 
2011 	/*
2012 	 * Rather than trying to look at existing mappings that
2013 	 * are setup by the firmware and then invalidate the ones
2014 	 * that do no have matching RMR entries, just disable the
2015 	 * SMMU until it gets enabled again in the reset routine.
2016 	 */
2017 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
2018 	reg |= ARM_SMMU_sCR0_CLIENTPD;
2019 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
2020 
2021 	list_for_each_entry(e, &rmr_list, list) {
2022 		struct iommu_iort_rmr_data *rmr;
2023 		int i;
2024 
2025 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
2026 		for (i = 0; i < rmr->num_sids; i++) {
2027 			idx = arm_smmu_find_sme(smmu, rmr->sids[i], ~0);
2028 			if (idx < 0)
2029 				continue;
2030 
2031 			if (smmu->s2crs[idx].count == 0) {
2032 				smmu->smrs[idx].id = rmr->sids[i];
2033 				smmu->smrs[idx].mask = 0;
2034 				smmu->smrs[idx].valid = true;
2035 			}
2036 			smmu->s2crs[idx].count++;
2037 			smmu->s2crs[idx].type = S2CR_TYPE_BYPASS;
2038 			smmu->s2crs[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
2039 
2040 			cnt++;
2041 		}
2042 	}
2043 
2044 	dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt,
2045 		   cnt == 1 ? "" : "s");
2046 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2047 }
2048 
2049 static int arm_smmu_device_probe(struct platform_device *pdev)
2050 {
2051 	struct resource *res;
2052 	struct arm_smmu_device *smmu;
2053 	struct device *dev = &pdev->dev;
2054 	int num_irqs, i, err;
2055 	u32 global_irqs, pmu_irqs;
2056 	irqreturn_t (*global_fault)(int irq, void *dev);
2057 
2058 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2059 	if (!smmu) {
2060 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2061 		return -ENOMEM;
2062 	}
2063 	smmu->dev = dev;
2064 
2065 	if (dev->of_node)
2066 		err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs);
2067 	else
2068 		err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs);
2069 	if (err)
2070 		return err;
2071 
2072 	smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
2073 	if (IS_ERR(smmu->base))
2074 		return PTR_ERR(smmu->base);
2075 	smmu->ioaddr = res->start;
2076 
2077 	/*
2078 	 * The resource size should effectively match the value of SMMU_TOP;
2079 	 * stash that temporarily until we know PAGESIZE to validate it with.
2080 	 */
2081 	smmu->numpage = resource_size(res);
2082 
2083 	smmu = arm_smmu_impl_init(smmu);
2084 	if (IS_ERR(smmu))
2085 		return PTR_ERR(smmu);
2086 
2087 	num_irqs = platform_irq_count(pdev);
2088 
2089 	smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs;
2090 	if (smmu->num_context_irqs <= 0)
2091 		return dev_err_probe(dev, -ENODEV,
2092 				"found %d interrupts but expected at least %d\n",
2093 				num_irqs, global_irqs + pmu_irqs + 1);
2094 
2095 	smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs,
2096 				  sizeof(*smmu->irqs), GFP_KERNEL);
2097 	if (!smmu->irqs)
2098 		return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n",
2099 				     smmu->num_context_irqs);
2100 
2101 	for (i = 0; i < smmu->num_context_irqs; i++) {
2102 		int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i);
2103 
2104 		if (irq < 0)
2105 			return irq;
2106 		smmu->irqs[i] = irq;
2107 	}
2108 
2109 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2110 	if (err < 0) {
2111 		dev_err(dev, "failed to get clocks %d\n", err);
2112 		return err;
2113 	}
2114 	smmu->num_clks = err;
2115 
2116 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2117 	if (err)
2118 		return err;
2119 
2120 	err = arm_smmu_device_cfg_probe(smmu);
2121 	if (err)
2122 		return err;
2123 
2124 	if (smmu->version == ARM_SMMU_V2) {
2125 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2126 			dev_err(dev,
2127 			      "found only %d context irq(s) but %d required\n",
2128 			      smmu->num_context_irqs, smmu->num_context_banks);
2129 			return -ENODEV;
2130 		}
2131 
2132 		/* Ignore superfluous interrupts */
2133 		smmu->num_context_irqs = smmu->num_context_banks;
2134 	}
2135 
2136 	if (smmu->impl && smmu->impl->global_fault)
2137 		global_fault = smmu->impl->global_fault;
2138 	else
2139 		global_fault = arm_smmu_global_fault;
2140 
2141 	for (i = 0; i < global_irqs; i++) {
2142 		int irq = platform_get_irq(pdev, i);
2143 
2144 		if (irq < 0)
2145 			return irq;
2146 
2147 		err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
2148 				       "arm-smmu global fault", smmu);
2149 		if (err)
2150 			return dev_err_probe(dev, err,
2151 					"failed to request global IRQ %d (%u)\n",
2152 					i, irq);
2153 	}
2154 
2155 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2156 				     "smmu.%pa", &smmu->ioaddr);
2157 	if (err) {
2158 		dev_err(dev, "Failed to register iommu in sysfs\n");
2159 		return err;
2160 	}
2161 
2162 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
2163 	if (err) {
2164 		dev_err(dev, "Failed to register iommu\n");
2165 		iommu_device_sysfs_remove(&smmu->iommu);
2166 		return err;
2167 	}
2168 
2169 	platform_set_drvdata(pdev, smmu);
2170 
2171 	/* Check for RMRs and install bypass SMRs if any */
2172 	arm_smmu_rmr_install_bypass_smr(smmu);
2173 
2174 	arm_smmu_device_reset(smmu);
2175 	arm_smmu_test_smr_masks(smmu);
2176 
2177 	/*
2178 	 * We want to avoid touching dev->power.lock in fastpaths unless
2179 	 * it's really going to do something useful - pm_runtime_enabled()
2180 	 * can serve as an ideal proxy for that decision. So, conditionally
2181 	 * enable pm_runtime.
2182 	 */
2183 	if (dev->pm_domain) {
2184 		pm_runtime_set_active(dev);
2185 		pm_runtime_enable(dev);
2186 	}
2187 
2188 	return 0;
2189 }
2190 
2191 static int arm_smmu_device_remove(struct platform_device *pdev)
2192 {
2193 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2194 
2195 	if (!smmu)
2196 		return -ENODEV;
2197 
2198 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2199 		dev_notice(&pdev->dev, "disabling translation\n");
2200 
2201 	iommu_device_unregister(&smmu->iommu);
2202 	iommu_device_sysfs_remove(&smmu->iommu);
2203 
2204 	arm_smmu_rpm_get(smmu);
2205 	/* Turn the thing off */
2206 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2207 	arm_smmu_rpm_put(smmu);
2208 
2209 	if (pm_runtime_enabled(smmu->dev))
2210 		pm_runtime_force_suspend(smmu->dev);
2211 	else
2212 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2213 
2214 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2215 	return 0;
2216 }
2217 
2218 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2219 {
2220 	arm_smmu_device_remove(pdev);
2221 }
2222 
2223 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2224 {
2225 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2226 	int ret;
2227 
2228 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2229 	if (ret)
2230 		return ret;
2231 
2232 	arm_smmu_device_reset(smmu);
2233 
2234 	return 0;
2235 }
2236 
2237 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2238 {
2239 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2240 
2241 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2242 
2243 	return 0;
2244 }
2245 
2246 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2247 {
2248 	int ret;
2249 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2250 
2251 	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
2252 	if (ret)
2253 		return ret;
2254 
2255 	if (pm_runtime_suspended(dev))
2256 		return 0;
2257 
2258 	ret = arm_smmu_runtime_resume(dev);
2259 	if (ret)
2260 		clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2261 
2262 	return ret;
2263 }
2264 
2265 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2266 {
2267 	int ret = 0;
2268 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2269 
2270 	if (pm_runtime_suspended(dev))
2271 		goto clk_unprepare;
2272 
2273 	ret = arm_smmu_runtime_suspend(dev);
2274 	if (ret)
2275 		return ret;
2276 
2277 clk_unprepare:
2278 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2279 	return ret;
2280 }
2281 
2282 static const struct dev_pm_ops arm_smmu_pm_ops = {
2283 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2284 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2285 			   arm_smmu_runtime_resume, NULL)
2286 };
2287 
2288 static struct platform_driver arm_smmu_driver = {
2289 	.driver	= {
2290 		.name			= "arm-smmu",
2291 		.of_match_table		= arm_smmu_of_match,
2292 		.pm			= &arm_smmu_pm_ops,
2293 		.suppress_bind_attrs    = true,
2294 	},
2295 	.probe	= arm_smmu_device_probe,
2296 	.remove	= arm_smmu_device_remove,
2297 	.shutdown = arm_smmu_device_shutdown,
2298 };
2299 module_platform_driver(arm_smmu_driver);
2300 
2301 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2302 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2303 MODULE_ALIAS("platform:arm-smmu");
2304 MODULE_LICENSE("GPL v2");
2305