1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-mapping.h>
25 #include <linux/err.h>
26 #include <linux/interrupt.h>
27 #include <linux/io.h>
28 #include <linux/iopoll.h>
29 #include <linux/module.h>
30 #include <linux/of.h>
31 #include <linux/of_address.h>
32 #include <linux/of_device.h>
33 #include <linux/pci.h>
34 #include <linux/platform_device.h>
35 #include <linux/pm_runtime.h>
36 #include <linux/ratelimit.h>
37 #include <linux/slab.h>
38 
39 #include <linux/fsl/mc.h>
40 
41 #include "arm-smmu.h"
42 #include "../../dma-iommu.h"
43 
44 /*
45  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
46  * global register space are still, in fact, using a hypervisor to mediate it
47  * by trapping and emulating register accesses. Sadly, some deployed versions
48  * of said trapping code have bugs wherein they go horribly wrong for stores
49  * using r31 (i.e. XZR/WZR) as the source register.
50  */
51 #define QCOM_DUMMY_VAL -1
52 
53 #define MSI_IOVA_BASE			0x8000000
54 #define MSI_IOVA_LENGTH			0x100000
55 
56 static int force_stage;
57 module_param(force_stage, int, S_IRUGO);
58 MODULE_PARM_DESC(force_stage,
59 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
60 static bool disable_bypass =
61 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
62 module_param(disable_bypass, bool, S_IRUGO);
63 MODULE_PARM_DESC(disable_bypass,
64 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
65 
66 #define s2cr_init_val (struct arm_smmu_s2cr){				\
67 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
68 }
69 
70 static bool using_legacy_binding, using_generic_binding;
71 
72 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
73 {
74 	if (pm_runtime_enabled(smmu->dev))
75 		return pm_runtime_resume_and_get(smmu->dev);
76 
77 	return 0;
78 }
79 
80 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
81 {
82 	if (pm_runtime_enabled(smmu->dev))
83 		pm_runtime_put_autosuspend(smmu->dev);
84 }
85 
86 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
87 {
88 	return container_of(dom, struct arm_smmu_domain, domain);
89 }
90 
91 static struct platform_driver arm_smmu_driver;
92 static struct iommu_ops arm_smmu_ops;
93 
94 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
95 static struct device_node *dev_get_dev_node(struct device *dev)
96 {
97 	if (dev_is_pci(dev)) {
98 		struct pci_bus *bus = to_pci_dev(dev)->bus;
99 
100 		while (!pci_is_root_bus(bus))
101 			bus = bus->parent;
102 		return of_node_get(bus->bridge->parent->of_node);
103 	}
104 
105 	return of_node_get(dev->of_node);
106 }
107 
108 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
109 {
110 	*((__be32 *)data) = cpu_to_be32(alias);
111 	return 0; /* Continue walking */
112 }
113 
114 static int __find_legacy_master_phandle(struct device *dev, void *data)
115 {
116 	struct of_phandle_iterator *it = *(void **)data;
117 	struct device_node *np = it->node;
118 	int err;
119 
120 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
121 			    "#stream-id-cells", -1)
122 		if (it->node == np) {
123 			*(void **)data = dev;
124 			return 1;
125 		}
126 	it->node = np;
127 	return err == -ENOENT ? 0 : err;
128 }
129 
130 static int arm_smmu_register_legacy_master(struct device *dev,
131 					   struct arm_smmu_device **smmu)
132 {
133 	struct device *smmu_dev;
134 	struct device_node *np;
135 	struct of_phandle_iterator it;
136 	void *data = &it;
137 	u32 *sids;
138 	__be32 pci_sid;
139 	int err;
140 
141 	np = dev_get_dev_node(dev);
142 	if (!np || !of_property_present(np, "#stream-id-cells")) {
143 		of_node_put(np);
144 		return -ENODEV;
145 	}
146 
147 	it.node = np;
148 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
149 				     __find_legacy_master_phandle);
150 	smmu_dev = data;
151 	of_node_put(np);
152 	if (err == 0)
153 		return -ENODEV;
154 	if (err < 0)
155 		return err;
156 
157 	if (dev_is_pci(dev)) {
158 		/* "mmu-masters" assumes Stream ID == Requester ID */
159 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
160 				       &pci_sid);
161 		it.cur = &pci_sid;
162 		it.cur_count = 1;
163 	}
164 
165 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
166 				&arm_smmu_ops);
167 	if (err)
168 		return err;
169 
170 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
171 	if (!sids)
172 		return -ENOMEM;
173 
174 	*smmu = dev_get_drvdata(smmu_dev);
175 	of_phandle_iterator_args(&it, sids, it.cur_count);
176 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
177 	kfree(sids);
178 	return err;
179 }
180 #else
181 static int arm_smmu_register_legacy_master(struct device *dev,
182 					   struct arm_smmu_device **smmu)
183 {
184 	return -ENODEV;
185 }
186 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
187 
188 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
189 {
190 	clear_bit(idx, map);
191 }
192 
193 /* Wait for any pending TLB invalidations to complete */
194 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
195 				int sync, int status)
196 {
197 	unsigned int spin_cnt, delay;
198 	u32 reg;
199 
200 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
201 		return smmu->impl->tlb_sync(smmu, page, sync, status);
202 
203 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
204 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
205 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
206 			reg = arm_smmu_readl(smmu, page, status);
207 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
208 				return;
209 			cpu_relax();
210 		}
211 		udelay(delay);
212 	}
213 	dev_err_ratelimited(smmu->dev,
214 			    "TLB sync timed out -- SMMU may be deadlocked\n");
215 }
216 
217 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
218 {
219 	unsigned long flags;
220 
221 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
222 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
223 			    ARM_SMMU_GR0_sTLBGSTATUS);
224 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
225 }
226 
227 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
228 {
229 	struct arm_smmu_device *smmu = smmu_domain->smmu;
230 	unsigned long flags;
231 
232 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
233 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
234 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
235 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
236 }
237 
238 static void arm_smmu_tlb_inv_context_s1(void *cookie)
239 {
240 	struct arm_smmu_domain *smmu_domain = cookie;
241 	/*
242 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
243 	 * current CPU are visible beforehand.
244 	 */
245 	wmb();
246 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
247 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
248 	arm_smmu_tlb_sync_context(smmu_domain);
249 }
250 
251 static void arm_smmu_tlb_inv_context_s2(void *cookie)
252 {
253 	struct arm_smmu_domain *smmu_domain = cookie;
254 	struct arm_smmu_device *smmu = smmu_domain->smmu;
255 
256 	/* See above */
257 	wmb();
258 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
259 	arm_smmu_tlb_sync_global(smmu);
260 }
261 
262 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
263 				      size_t granule, void *cookie, int reg)
264 {
265 	struct arm_smmu_domain *smmu_domain = cookie;
266 	struct arm_smmu_device *smmu = smmu_domain->smmu;
267 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
268 	int idx = cfg->cbndx;
269 
270 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
271 		wmb();
272 
273 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
274 		iova = (iova >> 12) << 12;
275 		iova |= cfg->asid;
276 		do {
277 			arm_smmu_cb_write(smmu, idx, reg, iova);
278 			iova += granule;
279 		} while (size -= granule);
280 	} else {
281 		iova >>= 12;
282 		iova |= (u64)cfg->asid << 48;
283 		do {
284 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
285 			iova += granule >> 12;
286 		} while (size -= granule);
287 	}
288 }
289 
290 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
291 				      size_t granule, void *cookie, int reg)
292 {
293 	struct arm_smmu_domain *smmu_domain = cookie;
294 	struct arm_smmu_device *smmu = smmu_domain->smmu;
295 	int idx = smmu_domain->cfg.cbndx;
296 
297 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
298 		wmb();
299 
300 	iova >>= 12;
301 	do {
302 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
303 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
304 		else
305 			arm_smmu_cb_write(smmu, idx, reg, iova);
306 		iova += granule >> 12;
307 	} while (size -= granule);
308 }
309 
310 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
311 				     size_t granule, void *cookie)
312 {
313 	struct arm_smmu_domain *smmu_domain = cookie;
314 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
315 
316 	if (cfg->flush_walk_prefer_tlbiasid) {
317 		arm_smmu_tlb_inv_context_s1(cookie);
318 	} else {
319 		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
320 					  ARM_SMMU_CB_S1_TLBIVA);
321 		arm_smmu_tlb_sync_context(cookie);
322 	}
323 }
324 
325 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
326 				     unsigned long iova, size_t granule,
327 				     void *cookie)
328 {
329 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
330 				  ARM_SMMU_CB_S1_TLBIVAL);
331 }
332 
333 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
334 				     size_t granule, void *cookie)
335 {
336 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
337 				  ARM_SMMU_CB_S2_TLBIIPAS2);
338 	arm_smmu_tlb_sync_context(cookie);
339 }
340 
341 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
342 				     unsigned long iova, size_t granule,
343 				     void *cookie)
344 {
345 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
346 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
347 }
348 
349 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
350 					size_t granule, void *cookie)
351 {
352 	arm_smmu_tlb_inv_context_s2(cookie);
353 }
354 /*
355  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
356  * almost negligible, but the benefit of getting the first one in as far ahead
357  * of the sync as possible is significant, hence we don't just make this a
358  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
359  * think.
360  */
361 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
362 					unsigned long iova, size_t granule,
363 					void *cookie)
364 {
365 	struct arm_smmu_domain *smmu_domain = cookie;
366 	struct arm_smmu_device *smmu = smmu_domain->smmu;
367 
368 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
369 		wmb();
370 
371 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
372 }
373 
374 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
375 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
376 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
377 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
378 };
379 
380 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
381 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
382 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
383 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
384 };
385 
386 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
387 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
388 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
389 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
390 };
391 
392 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
393 {
394 	u32 fsr, fsynr, cbfrsynra;
395 	unsigned long iova;
396 	struct iommu_domain *domain = dev;
397 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
398 	struct arm_smmu_device *smmu = smmu_domain->smmu;
399 	int idx = smmu_domain->cfg.cbndx;
400 	int ret;
401 
402 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
403 	if (!(fsr & ARM_SMMU_FSR_FAULT))
404 		return IRQ_NONE;
405 
406 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
407 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
408 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
409 
410 	ret = report_iommu_fault(domain, NULL, iova,
411 		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
412 
413 	if (ret == -ENOSYS)
414 		dev_err_ratelimited(smmu->dev,
415 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
416 			    fsr, iova, fsynr, cbfrsynra, idx);
417 
418 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
419 	return IRQ_HANDLED;
420 }
421 
422 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
423 {
424 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
425 	struct arm_smmu_device *smmu = dev;
426 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
427 				      DEFAULT_RATELIMIT_BURST);
428 
429 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
430 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
431 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
432 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
433 
434 	if (!gfsr)
435 		return IRQ_NONE;
436 
437 	if (__ratelimit(&rs)) {
438 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
439 		    (gfsr & ARM_SMMU_sGFSR_USF))
440 			dev_err(smmu->dev,
441 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
442 				(u16)gfsynr1);
443 		else
444 			dev_err(smmu->dev,
445 				"Unexpected global fault, this could be serious\n");
446 		dev_err(smmu->dev,
447 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
448 			gfsr, gfsynr0, gfsynr1, gfsynr2);
449 	}
450 
451 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
452 	return IRQ_HANDLED;
453 }
454 
455 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
456 				       struct io_pgtable_cfg *pgtbl_cfg)
457 {
458 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
459 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
460 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
461 
462 	cb->cfg = cfg;
463 
464 	/* TCR */
465 	if (stage1) {
466 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
467 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
468 		} else {
469 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
470 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
471 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
472 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
473 			else
474 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
475 		}
476 	} else {
477 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
478 	}
479 
480 	/* TTBRs */
481 	if (stage1) {
482 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
483 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
484 			cb->ttbr[1] = 0;
485 		} else {
486 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
487 						 cfg->asid);
488 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
489 						 cfg->asid);
490 
491 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
492 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
493 			else
494 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
495 		}
496 	} else {
497 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
498 	}
499 
500 	/* MAIRs (stage-1 only) */
501 	if (stage1) {
502 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
503 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
504 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
505 		} else {
506 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
507 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
508 		}
509 	}
510 }
511 
512 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
513 {
514 	u32 reg;
515 	bool stage1;
516 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
517 	struct arm_smmu_cfg *cfg = cb->cfg;
518 
519 	/* Unassigned context banks only need disabling */
520 	if (!cfg) {
521 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
522 		return;
523 	}
524 
525 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
526 
527 	/* CBA2R */
528 	if (smmu->version > ARM_SMMU_V1) {
529 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
530 			reg = ARM_SMMU_CBA2R_VA64;
531 		else
532 			reg = 0;
533 		/* 16-bit VMIDs live in CBA2R */
534 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
535 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
536 
537 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
538 	}
539 
540 	/* CBAR */
541 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
542 	if (smmu->version < ARM_SMMU_V2)
543 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
544 
545 	/*
546 	 * Use the weakest shareability/memory types, so they are
547 	 * overridden by the ttbcr/pte.
548 	 */
549 	if (stage1) {
550 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
551 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
552 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
553 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
554 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
555 		/* 8-bit VMIDs live in CBAR */
556 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
557 	}
558 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
559 
560 	/*
561 	 * TCR
562 	 * We must write this before the TTBRs, since it determines the
563 	 * access behaviour of some fields (in particular, ASID[15:8]).
564 	 */
565 	if (stage1 && smmu->version > ARM_SMMU_V1)
566 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
567 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
568 
569 	/* TTBRs */
570 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
571 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
572 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
573 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
574 	} else {
575 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
576 		if (stage1)
577 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
578 					   cb->ttbr[1]);
579 	}
580 
581 	/* MAIRs (stage-1 only) */
582 	if (stage1) {
583 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
584 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
585 	}
586 
587 	/* SCTLR */
588 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
589 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
590 	if (stage1)
591 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
592 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
593 		reg |= ARM_SMMU_SCTLR_E;
594 
595 	if (smmu->impl && smmu->impl->write_sctlr)
596 		smmu->impl->write_sctlr(smmu, idx, reg);
597 	else
598 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
599 }
600 
601 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
602 				       struct arm_smmu_device *smmu,
603 				       struct device *dev, unsigned int start)
604 {
605 	if (smmu->impl && smmu->impl->alloc_context_bank)
606 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
607 
608 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
609 }
610 
611 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
612 					struct arm_smmu_device *smmu,
613 					struct device *dev)
614 {
615 	int irq, start, ret = 0;
616 	unsigned long ias, oas;
617 	struct io_pgtable_ops *pgtbl_ops;
618 	struct io_pgtable_cfg pgtbl_cfg;
619 	enum io_pgtable_fmt fmt;
620 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
621 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
622 	irqreturn_t (*context_fault)(int irq, void *dev);
623 
624 	mutex_lock(&smmu_domain->init_mutex);
625 	if (smmu_domain->smmu)
626 		goto out_unlock;
627 
628 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
629 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
630 		smmu_domain->smmu = smmu;
631 		goto out_unlock;
632 	}
633 
634 	/*
635 	 * Mapping the requested stage onto what we support is surprisingly
636 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
637 	 * support for nested translation. That means we end up with the
638 	 * following table:
639 	 *
640 	 * Requested        Supported        Actual
641 	 *     S1               N              S1
642 	 *     S1             S1+S2            S1
643 	 *     S1               S2             S2
644 	 *     S1               S1             S1
645 	 *     N                N              N
646 	 *     N              S1+S2            S2
647 	 *     N                S2             S2
648 	 *     N                S1             S1
649 	 *
650 	 * Note that you can't actually request stage-2 mappings.
651 	 */
652 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
653 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
654 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
655 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
656 
657 	/*
658 	 * Choosing a suitable context format is even more fiddly. Until we
659 	 * grow some way for the caller to express a preference, and/or move
660 	 * the decision into the io-pgtable code where it arguably belongs,
661 	 * just aim for the closest thing to the rest of the system, and hope
662 	 * that the hardware isn't esoteric enough that we can't assume AArch64
663 	 * support to be a superset of AArch32 support...
664 	 */
665 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
666 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
667 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
668 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
669 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
670 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
671 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
672 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
673 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
674 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
675 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
676 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
677 
678 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
679 		ret = -EINVAL;
680 		goto out_unlock;
681 	}
682 
683 	switch (smmu_domain->stage) {
684 	case ARM_SMMU_DOMAIN_S1:
685 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
686 		start = smmu->num_s2_context_banks;
687 		ias = smmu->va_size;
688 		oas = smmu->ipa_size;
689 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
690 			fmt = ARM_64_LPAE_S1;
691 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
692 			fmt = ARM_32_LPAE_S1;
693 			ias = min(ias, 32UL);
694 			oas = min(oas, 40UL);
695 		} else {
696 			fmt = ARM_V7S;
697 			ias = min(ias, 32UL);
698 			oas = min(oas, 32UL);
699 		}
700 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
701 		break;
702 	case ARM_SMMU_DOMAIN_NESTED:
703 		/*
704 		 * We will likely want to change this if/when KVM gets
705 		 * involved.
706 		 */
707 	case ARM_SMMU_DOMAIN_S2:
708 		cfg->cbar = CBAR_TYPE_S2_TRANS;
709 		start = 0;
710 		ias = smmu->ipa_size;
711 		oas = smmu->pa_size;
712 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
713 			fmt = ARM_64_LPAE_S2;
714 		} else {
715 			fmt = ARM_32_LPAE_S2;
716 			ias = min(ias, 40UL);
717 			oas = min(oas, 40UL);
718 		}
719 		if (smmu->version == ARM_SMMU_V2)
720 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
721 		else
722 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
723 		break;
724 	default:
725 		ret = -EINVAL;
726 		goto out_unlock;
727 	}
728 
729 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
730 	if (ret < 0) {
731 		goto out_unlock;
732 	}
733 
734 	smmu_domain->smmu = smmu;
735 
736 	cfg->cbndx = ret;
737 	if (smmu->version < ARM_SMMU_V2) {
738 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
739 		cfg->irptndx %= smmu->num_context_irqs;
740 	} else {
741 		cfg->irptndx = cfg->cbndx;
742 	}
743 
744 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
745 		cfg->vmid = cfg->cbndx + 1;
746 	else
747 		cfg->asid = cfg->cbndx;
748 
749 	pgtbl_cfg = (struct io_pgtable_cfg) {
750 		.pgsize_bitmap	= smmu->pgsize_bitmap,
751 		.ias		= ias,
752 		.oas		= oas,
753 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
754 		.tlb		= smmu_domain->flush_ops,
755 		.iommu_dev	= smmu->dev,
756 	};
757 
758 	if (smmu->impl && smmu->impl->init_context) {
759 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
760 		if (ret)
761 			goto out_clear_smmu;
762 	}
763 
764 	if (smmu_domain->pgtbl_quirks)
765 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
766 
767 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
768 	if (!pgtbl_ops) {
769 		ret = -ENOMEM;
770 		goto out_clear_smmu;
771 	}
772 
773 	/* Update the domain's page sizes to reflect the page table format */
774 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
775 
776 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
777 		domain->geometry.aperture_start = ~0UL << ias;
778 		domain->geometry.aperture_end = ~0UL;
779 	} else {
780 		domain->geometry.aperture_end = (1UL << ias) - 1;
781 	}
782 
783 	domain->geometry.force_aperture = true;
784 
785 	/* Initialise the context bank with our page table cfg */
786 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
787 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
788 
789 	/*
790 	 * Request context fault interrupt. Do this last to avoid the
791 	 * handler seeing a half-initialised domain state.
792 	 */
793 	irq = smmu->irqs[cfg->irptndx];
794 
795 	if (smmu->impl && smmu->impl->context_fault)
796 		context_fault = smmu->impl->context_fault;
797 	else
798 		context_fault = arm_smmu_context_fault;
799 
800 	ret = devm_request_irq(smmu->dev, irq, context_fault,
801 			       IRQF_SHARED, "arm-smmu-context-fault", domain);
802 	if (ret < 0) {
803 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
804 			cfg->irptndx, irq);
805 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
806 	}
807 
808 	mutex_unlock(&smmu_domain->init_mutex);
809 
810 	/* Publish page table ops for map/unmap */
811 	smmu_domain->pgtbl_ops = pgtbl_ops;
812 	return 0;
813 
814 out_clear_smmu:
815 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
816 	smmu_domain->smmu = NULL;
817 out_unlock:
818 	mutex_unlock(&smmu_domain->init_mutex);
819 	return ret;
820 }
821 
822 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
823 {
824 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
825 	struct arm_smmu_device *smmu = smmu_domain->smmu;
826 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
827 	int ret, irq;
828 
829 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
830 		return;
831 
832 	ret = arm_smmu_rpm_get(smmu);
833 	if (ret < 0)
834 		return;
835 
836 	/*
837 	 * Disable the context bank and free the page tables before freeing
838 	 * it.
839 	 */
840 	smmu->cbs[cfg->cbndx].cfg = NULL;
841 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
842 
843 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
844 		irq = smmu->irqs[cfg->irptndx];
845 		devm_free_irq(smmu->dev, irq, domain);
846 	}
847 
848 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
849 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
850 
851 	arm_smmu_rpm_put(smmu);
852 }
853 
854 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
855 {
856 	struct arm_smmu_domain *smmu_domain;
857 
858 	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
859 		if (using_legacy_binding || type != IOMMU_DOMAIN_DMA)
860 			return NULL;
861 	}
862 	/*
863 	 * Allocate the domain and initialise some of its data structures.
864 	 * We can't really do anything meaningful until we've added a
865 	 * master.
866 	 */
867 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
868 	if (!smmu_domain)
869 		return NULL;
870 
871 	mutex_init(&smmu_domain->init_mutex);
872 	spin_lock_init(&smmu_domain->cb_lock);
873 
874 	return &smmu_domain->domain;
875 }
876 
877 static void arm_smmu_domain_free(struct iommu_domain *domain)
878 {
879 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
880 
881 	/*
882 	 * Free the domain resources. We assume that all devices have
883 	 * already been detached.
884 	 */
885 	arm_smmu_destroy_domain_context(domain);
886 	kfree(smmu_domain);
887 }
888 
889 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
890 {
891 	struct arm_smmu_smr *smr = smmu->smrs + idx;
892 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
893 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
894 
895 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
896 		reg |= ARM_SMMU_SMR_VALID;
897 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
898 }
899 
900 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
901 {
902 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
903 	u32 reg;
904 
905 	if (smmu->impl && smmu->impl->write_s2cr) {
906 		smmu->impl->write_s2cr(smmu, idx);
907 		return;
908 	}
909 
910 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
911 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
912 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
913 
914 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
915 	    smmu->smrs[idx].valid)
916 		reg |= ARM_SMMU_S2CR_EXIDVALID;
917 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
918 }
919 
920 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
921 {
922 	arm_smmu_write_s2cr(smmu, idx);
923 	if (smmu->smrs)
924 		arm_smmu_write_smr(smmu, idx);
925 }
926 
927 /*
928  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
929  * should be called after sCR0 is written.
930  */
931 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
932 {
933 	u32 smr;
934 	int i;
935 
936 	if (!smmu->smrs)
937 		return;
938 	/*
939 	 * If we've had to accommodate firmware memory regions, we may
940 	 * have live SMRs by now; tread carefully...
941 	 *
942 	 * Somewhat perversely, not having a free SMR for this test implies we
943 	 * can get away without it anyway, as we'll only be able to 'allocate'
944 	 * these SMRs for the ID/mask values we're already trusting to be OK.
945 	 */
946 	for (i = 0; i < smmu->num_mapping_groups; i++)
947 		if (!smmu->smrs[i].valid)
948 			goto smr_ok;
949 	return;
950 smr_ok:
951 	/*
952 	 * SMR.ID bits may not be preserved if the corresponding MASK
953 	 * bits are set, so check each one separately. We can reject
954 	 * masters later if they try to claim IDs outside these masks.
955 	 */
956 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
957 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
958 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
959 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
960 
961 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
962 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
963 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
964 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
965 }
966 
967 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
968 {
969 	struct arm_smmu_smr *smrs = smmu->smrs;
970 	int i, free_idx = -ENOSPC;
971 
972 	/* Stream indexing is blissfully easy */
973 	if (!smrs)
974 		return id;
975 
976 	/* Validating SMRs is... less so */
977 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
978 		if (!smrs[i].valid) {
979 			/*
980 			 * Note the first free entry we come across, which
981 			 * we'll claim in the end if nothing else matches.
982 			 */
983 			if (free_idx < 0)
984 				free_idx = i;
985 			continue;
986 		}
987 		/*
988 		 * If the new entry is _entirely_ matched by an existing entry,
989 		 * then reuse that, with the guarantee that there also cannot
990 		 * be any subsequent conflicting entries. In normal use we'd
991 		 * expect simply identical entries for this case, but there's
992 		 * no harm in accommodating the generalisation.
993 		 */
994 		if ((mask & smrs[i].mask) == mask &&
995 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
996 			return i;
997 		/*
998 		 * If the new entry has any other overlap with an existing one,
999 		 * though, then there always exists at least one stream ID
1000 		 * which would cause a conflict, and we can't allow that risk.
1001 		 */
1002 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1003 			return -EINVAL;
1004 	}
1005 
1006 	return free_idx;
1007 }
1008 
1009 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1010 {
1011 	if (--smmu->s2crs[idx].count)
1012 		return false;
1013 
1014 	smmu->s2crs[idx] = s2cr_init_val;
1015 	if (smmu->smrs)
1016 		smmu->smrs[idx].valid = false;
1017 
1018 	return true;
1019 }
1020 
1021 static int arm_smmu_master_alloc_smes(struct device *dev)
1022 {
1023 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1024 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1025 	struct arm_smmu_device *smmu = cfg->smmu;
1026 	struct arm_smmu_smr *smrs = smmu->smrs;
1027 	int i, idx, ret;
1028 
1029 	mutex_lock(&smmu->stream_map_mutex);
1030 	/* Figure out a viable stream map entry allocation */
1031 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1032 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1033 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1034 
1035 		if (idx != INVALID_SMENDX) {
1036 			ret = -EEXIST;
1037 			goto out_err;
1038 		}
1039 
1040 		ret = arm_smmu_find_sme(smmu, sid, mask);
1041 		if (ret < 0)
1042 			goto out_err;
1043 
1044 		idx = ret;
1045 		if (smrs && smmu->s2crs[idx].count == 0) {
1046 			smrs[idx].id = sid;
1047 			smrs[idx].mask = mask;
1048 			smrs[idx].valid = true;
1049 		}
1050 		smmu->s2crs[idx].count++;
1051 		cfg->smendx[i] = (s16)idx;
1052 	}
1053 
1054 	/* It worked! Now, poke the actual hardware */
1055 	for_each_cfg_sme(cfg, fwspec, i, idx)
1056 		arm_smmu_write_sme(smmu, idx);
1057 
1058 	mutex_unlock(&smmu->stream_map_mutex);
1059 	return 0;
1060 
1061 out_err:
1062 	while (i--) {
1063 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1064 		cfg->smendx[i] = INVALID_SMENDX;
1065 	}
1066 	mutex_unlock(&smmu->stream_map_mutex);
1067 	return ret;
1068 }
1069 
1070 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1071 				      struct iommu_fwspec *fwspec)
1072 {
1073 	struct arm_smmu_device *smmu = cfg->smmu;
1074 	int i, idx;
1075 
1076 	mutex_lock(&smmu->stream_map_mutex);
1077 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1078 		if (arm_smmu_free_sme(smmu, idx))
1079 			arm_smmu_write_sme(smmu, idx);
1080 		cfg->smendx[i] = INVALID_SMENDX;
1081 	}
1082 	mutex_unlock(&smmu->stream_map_mutex);
1083 }
1084 
1085 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1086 				      struct arm_smmu_master_cfg *cfg,
1087 				      struct iommu_fwspec *fwspec)
1088 {
1089 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1090 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1091 	u8 cbndx = smmu_domain->cfg.cbndx;
1092 	enum arm_smmu_s2cr_type type;
1093 	int i, idx;
1094 
1095 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1096 		type = S2CR_TYPE_BYPASS;
1097 	else
1098 		type = S2CR_TYPE_TRANS;
1099 
1100 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1101 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1102 			continue;
1103 
1104 		s2cr[idx].type = type;
1105 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1106 		s2cr[idx].cbndx = cbndx;
1107 		arm_smmu_write_s2cr(smmu, idx);
1108 	}
1109 	return 0;
1110 }
1111 
1112 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1113 {
1114 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1115 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1116 	struct arm_smmu_master_cfg *cfg;
1117 	struct arm_smmu_device *smmu;
1118 	int ret;
1119 
1120 	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1121 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1122 		return -ENXIO;
1123 	}
1124 
1125 	/*
1126 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1127 	 * domains between of_xlate() and probe_device() - we have no way to cope
1128 	 * with that, so until ARM gets converted to rely on groups and default
1129 	 * domains, just say no (but more politely than by dereferencing NULL).
1130 	 * This should be at least a WARN_ON once that's sorted.
1131 	 */
1132 	cfg = dev_iommu_priv_get(dev);
1133 	if (!cfg)
1134 		return -ENODEV;
1135 
1136 	smmu = cfg->smmu;
1137 
1138 	ret = arm_smmu_rpm_get(smmu);
1139 	if (ret < 0)
1140 		return ret;
1141 
1142 	/* Ensure that the domain is finalised */
1143 	ret = arm_smmu_init_domain_context(domain, smmu, dev);
1144 	if (ret < 0)
1145 		goto rpm_put;
1146 
1147 	/*
1148 	 * Sanity check the domain. We don't support domains across
1149 	 * different SMMUs.
1150 	 */
1151 	if (smmu_domain->smmu != smmu) {
1152 		ret = -EINVAL;
1153 		goto rpm_put;
1154 	}
1155 
1156 	/* Looks ok, so add the device to the domain */
1157 	ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1158 
1159 	/*
1160 	 * Setup an autosuspend delay to avoid bouncing runpm state.
1161 	 * Otherwise, if a driver for a suspended consumer device
1162 	 * unmaps buffers, it will runpm resume/suspend for each one.
1163 	 *
1164 	 * For example, when used by a GPU device, when an application
1165 	 * or game exits, it can trigger unmapping 100s or 1000s of
1166 	 * buffers.  With a runpm cycle for each buffer, that adds up
1167 	 * to 5-10sec worth of reprogramming the context bank, while
1168 	 * the system appears to be locked up to the user.
1169 	 */
1170 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1171 	pm_runtime_use_autosuspend(smmu->dev);
1172 
1173 rpm_put:
1174 	arm_smmu_rpm_put(smmu);
1175 	return ret;
1176 }
1177 
1178 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
1179 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
1180 			      int prot, gfp_t gfp, size_t *mapped)
1181 {
1182 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1183 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1184 	int ret;
1185 
1186 	if (!ops)
1187 		return -ENODEV;
1188 
1189 	arm_smmu_rpm_get(smmu);
1190 	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
1191 	arm_smmu_rpm_put(smmu);
1192 
1193 	return ret;
1194 }
1195 
1196 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
1197 				   size_t pgsize, size_t pgcount,
1198 				   struct iommu_iotlb_gather *iotlb_gather)
1199 {
1200 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1201 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1202 	size_t ret;
1203 
1204 	if (!ops)
1205 		return 0;
1206 
1207 	arm_smmu_rpm_get(smmu);
1208 	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
1209 	arm_smmu_rpm_put(smmu);
1210 
1211 	return ret;
1212 }
1213 
1214 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1215 {
1216 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1217 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1218 
1219 	if (smmu_domain->flush_ops) {
1220 		arm_smmu_rpm_get(smmu);
1221 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1222 		arm_smmu_rpm_put(smmu);
1223 	}
1224 }
1225 
1226 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1227 				struct iommu_iotlb_gather *gather)
1228 {
1229 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1230 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1231 
1232 	if (!smmu)
1233 		return;
1234 
1235 	arm_smmu_rpm_get(smmu);
1236 	if (smmu->version == ARM_SMMU_V2 ||
1237 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1238 		arm_smmu_tlb_sync_context(smmu_domain);
1239 	else
1240 		arm_smmu_tlb_sync_global(smmu);
1241 	arm_smmu_rpm_put(smmu);
1242 }
1243 
1244 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1245 					      dma_addr_t iova)
1246 {
1247 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1248 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1249 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1250 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1251 	struct device *dev = smmu->dev;
1252 	void __iomem *reg;
1253 	u32 tmp;
1254 	u64 phys;
1255 	unsigned long va, flags;
1256 	int ret, idx = cfg->cbndx;
1257 	phys_addr_t addr = 0;
1258 
1259 	ret = arm_smmu_rpm_get(smmu);
1260 	if (ret < 0)
1261 		return 0;
1262 
1263 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1264 	va = iova & ~0xfffUL;
1265 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1266 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1267 	else
1268 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1269 
1270 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1271 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1272 				      5, 50)) {
1273 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1274 		dev_err(dev,
1275 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1276 			&iova);
1277 		arm_smmu_rpm_put(smmu);
1278 		return ops->iova_to_phys(ops, iova);
1279 	}
1280 
1281 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1282 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1283 	if (phys & ARM_SMMU_CB_PAR_F) {
1284 		dev_err(dev, "translation fault!\n");
1285 		dev_err(dev, "PAR = 0x%llx\n", phys);
1286 		goto out;
1287 	}
1288 
1289 	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1290 out:
1291 	arm_smmu_rpm_put(smmu);
1292 
1293 	return addr;
1294 }
1295 
1296 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1297 					dma_addr_t iova)
1298 {
1299 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1300 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1301 
1302 	if (!ops)
1303 		return 0;
1304 
1305 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1306 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1307 		return arm_smmu_iova_to_phys_hard(domain, iova);
1308 
1309 	return ops->iova_to_phys(ops, iova);
1310 }
1311 
1312 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
1313 {
1314 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1315 
1316 	switch (cap) {
1317 	case IOMMU_CAP_CACHE_COHERENCY:
1318 		/*
1319 		 * It's overwhelmingly the case in practice that when the pagetable
1320 		 * walk interface is connected to a coherent interconnect, all the
1321 		 * translation interfaces are too. Furthermore if the device is
1322 		 * natively coherent, then its translation interface must also be.
1323 		 */
1324 		return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK ||
1325 			device_get_dma_attr(dev) == DEV_DMA_COHERENT;
1326 	case IOMMU_CAP_NOEXEC:
1327 	case IOMMU_CAP_DEFERRED_FLUSH:
1328 		return true;
1329 	default:
1330 		return false;
1331 	}
1332 }
1333 
1334 static
1335 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1336 {
1337 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1338 							  fwnode);
1339 	put_device(dev);
1340 	return dev ? dev_get_drvdata(dev) : NULL;
1341 }
1342 
1343 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1344 {
1345 	struct arm_smmu_device *smmu = NULL;
1346 	struct arm_smmu_master_cfg *cfg;
1347 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1348 	int i, ret;
1349 
1350 	if (using_legacy_binding) {
1351 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1352 
1353 		/*
1354 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1355 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1356 		 * later use.
1357 		 */
1358 		fwspec = dev_iommu_fwspec_get(dev);
1359 		if (ret)
1360 			goto out_free;
1361 	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1362 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1363 	} else {
1364 		return ERR_PTR(-ENODEV);
1365 	}
1366 
1367 	ret = -EINVAL;
1368 	for (i = 0; i < fwspec->num_ids; i++) {
1369 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1370 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1371 
1372 		if (sid & ~smmu->streamid_mask) {
1373 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1374 				sid, smmu->streamid_mask);
1375 			goto out_free;
1376 		}
1377 		if (mask & ~smmu->smr_mask_mask) {
1378 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1379 				mask, smmu->smr_mask_mask);
1380 			goto out_free;
1381 		}
1382 	}
1383 
1384 	ret = -ENOMEM;
1385 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1386 		      GFP_KERNEL);
1387 	if (!cfg)
1388 		goto out_free;
1389 
1390 	cfg->smmu = smmu;
1391 	dev_iommu_priv_set(dev, cfg);
1392 	while (i--)
1393 		cfg->smendx[i] = INVALID_SMENDX;
1394 
1395 	ret = arm_smmu_rpm_get(smmu);
1396 	if (ret < 0)
1397 		goto out_cfg_free;
1398 
1399 	ret = arm_smmu_master_alloc_smes(dev);
1400 	arm_smmu_rpm_put(smmu);
1401 
1402 	if (ret)
1403 		goto out_cfg_free;
1404 
1405 	device_link_add(dev, smmu->dev,
1406 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1407 
1408 	return &smmu->iommu;
1409 
1410 out_cfg_free:
1411 	kfree(cfg);
1412 out_free:
1413 	iommu_fwspec_free(dev);
1414 	return ERR_PTR(ret);
1415 }
1416 
1417 static void arm_smmu_release_device(struct device *dev)
1418 {
1419 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1420 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1421 	int ret;
1422 
1423 	ret = arm_smmu_rpm_get(cfg->smmu);
1424 	if (ret < 0)
1425 		return;
1426 
1427 	arm_smmu_master_free_smes(cfg, fwspec);
1428 
1429 	arm_smmu_rpm_put(cfg->smmu);
1430 
1431 	dev_iommu_priv_set(dev, NULL);
1432 	kfree(cfg);
1433 }
1434 
1435 static void arm_smmu_probe_finalize(struct device *dev)
1436 {
1437 	struct arm_smmu_master_cfg *cfg;
1438 	struct arm_smmu_device *smmu;
1439 
1440 	cfg = dev_iommu_priv_get(dev);
1441 	smmu = cfg->smmu;
1442 
1443 	if (smmu->impl && smmu->impl->probe_finalize)
1444 		smmu->impl->probe_finalize(smmu, dev);
1445 }
1446 
1447 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1448 {
1449 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1450 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1451 	struct arm_smmu_device *smmu = cfg->smmu;
1452 	struct iommu_group *group = NULL;
1453 	int i, idx;
1454 
1455 	mutex_lock(&smmu->stream_map_mutex);
1456 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1457 		if (group && smmu->s2crs[idx].group &&
1458 		    group != smmu->s2crs[idx].group) {
1459 			mutex_unlock(&smmu->stream_map_mutex);
1460 			return ERR_PTR(-EINVAL);
1461 		}
1462 
1463 		group = smmu->s2crs[idx].group;
1464 	}
1465 
1466 	if (group) {
1467 		mutex_unlock(&smmu->stream_map_mutex);
1468 		return iommu_group_ref_get(group);
1469 	}
1470 
1471 	if (dev_is_pci(dev))
1472 		group = pci_device_group(dev);
1473 	else if (dev_is_fsl_mc(dev))
1474 		group = fsl_mc_device_group(dev);
1475 	else
1476 		group = generic_device_group(dev);
1477 
1478 	/* Remember group for faster lookups */
1479 	if (!IS_ERR(group))
1480 		for_each_cfg_sme(cfg, fwspec, i, idx)
1481 			smmu->s2crs[idx].group = group;
1482 
1483 	mutex_unlock(&smmu->stream_map_mutex);
1484 	return group;
1485 }
1486 
1487 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1488 {
1489 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1490 	int ret = 0;
1491 
1492 	mutex_lock(&smmu_domain->init_mutex);
1493 	if (smmu_domain->smmu)
1494 		ret = -EPERM;
1495 	else
1496 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1497 	mutex_unlock(&smmu_domain->init_mutex);
1498 
1499 	return ret;
1500 }
1501 
1502 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1503 		unsigned long quirks)
1504 {
1505 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1506 	int ret = 0;
1507 
1508 	mutex_lock(&smmu_domain->init_mutex);
1509 	if (smmu_domain->smmu)
1510 		ret = -EPERM;
1511 	else
1512 		smmu_domain->pgtbl_quirks = quirks;
1513 	mutex_unlock(&smmu_domain->init_mutex);
1514 
1515 	return ret;
1516 }
1517 
1518 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1519 {
1520 	u32 mask, fwid = 0;
1521 
1522 	if (args->args_count > 0)
1523 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1524 
1525 	if (args->args_count > 1)
1526 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1527 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1528 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1529 
1530 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1531 }
1532 
1533 static void arm_smmu_get_resv_regions(struct device *dev,
1534 				      struct list_head *head)
1535 {
1536 	struct iommu_resv_region *region;
1537 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1538 
1539 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1540 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
1541 	if (!region)
1542 		return;
1543 
1544 	list_add_tail(&region->list, head);
1545 
1546 	iommu_dma_get_resv_regions(dev, head);
1547 }
1548 
1549 static int arm_smmu_def_domain_type(struct device *dev)
1550 {
1551 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1552 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1553 
1554 	if (using_legacy_binding)
1555 		return IOMMU_DOMAIN_IDENTITY;
1556 
1557 	if (impl && impl->def_domain_type)
1558 		return impl->def_domain_type(dev);
1559 
1560 	return 0;
1561 }
1562 
1563 static struct iommu_ops arm_smmu_ops = {
1564 	.capable		= arm_smmu_capable,
1565 	.domain_alloc		= arm_smmu_domain_alloc,
1566 	.probe_device		= arm_smmu_probe_device,
1567 	.release_device		= arm_smmu_release_device,
1568 	.probe_finalize		= arm_smmu_probe_finalize,
1569 	.device_group		= arm_smmu_device_group,
1570 	.of_xlate		= arm_smmu_of_xlate,
1571 	.get_resv_regions	= arm_smmu_get_resv_regions,
1572 	.def_domain_type	= arm_smmu_def_domain_type,
1573 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1574 	.owner			= THIS_MODULE,
1575 	.default_domain_ops = &(const struct iommu_domain_ops) {
1576 		.attach_dev		= arm_smmu_attach_dev,
1577 		.map_pages		= arm_smmu_map_pages,
1578 		.unmap_pages		= arm_smmu_unmap_pages,
1579 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1580 		.iotlb_sync		= arm_smmu_iotlb_sync,
1581 		.iova_to_phys		= arm_smmu_iova_to_phys,
1582 		.enable_nesting		= arm_smmu_enable_nesting,
1583 		.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1584 		.free			= arm_smmu_domain_free,
1585 	}
1586 };
1587 
1588 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1589 {
1590 	int i;
1591 	u32 reg;
1592 
1593 	/* clear global FSR */
1594 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1595 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1596 
1597 	/*
1598 	 * Reset stream mapping groups: Initial values mark all SMRn as
1599 	 * invalid and all S2CRn as bypass unless overridden.
1600 	 */
1601 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1602 		arm_smmu_write_sme(smmu, i);
1603 
1604 	/* Make sure all context banks are disabled and clear CB_FSR  */
1605 	for (i = 0; i < smmu->num_context_banks; ++i) {
1606 		arm_smmu_write_context_bank(smmu, i);
1607 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1608 	}
1609 
1610 	/* Invalidate the TLB, just in case */
1611 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1612 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1613 
1614 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1615 
1616 	/* Enable fault reporting */
1617 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1618 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1619 
1620 	/* Disable TLB broadcasting. */
1621 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1622 
1623 	/* Enable client access, handling unmatched streams as appropriate */
1624 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1625 	if (disable_bypass)
1626 		reg |= ARM_SMMU_sCR0_USFCFG;
1627 	else
1628 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1629 
1630 	/* Disable forced broadcasting */
1631 	reg &= ~ARM_SMMU_sCR0_FB;
1632 
1633 	/* Don't upgrade barriers */
1634 	reg &= ~(ARM_SMMU_sCR0_BSU);
1635 
1636 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1637 		reg |= ARM_SMMU_sCR0_VMID16EN;
1638 
1639 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1640 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1641 
1642 	if (smmu->impl && smmu->impl->reset)
1643 		smmu->impl->reset(smmu);
1644 
1645 	/* Push the button */
1646 	arm_smmu_tlb_sync_global(smmu);
1647 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1648 }
1649 
1650 static int arm_smmu_id_size_to_bits(int size)
1651 {
1652 	switch (size) {
1653 	case 0:
1654 		return 32;
1655 	case 1:
1656 		return 36;
1657 	case 2:
1658 		return 40;
1659 	case 3:
1660 		return 42;
1661 	case 4:
1662 		return 44;
1663 	case 5:
1664 	default:
1665 		return 48;
1666 	}
1667 }
1668 
1669 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1670 {
1671 	unsigned int size;
1672 	u32 id;
1673 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1674 	int i, ret;
1675 
1676 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1677 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1678 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1679 
1680 	/* ID0 */
1681 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1682 
1683 	/* Restrict available stages based on module parameter */
1684 	if (force_stage == 1)
1685 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1686 	else if (force_stage == 2)
1687 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1688 
1689 	if (id & ARM_SMMU_ID0_S1TS) {
1690 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1691 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1692 	}
1693 
1694 	if (id & ARM_SMMU_ID0_S2TS) {
1695 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1696 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1697 	}
1698 
1699 	if (id & ARM_SMMU_ID0_NTS) {
1700 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1701 		dev_notice(smmu->dev, "\tnested translation\n");
1702 	}
1703 
1704 	if (!(smmu->features &
1705 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1706 		dev_err(smmu->dev, "\tno translation support!\n");
1707 		return -ENODEV;
1708 	}
1709 
1710 	if ((id & ARM_SMMU_ID0_S1TS) &&
1711 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1712 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1713 		dev_notice(smmu->dev, "\taddress translation ops\n");
1714 	}
1715 
1716 	/*
1717 	 * In order for DMA API calls to work properly, we must defer to what
1718 	 * the FW says about coherency, regardless of what the hardware claims.
1719 	 * Fortunately, this also opens up a workaround for systems where the
1720 	 * ID register value has ended up configured incorrectly.
1721 	 */
1722 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1723 	if (cttw_fw || cttw_reg)
1724 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1725 			   cttw_fw ? "" : "non-");
1726 	if (cttw_fw != cttw_reg)
1727 		dev_notice(smmu->dev,
1728 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1729 
1730 	/* Max. number of entries we have for stream matching/indexing */
1731 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1732 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1733 		size = 1 << 16;
1734 	} else {
1735 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1736 	}
1737 	smmu->streamid_mask = size - 1;
1738 	if (id & ARM_SMMU_ID0_SMS) {
1739 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1740 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1741 		if (size == 0) {
1742 			dev_err(smmu->dev,
1743 				"stream-matching supported, but no SMRs present!\n");
1744 			return -ENODEV;
1745 		}
1746 
1747 		/* Zero-initialised to mark as invalid */
1748 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1749 					  GFP_KERNEL);
1750 		if (!smmu->smrs)
1751 			return -ENOMEM;
1752 
1753 		dev_notice(smmu->dev,
1754 			   "\tstream matching with %u register groups", size);
1755 	}
1756 	/* s2cr->type == 0 means translation, so initialise explicitly */
1757 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1758 					 GFP_KERNEL);
1759 	if (!smmu->s2crs)
1760 		return -ENOMEM;
1761 	for (i = 0; i < size; i++)
1762 		smmu->s2crs[i] = s2cr_init_val;
1763 
1764 	smmu->num_mapping_groups = size;
1765 	mutex_init(&smmu->stream_map_mutex);
1766 	spin_lock_init(&smmu->global_sync_lock);
1767 
1768 	if (smmu->version < ARM_SMMU_V2 ||
1769 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1770 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1771 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1772 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1773 	}
1774 
1775 	/* ID1 */
1776 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1777 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1778 
1779 	/* Check for size mismatch of SMMU address space from mapped region */
1780 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1781 	if (smmu->numpage != 2 * size << smmu->pgshift)
1782 		dev_warn(smmu->dev,
1783 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1784 			2 * size << smmu->pgshift, smmu->numpage);
1785 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1786 	smmu->numpage = size;
1787 
1788 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1789 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1790 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1791 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1792 		return -ENODEV;
1793 	}
1794 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1795 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1796 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1797 				 sizeof(*smmu->cbs), GFP_KERNEL);
1798 	if (!smmu->cbs)
1799 		return -ENOMEM;
1800 
1801 	/* ID2 */
1802 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1803 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1804 	smmu->ipa_size = size;
1805 
1806 	/* The output mask is also applied for bypass */
1807 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1808 	smmu->pa_size = size;
1809 
1810 	if (id & ARM_SMMU_ID2_VMID16)
1811 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1812 
1813 	/*
1814 	 * What the page table walker can address actually depends on which
1815 	 * descriptor format is in use, but since a) we don't know that yet,
1816 	 * and b) it can vary per context bank, this will have to do...
1817 	 */
1818 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1819 		dev_warn(smmu->dev,
1820 			 "failed to set DMA mask for table walker\n");
1821 
1822 	if (smmu->version < ARM_SMMU_V2) {
1823 		smmu->va_size = smmu->ipa_size;
1824 		if (smmu->version == ARM_SMMU_V1_64K)
1825 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1826 	} else {
1827 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1828 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1829 		if (id & ARM_SMMU_ID2_PTFS_4K)
1830 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1831 		if (id & ARM_SMMU_ID2_PTFS_16K)
1832 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1833 		if (id & ARM_SMMU_ID2_PTFS_64K)
1834 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1835 	}
1836 
1837 	if (smmu->impl && smmu->impl->cfg_probe) {
1838 		ret = smmu->impl->cfg_probe(smmu);
1839 		if (ret)
1840 			return ret;
1841 	}
1842 
1843 	/* Now we've corralled the various formats, what'll it do? */
1844 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1845 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1846 	if (smmu->features &
1847 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1848 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1849 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1850 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1851 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1852 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1853 
1854 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1855 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1856 	else
1857 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1858 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1859 		   smmu->pgsize_bitmap);
1860 
1861 
1862 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1863 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1864 			   smmu->va_size, smmu->ipa_size);
1865 
1866 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1867 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1868 			   smmu->ipa_size, smmu->pa_size);
1869 
1870 	return 0;
1871 }
1872 
1873 struct arm_smmu_match_data {
1874 	enum arm_smmu_arch_version version;
1875 	enum arm_smmu_implementation model;
1876 };
1877 
1878 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1879 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1880 
1881 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1882 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1883 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1884 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1885 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1886 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1887 
1888 static const struct of_device_id arm_smmu_of_match[] = {
1889 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1890 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1891 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1892 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1893 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1894 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1895 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1896 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1897 	{ },
1898 };
1899 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1900 
1901 #ifdef CONFIG_ACPI
1902 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1903 {
1904 	int ret = 0;
1905 
1906 	switch (model) {
1907 	case ACPI_IORT_SMMU_V1:
1908 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1909 		smmu->version = ARM_SMMU_V1;
1910 		smmu->model = GENERIC_SMMU;
1911 		break;
1912 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1913 		smmu->version = ARM_SMMU_V1_64K;
1914 		smmu->model = GENERIC_SMMU;
1915 		break;
1916 	case ACPI_IORT_SMMU_V2:
1917 		smmu->version = ARM_SMMU_V2;
1918 		smmu->model = GENERIC_SMMU;
1919 		break;
1920 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1921 		smmu->version = ARM_SMMU_V2;
1922 		smmu->model = ARM_MMU500;
1923 		break;
1924 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1925 		smmu->version = ARM_SMMU_V2;
1926 		smmu->model = CAVIUM_SMMUV2;
1927 		break;
1928 	default:
1929 		ret = -ENODEV;
1930 	}
1931 
1932 	return ret;
1933 }
1934 
1935 static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1936 				      u32 *global_irqs, u32 *pmu_irqs)
1937 {
1938 	struct device *dev = smmu->dev;
1939 	struct acpi_iort_node *node =
1940 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1941 	struct acpi_iort_smmu *iort_smmu;
1942 	int ret;
1943 
1944 	/* Retrieve SMMU1/2 specific data */
1945 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1946 
1947 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1948 	if (ret < 0)
1949 		return ret;
1950 
1951 	/* Ignore the configuration access interrupt */
1952 	*global_irqs = 1;
1953 	*pmu_irqs = 0;
1954 
1955 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1956 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1957 
1958 	return 0;
1959 }
1960 #else
1961 static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1962 					     u32 *global_irqs, u32 *pmu_irqs)
1963 {
1964 	return -ENODEV;
1965 }
1966 #endif
1967 
1968 static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu,
1969 				    u32 *global_irqs, u32 *pmu_irqs)
1970 {
1971 	const struct arm_smmu_match_data *data;
1972 	struct device *dev = smmu->dev;
1973 	bool legacy_binding;
1974 
1975 	if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs))
1976 		return dev_err_probe(dev, -ENODEV,
1977 				     "missing #global-interrupts property\n");
1978 	*pmu_irqs = 0;
1979 
1980 	data = of_device_get_match_data(dev);
1981 	smmu->version = data->version;
1982 	smmu->model = data->model;
1983 
1984 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
1985 	if (legacy_binding && !using_generic_binding) {
1986 		if (!using_legacy_binding) {
1987 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
1988 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
1989 		}
1990 		using_legacy_binding = true;
1991 	} else if (!legacy_binding && !using_legacy_binding) {
1992 		using_generic_binding = true;
1993 	} else {
1994 		dev_err(dev, "not probing due to mismatched DT properties\n");
1995 		return -ENODEV;
1996 	}
1997 
1998 	if (of_dma_is_coherent(dev->of_node))
1999 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2000 
2001 	return 0;
2002 }
2003 
2004 static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu)
2005 {
2006 	struct list_head rmr_list;
2007 	struct iommu_resv_region *e;
2008 	int idx, cnt = 0;
2009 	u32 reg;
2010 
2011 	INIT_LIST_HEAD(&rmr_list);
2012 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2013 
2014 	/*
2015 	 * Rather than trying to look at existing mappings that
2016 	 * are setup by the firmware and then invalidate the ones
2017 	 * that do no have matching RMR entries, just disable the
2018 	 * SMMU until it gets enabled again in the reset routine.
2019 	 */
2020 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
2021 	reg |= ARM_SMMU_sCR0_CLIENTPD;
2022 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
2023 
2024 	list_for_each_entry(e, &rmr_list, list) {
2025 		struct iommu_iort_rmr_data *rmr;
2026 		int i;
2027 
2028 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
2029 		for (i = 0; i < rmr->num_sids; i++) {
2030 			idx = arm_smmu_find_sme(smmu, rmr->sids[i], ~0);
2031 			if (idx < 0)
2032 				continue;
2033 
2034 			if (smmu->s2crs[idx].count == 0) {
2035 				smmu->smrs[idx].id = rmr->sids[i];
2036 				smmu->smrs[idx].mask = 0;
2037 				smmu->smrs[idx].valid = true;
2038 			}
2039 			smmu->s2crs[idx].count++;
2040 			smmu->s2crs[idx].type = S2CR_TYPE_BYPASS;
2041 			smmu->s2crs[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
2042 
2043 			cnt++;
2044 		}
2045 	}
2046 
2047 	dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt,
2048 		   cnt == 1 ? "" : "s");
2049 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2050 }
2051 
2052 static int arm_smmu_device_probe(struct platform_device *pdev)
2053 {
2054 	struct resource *res;
2055 	struct arm_smmu_device *smmu;
2056 	struct device *dev = &pdev->dev;
2057 	int num_irqs, i, err;
2058 	u32 global_irqs, pmu_irqs;
2059 	irqreturn_t (*global_fault)(int irq, void *dev);
2060 
2061 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2062 	if (!smmu) {
2063 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2064 		return -ENOMEM;
2065 	}
2066 	smmu->dev = dev;
2067 
2068 	if (dev->of_node)
2069 		err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs);
2070 	else
2071 		err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs);
2072 	if (err)
2073 		return err;
2074 
2075 	smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
2076 	if (IS_ERR(smmu->base))
2077 		return PTR_ERR(smmu->base);
2078 	smmu->ioaddr = res->start;
2079 
2080 	/*
2081 	 * The resource size should effectively match the value of SMMU_TOP;
2082 	 * stash that temporarily until we know PAGESIZE to validate it with.
2083 	 */
2084 	smmu->numpage = resource_size(res);
2085 
2086 	smmu = arm_smmu_impl_init(smmu);
2087 	if (IS_ERR(smmu))
2088 		return PTR_ERR(smmu);
2089 
2090 	num_irqs = platform_irq_count(pdev);
2091 
2092 	smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs;
2093 	if (smmu->num_context_irqs <= 0)
2094 		return dev_err_probe(dev, -ENODEV,
2095 				"found %d interrupts but expected at least %d\n",
2096 				num_irqs, global_irqs + pmu_irqs + 1);
2097 
2098 	smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs,
2099 				  sizeof(*smmu->irqs), GFP_KERNEL);
2100 	if (!smmu->irqs)
2101 		return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n",
2102 				     smmu->num_context_irqs);
2103 
2104 	for (i = 0; i < smmu->num_context_irqs; i++) {
2105 		int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i);
2106 
2107 		if (irq < 0)
2108 			return irq;
2109 		smmu->irqs[i] = irq;
2110 	}
2111 
2112 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2113 	if (err < 0) {
2114 		dev_err(dev, "failed to get clocks %d\n", err);
2115 		return err;
2116 	}
2117 	smmu->num_clks = err;
2118 
2119 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2120 	if (err)
2121 		return err;
2122 
2123 	err = arm_smmu_device_cfg_probe(smmu);
2124 	if (err)
2125 		return err;
2126 
2127 	if (smmu->version == ARM_SMMU_V2) {
2128 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2129 			dev_err(dev,
2130 			      "found only %d context irq(s) but %d required\n",
2131 			      smmu->num_context_irqs, smmu->num_context_banks);
2132 			return -ENODEV;
2133 		}
2134 
2135 		/* Ignore superfluous interrupts */
2136 		smmu->num_context_irqs = smmu->num_context_banks;
2137 	}
2138 
2139 	if (smmu->impl && smmu->impl->global_fault)
2140 		global_fault = smmu->impl->global_fault;
2141 	else
2142 		global_fault = arm_smmu_global_fault;
2143 
2144 	for (i = 0; i < global_irqs; i++) {
2145 		int irq = platform_get_irq(pdev, i);
2146 
2147 		if (irq < 0)
2148 			return irq;
2149 
2150 		err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
2151 				       "arm-smmu global fault", smmu);
2152 		if (err)
2153 			return dev_err_probe(dev, err,
2154 					"failed to request global IRQ %d (%u)\n",
2155 					i, irq);
2156 	}
2157 
2158 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2159 				     "smmu.%pa", &smmu->ioaddr);
2160 	if (err) {
2161 		dev_err(dev, "Failed to register iommu in sysfs\n");
2162 		return err;
2163 	}
2164 
2165 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
2166 	if (err) {
2167 		dev_err(dev, "Failed to register iommu\n");
2168 		iommu_device_sysfs_remove(&smmu->iommu);
2169 		return err;
2170 	}
2171 
2172 	platform_set_drvdata(pdev, smmu);
2173 
2174 	/* Check for RMRs and install bypass SMRs if any */
2175 	arm_smmu_rmr_install_bypass_smr(smmu);
2176 
2177 	arm_smmu_device_reset(smmu);
2178 	arm_smmu_test_smr_masks(smmu);
2179 
2180 	/*
2181 	 * We want to avoid touching dev->power.lock in fastpaths unless
2182 	 * it's really going to do something useful - pm_runtime_enabled()
2183 	 * can serve as an ideal proxy for that decision. So, conditionally
2184 	 * enable pm_runtime.
2185 	 */
2186 	if (dev->pm_domain) {
2187 		pm_runtime_set_active(dev);
2188 		pm_runtime_enable(dev);
2189 	}
2190 
2191 	return 0;
2192 }
2193 
2194 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2195 {
2196 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2197 
2198 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2199 		dev_notice(&pdev->dev, "disabling translation\n");
2200 
2201 	arm_smmu_rpm_get(smmu);
2202 	/* Turn the thing off */
2203 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2204 	arm_smmu_rpm_put(smmu);
2205 
2206 	if (pm_runtime_enabled(smmu->dev))
2207 		pm_runtime_force_suspend(smmu->dev);
2208 	else
2209 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2210 
2211 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2212 }
2213 
2214 static void arm_smmu_device_remove(struct platform_device *pdev)
2215 {
2216 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2217 
2218 	iommu_device_unregister(&smmu->iommu);
2219 	iommu_device_sysfs_remove(&smmu->iommu);
2220 
2221 	arm_smmu_device_shutdown(pdev);
2222 }
2223 
2224 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2225 {
2226 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2227 	int ret;
2228 
2229 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2230 	if (ret)
2231 		return ret;
2232 
2233 	arm_smmu_device_reset(smmu);
2234 
2235 	return 0;
2236 }
2237 
2238 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2239 {
2240 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2241 
2242 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2243 
2244 	return 0;
2245 }
2246 
2247 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2248 {
2249 	int ret;
2250 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2251 
2252 	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
2253 	if (ret)
2254 		return ret;
2255 
2256 	if (pm_runtime_suspended(dev))
2257 		return 0;
2258 
2259 	ret = arm_smmu_runtime_resume(dev);
2260 	if (ret)
2261 		clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2262 
2263 	return ret;
2264 }
2265 
2266 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2267 {
2268 	int ret = 0;
2269 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2270 
2271 	if (pm_runtime_suspended(dev))
2272 		goto clk_unprepare;
2273 
2274 	ret = arm_smmu_runtime_suspend(dev);
2275 	if (ret)
2276 		return ret;
2277 
2278 clk_unprepare:
2279 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2280 	return ret;
2281 }
2282 
2283 static const struct dev_pm_ops arm_smmu_pm_ops = {
2284 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2285 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2286 			   arm_smmu_runtime_resume, NULL)
2287 };
2288 
2289 static struct platform_driver arm_smmu_driver = {
2290 	.driver	= {
2291 		.name			= "arm-smmu",
2292 		.of_match_table		= arm_smmu_of_match,
2293 		.pm			= &arm_smmu_pm_ops,
2294 		.suppress_bind_attrs    = true,
2295 	},
2296 	.probe	= arm_smmu_device_probe,
2297 	.remove_new = arm_smmu_device_remove,
2298 	.shutdown = arm_smmu_device_shutdown,
2299 };
2300 module_platform_driver(arm_smmu_driver);
2301 
2302 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2303 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2304 MODULE_ALIAS("platform:arm-smmu");
2305 MODULE_LICENSE("GPL v2");
2306