xref: /openbmc/linux/drivers/iommu/arm/arm-smmu/arm-smmu.c (revision 90741096769bd75152a5fe397343e5893c7d905a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-mapping.h>
25 #include <linux/err.h>
26 #include <linux/interrupt.h>
27 #include <linux/io.h>
28 #include <linux/iopoll.h>
29 #include <linux/module.h>
30 #include <linux/of.h>
31 #include <linux/of_address.h>
32 #include <linux/of_device.h>
33 #include <linux/pci.h>
34 #include <linux/platform_device.h>
35 #include <linux/pm_runtime.h>
36 #include <linux/ratelimit.h>
37 #include <linux/slab.h>
38 
39 #include <linux/fsl/mc.h>
40 
41 #include "arm-smmu.h"
42 #include "../../dma-iommu.h"
43 
44 /*
45  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
46  * global register space are still, in fact, using a hypervisor to mediate it
47  * by trapping and emulating register accesses. Sadly, some deployed versions
48  * of said trapping code have bugs wherein they go horribly wrong for stores
49  * using r31 (i.e. XZR/WZR) as the source register.
50  */
51 #define QCOM_DUMMY_VAL -1
52 
53 #define MSI_IOVA_BASE			0x8000000
54 #define MSI_IOVA_LENGTH			0x100000
55 
56 static int force_stage;
57 module_param(force_stage, int, S_IRUGO);
58 MODULE_PARM_DESC(force_stage,
59 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
60 static bool disable_bypass =
61 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
62 module_param(disable_bypass, bool, S_IRUGO);
63 MODULE_PARM_DESC(disable_bypass,
64 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
65 
66 #define s2cr_init_val (struct arm_smmu_s2cr){				\
67 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
68 }
69 
70 static bool using_legacy_binding, using_generic_binding;
71 
72 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
73 {
74 	if (pm_runtime_enabled(smmu->dev))
75 		return pm_runtime_resume_and_get(smmu->dev);
76 
77 	return 0;
78 }
79 
80 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
81 {
82 	if (pm_runtime_enabled(smmu->dev))
83 		pm_runtime_put_autosuspend(smmu->dev);
84 }
85 
86 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
87 {
88 	return container_of(dom, struct arm_smmu_domain, domain);
89 }
90 
91 static struct platform_driver arm_smmu_driver;
92 static struct iommu_ops arm_smmu_ops;
93 
94 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
95 static struct device_node *dev_get_dev_node(struct device *dev)
96 {
97 	if (dev_is_pci(dev)) {
98 		struct pci_bus *bus = to_pci_dev(dev)->bus;
99 
100 		while (!pci_is_root_bus(bus))
101 			bus = bus->parent;
102 		return of_node_get(bus->bridge->parent->of_node);
103 	}
104 
105 	return of_node_get(dev->of_node);
106 }
107 
108 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
109 {
110 	*((__be32 *)data) = cpu_to_be32(alias);
111 	return 0; /* Continue walking */
112 }
113 
114 static int __find_legacy_master_phandle(struct device *dev, void *data)
115 {
116 	struct of_phandle_iterator *it = *(void **)data;
117 	struct device_node *np = it->node;
118 	int err;
119 
120 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
121 			    "#stream-id-cells", -1)
122 		if (it->node == np) {
123 			*(void **)data = dev;
124 			return 1;
125 		}
126 	it->node = np;
127 	return err == -ENOENT ? 0 : err;
128 }
129 
130 static int arm_smmu_register_legacy_master(struct device *dev,
131 					   struct arm_smmu_device **smmu)
132 {
133 	struct device *smmu_dev;
134 	struct device_node *np;
135 	struct of_phandle_iterator it;
136 	void *data = &it;
137 	u32 *sids;
138 	__be32 pci_sid;
139 	int err;
140 
141 	np = dev_get_dev_node(dev);
142 	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
143 		of_node_put(np);
144 		return -ENODEV;
145 	}
146 
147 	it.node = np;
148 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
149 				     __find_legacy_master_phandle);
150 	smmu_dev = data;
151 	of_node_put(np);
152 	if (err == 0)
153 		return -ENODEV;
154 	if (err < 0)
155 		return err;
156 
157 	if (dev_is_pci(dev)) {
158 		/* "mmu-masters" assumes Stream ID == Requester ID */
159 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
160 				       &pci_sid);
161 		it.cur = &pci_sid;
162 		it.cur_count = 1;
163 	}
164 
165 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
166 				&arm_smmu_ops);
167 	if (err)
168 		return err;
169 
170 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
171 	if (!sids)
172 		return -ENOMEM;
173 
174 	*smmu = dev_get_drvdata(smmu_dev);
175 	of_phandle_iterator_args(&it, sids, it.cur_count);
176 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
177 	kfree(sids);
178 	return err;
179 }
180 #else
181 static int arm_smmu_register_legacy_master(struct device *dev,
182 					   struct arm_smmu_device **smmu)
183 {
184 	return -ENODEV;
185 }
186 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
187 
188 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
189 {
190 	clear_bit(idx, map);
191 }
192 
193 /* Wait for any pending TLB invalidations to complete */
194 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
195 				int sync, int status)
196 {
197 	unsigned int spin_cnt, delay;
198 	u32 reg;
199 
200 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
201 		return smmu->impl->tlb_sync(smmu, page, sync, status);
202 
203 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
204 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
205 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
206 			reg = arm_smmu_readl(smmu, page, status);
207 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
208 				return;
209 			cpu_relax();
210 		}
211 		udelay(delay);
212 	}
213 	dev_err_ratelimited(smmu->dev,
214 			    "TLB sync timed out -- SMMU may be deadlocked\n");
215 }
216 
217 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
218 {
219 	unsigned long flags;
220 
221 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
222 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
223 			    ARM_SMMU_GR0_sTLBGSTATUS);
224 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
225 }
226 
227 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
228 {
229 	struct arm_smmu_device *smmu = smmu_domain->smmu;
230 	unsigned long flags;
231 
232 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
233 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
234 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
235 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
236 }
237 
238 static void arm_smmu_tlb_inv_context_s1(void *cookie)
239 {
240 	struct arm_smmu_domain *smmu_domain = cookie;
241 	/*
242 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
243 	 * current CPU are visible beforehand.
244 	 */
245 	wmb();
246 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
247 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
248 	arm_smmu_tlb_sync_context(smmu_domain);
249 }
250 
251 static void arm_smmu_tlb_inv_context_s2(void *cookie)
252 {
253 	struct arm_smmu_domain *smmu_domain = cookie;
254 	struct arm_smmu_device *smmu = smmu_domain->smmu;
255 
256 	/* See above */
257 	wmb();
258 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
259 	arm_smmu_tlb_sync_global(smmu);
260 }
261 
262 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
263 				      size_t granule, void *cookie, int reg)
264 {
265 	struct arm_smmu_domain *smmu_domain = cookie;
266 	struct arm_smmu_device *smmu = smmu_domain->smmu;
267 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
268 	int idx = cfg->cbndx;
269 
270 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
271 		wmb();
272 
273 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
274 		iova = (iova >> 12) << 12;
275 		iova |= cfg->asid;
276 		do {
277 			arm_smmu_cb_write(smmu, idx, reg, iova);
278 			iova += granule;
279 		} while (size -= granule);
280 	} else {
281 		iova >>= 12;
282 		iova |= (u64)cfg->asid << 48;
283 		do {
284 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
285 			iova += granule >> 12;
286 		} while (size -= granule);
287 	}
288 }
289 
290 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
291 				      size_t granule, void *cookie, int reg)
292 {
293 	struct arm_smmu_domain *smmu_domain = cookie;
294 	struct arm_smmu_device *smmu = smmu_domain->smmu;
295 	int idx = smmu_domain->cfg.cbndx;
296 
297 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
298 		wmb();
299 
300 	iova >>= 12;
301 	do {
302 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
303 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
304 		else
305 			arm_smmu_cb_write(smmu, idx, reg, iova);
306 		iova += granule >> 12;
307 	} while (size -= granule);
308 }
309 
310 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
311 				     size_t granule, void *cookie)
312 {
313 	struct arm_smmu_domain *smmu_domain = cookie;
314 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
315 
316 	if (cfg->flush_walk_prefer_tlbiasid) {
317 		arm_smmu_tlb_inv_context_s1(cookie);
318 	} else {
319 		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
320 					  ARM_SMMU_CB_S1_TLBIVA);
321 		arm_smmu_tlb_sync_context(cookie);
322 	}
323 }
324 
325 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
326 				     unsigned long iova, size_t granule,
327 				     void *cookie)
328 {
329 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
330 				  ARM_SMMU_CB_S1_TLBIVAL);
331 }
332 
333 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
334 				     size_t granule, void *cookie)
335 {
336 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
337 				  ARM_SMMU_CB_S2_TLBIIPAS2);
338 	arm_smmu_tlb_sync_context(cookie);
339 }
340 
341 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
342 				     unsigned long iova, size_t granule,
343 				     void *cookie)
344 {
345 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
346 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
347 }
348 
349 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
350 					size_t granule, void *cookie)
351 {
352 	arm_smmu_tlb_inv_context_s2(cookie);
353 }
354 /*
355  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
356  * almost negligible, but the benefit of getting the first one in as far ahead
357  * of the sync as possible is significant, hence we don't just make this a
358  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
359  * think.
360  */
361 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
362 					unsigned long iova, size_t granule,
363 					void *cookie)
364 {
365 	struct arm_smmu_domain *smmu_domain = cookie;
366 	struct arm_smmu_device *smmu = smmu_domain->smmu;
367 
368 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
369 		wmb();
370 
371 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
372 }
373 
374 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
375 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
376 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
377 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
378 };
379 
380 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
381 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
382 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
383 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
384 };
385 
386 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
387 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
388 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
389 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
390 };
391 
392 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
393 {
394 	u32 fsr, fsynr, cbfrsynra;
395 	unsigned long iova;
396 	struct iommu_domain *domain = dev;
397 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
398 	struct arm_smmu_device *smmu = smmu_domain->smmu;
399 	int idx = smmu_domain->cfg.cbndx;
400 	int ret;
401 
402 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
403 	if (!(fsr & ARM_SMMU_FSR_FAULT))
404 		return IRQ_NONE;
405 
406 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
407 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
408 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
409 
410 	ret = report_iommu_fault(domain, NULL, iova,
411 		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
412 
413 	if (ret == -ENOSYS)
414 		dev_err_ratelimited(smmu->dev,
415 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
416 			    fsr, iova, fsynr, cbfrsynra, idx);
417 
418 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
419 	return IRQ_HANDLED;
420 }
421 
422 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
423 {
424 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
425 	struct arm_smmu_device *smmu = dev;
426 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
427 				      DEFAULT_RATELIMIT_BURST);
428 
429 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
430 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
431 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
432 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
433 
434 	if (!gfsr)
435 		return IRQ_NONE;
436 
437 	if (__ratelimit(&rs)) {
438 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
439 		    (gfsr & ARM_SMMU_sGFSR_USF))
440 			dev_err(smmu->dev,
441 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
442 				(u16)gfsynr1);
443 		else
444 			dev_err(smmu->dev,
445 				"Unexpected global fault, this could be serious\n");
446 		dev_err(smmu->dev,
447 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
448 			gfsr, gfsynr0, gfsynr1, gfsynr2);
449 	}
450 
451 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
452 	return IRQ_HANDLED;
453 }
454 
455 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
456 				       struct io_pgtable_cfg *pgtbl_cfg)
457 {
458 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
459 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
460 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
461 
462 	cb->cfg = cfg;
463 
464 	/* TCR */
465 	if (stage1) {
466 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
467 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
468 		} else {
469 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
470 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
471 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
472 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
473 			else
474 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
475 		}
476 	} else {
477 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
478 	}
479 
480 	/* TTBRs */
481 	if (stage1) {
482 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
483 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
484 			cb->ttbr[1] = 0;
485 		} else {
486 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
487 						 cfg->asid);
488 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
489 						 cfg->asid);
490 
491 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
492 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
493 			else
494 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
495 		}
496 	} else {
497 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
498 	}
499 
500 	/* MAIRs (stage-1 only) */
501 	if (stage1) {
502 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
503 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
504 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
505 		} else {
506 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
507 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
508 		}
509 	}
510 }
511 
512 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
513 {
514 	u32 reg;
515 	bool stage1;
516 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
517 	struct arm_smmu_cfg *cfg = cb->cfg;
518 
519 	/* Unassigned context banks only need disabling */
520 	if (!cfg) {
521 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
522 		return;
523 	}
524 
525 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
526 
527 	/* CBA2R */
528 	if (smmu->version > ARM_SMMU_V1) {
529 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
530 			reg = ARM_SMMU_CBA2R_VA64;
531 		else
532 			reg = 0;
533 		/* 16-bit VMIDs live in CBA2R */
534 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
535 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
536 
537 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
538 	}
539 
540 	/* CBAR */
541 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
542 	if (smmu->version < ARM_SMMU_V2)
543 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
544 
545 	/*
546 	 * Use the weakest shareability/memory types, so they are
547 	 * overridden by the ttbcr/pte.
548 	 */
549 	if (stage1) {
550 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
551 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
552 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
553 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
554 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
555 		/* 8-bit VMIDs live in CBAR */
556 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
557 	}
558 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
559 
560 	/*
561 	 * TCR
562 	 * We must write this before the TTBRs, since it determines the
563 	 * access behaviour of some fields (in particular, ASID[15:8]).
564 	 */
565 	if (stage1 && smmu->version > ARM_SMMU_V1)
566 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
567 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
568 
569 	/* TTBRs */
570 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
571 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
572 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
573 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
574 	} else {
575 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
576 		if (stage1)
577 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
578 					   cb->ttbr[1]);
579 	}
580 
581 	/* MAIRs (stage-1 only) */
582 	if (stage1) {
583 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
584 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
585 	}
586 
587 	/* SCTLR */
588 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
589 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
590 	if (stage1)
591 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
592 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
593 		reg |= ARM_SMMU_SCTLR_E;
594 
595 	if (smmu->impl && smmu->impl->write_sctlr)
596 		smmu->impl->write_sctlr(smmu, idx, reg);
597 	else
598 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
599 }
600 
601 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
602 				       struct arm_smmu_device *smmu,
603 				       struct device *dev, unsigned int start)
604 {
605 	if (smmu->impl && smmu->impl->alloc_context_bank)
606 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
607 
608 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
609 }
610 
611 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
612 					struct arm_smmu_device *smmu,
613 					struct device *dev)
614 {
615 	int irq, start, ret = 0;
616 	unsigned long ias, oas;
617 	struct io_pgtable_ops *pgtbl_ops;
618 	struct io_pgtable_cfg pgtbl_cfg;
619 	enum io_pgtable_fmt fmt;
620 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
621 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
622 	irqreturn_t (*context_fault)(int irq, void *dev);
623 
624 	mutex_lock(&smmu_domain->init_mutex);
625 	if (smmu_domain->smmu)
626 		goto out_unlock;
627 
628 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
629 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
630 		smmu_domain->smmu = smmu;
631 		goto out_unlock;
632 	}
633 
634 	/*
635 	 * Mapping the requested stage onto what we support is surprisingly
636 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
637 	 * support for nested translation. That means we end up with the
638 	 * following table:
639 	 *
640 	 * Requested        Supported        Actual
641 	 *     S1               N              S1
642 	 *     S1             S1+S2            S1
643 	 *     S1               S2             S2
644 	 *     S1               S1             S1
645 	 *     N                N              N
646 	 *     N              S1+S2            S2
647 	 *     N                S2             S2
648 	 *     N                S1             S1
649 	 *
650 	 * Note that you can't actually request stage-2 mappings.
651 	 */
652 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
653 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
654 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
655 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
656 
657 	/*
658 	 * Choosing a suitable context format is even more fiddly. Until we
659 	 * grow some way for the caller to express a preference, and/or move
660 	 * the decision into the io-pgtable code where it arguably belongs,
661 	 * just aim for the closest thing to the rest of the system, and hope
662 	 * that the hardware isn't esoteric enough that we can't assume AArch64
663 	 * support to be a superset of AArch32 support...
664 	 */
665 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
666 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
667 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
668 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
669 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
670 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
671 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
672 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
673 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
674 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
675 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
676 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
677 
678 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
679 		ret = -EINVAL;
680 		goto out_unlock;
681 	}
682 
683 	switch (smmu_domain->stage) {
684 	case ARM_SMMU_DOMAIN_S1:
685 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
686 		start = smmu->num_s2_context_banks;
687 		ias = smmu->va_size;
688 		oas = smmu->ipa_size;
689 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
690 			fmt = ARM_64_LPAE_S1;
691 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
692 			fmt = ARM_32_LPAE_S1;
693 			ias = min(ias, 32UL);
694 			oas = min(oas, 40UL);
695 		} else {
696 			fmt = ARM_V7S;
697 			ias = min(ias, 32UL);
698 			oas = min(oas, 32UL);
699 		}
700 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
701 		break;
702 	case ARM_SMMU_DOMAIN_NESTED:
703 		/*
704 		 * We will likely want to change this if/when KVM gets
705 		 * involved.
706 		 */
707 	case ARM_SMMU_DOMAIN_S2:
708 		cfg->cbar = CBAR_TYPE_S2_TRANS;
709 		start = 0;
710 		ias = smmu->ipa_size;
711 		oas = smmu->pa_size;
712 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
713 			fmt = ARM_64_LPAE_S2;
714 		} else {
715 			fmt = ARM_32_LPAE_S2;
716 			ias = min(ias, 40UL);
717 			oas = min(oas, 40UL);
718 		}
719 		if (smmu->version == ARM_SMMU_V2)
720 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
721 		else
722 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
723 		break;
724 	default:
725 		ret = -EINVAL;
726 		goto out_unlock;
727 	}
728 
729 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
730 	if (ret < 0) {
731 		goto out_unlock;
732 	}
733 
734 	smmu_domain->smmu = smmu;
735 
736 	cfg->cbndx = ret;
737 	if (smmu->version < ARM_SMMU_V2) {
738 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
739 		cfg->irptndx %= smmu->num_context_irqs;
740 	} else {
741 		cfg->irptndx = cfg->cbndx;
742 	}
743 
744 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
745 		cfg->vmid = cfg->cbndx + 1;
746 	else
747 		cfg->asid = cfg->cbndx;
748 
749 	pgtbl_cfg = (struct io_pgtable_cfg) {
750 		.pgsize_bitmap	= smmu->pgsize_bitmap,
751 		.ias		= ias,
752 		.oas		= oas,
753 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
754 		.tlb		= smmu_domain->flush_ops,
755 		.iommu_dev	= smmu->dev,
756 	};
757 
758 	if (smmu->impl && smmu->impl->init_context) {
759 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
760 		if (ret)
761 			goto out_clear_smmu;
762 	}
763 
764 	if (smmu_domain->pgtbl_quirks)
765 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
766 
767 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
768 	if (!pgtbl_ops) {
769 		ret = -ENOMEM;
770 		goto out_clear_smmu;
771 	}
772 
773 	/* Update the domain's page sizes to reflect the page table format */
774 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
775 
776 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
777 		domain->geometry.aperture_start = ~0UL << ias;
778 		domain->geometry.aperture_end = ~0UL;
779 	} else {
780 		domain->geometry.aperture_end = (1UL << ias) - 1;
781 	}
782 
783 	domain->geometry.force_aperture = true;
784 
785 	/* Initialise the context bank with our page table cfg */
786 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
787 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
788 
789 	/*
790 	 * Request context fault interrupt. Do this last to avoid the
791 	 * handler seeing a half-initialised domain state.
792 	 */
793 	irq = smmu->irqs[cfg->irptndx];
794 
795 	if (smmu->impl && smmu->impl->context_fault)
796 		context_fault = smmu->impl->context_fault;
797 	else
798 		context_fault = arm_smmu_context_fault;
799 
800 	ret = devm_request_irq(smmu->dev, irq, context_fault,
801 			       IRQF_SHARED, "arm-smmu-context-fault", domain);
802 	if (ret < 0) {
803 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
804 			cfg->irptndx, irq);
805 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
806 	}
807 
808 	mutex_unlock(&smmu_domain->init_mutex);
809 
810 	/* Publish page table ops for map/unmap */
811 	smmu_domain->pgtbl_ops = pgtbl_ops;
812 	return 0;
813 
814 out_clear_smmu:
815 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
816 	smmu_domain->smmu = NULL;
817 out_unlock:
818 	mutex_unlock(&smmu_domain->init_mutex);
819 	return ret;
820 }
821 
822 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
823 {
824 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
825 	struct arm_smmu_device *smmu = smmu_domain->smmu;
826 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
827 	int ret, irq;
828 
829 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
830 		return;
831 
832 	ret = arm_smmu_rpm_get(smmu);
833 	if (ret < 0)
834 		return;
835 
836 	/*
837 	 * Disable the context bank and free the page tables before freeing
838 	 * it.
839 	 */
840 	smmu->cbs[cfg->cbndx].cfg = NULL;
841 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
842 
843 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
844 		irq = smmu->irqs[cfg->irptndx];
845 		devm_free_irq(smmu->dev, irq, domain);
846 	}
847 
848 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
849 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
850 
851 	arm_smmu_rpm_put(smmu);
852 }
853 
854 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
855 {
856 	struct arm_smmu_domain *smmu_domain;
857 
858 	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
859 		if (using_legacy_binding ||
860 		    (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
861 			return NULL;
862 	}
863 	/*
864 	 * Allocate the domain and initialise some of its data structures.
865 	 * We can't really do anything meaningful until we've added a
866 	 * master.
867 	 */
868 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
869 	if (!smmu_domain)
870 		return NULL;
871 
872 	mutex_init(&smmu_domain->init_mutex);
873 	spin_lock_init(&smmu_domain->cb_lock);
874 
875 	return &smmu_domain->domain;
876 }
877 
878 static void arm_smmu_domain_free(struct iommu_domain *domain)
879 {
880 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
881 
882 	/*
883 	 * Free the domain resources. We assume that all devices have
884 	 * already been detached.
885 	 */
886 	arm_smmu_destroy_domain_context(domain);
887 	kfree(smmu_domain);
888 }
889 
890 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
891 {
892 	struct arm_smmu_smr *smr = smmu->smrs + idx;
893 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
894 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
895 
896 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
897 		reg |= ARM_SMMU_SMR_VALID;
898 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
899 }
900 
901 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
902 {
903 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
904 	u32 reg;
905 
906 	if (smmu->impl && smmu->impl->write_s2cr) {
907 		smmu->impl->write_s2cr(smmu, idx);
908 		return;
909 	}
910 
911 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
912 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
913 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
914 
915 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
916 	    smmu->smrs[idx].valid)
917 		reg |= ARM_SMMU_S2CR_EXIDVALID;
918 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
919 }
920 
921 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
922 {
923 	arm_smmu_write_s2cr(smmu, idx);
924 	if (smmu->smrs)
925 		arm_smmu_write_smr(smmu, idx);
926 }
927 
928 /*
929  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
930  * should be called after sCR0 is written.
931  */
932 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
933 {
934 	u32 smr;
935 	int i;
936 
937 	if (!smmu->smrs)
938 		return;
939 	/*
940 	 * If we've had to accommodate firmware memory regions, we may
941 	 * have live SMRs by now; tread carefully...
942 	 *
943 	 * Somewhat perversely, not having a free SMR for this test implies we
944 	 * can get away without it anyway, as we'll only be able to 'allocate'
945 	 * these SMRs for the ID/mask values we're already trusting to be OK.
946 	 */
947 	for (i = 0; i < smmu->num_mapping_groups; i++)
948 		if (!smmu->smrs[i].valid)
949 			goto smr_ok;
950 	return;
951 smr_ok:
952 	/*
953 	 * SMR.ID bits may not be preserved if the corresponding MASK
954 	 * bits are set, so check each one separately. We can reject
955 	 * masters later if they try to claim IDs outside these masks.
956 	 */
957 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
958 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
959 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
960 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
961 
962 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
963 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
964 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
965 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
966 }
967 
968 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
969 {
970 	struct arm_smmu_smr *smrs = smmu->smrs;
971 	int i, free_idx = -ENOSPC;
972 
973 	/* Stream indexing is blissfully easy */
974 	if (!smrs)
975 		return id;
976 
977 	/* Validating SMRs is... less so */
978 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
979 		if (!smrs[i].valid) {
980 			/*
981 			 * Note the first free entry we come across, which
982 			 * we'll claim in the end if nothing else matches.
983 			 */
984 			if (free_idx < 0)
985 				free_idx = i;
986 			continue;
987 		}
988 		/*
989 		 * If the new entry is _entirely_ matched by an existing entry,
990 		 * then reuse that, with the guarantee that there also cannot
991 		 * be any subsequent conflicting entries. In normal use we'd
992 		 * expect simply identical entries for this case, but there's
993 		 * no harm in accommodating the generalisation.
994 		 */
995 		if ((mask & smrs[i].mask) == mask &&
996 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
997 			return i;
998 		/*
999 		 * If the new entry has any other overlap with an existing one,
1000 		 * though, then there always exists at least one stream ID
1001 		 * which would cause a conflict, and we can't allow that risk.
1002 		 */
1003 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1004 			return -EINVAL;
1005 	}
1006 
1007 	return free_idx;
1008 }
1009 
1010 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1011 {
1012 	if (--smmu->s2crs[idx].count)
1013 		return false;
1014 
1015 	smmu->s2crs[idx] = s2cr_init_val;
1016 	if (smmu->smrs)
1017 		smmu->smrs[idx].valid = false;
1018 
1019 	return true;
1020 }
1021 
1022 static int arm_smmu_master_alloc_smes(struct device *dev)
1023 {
1024 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1025 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1026 	struct arm_smmu_device *smmu = cfg->smmu;
1027 	struct arm_smmu_smr *smrs = smmu->smrs;
1028 	int i, idx, ret;
1029 
1030 	mutex_lock(&smmu->stream_map_mutex);
1031 	/* Figure out a viable stream map entry allocation */
1032 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1033 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1034 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1035 
1036 		if (idx != INVALID_SMENDX) {
1037 			ret = -EEXIST;
1038 			goto out_err;
1039 		}
1040 
1041 		ret = arm_smmu_find_sme(smmu, sid, mask);
1042 		if (ret < 0)
1043 			goto out_err;
1044 
1045 		idx = ret;
1046 		if (smrs && smmu->s2crs[idx].count == 0) {
1047 			smrs[idx].id = sid;
1048 			smrs[idx].mask = mask;
1049 			smrs[idx].valid = true;
1050 		}
1051 		smmu->s2crs[idx].count++;
1052 		cfg->smendx[i] = (s16)idx;
1053 	}
1054 
1055 	/* It worked! Now, poke the actual hardware */
1056 	for_each_cfg_sme(cfg, fwspec, i, idx)
1057 		arm_smmu_write_sme(smmu, idx);
1058 
1059 	mutex_unlock(&smmu->stream_map_mutex);
1060 	return 0;
1061 
1062 out_err:
1063 	while (i--) {
1064 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1065 		cfg->smendx[i] = INVALID_SMENDX;
1066 	}
1067 	mutex_unlock(&smmu->stream_map_mutex);
1068 	return ret;
1069 }
1070 
1071 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1072 				      struct iommu_fwspec *fwspec)
1073 {
1074 	struct arm_smmu_device *smmu = cfg->smmu;
1075 	int i, idx;
1076 
1077 	mutex_lock(&smmu->stream_map_mutex);
1078 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1079 		if (arm_smmu_free_sme(smmu, idx))
1080 			arm_smmu_write_sme(smmu, idx);
1081 		cfg->smendx[i] = INVALID_SMENDX;
1082 	}
1083 	mutex_unlock(&smmu->stream_map_mutex);
1084 }
1085 
1086 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1087 				      struct arm_smmu_master_cfg *cfg,
1088 				      struct iommu_fwspec *fwspec)
1089 {
1090 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1091 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1092 	u8 cbndx = smmu_domain->cfg.cbndx;
1093 	enum arm_smmu_s2cr_type type;
1094 	int i, idx;
1095 
1096 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1097 		type = S2CR_TYPE_BYPASS;
1098 	else
1099 		type = S2CR_TYPE_TRANS;
1100 
1101 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1102 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1103 			continue;
1104 
1105 		s2cr[idx].type = type;
1106 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1107 		s2cr[idx].cbndx = cbndx;
1108 		arm_smmu_write_s2cr(smmu, idx);
1109 	}
1110 	return 0;
1111 }
1112 
1113 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1114 {
1115 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1116 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1117 	struct arm_smmu_master_cfg *cfg;
1118 	struct arm_smmu_device *smmu;
1119 	int ret;
1120 
1121 	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1122 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1123 		return -ENXIO;
1124 	}
1125 
1126 	/*
1127 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1128 	 * domains between of_xlate() and probe_device() - we have no way to cope
1129 	 * with that, so until ARM gets converted to rely on groups and default
1130 	 * domains, just say no (but more politely than by dereferencing NULL).
1131 	 * This should be at least a WARN_ON once that's sorted.
1132 	 */
1133 	cfg = dev_iommu_priv_get(dev);
1134 	if (!cfg)
1135 		return -ENODEV;
1136 
1137 	smmu = cfg->smmu;
1138 
1139 	ret = arm_smmu_rpm_get(smmu);
1140 	if (ret < 0)
1141 		return ret;
1142 
1143 	/* Ensure that the domain is finalised */
1144 	ret = arm_smmu_init_domain_context(domain, smmu, dev);
1145 	if (ret < 0)
1146 		goto rpm_put;
1147 
1148 	/*
1149 	 * Sanity check the domain. We don't support domains across
1150 	 * different SMMUs.
1151 	 */
1152 	if (smmu_domain->smmu != smmu) {
1153 		ret = -EINVAL;
1154 		goto rpm_put;
1155 	}
1156 
1157 	/* Looks ok, so add the device to the domain */
1158 	ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1159 
1160 	/*
1161 	 * Setup an autosuspend delay to avoid bouncing runpm state.
1162 	 * Otherwise, if a driver for a suspended consumer device
1163 	 * unmaps buffers, it will runpm resume/suspend for each one.
1164 	 *
1165 	 * For example, when used by a GPU device, when an application
1166 	 * or game exits, it can trigger unmapping 100s or 1000s of
1167 	 * buffers.  With a runpm cycle for each buffer, that adds up
1168 	 * to 5-10sec worth of reprogramming the context bank, while
1169 	 * the system appears to be locked up to the user.
1170 	 */
1171 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1172 	pm_runtime_use_autosuspend(smmu->dev);
1173 
1174 rpm_put:
1175 	arm_smmu_rpm_put(smmu);
1176 	return ret;
1177 }
1178 
1179 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
1180 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
1181 			      int prot, gfp_t gfp, size_t *mapped)
1182 {
1183 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1184 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1185 	int ret;
1186 
1187 	if (!ops)
1188 		return -ENODEV;
1189 
1190 	arm_smmu_rpm_get(smmu);
1191 	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
1192 	arm_smmu_rpm_put(smmu);
1193 
1194 	return ret;
1195 }
1196 
1197 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
1198 				   size_t pgsize, size_t pgcount,
1199 				   struct iommu_iotlb_gather *iotlb_gather)
1200 {
1201 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1202 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1203 	size_t ret;
1204 
1205 	if (!ops)
1206 		return 0;
1207 
1208 	arm_smmu_rpm_get(smmu);
1209 	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
1210 	arm_smmu_rpm_put(smmu);
1211 
1212 	return ret;
1213 }
1214 
1215 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1216 {
1217 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1218 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1219 
1220 	if (smmu_domain->flush_ops) {
1221 		arm_smmu_rpm_get(smmu);
1222 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1223 		arm_smmu_rpm_put(smmu);
1224 	}
1225 }
1226 
1227 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1228 				struct iommu_iotlb_gather *gather)
1229 {
1230 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1231 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1232 
1233 	if (!smmu)
1234 		return;
1235 
1236 	arm_smmu_rpm_get(smmu);
1237 	if (smmu->version == ARM_SMMU_V2 ||
1238 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1239 		arm_smmu_tlb_sync_context(smmu_domain);
1240 	else
1241 		arm_smmu_tlb_sync_global(smmu);
1242 	arm_smmu_rpm_put(smmu);
1243 }
1244 
1245 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1246 					      dma_addr_t iova)
1247 {
1248 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1249 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1250 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1251 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1252 	struct device *dev = smmu->dev;
1253 	void __iomem *reg;
1254 	u32 tmp;
1255 	u64 phys;
1256 	unsigned long va, flags;
1257 	int ret, idx = cfg->cbndx;
1258 	phys_addr_t addr = 0;
1259 
1260 	ret = arm_smmu_rpm_get(smmu);
1261 	if (ret < 0)
1262 		return 0;
1263 
1264 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1265 	va = iova & ~0xfffUL;
1266 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1267 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1268 	else
1269 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1270 
1271 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1272 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1273 				      5, 50)) {
1274 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1275 		dev_err(dev,
1276 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1277 			&iova);
1278 		arm_smmu_rpm_put(smmu);
1279 		return ops->iova_to_phys(ops, iova);
1280 	}
1281 
1282 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1283 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1284 	if (phys & ARM_SMMU_CB_PAR_F) {
1285 		dev_err(dev, "translation fault!\n");
1286 		dev_err(dev, "PAR = 0x%llx\n", phys);
1287 		goto out;
1288 	}
1289 
1290 	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1291 out:
1292 	arm_smmu_rpm_put(smmu);
1293 
1294 	return addr;
1295 }
1296 
1297 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1298 					dma_addr_t iova)
1299 {
1300 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1301 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1302 
1303 	if (!ops)
1304 		return 0;
1305 
1306 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1307 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1308 		return arm_smmu_iova_to_phys_hard(domain, iova);
1309 
1310 	return ops->iova_to_phys(ops, iova);
1311 }
1312 
1313 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
1314 {
1315 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1316 
1317 	switch (cap) {
1318 	case IOMMU_CAP_CACHE_COHERENCY:
1319 		/* Assume that a coherent TCU implies coherent TBUs */
1320 		return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1321 	case IOMMU_CAP_NOEXEC:
1322 		return true;
1323 	default:
1324 		return false;
1325 	}
1326 }
1327 
1328 static
1329 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1330 {
1331 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1332 							  fwnode);
1333 	put_device(dev);
1334 	return dev ? dev_get_drvdata(dev) : NULL;
1335 }
1336 
1337 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1338 {
1339 	struct arm_smmu_device *smmu = NULL;
1340 	struct arm_smmu_master_cfg *cfg;
1341 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1342 	int i, ret;
1343 
1344 	if (using_legacy_binding) {
1345 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1346 
1347 		/*
1348 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1349 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1350 		 * later use.
1351 		 */
1352 		fwspec = dev_iommu_fwspec_get(dev);
1353 		if (ret)
1354 			goto out_free;
1355 	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1356 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1357 	} else {
1358 		return ERR_PTR(-ENODEV);
1359 	}
1360 
1361 	ret = -EINVAL;
1362 	for (i = 0; i < fwspec->num_ids; i++) {
1363 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1364 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1365 
1366 		if (sid & ~smmu->streamid_mask) {
1367 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1368 				sid, smmu->streamid_mask);
1369 			goto out_free;
1370 		}
1371 		if (mask & ~smmu->smr_mask_mask) {
1372 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1373 				mask, smmu->smr_mask_mask);
1374 			goto out_free;
1375 		}
1376 	}
1377 
1378 	ret = -ENOMEM;
1379 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1380 		      GFP_KERNEL);
1381 	if (!cfg)
1382 		goto out_free;
1383 
1384 	cfg->smmu = smmu;
1385 	dev_iommu_priv_set(dev, cfg);
1386 	while (i--)
1387 		cfg->smendx[i] = INVALID_SMENDX;
1388 
1389 	ret = arm_smmu_rpm_get(smmu);
1390 	if (ret < 0)
1391 		goto out_cfg_free;
1392 
1393 	ret = arm_smmu_master_alloc_smes(dev);
1394 	arm_smmu_rpm_put(smmu);
1395 
1396 	if (ret)
1397 		goto out_cfg_free;
1398 
1399 	device_link_add(dev, smmu->dev,
1400 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1401 
1402 	return &smmu->iommu;
1403 
1404 out_cfg_free:
1405 	kfree(cfg);
1406 out_free:
1407 	iommu_fwspec_free(dev);
1408 	return ERR_PTR(ret);
1409 }
1410 
1411 static void arm_smmu_release_device(struct device *dev)
1412 {
1413 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1414 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1415 	int ret;
1416 
1417 	ret = arm_smmu_rpm_get(cfg->smmu);
1418 	if (ret < 0)
1419 		return;
1420 
1421 	arm_smmu_master_free_smes(cfg, fwspec);
1422 
1423 	arm_smmu_rpm_put(cfg->smmu);
1424 
1425 	dev_iommu_priv_set(dev, NULL);
1426 	kfree(cfg);
1427 }
1428 
1429 static void arm_smmu_probe_finalize(struct device *dev)
1430 {
1431 	struct arm_smmu_master_cfg *cfg;
1432 	struct arm_smmu_device *smmu;
1433 
1434 	cfg = dev_iommu_priv_get(dev);
1435 	smmu = cfg->smmu;
1436 
1437 	if (smmu->impl && smmu->impl->probe_finalize)
1438 		smmu->impl->probe_finalize(smmu, dev);
1439 }
1440 
1441 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1442 {
1443 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1444 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1445 	struct arm_smmu_device *smmu = cfg->smmu;
1446 	struct iommu_group *group = NULL;
1447 	int i, idx;
1448 
1449 	mutex_lock(&smmu->stream_map_mutex);
1450 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1451 		if (group && smmu->s2crs[idx].group &&
1452 		    group != smmu->s2crs[idx].group) {
1453 			mutex_unlock(&smmu->stream_map_mutex);
1454 			return ERR_PTR(-EINVAL);
1455 		}
1456 
1457 		group = smmu->s2crs[idx].group;
1458 	}
1459 
1460 	if (group) {
1461 		mutex_unlock(&smmu->stream_map_mutex);
1462 		return iommu_group_ref_get(group);
1463 	}
1464 
1465 	if (dev_is_pci(dev))
1466 		group = pci_device_group(dev);
1467 	else if (dev_is_fsl_mc(dev))
1468 		group = fsl_mc_device_group(dev);
1469 	else
1470 		group = generic_device_group(dev);
1471 
1472 	/* Remember group for faster lookups */
1473 	if (!IS_ERR(group))
1474 		for_each_cfg_sme(cfg, fwspec, i, idx)
1475 			smmu->s2crs[idx].group = group;
1476 
1477 	mutex_unlock(&smmu->stream_map_mutex);
1478 	return group;
1479 }
1480 
1481 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1482 {
1483 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1484 	int ret = 0;
1485 
1486 	mutex_lock(&smmu_domain->init_mutex);
1487 	if (smmu_domain->smmu)
1488 		ret = -EPERM;
1489 	else
1490 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1491 	mutex_unlock(&smmu_domain->init_mutex);
1492 
1493 	return ret;
1494 }
1495 
1496 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1497 		unsigned long quirks)
1498 {
1499 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1500 	int ret = 0;
1501 
1502 	mutex_lock(&smmu_domain->init_mutex);
1503 	if (smmu_domain->smmu)
1504 		ret = -EPERM;
1505 	else
1506 		smmu_domain->pgtbl_quirks = quirks;
1507 	mutex_unlock(&smmu_domain->init_mutex);
1508 
1509 	return ret;
1510 }
1511 
1512 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1513 {
1514 	u32 mask, fwid = 0;
1515 
1516 	if (args->args_count > 0)
1517 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1518 
1519 	if (args->args_count > 1)
1520 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1521 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1522 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1523 
1524 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1525 }
1526 
1527 static void arm_smmu_get_resv_regions(struct device *dev,
1528 				      struct list_head *head)
1529 {
1530 	struct iommu_resv_region *region;
1531 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1532 
1533 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1534 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
1535 	if (!region)
1536 		return;
1537 
1538 	list_add_tail(&region->list, head);
1539 
1540 	iommu_dma_get_resv_regions(dev, head);
1541 }
1542 
1543 static int arm_smmu_def_domain_type(struct device *dev)
1544 {
1545 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1546 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1547 
1548 	if (using_legacy_binding)
1549 		return IOMMU_DOMAIN_IDENTITY;
1550 
1551 	if (impl && impl->def_domain_type)
1552 		return impl->def_domain_type(dev);
1553 
1554 	return 0;
1555 }
1556 
1557 static struct iommu_ops arm_smmu_ops = {
1558 	.capable		= arm_smmu_capable,
1559 	.domain_alloc		= arm_smmu_domain_alloc,
1560 	.probe_device		= arm_smmu_probe_device,
1561 	.release_device		= arm_smmu_release_device,
1562 	.probe_finalize		= arm_smmu_probe_finalize,
1563 	.device_group		= arm_smmu_device_group,
1564 	.of_xlate		= arm_smmu_of_xlate,
1565 	.get_resv_regions	= arm_smmu_get_resv_regions,
1566 	.def_domain_type	= arm_smmu_def_domain_type,
1567 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1568 	.owner			= THIS_MODULE,
1569 	.default_domain_ops = &(const struct iommu_domain_ops) {
1570 		.attach_dev		= arm_smmu_attach_dev,
1571 		.map_pages		= arm_smmu_map_pages,
1572 		.unmap_pages		= arm_smmu_unmap_pages,
1573 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1574 		.iotlb_sync		= arm_smmu_iotlb_sync,
1575 		.iova_to_phys		= arm_smmu_iova_to_phys,
1576 		.enable_nesting		= arm_smmu_enable_nesting,
1577 		.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1578 		.free			= arm_smmu_domain_free,
1579 	}
1580 };
1581 
1582 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1583 {
1584 	int i;
1585 	u32 reg;
1586 
1587 	/* clear global FSR */
1588 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1589 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1590 
1591 	/*
1592 	 * Reset stream mapping groups: Initial values mark all SMRn as
1593 	 * invalid and all S2CRn as bypass unless overridden.
1594 	 */
1595 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1596 		arm_smmu_write_sme(smmu, i);
1597 
1598 	/* Make sure all context banks are disabled and clear CB_FSR  */
1599 	for (i = 0; i < smmu->num_context_banks; ++i) {
1600 		arm_smmu_write_context_bank(smmu, i);
1601 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1602 	}
1603 
1604 	/* Invalidate the TLB, just in case */
1605 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1606 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1607 
1608 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1609 
1610 	/* Enable fault reporting */
1611 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1612 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1613 
1614 	/* Disable TLB broadcasting. */
1615 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1616 
1617 	/* Enable client access, handling unmatched streams as appropriate */
1618 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1619 	if (disable_bypass)
1620 		reg |= ARM_SMMU_sCR0_USFCFG;
1621 	else
1622 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1623 
1624 	/* Disable forced broadcasting */
1625 	reg &= ~ARM_SMMU_sCR0_FB;
1626 
1627 	/* Don't upgrade barriers */
1628 	reg &= ~(ARM_SMMU_sCR0_BSU);
1629 
1630 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1631 		reg |= ARM_SMMU_sCR0_VMID16EN;
1632 
1633 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1634 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1635 
1636 	if (smmu->impl && smmu->impl->reset)
1637 		smmu->impl->reset(smmu);
1638 
1639 	/* Push the button */
1640 	arm_smmu_tlb_sync_global(smmu);
1641 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1642 }
1643 
1644 static int arm_smmu_id_size_to_bits(int size)
1645 {
1646 	switch (size) {
1647 	case 0:
1648 		return 32;
1649 	case 1:
1650 		return 36;
1651 	case 2:
1652 		return 40;
1653 	case 3:
1654 		return 42;
1655 	case 4:
1656 		return 44;
1657 	case 5:
1658 	default:
1659 		return 48;
1660 	}
1661 }
1662 
1663 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1664 {
1665 	unsigned int size;
1666 	u32 id;
1667 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1668 	int i, ret;
1669 
1670 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1671 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1672 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1673 
1674 	/* ID0 */
1675 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1676 
1677 	/* Restrict available stages based on module parameter */
1678 	if (force_stage == 1)
1679 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1680 	else if (force_stage == 2)
1681 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1682 
1683 	if (id & ARM_SMMU_ID0_S1TS) {
1684 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1685 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1686 	}
1687 
1688 	if (id & ARM_SMMU_ID0_S2TS) {
1689 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1690 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1691 	}
1692 
1693 	if (id & ARM_SMMU_ID0_NTS) {
1694 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1695 		dev_notice(smmu->dev, "\tnested translation\n");
1696 	}
1697 
1698 	if (!(smmu->features &
1699 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1700 		dev_err(smmu->dev, "\tno translation support!\n");
1701 		return -ENODEV;
1702 	}
1703 
1704 	if ((id & ARM_SMMU_ID0_S1TS) &&
1705 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1706 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1707 		dev_notice(smmu->dev, "\taddress translation ops\n");
1708 	}
1709 
1710 	/*
1711 	 * In order for DMA API calls to work properly, we must defer to what
1712 	 * the FW says about coherency, regardless of what the hardware claims.
1713 	 * Fortunately, this also opens up a workaround for systems where the
1714 	 * ID register value has ended up configured incorrectly.
1715 	 */
1716 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1717 	if (cttw_fw || cttw_reg)
1718 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1719 			   cttw_fw ? "" : "non-");
1720 	if (cttw_fw != cttw_reg)
1721 		dev_notice(smmu->dev,
1722 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1723 
1724 	/* Max. number of entries we have for stream matching/indexing */
1725 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1726 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1727 		size = 1 << 16;
1728 	} else {
1729 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1730 	}
1731 	smmu->streamid_mask = size - 1;
1732 	if (id & ARM_SMMU_ID0_SMS) {
1733 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1734 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1735 		if (size == 0) {
1736 			dev_err(smmu->dev,
1737 				"stream-matching supported, but no SMRs present!\n");
1738 			return -ENODEV;
1739 		}
1740 
1741 		/* Zero-initialised to mark as invalid */
1742 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1743 					  GFP_KERNEL);
1744 		if (!smmu->smrs)
1745 			return -ENOMEM;
1746 
1747 		dev_notice(smmu->dev,
1748 			   "\tstream matching with %u register groups", size);
1749 	}
1750 	/* s2cr->type == 0 means translation, so initialise explicitly */
1751 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1752 					 GFP_KERNEL);
1753 	if (!smmu->s2crs)
1754 		return -ENOMEM;
1755 	for (i = 0; i < size; i++)
1756 		smmu->s2crs[i] = s2cr_init_val;
1757 
1758 	smmu->num_mapping_groups = size;
1759 	mutex_init(&smmu->stream_map_mutex);
1760 	spin_lock_init(&smmu->global_sync_lock);
1761 
1762 	if (smmu->version < ARM_SMMU_V2 ||
1763 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1764 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1765 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1766 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1767 	}
1768 
1769 	/* ID1 */
1770 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1771 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1772 
1773 	/* Check for size mismatch of SMMU address space from mapped region */
1774 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1775 	if (smmu->numpage != 2 * size << smmu->pgshift)
1776 		dev_warn(smmu->dev,
1777 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1778 			2 * size << smmu->pgshift, smmu->numpage);
1779 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1780 	smmu->numpage = size;
1781 
1782 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1783 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1784 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1785 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1786 		return -ENODEV;
1787 	}
1788 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1789 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1790 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1791 				 sizeof(*smmu->cbs), GFP_KERNEL);
1792 	if (!smmu->cbs)
1793 		return -ENOMEM;
1794 
1795 	/* ID2 */
1796 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1797 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1798 	smmu->ipa_size = size;
1799 
1800 	/* The output mask is also applied for bypass */
1801 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1802 	smmu->pa_size = size;
1803 
1804 	if (id & ARM_SMMU_ID2_VMID16)
1805 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1806 
1807 	/*
1808 	 * What the page table walker can address actually depends on which
1809 	 * descriptor format is in use, but since a) we don't know that yet,
1810 	 * and b) it can vary per context bank, this will have to do...
1811 	 */
1812 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1813 		dev_warn(smmu->dev,
1814 			 "failed to set DMA mask for table walker\n");
1815 
1816 	if (smmu->version < ARM_SMMU_V2) {
1817 		smmu->va_size = smmu->ipa_size;
1818 		if (smmu->version == ARM_SMMU_V1_64K)
1819 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1820 	} else {
1821 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1822 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1823 		if (id & ARM_SMMU_ID2_PTFS_4K)
1824 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1825 		if (id & ARM_SMMU_ID2_PTFS_16K)
1826 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1827 		if (id & ARM_SMMU_ID2_PTFS_64K)
1828 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1829 	}
1830 
1831 	if (smmu->impl && smmu->impl->cfg_probe) {
1832 		ret = smmu->impl->cfg_probe(smmu);
1833 		if (ret)
1834 			return ret;
1835 	}
1836 
1837 	/* Now we've corralled the various formats, what'll it do? */
1838 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1839 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1840 	if (smmu->features &
1841 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1842 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1843 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1844 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1845 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1846 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1847 
1848 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1849 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1850 	else
1851 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1852 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1853 		   smmu->pgsize_bitmap);
1854 
1855 
1856 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1857 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1858 			   smmu->va_size, smmu->ipa_size);
1859 
1860 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1861 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1862 			   smmu->ipa_size, smmu->pa_size);
1863 
1864 	return 0;
1865 }
1866 
1867 struct arm_smmu_match_data {
1868 	enum arm_smmu_arch_version version;
1869 	enum arm_smmu_implementation model;
1870 };
1871 
1872 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1873 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1874 
1875 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1876 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1877 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1878 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1879 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1880 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1881 
1882 static const struct of_device_id arm_smmu_of_match[] = {
1883 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1884 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1885 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1886 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1887 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1888 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1889 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1890 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1891 	{ },
1892 };
1893 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1894 
1895 #ifdef CONFIG_ACPI
1896 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1897 {
1898 	int ret = 0;
1899 
1900 	switch (model) {
1901 	case ACPI_IORT_SMMU_V1:
1902 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1903 		smmu->version = ARM_SMMU_V1;
1904 		smmu->model = GENERIC_SMMU;
1905 		break;
1906 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1907 		smmu->version = ARM_SMMU_V1_64K;
1908 		smmu->model = GENERIC_SMMU;
1909 		break;
1910 	case ACPI_IORT_SMMU_V2:
1911 		smmu->version = ARM_SMMU_V2;
1912 		smmu->model = GENERIC_SMMU;
1913 		break;
1914 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1915 		smmu->version = ARM_SMMU_V2;
1916 		smmu->model = ARM_MMU500;
1917 		break;
1918 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1919 		smmu->version = ARM_SMMU_V2;
1920 		smmu->model = CAVIUM_SMMUV2;
1921 		break;
1922 	default:
1923 		ret = -ENODEV;
1924 	}
1925 
1926 	return ret;
1927 }
1928 
1929 static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1930 				      u32 *global_irqs, u32 *pmu_irqs)
1931 {
1932 	struct device *dev = smmu->dev;
1933 	struct acpi_iort_node *node =
1934 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1935 	struct acpi_iort_smmu *iort_smmu;
1936 	int ret;
1937 
1938 	/* Retrieve SMMU1/2 specific data */
1939 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1940 
1941 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1942 	if (ret < 0)
1943 		return ret;
1944 
1945 	/* Ignore the configuration access interrupt */
1946 	*global_irqs = 1;
1947 	*pmu_irqs = 0;
1948 
1949 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1950 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1951 
1952 	return 0;
1953 }
1954 #else
1955 static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1956 					     u32 *global_irqs, u32 *pmu_irqs)
1957 {
1958 	return -ENODEV;
1959 }
1960 #endif
1961 
1962 static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu,
1963 				    u32 *global_irqs, u32 *pmu_irqs)
1964 {
1965 	const struct arm_smmu_match_data *data;
1966 	struct device *dev = smmu->dev;
1967 	bool legacy_binding;
1968 
1969 	if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs))
1970 		return dev_err_probe(dev, -ENODEV,
1971 				     "missing #global-interrupts property\n");
1972 	*pmu_irqs = 0;
1973 
1974 	data = of_device_get_match_data(dev);
1975 	smmu->version = data->version;
1976 	smmu->model = data->model;
1977 
1978 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
1979 	if (legacy_binding && !using_generic_binding) {
1980 		if (!using_legacy_binding) {
1981 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
1982 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
1983 		}
1984 		using_legacy_binding = true;
1985 	} else if (!legacy_binding && !using_legacy_binding) {
1986 		using_generic_binding = true;
1987 	} else {
1988 		dev_err(dev, "not probing due to mismatched DT properties\n");
1989 		return -ENODEV;
1990 	}
1991 
1992 	if (of_dma_is_coherent(dev->of_node))
1993 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1994 
1995 	return 0;
1996 }
1997 
1998 static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu)
1999 {
2000 	struct list_head rmr_list;
2001 	struct iommu_resv_region *e;
2002 	int idx, cnt = 0;
2003 	u32 reg;
2004 
2005 	INIT_LIST_HEAD(&rmr_list);
2006 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2007 
2008 	/*
2009 	 * Rather than trying to look at existing mappings that
2010 	 * are setup by the firmware and then invalidate the ones
2011 	 * that do no have matching RMR entries, just disable the
2012 	 * SMMU until it gets enabled again in the reset routine.
2013 	 */
2014 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
2015 	reg |= ARM_SMMU_sCR0_CLIENTPD;
2016 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
2017 
2018 	list_for_each_entry(e, &rmr_list, list) {
2019 		struct iommu_iort_rmr_data *rmr;
2020 		int i;
2021 
2022 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
2023 		for (i = 0; i < rmr->num_sids; i++) {
2024 			idx = arm_smmu_find_sme(smmu, rmr->sids[i], ~0);
2025 			if (idx < 0)
2026 				continue;
2027 
2028 			if (smmu->s2crs[idx].count == 0) {
2029 				smmu->smrs[idx].id = rmr->sids[i];
2030 				smmu->smrs[idx].mask = 0;
2031 				smmu->smrs[idx].valid = true;
2032 			}
2033 			smmu->s2crs[idx].count++;
2034 			smmu->s2crs[idx].type = S2CR_TYPE_BYPASS;
2035 			smmu->s2crs[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
2036 
2037 			cnt++;
2038 		}
2039 	}
2040 
2041 	dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt,
2042 		   cnt == 1 ? "" : "s");
2043 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
2044 }
2045 
2046 static int arm_smmu_device_probe(struct platform_device *pdev)
2047 {
2048 	struct resource *res;
2049 	struct arm_smmu_device *smmu;
2050 	struct device *dev = &pdev->dev;
2051 	int num_irqs, i, err;
2052 	u32 global_irqs, pmu_irqs;
2053 	irqreturn_t (*global_fault)(int irq, void *dev);
2054 
2055 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2056 	if (!smmu) {
2057 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2058 		return -ENOMEM;
2059 	}
2060 	smmu->dev = dev;
2061 
2062 	if (dev->of_node)
2063 		err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs);
2064 	else
2065 		err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs);
2066 	if (err)
2067 		return err;
2068 
2069 	smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
2070 	if (IS_ERR(smmu->base))
2071 		return PTR_ERR(smmu->base);
2072 	smmu->ioaddr = res->start;
2073 
2074 	/*
2075 	 * The resource size should effectively match the value of SMMU_TOP;
2076 	 * stash that temporarily until we know PAGESIZE to validate it with.
2077 	 */
2078 	smmu->numpage = resource_size(res);
2079 
2080 	smmu = arm_smmu_impl_init(smmu);
2081 	if (IS_ERR(smmu))
2082 		return PTR_ERR(smmu);
2083 
2084 	num_irqs = platform_irq_count(pdev);
2085 
2086 	smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs;
2087 	if (smmu->num_context_irqs <= 0)
2088 		return dev_err_probe(dev, -ENODEV,
2089 				"found %d interrupts but expected at least %d\n",
2090 				num_irqs, global_irqs + pmu_irqs + 1);
2091 
2092 	smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs,
2093 				  sizeof(*smmu->irqs), GFP_KERNEL);
2094 	if (!smmu->irqs)
2095 		return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n",
2096 				     smmu->num_context_irqs);
2097 
2098 	for (i = 0; i < smmu->num_context_irqs; i++) {
2099 		int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i);
2100 
2101 		if (irq < 0)
2102 			return irq;
2103 		smmu->irqs[i] = irq;
2104 	}
2105 
2106 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2107 	if (err < 0) {
2108 		dev_err(dev, "failed to get clocks %d\n", err);
2109 		return err;
2110 	}
2111 	smmu->num_clks = err;
2112 
2113 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2114 	if (err)
2115 		return err;
2116 
2117 	err = arm_smmu_device_cfg_probe(smmu);
2118 	if (err)
2119 		return err;
2120 
2121 	if (smmu->version == ARM_SMMU_V2) {
2122 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2123 			dev_err(dev,
2124 			      "found only %d context irq(s) but %d required\n",
2125 			      smmu->num_context_irqs, smmu->num_context_banks);
2126 			return -ENODEV;
2127 		}
2128 
2129 		/* Ignore superfluous interrupts */
2130 		smmu->num_context_irqs = smmu->num_context_banks;
2131 	}
2132 
2133 	if (smmu->impl && smmu->impl->global_fault)
2134 		global_fault = smmu->impl->global_fault;
2135 	else
2136 		global_fault = arm_smmu_global_fault;
2137 
2138 	for (i = 0; i < global_irqs; i++) {
2139 		int irq = platform_get_irq(pdev, i);
2140 
2141 		if (irq < 0)
2142 			return irq;
2143 
2144 		err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
2145 				       "arm-smmu global fault", smmu);
2146 		if (err)
2147 			return dev_err_probe(dev, err,
2148 					"failed to request global IRQ %d (%u)\n",
2149 					i, irq);
2150 	}
2151 
2152 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2153 				     "smmu.%pa", &smmu->ioaddr);
2154 	if (err) {
2155 		dev_err(dev, "Failed to register iommu in sysfs\n");
2156 		return err;
2157 	}
2158 
2159 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
2160 	if (err) {
2161 		dev_err(dev, "Failed to register iommu\n");
2162 		iommu_device_sysfs_remove(&smmu->iommu);
2163 		return err;
2164 	}
2165 
2166 	platform_set_drvdata(pdev, smmu);
2167 
2168 	/* Check for RMRs and install bypass SMRs if any */
2169 	arm_smmu_rmr_install_bypass_smr(smmu);
2170 
2171 	arm_smmu_device_reset(smmu);
2172 	arm_smmu_test_smr_masks(smmu);
2173 
2174 	/*
2175 	 * We want to avoid touching dev->power.lock in fastpaths unless
2176 	 * it's really going to do something useful - pm_runtime_enabled()
2177 	 * can serve as an ideal proxy for that decision. So, conditionally
2178 	 * enable pm_runtime.
2179 	 */
2180 	if (dev->pm_domain) {
2181 		pm_runtime_set_active(dev);
2182 		pm_runtime_enable(dev);
2183 	}
2184 
2185 	return 0;
2186 }
2187 
2188 static int arm_smmu_device_remove(struct platform_device *pdev)
2189 {
2190 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2191 
2192 	if (!smmu)
2193 		return -ENODEV;
2194 
2195 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2196 		dev_notice(&pdev->dev, "disabling translation\n");
2197 
2198 	iommu_device_unregister(&smmu->iommu);
2199 	iommu_device_sysfs_remove(&smmu->iommu);
2200 
2201 	arm_smmu_rpm_get(smmu);
2202 	/* Turn the thing off */
2203 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2204 	arm_smmu_rpm_put(smmu);
2205 
2206 	if (pm_runtime_enabled(smmu->dev))
2207 		pm_runtime_force_suspend(smmu->dev);
2208 	else
2209 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2210 
2211 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2212 	return 0;
2213 }
2214 
2215 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2216 {
2217 	arm_smmu_device_remove(pdev);
2218 }
2219 
2220 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2221 {
2222 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2223 	int ret;
2224 
2225 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2226 	if (ret)
2227 		return ret;
2228 
2229 	arm_smmu_device_reset(smmu);
2230 
2231 	return 0;
2232 }
2233 
2234 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2235 {
2236 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2237 
2238 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2239 
2240 	return 0;
2241 }
2242 
2243 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2244 {
2245 	int ret;
2246 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2247 
2248 	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
2249 	if (ret)
2250 		return ret;
2251 
2252 	if (pm_runtime_suspended(dev))
2253 		return 0;
2254 
2255 	ret = arm_smmu_runtime_resume(dev);
2256 	if (ret)
2257 		clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2258 
2259 	return ret;
2260 }
2261 
2262 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2263 {
2264 	int ret = 0;
2265 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2266 
2267 	if (pm_runtime_suspended(dev))
2268 		goto clk_unprepare;
2269 
2270 	ret = arm_smmu_runtime_suspend(dev);
2271 	if (ret)
2272 		return ret;
2273 
2274 clk_unprepare:
2275 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2276 	return ret;
2277 }
2278 
2279 static const struct dev_pm_ops arm_smmu_pm_ops = {
2280 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2281 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2282 			   arm_smmu_runtime_resume, NULL)
2283 };
2284 
2285 static struct platform_driver arm_smmu_driver = {
2286 	.driver	= {
2287 		.name			= "arm-smmu",
2288 		.of_match_table		= arm_smmu_of_match,
2289 		.pm			= &arm_smmu_pm_ops,
2290 		.suppress_bind_attrs    = true,
2291 	},
2292 	.probe	= arm_smmu_device_probe,
2293 	.remove	= arm_smmu_device_remove,
2294 	.shutdown = arm_smmu_device_shutdown,
2295 };
2296 module_platform_driver(arm_smmu_driver);
2297 
2298 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2299 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2300 MODULE_ALIAS("platform:arm-smmu");
2301 MODULE_LICENSE("GPL v2");
2302