xref: /openbmc/linux/drivers/iommu/arm/arm-smmu/arm-smmu.c (revision 75b1a8f9d62e50f05d0e4e9f3c8bcde32527ffc1)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
28 #include <linux/io.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/of_iommu.h>
35 #include <linux/pci.h>
36 #include <linux/platform_device.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/ratelimit.h>
39 #include <linux/slab.h>
40 
41 #include <linux/amba/bus.h>
42 #include <linux/fsl/mc.h>
43 
44 #include "arm-smmu.h"
45 
46 /*
47  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
48  * global register space are still, in fact, using a hypervisor to mediate it
49  * by trapping and emulating register accesses. Sadly, some deployed versions
50  * of said trapping code have bugs wherein they go horribly wrong for stores
51  * using r31 (i.e. XZR/WZR) as the source register.
52  */
53 #define QCOM_DUMMY_VAL -1
54 
55 #define MSI_IOVA_BASE			0x8000000
56 #define MSI_IOVA_LENGTH			0x100000
57 
58 static int force_stage;
59 module_param(force_stage, int, S_IRUGO);
60 MODULE_PARM_DESC(force_stage,
61 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
62 static bool disable_bypass =
63 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
64 module_param(disable_bypass, bool, S_IRUGO);
65 MODULE_PARM_DESC(disable_bypass,
66 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
67 
68 #define s2cr_init_val (struct arm_smmu_s2cr){				\
69 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
70 }
71 
72 static bool using_legacy_binding, using_generic_binding;
73 
74 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
75 {
76 	if (pm_runtime_enabled(smmu->dev))
77 		return pm_runtime_get_sync(smmu->dev);
78 
79 	return 0;
80 }
81 
82 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
83 {
84 	if (pm_runtime_enabled(smmu->dev))
85 		pm_runtime_put_autosuspend(smmu->dev);
86 }
87 
88 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
89 {
90 	return container_of(dom, struct arm_smmu_domain, domain);
91 }
92 
93 static struct platform_driver arm_smmu_driver;
94 static struct iommu_ops arm_smmu_ops;
95 
96 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
97 static int arm_smmu_bus_init(struct iommu_ops *ops);
98 
99 static struct device_node *dev_get_dev_node(struct device *dev)
100 {
101 	if (dev_is_pci(dev)) {
102 		struct pci_bus *bus = to_pci_dev(dev)->bus;
103 
104 		while (!pci_is_root_bus(bus))
105 			bus = bus->parent;
106 		return of_node_get(bus->bridge->parent->of_node);
107 	}
108 
109 	return of_node_get(dev->of_node);
110 }
111 
112 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
113 {
114 	*((__be32 *)data) = cpu_to_be32(alias);
115 	return 0; /* Continue walking */
116 }
117 
118 static int __find_legacy_master_phandle(struct device *dev, void *data)
119 {
120 	struct of_phandle_iterator *it = *(void **)data;
121 	struct device_node *np = it->node;
122 	int err;
123 
124 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
125 			    "#stream-id-cells", -1)
126 		if (it->node == np) {
127 			*(void **)data = dev;
128 			return 1;
129 		}
130 	it->node = np;
131 	return err == -ENOENT ? 0 : err;
132 }
133 
134 static int arm_smmu_register_legacy_master(struct device *dev,
135 					   struct arm_smmu_device **smmu)
136 {
137 	struct device *smmu_dev;
138 	struct device_node *np;
139 	struct of_phandle_iterator it;
140 	void *data = &it;
141 	u32 *sids;
142 	__be32 pci_sid;
143 	int err;
144 
145 	np = dev_get_dev_node(dev);
146 	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
147 		of_node_put(np);
148 		return -ENODEV;
149 	}
150 
151 	it.node = np;
152 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
153 				     __find_legacy_master_phandle);
154 	smmu_dev = data;
155 	of_node_put(np);
156 	if (err == 0)
157 		return -ENODEV;
158 	if (err < 0)
159 		return err;
160 
161 	if (dev_is_pci(dev)) {
162 		/* "mmu-masters" assumes Stream ID == Requester ID */
163 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
164 				       &pci_sid);
165 		it.cur = &pci_sid;
166 		it.cur_count = 1;
167 	}
168 
169 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
170 				&arm_smmu_ops);
171 	if (err)
172 		return err;
173 
174 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
175 	if (!sids)
176 		return -ENOMEM;
177 
178 	*smmu = dev_get_drvdata(smmu_dev);
179 	of_phandle_iterator_args(&it, sids, it.cur_count);
180 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
181 	kfree(sids);
182 	return err;
183 }
184 
185 /*
186  * With the legacy DT binding in play, we have no guarantees about
187  * probe order, but then we're also not doing default domains, so we can
188  * delay setting bus ops until we're sure every possible SMMU is ready,
189  * and that way ensure that no probe_device() calls get missed.
190  */
191 static int arm_smmu_legacy_bus_init(void)
192 {
193 	if (using_legacy_binding)
194 		return arm_smmu_bus_init(&arm_smmu_ops);
195 	return 0;
196 }
197 device_initcall_sync(arm_smmu_legacy_bus_init);
198 #else
199 static int arm_smmu_register_legacy_master(struct device *dev,
200 					   struct arm_smmu_device **smmu)
201 {
202 	return -ENODEV;
203 }
204 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
205 
206 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
207 {
208 	clear_bit(idx, map);
209 }
210 
211 /* Wait for any pending TLB invalidations to complete */
212 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
213 				int sync, int status)
214 {
215 	unsigned int spin_cnt, delay;
216 	u32 reg;
217 
218 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
219 		return smmu->impl->tlb_sync(smmu, page, sync, status);
220 
221 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
222 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
223 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
224 			reg = arm_smmu_readl(smmu, page, status);
225 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
226 				return;
227 			cpu_relax();
228 		}
229 		udelay(delay);
230 	}
231 	dev_err_ratelimited(smmu->dev,
232 			    "TLB sync timed out -- SMMU may be deadlocked\n");
233 }
234 
235 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
236 {
237 	unsigned long flags;
238 
239 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
240 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
241 			    ARM_SMMU_GR0_sTLBGSTATUS);
242 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
243 }
244 
245 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
246 {
247 	struct arm_smmu_device *smmu = smmu_domain->smmu;
248 	unsigned long flags;
249 
250 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
251 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
252 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
253 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
254 }
255 
256 static void arm_smmu_tlb_inv_context_s1(void *cookie)
257 {
258 	struct arm_smmu_domain *smmu_domain = cookie;
259 	/*
260 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
261 	 * current CPU are visible beforehand.
262 	 */
263 	wmb();
264 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
265 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
266 	arm_smmu_tlb_sync_context(smmu_domain);
267 }
268 
269 static void arm_smmu_tlb_inv_context_s2(void *cookie)
270 {
271 	struct arm_smmu_domain *smmu_domain = cookie;
272 	struct arm_smmu_device *smmu = smmu_domain->smmu;
273 
274 	/* See above */
275 	wmb();
276 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
277 	arm_smmu_tlb_sync_global(smmu);
278 }
279 
280 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
281 				      size_t granule, void *cookie, int reg)
282 {
283 	struct arm_smmu_domain *smmu_domain = cookie;
284 	struct arm_smmu_device *smmu = smmu_domain->smmu;
285 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
286 	int idx = cfg->cbndx;
287 
288 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
289 		wmb();
290 
291 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
292 		iova = (iova >> 12) << 12;
293 		iova |= cfg->asid;
294 		do {
295 			arm_smmu_cb_write(smmu, idx, reg, iova);
296 			iova += granule;
297 		} while (size -= granule);
298 	} else {
299 		iova >>= 12;
300 		iova |= (u64)cfg->asid << 48;
301 		do {
302 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
303 			iova += granule >> 12;
304 		} while (size -= granule);
305 	}
306 }
307 
308 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
309 				      size_t granule, void *cookie, int reg)
310 {
311 	struct arm_smmu_domain *smmu_domain = cookie;
312 	struct arm_smmu_device *smmu = smmu_domain->smmu;
313 	int idx = smmu_domain->cfg.cbndx;
314 
315 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
316 		wmb();
317 
318 	iova >>= 12;
319 	do {
320 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
321 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
322 		else
323 			arm_smmu_cb_write(smmu, idx, reg, iova);
324 		iova += granule >> 12;
325 	} while (size -= granule);
326 }
327 
328 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
329 				     size_t granule, void *cookie)
330 {
331 	arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
332 				  ARM_SMMU_CB_S1_TLBIVA);
333 	arm_smmu_tlb_sync_context(cookie);
334 }
335 
336 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
337 				     unsigned long iova, size_t granule,
338 				     void *cookie)
339 {
340 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
341 				  ARM_SMMU_CB_S1_TLBIVAL);
342 }
343 
344 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
345 				     size_t granule, void *cookie)
346 {
347 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
348 				  ARM_SMMU_CB_S2_TLBIIPAS2);
349 	arm_smmu_tlb_sync_context(cookie);
350 }
351 
352 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
353 				     unsigned long iova, size_t granule,
354 				     void *cookie)
355 {
356 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
357 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
358 }
359 
360 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
361 					size_t granule, void *cookie)
362 {
363 	arm_smmu_tlb_inv_context_s2(cookie);
364 }
365 /*
366  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
367  * almost negligible, but the benefit of getting the first one in as far ahead
368  * of the sync as possible is significant, hence we don't just make this a
369  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
370  * think.
371  */
372 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
373 					unsigned long iova, size_t granule,
374 					void *cookie)
375 {
376 	struct arm_smmu_domain *smmu_domain = cookie;
377 	struct arm_smmu_device *smmu = smmu_domain->smmu;
378 
379 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
380 		wmb();
381 
382 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
383 }
384 
385 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
386 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
387 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
388 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
389 };
390 
391 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
392 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
393 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
394 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
395 };
396 
397 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
398 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
399 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
400 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
401 };
402 
403 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
404 {
405 	u32 fsr, fsynr, cbfrsynra;
406 	unsigned long iova;
407 	struct iommu_domain *domain = dev;
408 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
409 	struct arm_smmu_device *smmu = smmu_domain->smmu;
410 	int idx = smmu_domain->cfg.cbndx;
411 
412 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
413 	if (!(fsr & ARM_SMMU_FSR_FAULT))
414 		return IRQ_NONE;
415 
416 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
417 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
418 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
419 
420 	dev_err_ratelimited(smmu->dev,
421 	"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
422 			    fsr, iova, fsynr, cbfrsynra, idx);
423 
424 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
425 	return IRQ_HANDLED;
426 }
427 
428 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
429 {
430 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
431 	struct arm_smmu_device *smmu = dev;
432 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
433 				      DEFAULT_RATELIMIT_BURST);
434 
435 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
436 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
437 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
438 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
439 
440 	if (!gfsr)
441 		return IRQ_NONE;
442 
443 	if (__ratelimit(&rs)) {
444 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
445 		    (gfsr & ARM_SMMU_sGFSR_USF))
446 			dev_err(smmu->dev,
447 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
448 				(u16)gfsynr1);
449 		else
450 			dev_err(smmu->dev,
451 				"Unexpected global fault, this could be serious\n");
452 		dev_err(smmu->dev,
453 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
454 			gfsr, gfsynr0, gfsynr1, gfsynr2);
455 	}
456 
457 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
458 	return IRQ_HANDLED;
459 }
460 
461 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
462 				       struct io_pgtable_cfg *pgtbl_cfg)
463 {
464 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
465 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
466 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
467 
468 	cb->cfg = cfg;
469 
470 	/* TCR */
471 	if (stage1) {
472 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
473 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
474 		} else {
475 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
476 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
477 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
478 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
479 			else
480 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
481 		}
482 	} else {
483 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
484 	}
485 
486 	/* TTBRs */
487 	if (stage1) {
488 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
489 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
490 			cb->ttbr[1] = 0;
491 		} else {
492 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
493 						 cfg->asid);
494 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
495 						 cfg->asid);
496 
497 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
498 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
499 			else
500 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
501 		}
502 	} else {
503 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
504 	}
505 
506 	/* MAIRs (stage-1 only) */
507 	if (stage1) {
508 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
509 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
510 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
511 		} else {
512 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
513 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
514 		}
515 	}
516 }
517 
518 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
519 {
520 	u32 reg;
521 	bool stage1;
522 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
523 	struct arm_smmu_cfg *cfg = cb->cfg;
524 
525 	/* Unassigned context banks only need disabling */
526 	if (!cfg) {
527 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
528 		return;
529 	}
530 
531 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
532 
533 	/* CBA2R */
534 	if (smmu->version > ARM_SMMU_V1) {
535 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
536 			reg = ARM_SMMU_CBA2R_VA64;
537 		else
538 			reg = 0;
539 		/* 16-bit VMIDs live in CBA2R */
540 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
541 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
542 
543 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
544 	}
545 
546 	/* CBAR */
547 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
548 	if (smmu->version < ARM_SMMU_V2)
549 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
550 
551 	/*
552 	 * Use the weakest shareability/memory types, so they are
553 	 * overridden by the ttbcr/pte.
554 	 */
555 	if (stage1) {
556 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
557 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
558 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
559 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
560 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
561 		/* 8-bit VMIDs live in CBAR */
562 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
563 	}
564 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
565 
566 	/*
567 	 * TCR
568 	 * We must write this before the TTBRs, since it determines the
569 	 * access behaviour of some fields (in particular, ASID[15:8]).
570 	 */
571 	if (stage1 && smmu->version > ARM_SMMU_V1)
572 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
573 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
574 
575 	/* TTBRs */
576 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
577 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
578 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
579 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
580 	} else {
581 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
582 		if (stage1)
583 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
584 					   cb->ttbr[1]);
585 	}
586 
587 	/* MAIRs (stage-1 only) */
588 	if (stage1) {
589 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
590 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
591 	}
592 
593 	/* SCTLR */
594 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
595 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
596 	if (stage1)
597 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
598 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
599 		reg |= ARM_SMMU_SCTLR_E;
600 
601 	if (smmu->impl && smmu->impl->write_sctlr)
602 		smmu->impl->write_sctlr(smmu, idx, reg);
603 	else
604 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
605 }
606 
607 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
608 				       struct arm_smmu_device *smmu,
609 				       struct device *dev, unsigned int start)
610 {
611 	if (smmu->impl && smmu->impl->alloc_context_bank)
612 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
613 
614 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
615 }
616 
617 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
618 					struct arm_smmu_device *smmu,
619 					struct device *dev)
620 {
621 	int irq, start, ret = 0;
622 	unsigned long ias, oas;
623 	struct io_pgtable_ops *pgtbl_ops;
624 	struct io_pgtable_cfg pgtbl_cfg;
625 	enum io_pgtable_fmt fmt;
626 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
627 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
628 	irqreturn_t (*context_fault)(int irq, void *dev);
629 
630 	mutex_lock(&smmu_domain->init_mutex);
631 	if (smmu_domain->smmu)
632 		goto out_unlock;
633 
634 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
635 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
636 		smmu_domain->smmu = smmu;
637 		goto out_unlock;
638 	}
639 
640 	/*
641 	 * Mapping the requested stage onto what we support is surprisingly
642 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
643 	 * support for nested translation. That means we end up with the
644 	 * following table:
645 	 *
646 	 * Requested        Supported        Actual
647 	 *     S1               N              S1
648 	 *     S1             S1+S2            S1
649 	 *     S1               S2             S2
650 	 *     S1               S1             S1
651 	 *     N                N              N
652 	 *     N              S1+S2            S2
653 	 *     N                S2             S2
654 	 *     N                S1             S1
655 	 *
656 	 * Note that you can't actually request stage-2 mappings.
657 	 */
658 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
659 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
660 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
661 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
662 
663 	/*
664 	 * Choosing a suitable context format is even more fiddly. Until we
665 	 * grow some way for the caller to express a preference, and/or move
666 	 * the decision into the io-pgtable code where it arguably belongs,
667 	 * just aim for the closest thing to the rest of the system, and hope
668 	 * that the hardware isn't esoteric enough that we can't assume AArch64
669 	 * support to be a superset of AArch32 support...
670 	 */
671 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
672 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
673 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
674 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
675 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
676 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
677 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
678 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
679 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
680 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
681 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
682 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
683 
684 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
685 		ret = -EINVAL;
686 		goto out_unlock;
687 	}
688 
689 	switch (smmu_domain->stage) {
690 	case ARM_SMMU_DOMAIN_S1:
691 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
692 		start = smmu->num_s2_context_banks;
693 		ias = smmu->va_size;
694 		oas = smmu->ipa_size;
695 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
696 			fmt = ARM_64_LPAE_S1;
697 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
698 			fmt = ARM_32_LPAE_S1;
699 			ias = min(ias, 32UL);
700 			oas = min(oas, 40UL);
701 		} else {
702 			fmt = ARM_V7S;
703 			ias = min(ias, 32UL);
704 			oas = min(oas, 32UL);
705 		}
706 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
707 		break;
708 	case ARM_SMMU_DOMAIN_NESTED:
709 		/*
710 		 * We will likely want to change this if/when KVM gets
711 		 * involved.
712 		 */
713 	case ARM_SMMU_DOMAIN_S2:
714 		cfg->cbar = CBAR_TYPE_S2_TRANS;
715 		start = 0;
716 		ias = smmu->ipa_size;
717 		oas = smmu->pa_size;
718 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
719 			fmt = ARM_64_LPAE_S2;
720 		} else {
721 			fmt = ARM_32_LPAE_S2;
722 			ias = min(ias, 40UL);
723 			oas = min(oas, 40UL);
724 		}
725 		if (smmu->version == ARM_SMMU_V2)
726 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
727 		else
728 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
729 		break;
730 	default:
731 		ret = -EINVAL;
732 		goto out_unlock;
733 	}
734 
735 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
736 	if (ret < 0) {
737 		goto out_unlock;
738 	}
739 
740 	smmu_domain->smmu = smmu;
741 
742 	cfg->cbndx = ret;
743 	if (smmu->version < ARM_SMMU_V2) {
744 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
745 		cfg->irptndx %= smmu->num_context_irqs;
746 	} else {
747 		cfg->irptndx = cfg->cbndx;
748 	}
749 
750 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
751 		cfg->vmid = cfg->cbndx + 1;
752 	else
753 		cfg->asid = cfg->cbndx;
754 
755 	pgtbl_cfg = (struct io_pgtable_cfg) {
756 		.pgsize_bitmap	= smmu->pgsize_bitmap,
757 		.ias		= ias,
758 		.oas		= oas,
759 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
760 		.tlb		= smmu_domain->flush_ops,
761 		.iommu_dev	= smmu->dev,
762 	};
763 
764 	if (smmu->impl && smmu->impl->init_context) {
765 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
766 		if (ret)
767 			goto out_clear_smmu;
768 	}
769 
770 	if (smmu_domain->pgtbl_cfg.quirks)
771 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_cfg.quirks;
772 
773 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
774 	if (!pgtbl_ops) {
775 		ret = -ENOMEM;
776 		goto out_clear_smmu;
777 	}
778 
779 	/* Update the domain's page sizes to reflect the page table format */
780 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
781 
782 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
783 		domain->geometry.aperture_start = ~0UL << ias;
784 		domain->geometry.aperture_end = ~0UL;
785 	} else {
786 		domain->geometry.aperture_end = (1UL << ias) - 1;
787 	}
788 
789 	domain->geometry.force_aperture = true;
790 
791 	/* Initialise the context bank with our page table cfg */
792 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
793 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
794 
795 	/*
796 	 * Request context fault interrupt. Do this last to avoid the
797 	 * handler seeing a half-initialised domain state.
798 	 */
799 	irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
800 
801 	if (smmu->impl && smmu->impl->context_fault)
802 		context_fault = smmu->impl->context_fault;
803 	else
804 		context_fault = arm_smmu_context_fault;
805 
806 	ret = devm_request_irq(smmu->dev, irq, context_fault,
807 			       IRQF_SHARED, "arm-smmu-context-fault", domain);
808 	if (ret < 0) {
809 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
810 			cfg->irptndx, irq);
811 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
812 	}
813 
814 	mutex_unlock(&smmu_domain->init_mutex);
815 
816 	/* Publish page table ops for map/unmap */
817 	smmu_domain->pgtbl_ops = pgtbl_ops;
818 	return 0;
819 
820 out_clear_smmu:
821 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
822 	smmu_domain->smmu = NULL;
823 out_unlock:
824 	mutex_unlock(&smmu_domain->init_mutex);
825 	return ret;
826 }
827 
828 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
829 {
830 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
831 	struct arm_smmu_device *smmu = smmu_domain->smmu;
832 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
833 	int ret, irq;
834 
835 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
836 		return;
837 
838 	ret = arm_smmu_rpm_get(smmu);
839 	if (ret < 0)
840 		return;
841 
842 	/*
843 	 * Disable the context bank and free the page tables before freeing
844 	 * it.
845 	 */
846 	smmu->cbs[cfg->cbndx].cfg = NULL;
847 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
848 
849 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
850 		irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
851 		devm_free_irq(smmu->dev, irq, domain);
852 	}
853 
854 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
855 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
856 
857 	arm_smmu_rpm_put(smmu);
858 }
859 
860 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
861 {
862 	struct arm_smmu_domain *smmu_domain;
863 
864 	if (type != IOMMU_DOMAIN_UNMANAGED &&
865 	    type != IOMMU_DOMAIN_DMA &&
866 	    type != IOMMU_DOMAIN_IDENTITY)
867 		return NULL;
868 	/*
869 	 * Allocate the domain and initialise some of its data structures.
870 	 * We can't really do anything meaningful until we've added a
871 	 * master.
872 	 */
873 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
874 	if (!smmu_domain)
875 		return NULL;
876 
877 	if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
878 	    iommu_get_dma_cookie(&smmu_domain->domain))) {
879 		kfree(smmu_domain);
880 		return NULL;
881 	}
882 
883 	mutex_init(&smmu_domain->init_mutex);
884 	spin_lock_init(&smmu_domain->cb_lock);
885 
886 	return &smmu_domain->domain;
887 }
888 
889 static void arm_smmu_domain_free(struct iommu_domain *domain)
890 {
891 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
892 
893 	/*
894 	 * Free the domain resources. We assume that all devices have
895 	 * already been detached.
896 	 */
897 	iommu_put_dma_cookie(domain);
898 	arm_smmu_destroy_domain_context(domain);
899 	kfree(smmu_domain);
900 }
901 
902 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
903 {
904 	struct arm_smmu_smr *smr = smmu->smrs + idx;
905 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
906 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
907 
908 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
909 		reg |= ARM_SMMU_SMR_VALID;
910 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
911 }
912 
913 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
914 {
915 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
916 	u32 reg;
917 
918 	if (smmu->impl && smmu->impl->write_s2cr) {
919 		smmu->impl->write_s2cr(smmu, idx);
920 		return;
921 	}
922 
923 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
924 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
925 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
926 
927 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
928 	    smmu->smrs[idx].valid)
929 		reg |= ARM_SMMU_S2CR_EXIDVALID;
930 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
931 }
932 
933 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
934 {
935 	arm_smmu_write_s2cr(smmu, idx);
936 	if (smmu->smrs)
937 		arm_smmu_write_smr(smmu, idx);
938 }
939 
940 /*
941  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
942  * should be called after sCR0 is written.
943  */
944 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
945 {
946 	u32 smr;
947 	int i;
948 
949 	if (!smmu->smrs)
950 		return;
951 	/*
952 	 * If we've had to accommodate firmware memory regions, we may
953 	 * have live SMRs by now; tread carefully...
954 	 *
955 	 * Somewhat perversely, not having a free SMR for this test implies we
956 	 * can get away without it anyway, as we'll only be able to 'allocate'
957 	 * these SMRs for the ID/mask values we're already trusting to be OK.
958 	 */
959 	for (i = 0; i < smmu->num_mapping_groups; i++)
960 		if (!smmu->smrs[i].valid)
961 			goto smr_ok;
962 	return;
963 smr_ok:
964 	/*
965 	 * SMR.ID bits may not be preserved if the corresponding MASK
966 	 * bits are set, so check each one separately. We can reject
967 	 * masters later if they try to claim IDs outside these masks.
968 	 */
969 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
970 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
971 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
972 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
973 
974 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
975 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
976 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
977 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
978 }
979 
980 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
981 {
982 	struct arm_smmu_smr *smrs = smmu->smrs;
983 	int i, free_idx = -ENOSPC;
984 
985 	/* Stream indexing is blissfully easy */
986 	if (!smrs)
987 		return id;
988 
989 	/* Validating SMRs is... less so */
990 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
991 		if (!smrs[i].valid) {
992 			/*
993 			 * Note the first free entry we come across, which
994 			 * we'll claim in the end if nothing else matches.
995 			 */
996 			if (free_idx < 0)
997 				free_idx = i;
998 			continue;
999 		}
1000 		/*
1001 		 * If the new entry is _entirely_ matched by an existing entry,
1002 		 * then reuse that, with the guarantee that there also cannot
1003 		 * be any subsequent conflicting entries. In normal use we'd
1004 		 * expect simply identical entries for this case, but there's
1005 		 * no harm in accommodating the generalisation.
1006 		 */
1007 		if ((mask & smrs[i].mask) == mask &&
1008 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
1009 			return i;
1010 		/*
1011 		 * If the new entry has any other overlap with an existing one,
1012 		 * though, then there always exists at least one stream ID
1013 		 * which would cause a conflict, and we can't allow that risk.
1014 		 */
1015 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1016 			return -EINVAL;
1017 	}
1018 
1019 	return free_idx;
1020 }
1021 
1022 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1023 {
1024 	if (--smmu->s2crs[idx].count)
1025 		return false;
1026 
1027 	smmu->s2crs[idx] = s2cr_init_val;
1028 	if (smmu->smrs)
1029 		smmu->smrs[idx].valid = false;
1030 
1031 	return true;
1032 }
1033 
1034 static int arm_smmu_master_alloc_smes(struct device *dev)
1035 {
1036 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1037 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1038 	struct arm_smmu_device *smmu = cfg->smmu;
1039 	struct arm_smmu_smr *smrs = smmu->smrs;
1040 	int i, idx, ret;
1041 
1042 	mutex_lock(&smmu->stream_map_mutex);
1043 	/* Figure out a viable stream map entry allocation */
1044 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1045 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1046 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1047 
1048 		if (idx != INVALID_SMENDX) {
1049 			ret = -EEXIST;
1050 			goto out_err;
1051 		}
1052 
1053 		ret = arm_smmu_find_sme(smmu, sid, mask);
1054 		if (ret < 0)
1055 			goto out_err;
1056 
1057 		idx = ret;
1058 		if (smrs && smmu->s2crs[idx].count == 0) {
1059 			smrs[idx].id = sid;
1060 			smrs[idx].mask = mask;
1061 			smrs[idx].valid = true;
1062 		}
1063 		smmu->s2crs[idx].count++;
1064 		cfg->smendx[i] = (s16)idx;
1065 	}
1066 
1067 	/* It worked! Now, poke the actual hardware */
1068 	for_each_cfg_sme(cfg, fwspec, i, idx)
1069 		arm_smmu_write_sme(smmu, idx);
1070 
1071 	mutex_unlock(&smmu->stream_map_mutex);
1072 	return 0;
1073 
1074 out_err:
1075 	while (i--) {
1076 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1077 		cfg->smendx[i] = INVALID_SMENDX;
1078 	}
1079 	mutex_unlock(&smmu->stream_map_mutex);
1080 	return ret;
1081 }
1082 
1083 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1084 				      struct iommu_fwspec *fwspec)
1085 {
1086 	struct arm_smmu_device *smmu = cfg->smmu;
1087 	int i, idx;
1088 
1089 	mutex_lock(&smmu->stream_map_mutex);
1090 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1091 		if (arm_smmu_free_sme(smmu, idx))
1092 			arm_smmu_write_sme(smmu, idx);
1093 		cfg->smendx[i] = INVALID_SMENDX;
1094 	}
1095 	mutex_unlock(&smmu->stream_map_mutex);
1096 }
1097 
1098 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1099 				      struct arm_smmu_master_cfg *cfg,
1100 				      struct iommu_fwspec *fwspec)
1101 {
1102 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1103 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1104 	u8 cbndx = smmu_domain->cfg.cbndx;
1105 	enum arm_smmu_s2cr_type type;
1106 	int i, idx;
1107 
1108 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1109 		type = S2CR_TYPE_BYPASS;
1110 	else
1111 		type = S2CR_TYPE_TRANS;
1112 
1113 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1114 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1115 			continue;
1116 
1117 		s2cr[idx].type = type;
1118 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1119 		s2cr[idx].cbndx = cbndx;
1120 		arm_smmu_write_s2cr(smmu, idx);
1121 	}
1122 	return 0;
1123 }
1124 
1125 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1126 {
1127 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1128 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1129 	struct arm_smmu_master_cfg *cfg;
1130 	struct arm_smmu_device *smmu;
1131 	int ret;
1132 
1133 	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1134 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1135 		return -ENXIO;
1136 	}
1137 
1138 	/*
1139 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1140 	 * domains between of_xlate() and probe_device() - we have no way to cope
1141 	 * with that, so until ARM gets converted to rely on groups and default
1142 	 * domains, just say no (but more politely than by dereferencing NULL).
1143 	 * This should be at least a WARN_ON once that's sorted.
1144 	 */
1145 	cfg = dev_iommu_priv_get(dev);
1146 	if (!cfg)
1147 		return -ENODEV;
1148 
1149 	smmu = cfg->smmu;
1150 
1151 	ret = arm_smmu_rpm_get(smmu);
1152 	if (ret < 0)
1153 		return ret;
1154 
1155 	/* Ensure that the domain is finalised */
1156 	ret = arm_smmu_init_domain_context(domain, smmu, dev);
1157 	if (ret < 0)
1158 		goto rpm_put;
1159 
1160 	/*
1161 	 * Sanity check the domain. We don't support domains across
1162 	 * different SMMUs.
1163 	 */
1164 	if (smmu_domain->smmu != smmu) {
1165 		dev_err(dev,
1166 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1167 			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1168 		ret = -EINVAL;
1169 		goto rpm_put;
1170 	}
1171 
1172 	/* Looks ok, so add the device to the domain */
1173 	ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1174 
1175 	/*
1176 	 * Setup an autosuspend delay to avoid bouncing runpm state.
1177 	 * Otherwise, if a driver for a suspended consumer device
1178 	 * unmaps buffers, it will runpm resume/suspend for each one.
1179 	 *
1180 	 * For example, when used by a GPU device, when an application
1181 	 * or game exits, it can trigger unmapping 100s or 1000s of
1182 	 * buffers.  With a runpm cycle for each buffer, that adds up
1183 	 * to 5-10sec worth of reprogramming the context bank, while
1184 	 * the system appears to be locked up to the user.
1185 	 */
1186 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1187 	pm_runtime_use_autosuspend(smmu->dev);
1188 
1189 rpm_put:
1190 	arm_smmu_rpm_put(smmu);
1191 	return ret;
1192 }
1193 
1194 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1195 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
1196 {
1197 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1198 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1199 	int ret;
1200 
1201 	if (!ops)
1202 		return -ENODEV;
1203 
1204 	arm_smmu_rpm_get(smmu);
1205 	ret = ops->map(ops, iova, paddr, size, prot, gfp);
1206 	arm_smmu_rpm_put(smmu);
1207 
1208 	return ret;
1209 }
1210 
1211 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1212 			     size_t size, struct iommu_iotlb_gather *gather)
1213 {
1214 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1215 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1216 	size_t ret;
1217 
1218 	if (!ops)
1219 		return 0;
1220 
1221 	arm_smmu_rpm_get(smmu);
1222 	ret = ops->unmap(ops, iova, size, gather);
1223 	arm_smmu_rpm_put(smmu);
1224 
1225 	return ret;
1226 }
1227 
1228 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1229 {
1230 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1231 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1232 
1233 	if (smmu_domain->flush_ops) {
1234 		arm_smmu_rpm_get(smmu);
1235 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1236 		arm_smmu_rpm_put(smmu);
1237 	}
1238 }
1239 
1240 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1241 				struct iommu_iotlb_gather *gather)
1242 {
1243 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1244 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1245 
1246 	if (!smmu)
1247 		return;
1248 
1249 	arm_smmu_rpm_get(smmu);
1250 	if (smmu->version == ARM_SMMU_V2 ||
1251 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1252 		arm_smmu_tlb_sync_context(smmu_domain);
1253 	else
1254 		arm_smmu_tlb_sync_global(smmu);
1255 	arm_smmu_rpm_put(smmu);
1256 }
1257 
1258 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1259 					      dma_addr_t iova)
1260 {
1261 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1262 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1263 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1264 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1265 	struct device *dev = smmu->dev;
1266 	void __iomem *reg;
1267 	u32 tmp;
1268 	u64 phys;
1269 	unsigned long va, flags;
1270 	int ret, idx = cfg->cbndx;
1271 
1272 	ret = arm_smmu_rpm_get(smmu);
1273 	if (ret < 0)
1274 		return 0;
1275 
1276 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1277 	va = iova & ~0xfffUL;
1278 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1279 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1280 	else
1281 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1282 
1283 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1284 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1285 				      5, 50)) {
1286 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1287 		dev_err(dev,
1288 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1289 			&iova);
1290 		return ops->iova_to_phys(ops, iova);
1291 	}
1292 
1293 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1294 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1295 	if (phys & ARM_SMMU_CB_PAR_F) {
1296 		dev_err(dev, "translation fault!\n");
1297 		dev_err(dev, "PAR = 0x%llx\n", phys);
1298 		return 0;
1299 	}
1300 
1301 	arm_smmu_rpm_put(smmu);
1302 
1303 	return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1304 }
1305 
1306 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1307 					dma_addr_t iova)
1308 {
1309 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1310 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1311 
1312 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
1313 		return iova;
1314 
1315 	if (!ops)
1316 		return 0;
1317 
1318 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1319 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1320 		return arm_smmu_iova_to_phys_hard(domain, iova);
1321 
1322 	return ops->iova_to_phys(ops, iova);
1323 }
1324 
1325 static bool arm_smmu_capable(enum iommu_cap cap)
1326 {
1327 	switch (cap) {
1328 	case IOMMU_CAP_CACHE_COHERENCY:
1329 		/*
1330 		 * Return true here as the SMMU can always send out coherent
1331 		 * requests.
1332 		 */
1333 		return true;
1334 	case IOMMU_CAP_NOEXEC:
1335 		return true;
1336 	default:
1337 		return false;
1338 	}
1339 }
1340 
1341 static
1342 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1343 {
1344 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1345 							  fwnode);
1346 	put_device(dev);
1347 	return dev ? dev_get_drvdata(dev) : NULL;
1348 }
1349 
1350 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1351 {
1352 	struct arm_smmu_device *smmu = NULL;
1353 	struct arm_smmu_master_cfg *cfg;
1354 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1355 	int i, ret;
1356 
1357 	if (using_legacy_binding) {
1358 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1359 
1360 		/*
1361 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1362 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1363 		 * later use.
1364 		 */
1365 		fwspec = dev_iommu_fwspec_get(dev);
1366 		if (ret)
1367 			goto out_free;
1368 	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1369 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1370 	} else {
1371 		return ERR_PTR(-ENODEV);
1372 	}
1373 
1374 	ret = -EINVAL;
1375 	for (i = 0; i < fwspec->num_ids; i++) {
1376 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1377 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1378 
1379 		if (sid & ~smmu->streamid_mask) {
1380 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1381 				sid, smmu->streamid_mask);
1382 			goto out_free;
1383 		}
1384 		if (mask & ~smmu->smr_mask_mask) {
1385 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1386 				mask, smmu->smr_mask_mask);
1387 			goto out_free;
1388 		}
1389 	}
1390 
1391 	ret = -ENOMEM;
1392 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1393 		      GFP_KERNEL);
1394 	if (!cfg)
1395 		goto out_free;
1396 
1397 	cfg->smmu = smmu;
1398 	dev_iommu_priv_set(dev, cfg);
1399 	while (i--)
1400 		cfg->smendx[i] = INVALID_SMENDX;
1401 
1402 	ret = arm_smmu_rpm_get(smmu);
1403 	if (ret < 0)
1404 		goto out_cfg_free;
1405 
1406 	ret = arm_smmu_master_alloc_smes(dev);
1407 	arm_smmu_rpm_put(smmu);
1408 
1409 	if (ret)
1410 		goto out_cfg_free;
1411 
1412 	device_link_add(dev, smmu->dev,
1413 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1414 
1415 	return &smmu->iommu;
1416 
1417 out_cfg_free:
1418 	kfree(cfg);
1419 out_free:
1420 	iommu_fwspec_free(dev);
1421 	return ERR_PTR(ret);
1422 }
1423 
1424 static void arm_smmu_release_device(struct device *dev)
1425 {
1426 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1427 	struct arm_smmu_master_cfg *cfg;
1428 	struct arm_smmu_device *smmu;
1429 	int ret;
1430 
1431 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
1432 		return;
1433 
1434 	cfg  = dev_iommu_priv_get(dev);
1435 	smmu = cfg->smmu;
1436 
1437 	ret = arm_smmu_rpm_get(smmu);
1438 	if (ret < 0)
1439 		return;
1440 
1441 	arm_smmu_master_free_smes(cfg, fwspec);
1442 
1443 	arm_smmu_rpm_put(smmu);
1444 
1445 	dev_iommu_priv_set(dev, NULL);
1446 	kfree(cfg);
1447 	iommu_fwspec_free(dev);
1448 }
1449 
1450 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1451 {
1452 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1453 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1454 	struct arm_smmu_device *smmu = cfg->smmu;
1455 	struct iommu_group *group = NULL;
1456 	int i, idx;
1457 
1458 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1459 		if (group && smmu->s2crs[idx].group &&
1460 		    group != smmu->s2crs[idx].group)
1461 			return ERR_PTR(-EINVAL);
1462 
1463 		group = smmu->s2crs[idx].group;
1464 	}
1465 
1466 	if (group)
1467 		return iommu_group_ref_get(group);
1468 
1469 	if (dev_is_pci(dev))
1470 		group = pci_device_group(dev);
1471 	else if (dev_is_fsl_mc(dev))
1472 		group = fsl_mc_device_group(dev);
1473 	else
1474 		group = generic_device_group(dev);
1475 
1476 	/* Remember group for faster lookups */
1477 	if (!IS_ERR(group))
1478 		for_each_cfg_sme(cfg, fwspec, i, idx)
1479 			smmu->s2crs[idx].group = group;
1480 
1481 	return group;
1482 }
1483 
1484 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1485 				    enum iommu_attr attr, void *data)
1486 {
1487 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1488 
1489 	switch(domain->type) {
1490 	case IOMMU_DOMAIN_UNMANAGED:
1491 		switch (attr) {
1492 		case DOMAIN_ATTR_NESTING:
1493 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1494 			return 0;
1495 		case DOMAIN_ATTR_IO_PGTABLE_CFG: {
1496 			struct io_pgtable_domain_attr *pgtbl_cfg = data;
1497 			*pgtbl_cfg = smmu_domain->pgtbl_cfg;
1498 
1499 			return 0;
1500 		}
1501 		default:
1502 			return -ENODEV;
1503 		}
1504 		break;
1505 	case IOMMU_DOMAIN_DMA:
1506 		switch (attr) {
1507 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: {
1508 			bool non_strict = smmu_domain->pgtbl_cfg.quirks &
1509 					  IO_PGTABLE_QUIRK_NON_STRICT;
1510 			*(int *)data = non_strict;
1511 			return 0;
1512 		}
1513 		default:
1514 			return -ENODEV;
1515 		}
1516 		break;
1517 	default:
1518 		return -EINVAL;
1519 	}
1520 }
1521 
1522 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1523 				    enum iommu_attr attr, void *data)
1524 {
1525 	int ret = 0;
1526 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1527 
1528 	mutex_lock(&smmu_domain->init_mutex);
1529 
1530 	switch(domain->type) {
1531 	case IOMMU_DOMAIN_UNMANAGED:
1532 		switch (attr) {
1533 		case DOMAIN_ATTR_NESTING:
1534 			if (smmu_domain->smmu) {
1535 				ret = -EPERM;
1536 				goto out_unlock;
1537 			}
1538 
1539 			if (*(int *)data)
1540 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1541 			else
1542 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1543 			break;
1544 		case DOMAIN_ATTR_IO_PGTABLE_CFG: {
1545 			struct io_pgtable_domain_attr *pgtbl_cfg = data;
1546 
1547 			if (smmu_domain->smmu) {
1548 				ret = -EPERM;
1549 				goto out_unlock;
1550 			}
1551 
1552 			smmu_domain->pgtbl_cfg = *pgtbl_cfg;
1553 			break;
1554 		}
1555 		default:
1556 			ret = -ENODEV;
1557 		}
1558 		break;
1559 	case IOMMU_DOMAIN_DMA:
1560 		switch (attr) {
1561 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1562 			if (*(int *)data)
1563 				smmu_domain->pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1564 			else
1565 				smmu_domain->pgtbl_cfg.quirks &= ~IO_PGTABLE_QUIRK_NON_STRICT;
1566 			break;
1567 		default:
1568 			ret = -ENODEV;
1569 		}
1570 		break;
1571 	default:
1572 		ret = -EINVAL;
1573 	}
1574 out_unlock:
1575 	mutex_unlock(&smmu_domain->init_mutex);
1576 	return ret;
1577 }
1578 
1579 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1580 {
1581 	u32 mask, fwid = 0;
1582 
1583 	if (args->args_count > 0)
1584 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1585 
1586 	if (args->args_count > 1)
1587 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1588 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1589 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1590 
1591 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1592 }
1593 
1594 static void arm_smmu_get_resv_regions(struct device *dev,
1595 				      struct list_head *head)
1596 {
1597 	struct iommu_resv_region *region;
1598 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1599 
1600 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1601 					 prot, IOMMU_RESV_SW_MSI);
1602 	if (!region)
1603 		return;
1604 
1605 	list_add_tail(&region->list, head);
1606 
1607 	iommu_dma_get_resv_regions(dev, head);
1608 }
1609 
1610 static int arm_smmu_def_domain_type(struct device *dev)
1611 {
1612 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1613 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1614 
1615 	if (impl && impl->def_domain_type)
1616 		return impl->def_domain_type(dev);
1617 
1618 	return 0;
1619 }
1620 
1621 static struct iommu_ops arm_smmu_ops = {
1622 	.capable		= arm_smmu_capable,
1623 	.domain_alloc		= arm_smmu_domain_alloc,
1624 	.domain_free		= arm_smmu_domain_free,
1625 	.attach_dev		= arm_smmu_attach_dev,
1626 	.map			= arm_smmu_map,
1627 	.unmap			= arm_smmu_unmap,
1628 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1629 	.iotlb_sync		= arm_smmu_iotlb_sync,
1630 	.iova_to_phys		= arm_smmu_iova_to_phys,
1631 	.probe_device		= arm_smmu_probe_device,
1632 	.release_device		= arm_smmu_release_device,
1633 	.device_group		= arm_smmu_device_group,
1634 	.domain_get_attr	= arm_smmu_domain_get_attr,
1635 	.domain_set_attr	= arm_smmu_domain_set_attr,
1636 	.of_xlate		= arm_smmu_of_xlate,
1637 	.get_resv_regions	= arm_smmu_get_resv_regions,
1638 	.put_resv_regions	= generic_iommu_put_resv_regions,
1639 	.def_domain_type	= arm_smmu_def_domain_type,
1640 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1641 };
1642 
1643 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1644 {
1645 	int i;
1646 	u32 reg;
1647 
1648 	/* clear global FSR */
1649 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1650 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1651 
1652 	/*
1653 	 * Reset stream mapping groups: Initial values mark all SMRn as
1654 	 * invalid and all S2CRn as bypass unless overridden.
1655 	 */
1656 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1657 		arm_smmu_write_sme(smmu, i);
1658 
1659 	/* Make sure all context banks are disabled and clear CB_FSR  */
1660 	for (i = 0; i < smmu->num_context_banks; ++i) {
1661 		arm_smmu_write_context_bank(smmu, i);
1662 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1663 	}
1664 
1665 	/* Invalidate the TLB, just in case */
1666 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1667 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1668 
1669 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1670 
1671 	/* Enable fault reporting */
1672 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1673 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1674 
1675 	/* Disable TLB broadcasting. */
1676 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1677 
1678 	/* Enable client access, handling unmatched streams as appropriate */
1679 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1680 	if (disable_bypass)
1681 		reg |= ARM_SMMU_sCR0_USFCFG;
1682 	else
1683 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1684 
1685 	/* Disable forced broadcasting */
1686 	reg &= ~ARM_SMMU_sCR0_FB;
1687 
1688 	/* Don't upgrade barriers */
1689 	reg &= ~(ARM_SMMU_sCR0_BSU);
1690 
1691 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1692 		reg |= ARM_SMMU_sCR0_VMID16EN;
1693 
1694 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1695 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1696 
1697 	if (smmu->impl && smmu->impl->reset)
1698 		smmu->impl->reset(smmu);
1699 
1700 	/* Push the button */
1701 	arm_smmu_tlb_sync_global(smmu);
1702 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1703 }
1704 
1705 static int arm_smmu_id_size_to_bits(int size)
1706 {
1707 	switch (size) {
1708 	case 0:
1709 		return 32;
1710 	case 1:
1711 		return 36;
1712 	case 2:
1713 		return 40;
1714 	case 3:
1715 		return 42;
1716 	case 4:
1717 		return 44;
1718 	case 5:
1719 	default:
1720 		return 48;
1721 	}
1722 }
1723 
1724 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1725 {
1726 	unsigned int size;
1727 	u32 id;
1728 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1729 	int i, ret;
1730 
1731 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1732 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1733 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1734 
1735 	/* ID0 */
1736 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1737 
1738 	/* Restrict available stages based on module parameter */
1739 	if (force_stage == 1)
1740 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1741 	else if (force_stage == 2)
1742 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1743 
1744 	if (id & ARM_SMMU_ID0_S1TS) {
1745 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1746 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1747 	}
1748 
1749 	if (id & ARM_SMMU_ID0_S2TS) {
1750 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1751 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1752 	}
1753 
1754 	if (id & ARM_SMMU_ID0_NTS) {
1755 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1756 		dev_notice(smmu->dev, "\tnested translation\n");
1757 	}
1758 
1759 	if (!(smmu->features &
1760 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1761 		dev_err(smmu->dev, "\tno translation support!\n");
1762 		return -ENODEV;
1763 	}
1764 
1765 	if ((id & ARM_SMMU_ID0_S1TS) &&
1766 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1767 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1768 		dev_notice(smmu->dev, "\taddress translation ops\n");
1769 	}
1770 
1771 	/*
1772 	 * In order for DMA API calls to work properly, we must defer to what
1773 	 * the FW says about coherency, regardless of what the hardware claims.
1774 	 * Fortunately, this also opens up a workaround for systems where the
1775 	 * ID register value has ended up configured incorrectly.
1776 	 */
1777 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1778 	if (cttw_fw || cttw_reg)
1779 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1780 			   cttw_fw ? "" : "non-");
1781 	if (cttw_fw != cttw_reg)
1782 		dev_notice(smmu->dev,
1783 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1784 
1785 	/* Max. number of entries we have for stream matching/indexing */
1786 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1787 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1788 		size = 1 << 16;
1789 	} else {
1790 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1791 	}
1792 	smmu->streamid_mask = size - 1;
1793 	if (id & ARM_SMMU_ID0_SMS) {
1794 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1795 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1796 		if (size == 0) {
1797 			dev_err(smmu->dev,
1798 				"stream-matching supported, but no SMRs present!\n");
1799 			return -ENODEV;
1800 		}
1801 
1802 		/* Zero-initialised to mark as invalid */
1803 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1804 					  GFP_KERNEL);
1805 		if (!smmu->smrs)
1806 			return -ENOMEM;
1807 
1808 		dev_notice(smmu->dev,
1809 			   "\tstream matching with %u register groups", size);
1810 	}
1811 	/* s2cr->type == 0 means translation, so initialise explicitly */
1812 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1813 					 GFP_KERNEL);
1814 	if (!smmu->s2crs)
1815 		return -ENOMEM;
1816 	for (i = 0; i < size; i++)
1817 		smmu->s2crs[i] = s2cr_init_val;
1818 
1819 	smmu->num_mapping_groups = size;
1820 	mutex_init(&smmu->stream_map_mutex);
1821 	spin_lock_init(&smmu->global_sync_lock);
1822 
1823 	if (smmu->version < ARM_SMMU_V2 ||
1824 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1825 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1826 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1827 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1828 	}
1829 
1830 	/* ID1 */
1831 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1832 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1833 
1834 	/* Check for size mismatch of SMMU address space from mapped region */
1835 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1836 	if (smmu->numpage != 2 * size << smmu->pgshift)
1837 		dev_warn(smmu->dev,
1838 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1839 			2 * size << smmu->pgshift, smmu->numpage);
1840 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1841 	smmu->numpage = size;
1842 
1843 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1844 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1845 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1846 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1847 		return -ENODEV;
1848 	}
1849 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1850 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1851 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1852 				 sizeof(*smmu->cbs), GFP_KERNEL);
1853 	if (!smmu->cbs)
1854 		return -ENOMEM;
1855 
1856 	/* ID2 */
1857 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1858 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1859 	smmu->ipa_size = size;
1860 
1861 	/* The output mask is also applied for bypass */
1862 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1863 	smmu->pa_size = size;
1864 
1865 	if (id & ARM_SMMU_ID2_VMID16)
1866 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1867 
1868 	/*
1869 	 * What the page table walker can address actually depends on which
1870 	 * descriptor format is in use, but since a) we don't know that yet,
1871 	 * and b) it can vary per context bank, this will have to do...
1872 	 */
1873 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1874 		dev_warn(smmu->dev,
1875 			 "failed to set DMA mask for table walker\n");
1876 
1877 	if (smmu->version < ARM_SMMU_V2) {
1878 		smmu->va_size = smmu->ipa_size;
1879 		if (smmu->version == ARM_SMMU_V1_64K)
1880 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1881 	} else {
1882 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1883 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1884 		if (id & ARM_SMMU_ID2_PTFS_4K)
1885 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1886 		if (id & ARM_SMMU_ID2_PTFS_16K)
1887 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1888 		if (id & ARM_SMMU_ID2_PTFS_64K)
1889 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1890 	}
1891 
1892 	if (smmu->impl && smmu->impl->cfg_probe) {
1893 		ret = smmu->impl->cfg_probe(smmu);
1894 		if (ret)
1895 			return ret;
1896 	}
1897 
1898 	/* Now we've corralled the various formats, what'll it do? */
1899 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1900 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1901 	if (smmu->features &
1902 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1903 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1904 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1905 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1906 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1907 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1908 
1909 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1910 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1911 	else
1912 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1913 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1914 		   smmu->pgsize_bitmap);
1915 
1916 
1917 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1918 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1919 			   smmu->va_size, smmu->ipa_size);
1920 
1921 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1922 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1923 			   smmu->ipa_size, smmu->pa_size);
1924 
1925 	return 0;
1926 }
1927 
1928 struct arm_smmu_match_data {
1929 	enum arm_smmu_arch_version version;
1930 	enum arm_smmu_implementation model;
1931 };
1932 
1933 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1934 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1935 
1936 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1937 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1938 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1939 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1940 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1941 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1942 
1943 static const struct of_device_id arm_smmu_of_match[] = {
1944 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1945 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1946 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1947 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1948 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1949 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1950 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1951 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1952 	{ },
1953 };
1954 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1955 
1956 #ifdef CONFIG_ACPI
1957 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1958 {
1959 	int ret = 0;
1960 
1961 	switch (model) {
1962 	case ACPI_IORT_SMMU_V1:
1963 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1964 		smmu->version = ARM_SMMU_V1;
1965 		smmu->model = GENERIC_SMMU;
1966 		break;
1967 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1968 		smmu->version = ARM_SMMU_V1_64K;
1969 		smmu->model = GENERIC_SMMU;
1970 		break;
1971 	case ACPI_IORT_SMMU_V2:
1972 		smmu->version = ARM_SMMU_V2;
1973 		smmu->model = GENERIC_SMMU;
1974 		break;
1975 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1976 		smmu->version = ARM_SMMU_V2;
1977 		smmu->model = ARM_MMU500;
1978 		break;
1979 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1980 		smmu->version = ARM_SMMU_V2;
1981 		smmu->model = CAVIUM_SMMUV2;
1982 		break;
1983 	default:
1984 		ret = -ENODEV;
1985 	}
1986 
1987 	return ret;
1988 }
1989 
1990 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1991 				      struct arm_smmu_device *smmu)
1992 {
1993 	struct device *dev = smmu->dev;
1994 	struct acpi_iort_node *node =
1995 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1996 	struct acpi_iort_smmu *iort_smmu;
1997 	int ret;
1998 
1999 	/* Retrieve SMMU1/2 specific data */
2000 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
2001 
2002 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
2003 	if (ret < 0)
2004 		return ret;
2005 
2006 	/* Ignore the configuration access interrupt */
2007 	smmu->num_global_irqs = 1;
2008 
2009 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2010 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2011 
2012 	return 0;
2013 }
2014 #else
2015 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2016 					     struct arm_smmu_device *smmu)
2017 {
2018 	return -ENODEV;
2019 }
2020 #endif
2021 
2022 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2023 				    struct arm_smmu_device *smmu)
2024 {
2025 	const struct arm_smmu_match_data *data;
2026 	struct device *dev = &pdev->dev;
2027 	bool legacy_binding;
2028 
2029 	if (of_property_read_u32(dev->of_node, "#global-interrupts",
2030 				 &smmu->num_global_irqs)) {
2031 		dev_err(dev, "missing #global-interrupts property\n");
2032 		return -ENODEV;
2033 	}
2034 
2035 	data = of_device_get_match_data(dev);
2036 	smmu->version = data->version;
2037 	smmu->model = data->model;
2038 
2039 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2040 	if (legacy_binding && !using_generic_binding) {
2041 		if (!using_legacy_binding) {
2042 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2043 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2044 		}
2045 		using_legacy_binding = true;
2046 	} else if (!legacy_binding && !using_legacy_binding) {
2047 		using_generic_binding = true;
2048 	} else {
2049 		dev_err(dev, "not probing due to mismatched DT properties\n");
2050 		return -ENODEV;
2051 	}
2052 
2053 	if (of_dma_is_coherent(dev->of_node))
2054 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2055 
2056 	return 0;
2057 }
2058 
2059 static int arm_smmu_bus_init(struct iommu_ops *ops)
2060 {
2061 	int err;
2062 
2063 	/* Oh, for a proper bus abstraction */
2064 	if (!iommu_present(&platform_bus_type)) {
2065 		err = bus_set_iommu(&platform_bus_type, ops);
2066 		if (err)
2067 			return err;
2068 	}
2069 #ifdef CONFIG_ARM_AMBA
2070 	if (!iommu_present(&amba_bustype)) {
2071 		err = bus_set_iommu(&amba_bustype, ops);
2072 		if (err)
2073 			goto err_reset_platform_ops;
2074 	}
2075 #endif
2076 #ifdef CONFIG_PCI
2077 	if (!iommu_present(&pci_bus_type)) {
2078 		err = bus_set_iommu(&pci_bus_type, ops);
2079 		if (err)
2080 			goto err_reset_amba_ops;
2081 	}
2082 #endif
2083 #ifdef CONFIG_FSL_MC_BUS
2084 	if (!iommu_present(&fsl_mc_bus_type)) {
2085 		err = bus_set_iommu(&fsl_mc_bus_type, ops);
2086 		if (err)
2087 			goto err_reset_pci_ops;
2088 	}
2089 #endif
2090 	return 0;
2091 
2092 err_reset_pci_ops: __maybe_unused;
2093 #ifdef CONFIG_PCI
2094 	bus_set_iommu(&pci_bus_type, NULL);
2095 #endif
2096 err_reset_amba_ops: __maybe_unused;
2097 #ifdef CONFIG_ARM_AMBA
2098 	bus_set_iommu(&amba_bustype, NULL);
2099 #endif
2100 err_reset_platform_ops: __maybe_unused;
2101 	bus_set_iommu(&platform_bus_type, NULL);
2102 	return err;
2103 }
2104 
2105 static int arm_smmu_device_probe(struct platform_device *pdev)
2106 {
2107 	struct resource *res;
2108 	resource_size_t ioaddr;
2109 	struct arm_smmu_device *smmu;
2110 	struct device *dev = &pdev->dev;
2111 	int num_irqs, i, err;
2112 	irqreturn_t (*global_fault)(int irq, void *dev);
2113 
2114 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2115 	if (!smmu) {
2116 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2117 		return -ENOMEM;
2118 	}
2119 	smmu->dev = dev;
2120 
2121 	if (dev->of_node)
2122 		err = arm_smmu_device_dt_probe(pdev, smmu);
2123 	else
2124 		err = arm_smmu_device_acpi_probe(pdev, smmu);
2125 
2126 	if (err)
2127 		return err;
2128 
2129 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2130 	ioaddr = res->start;
2131 	smmu->base = devm_ioremap_resource(dev, res);
2132 	if (IS_ERR(smmu->base))
2133 		return PTR_ERR(smmu->base);
2134 	/*
2135 	 * The resource size should effectively match the value of SMMU_TOP;
2136 	 * stash that temporarily until we know PAGESIZE to validate it with.
2137 	 */
2138 	smmu->numpage = resource_size(res);
2139 
2140 	smmu = arm_smmu_impl_init(smmu);
2141 	if (IS_ERR(smmu))
2142 		return PTR_ERR(smmu);
2143 
2144 	num_irqs = 0;
2145 	while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2146 		num_irqs++;
2147 		if (num_irqs > smmu->num_global_irqs)
2148 			smmu->num_context_irqs++;
2149 	}
2150 
2151 	if (!smmu->num_context_irqs) {
2152 		dev_err(dev, "found %d interrupts but expected at least %d\n",
2153 			num_irqs, smmu->num_global_irqs + 1);
2154 		return -ENODEV;
2155 	}
2156 
2157 	smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2158 				  GFP_KERNEL);
2159 	if (!smmu->irqs) {
2160 		dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2161 		return -ENOMEM;
2162 	}
2163 
2164 	for (i = 0; i < num_irqs; ++i) {
2165 		int irq = platform_get_irq(pdev, i);
2166 
2167 		if (irq < 0)
2168 			return -ENODEV;
2169 		smmu->irqs[i] = irq;
2170 	}
2171 
2172 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2173 	if (err < 0) {
2174 		dev_err(dev, "failed to get clocks %d\n", err);
2175 		return err;
2176 	}
2177 	smmu->num_clks = err;
2178 
2179 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2180 	if (err)
2181 		return err;
2182 
2183 	err = arm_smmu_device_cfg_probe(smmu);
2184 	if (err)
2185 		return err;
2186 
2187 	if (smmu->version == ARM_SMMU_V2) {
2188 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2189 			dev_err(dev,
2190 			      "found only %d context irq(s) but %d required\n",
2191 			      smmu->num_context_irqs, smmu->num_context_banks);
2192 			return -ENODEV;
2193 		}
2194 
2195 		/* Ignore superfluous interrupts */
2196 		smmu->num_context_irqs = smmu->num_context_banks;
2197 	}
2198 
2199 	if (smmu->impl && smmu->impl->global_fault)
2200 		global_fault = smmu->impl->global_fault;
2201 	else
2202 		global_fault = arm_smmu_global_fault;
2203 
2204 	for (i = 0; i < smmu->num_global_irqs; ++i) {
2205 		err = devm_request_irq(smmu->dev, smmu->irqs[i],
2206 				       global_fault,
2207 				       IRQF_SHARED,
2208 				       "arm-smmu global fault",
2209 				       smmu);
2210 		if (err) {
2211 			dev_err(dev, "failed to request global IRQ %d (%u)\n",
2212 				i, smmu->irqs[i]);
2213 			return err;
2214 		}
2215 	}
2216 
2217 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2218 				     "smmu.%pa", &ioaddr);
2219 	if (err) {
2220 		dev_err(dev, "Failed to register iommu in sysfs\n");
2221 		return err;
2222 	}
2223 
2224 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2225 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2226 
2227 	err = iommu_device_register(&smmu->iommu);
2228 	if (err) {
2229 		dev_err(dev, "Failed to register iommu\n");
2230 		return err;
2231 	}
2232 
2233 	platform_set_drvdata(pdev, smmu);
2234 	arm_smmu_device_reset(smmu);
2235 	arm_smmu_test_smr_masks(smmu);
2236 
2237 	/*
2238 	 * We want to avoid touching dev->power.lock in fastpaths unless
2239 	 * it's really going to do something useful - pm_runtime_enabled()
2240 	 * can serve as an ideal proxy for that decision. So, conditionally
2241 	 * enable pm_runtime.
2242 	 */
2243 	if (dev->pm_domain) {
2244 		pm_runtime_set_active(dev);
2245 		pm_runtime_enable(dev);
2246 	}
2247 
2248 	/*
2249 	 * For ACPI and generic DT bindings, an SMMU will be probed before
2250 	 * any device which might need it, so we want the bus ops in place
2251 	 * ready to handle default domain setup as soon as any SMMU exists.
2252 	 */
2253 	if (!using_legacy_binding)
2254 		return arm_smmu_bus_init(&arm_smmu_ops);
2255 
2256 	return 0;
2257 }
2258 
2259 static int arm_smmu_device_remove(struct platform_device *pdev)
2260 {
2261 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2262 
2263 	if (!smmu)
2264 		return -ENODEV;
2265 
2266 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2267 		dev_notice(&pdev->dev, "disabling translation\n");
2268 
2269 	arm_smmu_bus_init(NULL);
2270 	iommu_device_unregister(&smmu->iommu);
2271 	iommu_device_sysfs_remove(&smmu->iommu);
2272 
2273 	arm_smmu_rpm_get(smmu);
2274 	/* Turn the thing off */
2275 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2276 	arm_smmu_rpm_put(smmu);
2277 
2278 	if (pm_runtime_enabled(smmu->dev))
2279 		pm_runtime_force_suspend(smmu->dev);
2280 	else
2281 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2282 
2283 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2284 	return 0;
2285 }
2286 
2287 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2288 {
2289 	arm_smmu_device_remove(pdev);
2290 }
2291 
2292 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2293 {
2294 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2295 	int ret;
2296 
2297 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2298 	if (ret)
2299 		return ret;
2300 
2301 	arm_smmu_device_reset(smmu);
2302 
2303 	return 0;
2304 }
2305 
2306 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2307 {
2308 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2309 
2310 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2311 
2312 	return 0;
2313 }
2314 
2315 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2316 {
2317 	if (pm_runtime_suspended(dev))
2318 		return 0;
2319 
2320 	return arm_smmu_runtime_resume(dev);
2321 }
2322 
2323 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2324 {
2325 	if (pm_runtime_suspended(dev))
2326 		return 0;
2327 
2328 	return arm_smmu_runtime_suspend(dev);
2329 }
2330 
2331 static const struct dev_pm_ops arm_smmu_pm_ops = {
2332 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2333 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2334 			   arm_smmu_runtime_resume, NULL)
2335 };
2336 
2337 static struct platform_driver arm_smmu_driver = {
2338 	.driver	= {
2339 		.name			= "arm-smmu",
2340 		.of_match_table		= arm_smmu_of_match,
2341 		.pm			= &arm_smmu_pm_ops,
2342 		.suppress_bind_attrs    = true,
2343 	},
2344 	.probe	= arm_smmu_device_probe,
2345 	.remove	= arm_smmu_device_remove,
2346 	.shutdown = arm_smmu_device_shutdown,
2347 };
2348 module_platform_driver(arm_smmu_driver);
2349 
2350 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2351 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2352 MODULE_ALIAS("platform:arm-smmu");
2353 MODULE_LICENSE("GPL v2");
2354