1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
28 #include <linux/io.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/of_iommu.h>
35 #include <linux/pci.h>
36 #include <linux/platform_device.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/ratelimit.h>
39 #include <linux/slab.h>
40 
41 #include <linux/amba/bus.h>
42 #include <linux/fsl/mc.h>
43 
44 #include "arm-smmu.h"
45 
46 /*
47  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
48  * global register space are still, in fact, using a hypervisor to mediate it
49  * by trapping and emulating register accesses. Sadly, some deployed versions
50  * of said trapping code have bugs wherein they go horribly wrong for stores
51  * using r31 (i.e. XZR/WZR) as the source register.
52  */
53 #define QCOM_DUMMY_VAL -1
54 
55 #define MSI_IOVA_BASE			0x8000000
56 #define MSI_IOVA_LENGTH			0x100000
57 
58 static int force_stage;
59 module_param(force_stage, int, S_IRUGO);
60 MODULE_PARM_DESC(force_stage,
61 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
62 static bool disable_bypass =
63 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
64 module_param(disable_bypass, bool, S_IRUGO);
65 MODULE_PARM_DESC(disable_bypass,
66 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
67 
68 #define s2cr_init_val (struct arm_smmu_s2cr){				\
69 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
70 }
71 
72 static bool using_legacy_binding, using_generic_binding;
73 
74 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
75 {
76 	if (pm_runtime_enabled(smmu->dev))
77 		return pm_runtime_get_sync(smmu->dev);
78 
79 	return 0;
80 }
81 
82 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
83 {
84 	if (pm_runtime_enabled(smmu->dev))
85 		pm_runtime_put_autosuspend(smmu->dev);
86 }
87 
88 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
89 {
90 	return container_of(dom, struct arm_smmu_domain, domain);
91 }
92 
93 static struct platform_driver arm_smmu_driver;
94 static struct iommu_ops arm_smmu_ops;
95 
96 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
97 static int arm_smmu_bus_init(struct iommu_ops *ops);
98 
99 static struct device_node *dev_get_dev_node(struct device *dev)
100 {
101 	if (dev_is_pci(dev)) {
102 		struct pci_bus *bus = to_pci_dev(dev)->bus;
103 
104 		while (!pci_is_root_bus(bus))
105 			bus = bus->parent;
106 		return of_node_get(bus->bridge->parent->of_node);
107 	}
108 
109 	return of_node_get(dev->of_node);
110 }
111 
112 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
113 {
114 	*((__be32 *)data) = cpu_to_be32(alias);
115 	return 0; /* Continue walking */
116 }
117 
118 static int __find_legacy_master_phandle(struct device *dev, void *data)
119 {
120 	struct of_phandle_iterator *it = *(void **)data;
121 	struct device_node *np = it->node;
122 	int err;
123 
124 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
125 			    "#stream-id-cells", -1)
126 		if (it->node == np) {
127 			*(void **)data = dev;
128 			return 1;
129 		}
130 	it->node = np;
131 	return err == -ENOENT ? 0 : err;
132 }
133 
134 static int arm_smmu_register_legacy_master(struct device *dev,
135 					   struct arm_smmu_device **smmu)
136 {
137 	struct device *smmu_dev;
138 	struct device_node *np;
139 	struct of_phandle_iterator it;
140 	void *data = &it;
141 	u32 *sids;
142 	__be32 pci_sid;
143 	int err;
144 
145 	np = dev_get_dev_node(dev);
146 	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
147 		of_node_put(np);
148 		return -ENODEV;
149 	}
150 
151 	it.node = np;
152 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
153 				     __find_legacy_master_phandle);
154 	smmu_dev = data;
155 	of_node_put(np);
156 	if (err == 0)
157 		return -ENODEV;
158 	if (err < 0)
159 		return err;
160 
161 	if (dev_is_pci(dev)) {
162 		/* "mmu-masters" assumes Stream ID == Requester ID */
163 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
164 				       &pci_sid);
165 		it.cur = &pci_sid;
166 		it.cur_count = 1;
167 	}
168 
169 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
170 				&arm_smmu_ops);
171 	if (err)
172 		return err;
173 
174 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
175 	if (!sids)
176 		return -ENOMEM;
177 
178 	*smmu = dev_get_drvdata(smmu_dev);
179 	of_phandle_iterator_args(&it, sids, it.cur_count);
180 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
181 	kfree(sids);
182 	return err;
183 }
184 
185 /*
186  * With the legacy DT binding in play, we have no guarantees about
187  * probe order, but then we're also not doing default domains, so we can
188  * delay setting bus ops until we're sure every possible SMMU is ready,
189  * and that way ensure that no probe_device() calls get missed.
190  */
191 static int arm_smmu_legacy_bus_init(void)
192 {
193 	if (using_legacy_binding)
194 		return arm_smmu_bus_init(&arm_smmu_ops);
195 	return 0;
196 }
197 device_initcall_sync(arm_smmu_legacy_bus_init);
198 #else
199 static int arm_smmu_register_legacy_master(struct device *dev,
200 					   struct arm_smmu_device **smmu)
201 {
202 	return -ENODEV;
203 }
204 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
205 
206 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
207 {
208 	clear_bit(idx, map);
209 }
210 
211 /* Wait for any pending TLB invalidations to complete */
212 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
213 				int sync, int status)
214 {
215 	unsigned int spin_cnt, delay;
216 	u32 reg;
217 
218 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
219 		return smmu->impl->tlb_sync(smmu, page, sync, status);
220 
221 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
222 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
223 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
224 			reg = arm_smmu_readl(smmu, page, status);
225 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
226 				return;
227 			cpu_relax();
228 		}
229 		udelay(delay);
230 	}
231 	dev_err_ratelimited(smmu->dev,
232 			    "TLB sync timed out -- SMMU may be deadlocked\n");
233 }
234 
235 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
236 {
237 	unsigned long flags;
238 
239 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
240 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
241 			    ARM_SMMU_GR0_sTLBGSTATUS);
242 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
243 }
244 
245 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
246 {
247 	struct arm_smmu_device *smmu = smmu_domain->smmu;
248 	unsigned long flags;
249 
250 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
251 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
252 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
253 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
254 }
255 
256 static void arm_smmu_tlb_inv_context_s1(void *cookie)
257 {
258 	struct arm_smmu_domain *smmu_domain = cookie;
259 	/*
260 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
261 	 * current CPU are visible beforehand.
262 	 */
263 	wmb();
264 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
265 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
266 	arm_smmu_tlb_sync_context(smmu_domain);
267 }
268 
269 static void arm_smmu_tlb_inv_context_s2(void *cookie)
270 {
271 	struct arm_smmu_domain *smmu_domain = cookie;
272 	struct arm_smmu_device *smmu = smmu_domain->smmu;
273 
274 	/* See above */
275 	wmb();
276 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
277 	arm_smmu_tlb_sync_global(smmu);
278 }
279 
280 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
281 				      size_t granule, void *cookie, int reg)
282 {
283 	struct arm_smmu_domain *smmu_domain = cookie;
284 	struct arm_smmu_device *smmu = smmu_domain->smmu;
285 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
286 	int idx = cfg->cbndx;
287 
288 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
289 		wmb();
290 
291 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
292 		iova = (iova >> 12) << 12;
293 		iova |= cfg->asid;
294 		do {
295 			arm_smmu_cb_write(smmu, idx, reg, iova);
296 			iova += granule;
297 		} while (size -= granule);
298 	} else {
299 		iova >>= 12;
300 		iova |= (u64)cfg->asid << 48;
301 		do {
302 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
303 			iova += granule >> 12;
304 		} while (size -= granule);
305 	}
306 }
307 
308 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
309 				      size_t granule, void *cookie, int reg)
310 {
311 	struct arm_smmu_domain *smmu_domain = cookie;
312 	struct arm_smmu_device *smmu = smmu_domain->smmu;
313 	int idx = smmu_domain->cfg.cbndx;
314 
315 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
316 		wmb();
317 
318 	iova >>= 12;
319 	do {
320 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
321 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
322 		else
323 			arm_smmu_cb_write(smmu, idx, reg, iova);
324 		iova += granule >> 12;
325 	} while (size -= granule);
326 }
327 
328 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
329 				     size_t granule, void *cookie)
330 {
331 	arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
332 				  ARM_SMMU_CB_S1_TLBIVA);
333 	arm_smmu_tlb_sync_context(cookie);
334 }
335 
336 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
337 				     unsigned long iova, size_t granule,
338 				     void *cookie)
339 {
340 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
341 				  ARM_SMMU_CB_S1_TLBIVAL);
342 }
343 
344 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
345 				     size_t granule, void *cookie)
346 {
347 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
348 				  ARM_SMMU_CB_S2_TLBIIPAS2);
349 	arm_smmu_tlb_sync_context(cookie);
350 }
351 
352 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
353 				     unsigned long iova, size_t granule,
354 				     void *cookie)
355 {
356 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
357 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
358 }
359 
360 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
361 					size_t granule, void *cookie)
362 {
363 	arm_smmu_tlb_inv_context_s2(cookie);
364 }
365 /*
366  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
367  * almost negligible, but the benefit of getting the first one in as far ahead
368  * of the sync as possible is significant, hence we don't just make this a
369  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
370  * think.
371  */
372 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
373 					unsigned long iova, size_t granule,
374 					void *cookie)
375 {
376 	struct arm_smmu_domain *smmu_domain = cookie;
377 	struct arm_smmu_device *smmu = smmu_domain->smmu;
378 
379 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
380 		wmb();
381 
382 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
383 }
384 
385 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
386 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
387 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
388 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
389 };
390 
391 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
392 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
393 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
394 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
395 };
396 
397 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
398 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
399 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
400 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
401 };
402 
403 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
404 {
405 	u32 fsr, fsynr, cbfrsynra;
406 	unsigned long iova;
407 	struct iommu_domain *domain = dev;
408 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
409 	struct arm_smmu_device *smmu = smmu_domain->smmu;
410 	int idx = smmu_domain->cfg.cbndx;
411 
412 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
413 	if (!(fsr & ARM_SMMU_FSR_FAULT))
414 		return IRQ_NONE;
415 
416 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
417 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
418 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
419 
420 	dev_err_ratelimited(smmu->dev,
421 	"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
422 			    fsr, iova, fsynr, cbfrsynra, idx);
423 
424 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
425 	return IRQ_HANDLED;
426 }
427 
428 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
429 {
430 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
431 	struct arm_smmu_device *smmu = dev;
432 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
433 				      DEFAULT_RATELIMIT_BURST);
434 
435 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
436 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
437 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
438 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
439 
440 	if (!gfsr)
441 		return IRQ_NONE;
442 
443 	if (__ratelimit(&rs)) {
444 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
445 		    (gfsr & ARM_SMMU_sGFSR_USF))
446 			dev_err(smmu->dev,
447 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
448 				(u16)gfsynr1);
449 		else
450 			dev_err(smmu->dev,
451 				"Unexpected global fault, this could be serious\n");
452 		dev_err(smmu->dev,
453 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
454 			gfsr, gfsynr0, gfsynr1, gfsynr2);
455 	}
456 
457 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
458 	return IRQ_HANDLED;
459 }
460 
461 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
462 				       struct io_pgtable_cfg *pgtbl_cfg)
463 {
464 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
465 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
466 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
467 
468 	cb->cfg = cfg;
469 
470 	/* TCR */
471 	if (stage1) {
472 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
473 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
474 		} else {
475 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
476 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
477 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
478 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
479 			else
480 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
481 		}
482 	} else {
483 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
484 	}
485 
486 	/* TTBRs */
487 	if (stage1) {
488 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
489 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
490 			cb->ttbr[1] = 0;
491 		} else {
492 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
493 						 cfg->asid);
494 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
495 						 cfg->asid);
496 
497 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
498 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
499 			else
500 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
501 		}
502 	} else {
503 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
504 	}
505 
506 	/* MAIRs (stage-1 only) */
507 	if (stage1) {
508 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
509 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
510 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
511 		} else {
512 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
513 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
514 		}
515 	}
516 }
517 
518 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
519 {
520 	u32 reg;
521 	bool stage1;
522 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
523 	struct arm_smmu_cfg *cfg = cb->cfg;
524 
525 	/* Unassigned context banks only need disabling */
526 	if (!cfg) {
527 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
528 		return;
529 	}
530 
531 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
532 
533 	/* CBA2R */
534 	if (smmu->version > ARM_SMMU_V1) {
535 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
536 			reg = ARM_SMMU_CBA2R_VA64;
537 		else
538 			reg = 0;
539 		/* 16-bit VMIDs live in CBA2R */
540 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
541 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
542 
543 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
544 	}
545 
546 	/* CBAR */
547 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
548 	if (smmu->version < ARM_SMMU_V2)
549 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
550 
551 	/*
552 	 * Use the weakest shareability/memory types, so they are
553 	 * overridden by the ttbcr/pte.
554 	 */
555 	if (stage1) {
556 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
557 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
558 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
559 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
560 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
561 		/* 8-bit VMIDs live in CBAR */
562 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
563 	}
564 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
565 
566 	/*
567 	 * TCR
568 	 * We must write this before the TTBRs, since it determines the
569 	 * access behaviour of some fields (in particular, ASID[15:8]).
570 	 */
571 	if (stage1 && smmu->version > ARM_SMMU_V1)
572 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
573 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
574 
575 	/* TTBRs */
576 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
577 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
578 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
579 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
580 	} else {
581 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
582 		if (stage1)
583 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
584 					   cb->ttbr[1]);
585 	}
586 
587 	/* MAIRs (stage-1 only) */
588 	if (stage1) {
589 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
590 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
591 	}
592 
593 	/* SCTLR */
594 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
595 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
596 	if (stage1)
597 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
598 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
599 		reg |= ARM_SMMU_SCTLR_E;
600 
601 	if (smmu->impl && smmu->impl->write_sctlr)
602 		smmu->impl->write_sctlr(smmu, idx, reg);
603 	else
604 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
605 }
606 
607 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
608 				       struct arm_smmu_device *smmu,
609 				       struct device *dev, unsigned int start)
610 {
611 	if (smmu->impl && smmu->impl->alloc_context_bank)
612 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
613 
614 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
615 }
616 
617 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
618 					struct arm_smmu_device *smmu,
619 					struct device *dev)
620 {
621 	int irq, start, ret = 0;
622 	unsigned long ias, oas;
623 	struct io_pgtable_ops *pgtbl_ops;
624 	struct io_pgtable_cfg pgtbl_cfg;
625 	enum io_pgtable_fmt fmt;
626 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
627 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
628 	irqreturn_t (*context_fault)(int irq, void *dev);
629 
630 	mutex_lock(&smmu_domain->init_mutex);
631 	if (smmu_domain->smmu)
632 		goto out_unlock;
633 
634 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
635 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
636 		smmu_domain->smmu = smmu;
637 		goto out_unlock;
638 	}
639 
640 	/*
641 	 * Mapping the requested stage onto what we support is surprisingly
642 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
643 	 * support for nested translation. That means we end up with the
644 	 * following table:
645 	 *
646 	 * Requested        Supported        Actual
647 	 *     S1               N              S1
648 	 *     S1             S1+S2            S1
649 	 *     S1               S2             S2
650 	 *     S1               S1             S1
651 	 *     N                N              N
652 	 *     N              S1+S2            S2
653 	 *     N                S2             S2
654 	 *     N                S1             S1
655 	 *
656 	 * Note that you can't actually request stage-2 mappings.
657 	 */
658 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
659 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
660 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
661 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
662 
663 	/*
664 	 * Choosing a suitable context format is even more fiddly. Until we
665 	 * grow some way for the caller to express a preference, and/or move
666 	 * the decision into the io-pgtable code where it arguably belongs,
667 	 * just aim for the closest thing to the rest of the system, and hope
668 	 * that the hardware isn't esoteric enough that we can't assume AArch64
669 	 * support to be a superset of AArch32 support...
670 	 */
671 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
672 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
673 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
674 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
675 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
676 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
677 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
678 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
679 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
680 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
681 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
682 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
683 
684 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
685 		ret = -EINVAL;
686 		goto out_unlock;
687 	}
688 
689 	switch (smmu_domain->stage) {
690 	case ARM_SMMU_DOMAIN_S1:
691 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
692 		start = smmu->num_s2_context_banks;
693 		ias = smmu->va_size;
694 		oas = smmu->ipa_size;
695 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
696 			fmt = ARM_64_LPAE_S1;
697 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
698 			fmt = ARM_32_LPAE_S1;
699 			ias = min(ias, 32UL);
700 			oas = min(oas, 40UL);
701 		} else {
702 			fmt = ARM_V7S;
703 			ias = min(ias, 32UL);
704 			oas = min(oas, 32UL);
705 		}
706 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
707 		break;
708 	case ARM_SMMU_DOMAIN_NESTED:
709 		/*
710 		 * We will likely want to change this if/when KVM gets
711 		 * involved.
712 		 */
713 	case ARM_SMMU_DOMAIN_S2:
714 		cfg->cbar = CBAR_TYPE_S2_TRANS;
715 		start = 0;
716 		ias = smmu->ipa_size;
717 		oas = smmu->pa_size;
718 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
719 			fmt = ARM_64_LPAE_S2;
720 		} else {
721 			fmt = ARM_32_LPAE_S2;
722 			ias = min(ias, 40UL);
723 			oas = min(oas, 40UL);
724 		}
725 		if (smmu->version == ARM_SMMU_V2)
726 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
727 		else
728 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
729 		break;
730 	default:
731 		ret = -EINVAL;
732 		goto out_unlock;
733 	}
734 
735 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
736 	if (ret < 0) {
737 		goto out_unlock;
738 	}
739 
740 	smmu_domain->smmu = smmu;
741 
742 	cfg->cbndx = ret;
743 	if (smmu->version < ARM_SMMU_V2) {
744 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
745 		cfg->irptndx %= smmu->num_context_irqs;
746 	} else {
747 		cfg->irptndx = cfg->cbndx;
748 	}
749 
750 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
751 		cfg->vmid = cfg->cbndx + 1;
752 	else
753 		cfg->asid = cfg->cbndx;
754 
755 	pgtbl_cfg = (struct io_pgtable_cfg) {
756 		.pgsize_bitmap	= smmu->pgsize_bitmap,
757 		.ias		= ias,
758 		.oas		= oas,
759 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
760 		.tlb		= smmu_domain->flush_ops,
761 		.iommu_dev	= smmu->dev,
762 	};
763 
764 	if (!iommu_get_dma_strict(domain))
765 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
766 
767 	if (smmu->impl && smmu->impl->init_context) {
768 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
769 		if (ret)
770 			goto out_clear_smmu;
771 	}
772 
773 	if (smmu_domain->pgtbl_quirks)
774 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
775 
776 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
777 	if (!pgtbl_ops) {
778 		ret = -ENOMEM;
779 		goto out_clear_smmu;
780 	}
781 
782 	/* Update the domain's page sizes to reflect the page table format */
783 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
784 
785 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
786 		domain->geometry.aperture_start = ~0UL << ias;
787 		domain->geometry.aperture_end = ~0UL;
788 	} else {
789 		domain->geometry.aperture_end = (1UL << ias) - 1;
790 	}
791 
792 	domain->geometry.force_aperture = true;
793 
794 	/* Initialise the context bank with our page table cfg */
795 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
796 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
797 
798 	/*
799 	 * Request context fault interrupt. Do this last to avoid the
800 	 * handler seeing a half-initialised domain state.
801 	 */
802 	irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
803 
804 	if (smmu->impl && smmu->impl->context_fault)
805 		context_fault = smmu->impl->context_fault;
806 	else
807 		context_fault = arm_smmu_context_fault;
808 
809 	ret = devm_request_irq(smmu->dev, irq, context_fault,
810 			       IRQF_SHARED, "arm-smmu-context-fault", domain);
811 	if (ret < 0) {
812 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
813 			cfg->irptndx, irq);
814 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
815 	}
816 
817 	mutex_unlock(&smmu_domain->init_mutex);
818 
819 	/* Publish page table ops for map/unmap */
820 	smmu_domain->pgtbl_ops = pgtbl_ops;
821 	return 0;
822 
823 out_clear_smmu:
824 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
825 	smmu_domain->smmu = NULL;
826 out_unlock:
827 	mutex_unlock(&smmu_domain->init_mutex);
828 	return ret;
829 }
830 
831 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
832 {
833 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
834 	struct arm_smmu_device *smmu = smmu_domain->smmu;
835 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
836 	int ret, irq;
837 
838 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
839 		return;
840 
841 	ret = arm_smmu_rpm_get(smmu);
842 	if (ret < 0)
843 		return;
844 
845 	/*
846 	 * Disable the context bank and free the page tables before freeing
847 	 * it.
848 	 */
849 	smmu->cbs[cfg->cbndx].cfg = NULL;
850 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
851 
852 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
853 		irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
854 		devm_free_irq(smmu->dev, irq, domain);
855 	}
856 
857 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
858 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
859 
860 	arm_smmu_rpm_put(smmu);
861 }
862 
863 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
864 {
865 	struct arm_smmu_domain *smmu_domain;
866 
867 	if (type != IOMMU_DOMAIN_UNMANAGED &&
868 	    type != IOMMU_DOMAIN_DMA &&
869 	    type != IOMMU_DOMAIN_IDENTITY)
870 		return NULL;
871 	/*
872 	 * Allocate the domain and initialise some of its data structures.
873 	 * We can't really do anything meaningful until we've added a
874 	 * master.
875 	 */
876 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
877 	if (!smmu_domain)
878 		return NULL;
879 
880 	if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
881 	    iommu_get_dma_cookie(&smmu_domain->domain))) {
882 		kfree(smmu_domain);
883 		return NULL;
884 	}
885 
886 	mutex_init(&smmu_domain->init_mutex);
887 	spin_lock_init(&smmu_domain->cb_lock);
888 
889 	return &smmu_domain->domain;
890 }
891 
892 static void arm_smmu_domain_free(struct iommu_domain *domain)
893 {
894 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
895 
896 	/*
897 	 * Free the domain resources. We assume that all devices have
898 	 * already been detached.
899 	 */
900 	iommu_put_dma_cookie(domain);
901 	arm_smmu_destroy_domain_context(domain);
902 	kfree(smmu_domain);
903 }
904 
905 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
906 {
907 	struct arm_smmu_smr *smr = smmu->smrs + idx;
908 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
909 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
910 
911 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
912 		reg |= ARM_SMMU_SMR_VALID;
913 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
914 }
915 
916 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
917 {
918 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
919 	u32 reg;
920 
921 	if (smmu->impl && smmu->impl->write_s2cr) {
922 		smmu->impl->write_s2cr(smmu, idx);
923 		return;
924 	}
925 
926 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
927 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
928 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
929 
930 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
931 	    smmu->smrs[idx].valid)
932 		reg |= ARM_SMMU_S2CR_EXIDVALID;
933 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
934 }
935 
936 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
937 {
938 	arm_smmu_write_s2cr(smmu, idx);
939 	if (smmu->smrs)
940 		arm_smmu_write_smr(smmu, idx);
941 }
942 
943 /*
944  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
945  * should be called after sCR0 is written.
946  */
947 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
948 {
949 	u32 smr;
950 	int i;
951 
952 	if (!smmu->smrs)
953 		return;
954 	/*
955 	 * If we've had to accommodate firmware memory regions, we may
956 	 * have live SMRs by now; tread carefully...
957 	 *
958 	 * Somewhat perversely, not having a free SMR for this test implies we
959 	 * can get away without it anyway, as we'll only be able to 'allocate'
960 	 * these SMRs for the ID/mask values we're already trusting to be OK.
961 	 */
962 	for (i = 0; i < smmu->num_mapping_groups; i++)
963 		if (!smmu->smrs[i].valid)
964 			goto smr_ok;
965 	return;
966 smr_ok:
967 	/*
968 	 * SMR.ID bits may not be preserved if the corresponding MASK
969 	 * bits are set, so check each one separately. We can reject
970 	 * masters later if they try to claim IDs outside these masks.
971 	 */
972 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
973 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
974 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
975 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
976 
977 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
978 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
979 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
980 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
981 }
982 
983 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
984 {
985 	struct arm_smmu_smr *smrs = smmu->smrs;
986 	int i, free_idx = -ENOSPC;
987 
988 	/* Stream indexing is blissfully easy */
989 	if (!smrs)
990 		return id;
991 
992 	/* Validating SMRs is... less so */
993 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
994 		if (!smrs[i].valid) {
995 			/*
996 			 * Note the first free entry we come across, which
997 			 * we'll claim in the end if nothing else matches.
998 			 */
999 			if (free_idx < 0)
1000 				free_idx = i;
1001 			continue;
1002 		}
1003 		/*
1004 		 * If the new entry is _entirely_ matched by an existing entry,
1005 		 * then reuse that, with the guarantee that there also cannot
1006 		 * be any subsequent conflicting entries. In normal use we'd
1007 		 * expect simply identical entries for this case, but there's
1008 		 * no harm in accommodating the generalisation.
1009 		 */
1010 		if ((mask & smrs[i].mask) == mask &&
1011 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
1012 			return i;
1013 		/*
1014 		 * If the new entry has any other overlap with an existing one,
1015 		 * though, then there always exists at least one stream ID
1016 		 * which would cause a conflict, and we can't allow that risk.
1017 		 */
1018 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1019 			return -EINVAL;
1020 	}
1021 
1022 	return free_idx;
1023 }
1024 
1025 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1026 {
1027 	if (--smmu->s2crs[idx].count)
1028 		return false;
1029 
1030 	smmu->s2crs[idx] = s2cr_init_val;
1031 	if (smmu->smrs)
1032 		smmu->smrs[idx].valid = false;
1033 
1034 	return true;
1035 }
1036 
1037 static int arm_smmu_master_alloc_smes(struct device *dev)
1038 {
1039 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1040 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1041 	struct arm_smmu_device *smmu = cfg->smmu;
1042 	struct arm_smmu_smr *smrs = smmu->smrs;
1043 	int i, idx, ret;
1044 
1045 	mutex_lock(&smmu->stream_map_mutex);
1046 	/* Figure out a viable stream map entry allocation */
1047 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1048 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1049 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1050 
1051 		if (idx != INVALID_SMENDX) {
1052 			ret = -EEXIST;
1053 			goto out_err;
1054 		}
1055 
1056 		ret = arm_smmu_find_sme(smmu, sid, mask);
1057 		if (ret < 0)
1058 			goto out_err;
1059 
1060 		idx = ret;
1061 		if (smrs && smmu->s2crs[idx].count == 0) {
1062 			smrs[idx].id = sid;
1063 			smrs[idx].mask = mask;
1064 			smrs[idx].valid = true;
1065 		}
1066 		smmu->s2crs[idx].count++;
1067 		cfg->smendx[i] = (s16)idx;
1068 	}
1069 
1070 	/* It worked! Now, poke the actual hardware */
1071 	for_each_cfg_sme(cfg, fwspec, i, idx)
1072 		arm_smmu_write_sme(smmu, idx);
1073 
1074 	mutex_unlock(&smmu->stream_map_mutex);
1075 	return 0;
1076 
1077 out_err:
1078 	while (i--) {
1079 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1080 		cfg->smendx[i] = INVALID_SMENDX;
1081 	}
1082 	mutex_unlock(&smmu->stream_map_mutex);
1083 	return ret;
1084 }
1085 
1086 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1087 				      struct iommu_fwspec *fwspec)
1088 {
1089 	struct arm_smmu_device *smmu = cfg->smmu;
1090 	int i, idx;
1091 
1092 	mutex_lock(&smmu->stream_map_mutex);
1093 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1094 		if (arm_smmu_free_sme(smmu, idx))
1095 			arm_smmu_write_sme(smmu, idx);
1096 		cfg->smendx[i] = INVALID_SMENDX;
1097 	}
1098 	mutex_unlock(&smmu->stream_map_mutex);
1099 }
1100 
1101 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1102 				      struct arm_smmu_master_cfg *cfg,
1103 				      struct iommu_fwspec *fwspec)
1104 {
1105 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1106 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1107 	u8 cbndx = smmu_domain->cfg.cbndx;
1108 	enum arm_smmu_s2cr_type type;
1109 	int i, idx;
1110 
1111 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1112 		type = S2CR_TYPE_BYPASS;
1113 	else
1114 		type = S2CR_TYPE_TRANS;
1115 
1116 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1117 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1118 			continue;
1119 
1120 		s2cr[idx].type = type;
1121 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1122 		s2cr[idx].cbndx = cbndx;
1123 		arm_smmu_write_s2cr(smmu, idx);
1124 	}
1125 	return 0;
1126 }
1127 
1128 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1129 {
1130 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1131 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1132 	struct arm_smmu_master_cfg *cfg;
1133 	struct arm_smmu_device *smmu;
1134 	int ret;
1135 
1136 	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1137 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1138 		return -ENXIO;
1139 	}
1140 
1141 	/*
1142 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1143 	 * domains between of_xlate() and probe_device() - we have no way to cope
1144 	 * with that, so until ARM gets converted to rely on groups and default
1145 	 * domains, just say no (but more politely than by dereferencing NULL).
1146 	 * This should be at least a WARN_ON once that's sorted.
1147 	 */
1148 	cfg = dev_iommu_priv_get(dev);
1149 	if (!cfg)
1150 		return -ENODEV;
1151 
1152 	smmu = cfg->smmu;
1153 
1154 	ret = arm_smmu_rpm_get(smmu);
1155 	if (ret < 0)
1156 		return ret;
1157 
1158 	/* Ensure that the domain is finalised */
1159 	ret = arm_smmu_init_domain_context(domain, smmu, dev);
1160 	if (ret < 0)
1161 		goto rpm_put;
1162 
1163 	/*
1164 	 * Sanity check the domain. We don't support domains across
1165 	 * different SMMUs.
1166 	 */
1167 	if (smmu_domain->smmu != smmu) {
1168 		dev_err(dev,
1169 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1170 			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1171 		ret = -EINVAL;
1172 		goto rpm_put;
1173 	}
1174 
1175 	/* Looks ok, so add the device to the domain */
1176 	ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1177 
1178 	/*
1179 	 * Setup an autosuspend delay to avoid bouncing runpm state.
1180 	 * Otherwise, if a driver for a suspended consumer device
1181 	 * unmaps buffers, it will runpm resume/suspend for each one.
1182 	 *
1183 	 * For example, when used by a GPU device, when an application
1184 	 * or game exits, it can trigger unmapping 100s or 1000s of
1185 	 * buffers.  With a runpm cycle for each buffer, that adds up
1186 	 * to 5-10sec worth of reprogramming the context bank, while
1187 	 * the system appears to be locked up to the user.
1188 	 */
1189 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1190 	pm_runtime_use_autosuspend(smmu->dev);
1191 
1192 rpm_put:
1193 	arm_smmu_rpm_put(smmu);
1194 	return ret;
1195 }
1196 
1197 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1198 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
1199 {
1200 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1201 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1202 	int ret;
1203 
1204 	if (!ops)
1205 		return -ENODEV;
1206 
1207 	arm_smmu_rpm_get(smmu);
1208 	ret = ops->map(ops, iova, paddr, size, prot, gfp);
1209 	arm_smmu_rpm_put(smmu);
1210 
1211 	return ret;
1212 }
1213 
1214 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1215 			     size_t size, struct iommu_iotlb_gather *gather)
1216 {
1217 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1218 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1219 	size_t ret;
1220 
1221 	if (!ops)
1222 		return 0;
1223 
1224 	arm_smmu_rpm_get(smmu);
1225 	ret = ops->unmap(ops, iova, size, gather);
1226 	arm_smmu_rpm_put(smmu);
1227 
1228 	return ret;
1229 }
1230 
1231 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1232 {
1233 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1234 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1235 
1236 	if (smmu_domain->flush_ops) {
1237 		arm_smmu_rpm_get(smmu);
1238 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1239 		arm_smmu_rpm_put(smmu);
1240 	}
1241 }
1242 
1243 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1244 				struct iommu_iotlb_gather *gather)
1245 {
1246 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1247 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1248 
1249 	if (!smmu)
1250 		return;
1251 
1252 	arm_smmu_rpm_get(smmu);
1253 	if (smmu->version == ARM_SMMU_V2 ||
1254 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1255 		arm_smmu_tlb_sync_context(smmu_domain);
1256 	else
1257 		arm_smmu_tlb_sync_global(smmu);
1258 	arm_smmu_rpm_put(smmu);
1259 }
1260 
1261 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1262 					      dma_addr_t iova)
1263 {
1264 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1265 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1266 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1267 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1268 	struct device *dev = smmu->dev;
1269 	void __iomem *reg;
1270 	u32 tmp;
1271 	u64 phys;
1272 	unsigned long va, flags;
1273 	int ret, idx = cfg->cbndx;
1274 
1275 	ret = arm_smmu_rpm_get(smmu);
1276 	if (ret < 0)
1277 		return 0;
1278 
1279 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1280 	va = iova & ~0xfffUL;
1281 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1282 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1283 	else
1284 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1285 
1286 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1287 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1288 				      5, 50)) {
1289 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1290 		dev_err(dev,
1291 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1292 			&iova);
1293 		return ops->iova_to_phys(ops, iova);
1294 	}
1295 
1296 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1297 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1298 	if (phys & ARM_SMMU_CB_PAR_F) {
1299 		dev_err(dev, "translation fault!\n");
1300 		dev_err(dev, "PAR = 0x%llx\n", phys);
1301 		return 0;
1302 	}
1303 
1304 	arm_smmu_rpm_put(smmu);
1305 
1306 	return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1307 }
1308 
1309 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1310 					dma_addr_t iova)
1311 {
1312 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1313 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1314 
1315 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
1316 		return iova;
1317 
1318 	if (!ops)
1319 		return 0;
1320 
1321 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1322 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1323 		return arm_smmu_iova_to_phys_hard(domain, iova);
1324 
1325 	return ops->iova_to_phys(ops, iova);
1326 }
1327 
1328 static bool arm_smmu_capable(enum iommu_cap cap)
1329 {
1330 	switch (cap) {
1331 	case IOMMU_CAP_CACHE_COHERENCY:
1332 		/*
1333 		 * Return true here as the SMMU can always send out coherent
1334 		 * requests.
1335 		 */
1336 		return true;
1337 	case IOMMU_CAP_NOEXEC:
1338 		return true;
1339 	default:
1340 		return false;
1341 	}
1342 }
1343 
1344 static
1345 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1346 {
1347 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1348 							  fwnode);
1349 	put_device(dev);
1350 	return dev ? dev_get_drvdata(dev) : NULL;
1351 }
1352 
1353 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1354 {
1355 	struct arm_smmu_device *smmu = NULL;
1356 	struct arm_smmu_master_cfg *cfg;
1357 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1358 	int i, ret;
1359 
1360 	if (using_legacy_binding) {
1361 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1362 
1363 		/*
1364 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1365 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1366 		 * later use.
1367 		 */
1368 		fwspec = dev_iommu_fwspec_get(dev);
1369 		if (ret)
1370 			goto out_free;
1371 	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1372 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1373 	} else {
1374 		return ERR_PTR(-ENODEV);
1375 	}
1376 
1377 	ret = -EINVAL;
1378 	for (i = 0; i < fwspec->num_ids; i++) {
1379 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1380 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1381 
1382 		if (sid & ~smmu->streamid_mask) {
1383 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1384 				sid, smmu->streamid_mask);
1385 			goto out_free;
1386 		}
1387 		if (mask & ~smmu->smr_mask_mask) {
1388 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1389 				mask, smmu->smr_mask_mask);
1390 			goto out_free;
1391 		}
1392 	}
1393 
1394 	ret = -ENOMEM;
1395 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1396 		      GFP_KERNEL);
1397 	if (!cfg)
1398 		goto out_free;
1399 
1400 	cfg->smmu = smmu;
1401 	dev_iommu_priv_set(dev, cfg);
1402 	while (i--)
1403 		cfg->smendx[i] = INVALID_SMENDX;
1404 
1405 	ret = arm_smmu_rpm_get(smmu);
1406 	if (ret < 0)
1407 		goto out_cfg_free;
1408 
1409 	ret = arm_smmu_master_alloc_smes(dev);
1410 	arm_smmu_rpm_put(smmu);
1411 
1412 	if (ret)
1413 		goto out_cfg_free;
1414 
1415 	device_link_add(dev, smmu->dev,
1416 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1417 
1418 	return &smmu->iommu;
1419 
1420 out_cfg_free:
1421 	kfree(cfg);
1422 out_free:
1423 	iommu_fwspec_free(dev);
1424 	return ERR_PTR(ret);
1425 }
1426 
1427 static void arm_smmu_release_device(struct device *dev)
1428 {
1429 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1430 	struct arm_smmu_master_cfg *cfg;
1431 	struct arm_smmu_device *smmu;
1432 	int ret;
1433 
1434 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
1435 		return;
1436 
1437 	cfg  = dev_iommu_priv_get(dev);
1438 	smmu = cfg->smmu;
1439 
1440 	ret = arm_smmu_rpm_get(smmu);
1441 	if (ret < 0)
1442 		return;
1443 
1444 	arm_smmu_master_free_smes(cfg, fwspec);
1445 
1446 	arm_smmu_rpm_put(smmu);
1447 
1448 	dev_iommu_priv_set(dev, NULL);
1449 	kfree(cfg);
1450 	iommu_fwspec_free(dev);
1451 }
1452 
1453 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1454 {
1455 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1456 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1457 	struct arm_smmu_device *smmu = cfg->smmu;
1458 	struct iommu_group *group = NULL;
1459 	int i, idx;
1460 
1461 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1462 		if (group && smmu->s2crs[idx].group &&
1463 		    group != smmu->s2crs[idx].group)
1464 			return ERR_PTR(-EINVAL);
1465 
1466 		group = smmu->s2crs[idx].group;
1467 	}
1468 
1469 	if (group)
1470 		return iommu_group_ref_get(group);
1471 
1472 	if (dev_is_pci(dev))
1473 		group = pci_device_group(dev);
1474 	else if (dev_is_fsl_mc(dev))
1475 		group = fsl_mc_device_group(dev);
1476 	else
1477 		group = generic_device_group(dev);
1478 
1479 	/* Remember group for faster lookups */
1480 	if (!IS_ERR(group))
1481 		for_each_cfg_sme(cfg, fwspec, i, idx)
1482 			smmu->s2crs[idx].group = group;
1483 
1484 	return group;
1485 }
1486 
1487 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1488 {
1489 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1490 	int ret = 0;
1491 
1492 	mutex_lock(&smmu_domain->init_mutex);
1493 	if (smmu_domain->smmu)
1494 		ret = -EPERM;
1495 	else
1496 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1497 	mutex_unlock(&smmu_domain->init_mutex);
1498 
1499 	return ret;
1500 }
1501 
1502 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1503 		unsigned long quirks)
1504 {
1505 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1506 	int ret = 0;
1507 
1508 	mutex_lock(&smmu_domain->init_mutex);
1509 	if (smmu_domain->smmu)
1510 		ret = -EPERM;
1511 	else
1512 		smmu_domain->pgtbl_quirks = quirks;
1513 	mutex_unlock(&smmu_domain->init_mutex);
1514 
1515 	return ret;
1516 }
1517 
1518 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1519 {
1520 	u32 mask, fwid = 0;
1521 
1522 	if (args->args_count > 0)
1523 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1524 
1525 	if (args->args_count > 1)
1526 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1527 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1528 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1529 
1530 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1531 }
1532 
1533 static void arm_smmu_get_resv_regions(struct device *dev,
1534 				      struct list_head *head)
1535 {
1536 	struct iommu_resv_region *region;
1537 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1538 
1539 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1540 					 prot, IOMMU_RESV_SW_MSI);
1541 	if (!region)
1542 		return;
1543 
1544 	list_add_tail(&region->list, head);
1545 
1546 	iommu_dma_get_resv_regions(dev, head);
1547 }
1548 
1549 static int arm_smmu_def_domain_type(struct device *dev)
1550 {
1551 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1552 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1553 
1554 	if (impl && impl->def_domain_type)
1555 		return impl->def_domain_type(dev);
1556 
1557 	return 0;
1558 }
1559 
1560 static struct iommu_ops arm_smmu_ops = {
1561 	.capable		= arm_smmu_capable,
1562 	.domain_alloc		= arm_smmu_domain_alloc,
1563 	.domain_free		= arm_smmu_domain_free,
1564 	.attach_dev		= arm_smmu_attach_dev,
1565 	.map			= arm_smmu_map,
1566 	.unmap			= arm_smmu_unmap,
1567 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1568 	.iotlb_sync		= arm_smmu_iotlb_sync,
1569 	.iova_to_phys		= arm_smmu_iova_to_phys,
1570 	.probe_device		= arm_smmu_probe_device,
1571 	.release_device		= arm_smmu_release_device,
1572 	.device_group		= arm_smmu_device_group,
1573 	.enable_nesting		= arm_smmu_enable_nesting,
1574 	.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1575 	.of_xlate		= arm_smmu_of_xlate,
1576 	.get_resv_regions	= arm_smmu_get_resv_regions,
1577 	.put_resv_regions	= generic_iommu_put_resv_regions,
1578 	.def_domain_type	= arm_smmu_def_domain_type,
1579 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1580 	.owner			= THIS_MODULE,
1581 };
1582 
1583 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1584 {
1585 	int i;
1586 	u32 reg;
1587 
1588 	/* clear global FSR */
1589 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1590 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1591 
1592 	/*
1593 	 * Reset stream mapping groups: Initial values mark all SMRn as
1594 	 * invalid and all S2CRn as bypass unless overridden.
1595 	 */
1596 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1597 		arm_smmu_write_sme(smmu, i);
1598 
1599 	/* Make sure all context banks are disabled and clear CB_FSR  */
1600 	for (i = 0; i < smmu->num_context_banks; ++i) {
1601 		arm_smmu_write_context_bank(smmu, i);
1602 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1603 	}
1604 
1605 	/* Invalidate the TLB, just in case */
1606 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1607 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1608 
1609 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1610 
1611 	/* Enable fault reporting */
1612 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1613 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1614 
1615 	/* Disable TLB broadcasting. */
1616 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1617 
1618 	/* Enable client access, handling unmatched streams as appropriate */
1619 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1620 	if (disable_bypass)
1621 		reg |= ARM_SMMU_sCR0_USFCFG;
1622 	else
1623 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1624 
1625 	/* Disable forced broadcasting */
1626 	reg &= ~ARM_SMMU_sCR0_FB;
1627 
1628 	/* Don't upgrade barriers */
1629 	reg &= ~(ARM_SMMU_sCR0_BSU);
1630 
1631 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1632 		reg |= ARM_SMMU_sCR0_VMID16EN;
1633 
1634 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1635 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1636 
1637 	if (smmu->impl && smmu->impl->reset)
1638 		smmu->impl->reset(smmu);
1639 
1640 	/* Push the button */
1641 	arm_smmu_tlb_sync_global(smmu);
1642 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1643 }
1644 
1645 static int arm_smmu_id_size_to_bits(int size)
1646 {
1647 	switch (size) {
1648 	case 0:
1649 		return 32;
1650 	case 1:
1651 		return 36;
1652 	case 2:
1653 		return 40;
1654 	case 3:
1655 		return 42;
1656 	case 4:
1657 		return 44;
1658 	case 5:
1659 	default:
1660 		return 48;
1661 	}
1662 }
1663 
1664 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1665 {
1666 	unsigned int size;
1667 	u32 id;
1668 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1669 	int i, ret;
1670 
1671 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1672 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1673 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1674 
1675 	/* ID0 */
1676 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1677 
1678 	/* Restrict available stages based on module parameter */
1679 	if (force_stage == 1)
1680 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1681 	else if (force_stage == 2)
1682 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1683 
1684 	if (id & ARM_SMMU_ID0_S1TS) {
1685 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1686 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1687 	}
1688 
1689 	if (id & ARM_SMMU_ID0_S2TS) {
1690 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1691 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1692 	}
1693 
1694 	if (id & ARM_SMMU_ID0_NTS) {
1695 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1696 		dev_notice(smmu->dev, "\tnested translation\n");
1697 	}
1698 
1699 	if (!(smmu->features &
1700 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1701 		dev_err(smmu->dev, "\tno translation support!\n");
1702 		return -ENODEV;
1703 	}
1704 
1705 	if ((id & ARM_SMMU_ID0_S1TS) &&
1706 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1707 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1708 		dev_notice(smmu->dev, "\taddress translation ops\n");
1709 	}
1710 
1711 	/*
1712 	 * In order for DMA API calls to work properly, we must defer to what
1713 	 * the FW says about coherency, regardless of what the hardware claims.
1714 	 * Fortunately, this also opens up a workaround for systems where the
1715 	 * ID register value has ended up configured incorrectly.
1716 	 */
1717 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1718 	if (cttw_fw || cttw_reg)
1719 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1720 			   cttw_fw ? "" : "non-");
1721 	if (cttw_fw != cttw_reg)
1722 		dev_notice(smmu->dev,
1723 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1724 
1725 	/* Max. number of entries we have for stream matching/indexing */
1726 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1727 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1728 		size = 1 << 16;
1729 	} else {
1730 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1731 	}
1732 	smmu->streamid_mask = size - 1;
1733 	if (id & ARM_SMMU_ID0_SMS) {
1734 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1735 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1736 		if (size == 0) {
1737 			dev_err(smmu->dev,
1738 				"stream-matching supported, but no SMRs present!\n");
1739 			return -ENODEV;
1740 		}
1741 
1742 		/* Zero-initialised to mark as invalid */
1743 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1744 					  GFP_KERNEL);
1745 		if (!smmu->smrs)
1746 			return -ENOMEM;
1747 
1748 		dev_notice(smmu->dev,
1749 			   "\tstream matching with %u register groups", size);
1750 	}
1751 	/* s2cr->type == 0 means translation, so initialise explicitly */
1752 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1753 					 GFP_KERNEL);
1754 	if (!smmu->s2crs)
1755 		return -ENOMEM;
1756 	for (i = 0; i < size; i++)
1757 		smmu->s2crs[i] = s2cr_init_val;
1758 
1759 	smmu->num_mapping_groups = size;
1760 	mutex_init(&smmu->stream_map_mutex);
1761 	spin_lock_init(&smmu->global_sync_lock);
1762 
1763 	if (smmu->version < ARM_SMMU_V2 ||
1764 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1765 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1766 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1767 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1768 	}
1769 
1770 	/* ID1 */
1771 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1772 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1773 
1774 	/* Check for size mismatch of SMMU address space from mapped region */
1775 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1776 	if (smmu->numpage != 2 * size << smmu->pgshift)
1777 		dev_warn(smmu->dev,
1778 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1779 			2 * size << smmu->pgshift, smmu->numpage);
1780 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1781 	smmu->numpage = size;
1782 
1783 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1784 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1785 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1786 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1787 		return -ENODEV;
1788 	}
1789 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1790 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1791 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1792 				 sizeof(*smmu->cbs), GFP_KERNEL);
1793 	if (!smmu->cbs)
1794 		return -ENOMEM;
1795 
1796 	/* ID2 */
1797 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1798 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1799 	smmu->ipa_size = size;
1800 
1801 	/* The output mask is also applied for bypass */
1802 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1803 	smmu->pa_size = size;
1804 
1805 	if (id & ARM_SMMU_ID2_VMID16)
1806 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1807 
1808 	/*
1809 	 * What the page table walker can address actually depends on which
1810 	 * descriptor format is in use, but since a) we don't know that yet,
1811 	 * and b) it can vary per context bank, this will have to do...
1812 	 */
1813 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1814 		dev_warn(smmu->dev,
1815 			 "failed to set DMA mask for table walker\n");
1816 
1817 	if (smmu->version < ARM_SMMU_V2) {
1818 		smmu->va_size = smmu->ipa_size;
1819 		if (smmu->version == ARM_SMMU_V1_64K)
1820 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1821 	} else {
1822 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1823 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1824 		if (id & ARM_SMMU_ID2_PTFS_4K)
1825 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1826 		if (id & ARM_SMMU_ID2_PTFS_16K)
1827 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1828 		if (id & ARM_SMMU_ID2_PTFS_64K)
1829 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1830 	}
1831 
1832 	if (smmu->impl && smmu->impl->cfg_probe) {
1833 		ret = smmu->impl->cfg_probe(smmu);
1834 		if (ret)
1835 			return ret;
1836 	}
1837 
1838 	/* Now we've corralled the various formats, what'll it do? */
1839 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1840 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1841 	if (smmu->features &
1842 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1843 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1844 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1845 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1846 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1847 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1848 
1849 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1850 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1851 	else
1852 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1853 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1854 		   smmu->pgsize_bitmap);
1855 
1856 
1857 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1858 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1859 			   smmu->va_size, smmu->ipa_size);
1860 
1861 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1862 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1863 			   smmu->ipa_size, smmu->pa_size);
1864 
1865 	return 0;
1866 }
1867 
1868 struct arm_smmu_match_data {
1869 	enum arm_smmu_arch_version version;
1870 	enum arm_smmu_implementation model;
1871 };
1872 
1873 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1874 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1875 
1876 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1877 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1878 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1879 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1880 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1881 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1882 
1883 static const struct of_device_id arm_smmu_of_match[] = {
1884 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1885 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1886 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1887 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1888 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1889 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1890 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1891 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1892 	{ },
1893 };
1894 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1895 
1896 #ifdef CONFIG_ACPI
1897 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1898 {
1899 	int ret = 0;
1900 
1901 	switch (model) {
1902 	case ACPI_IORT_SMMU_V1:
1903 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1904 		smmu->version = ARM_SMMU_V1;
1905 		smmu->model = GENERIC_SMMU;
1906 		break;
1907 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1908 		smmu->version = ARM_SMMU_V1_64K;
1909 		smmu->model = GENERIC_SMMU;
1910 		break;
1911 	case ACPI_IORT_SMMU_V2:
1912 		smmu->version = ARM_SMMU_V2;
1913 		smmu->model = GENERIC_SMMU;
1914 		break;
1915 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1916 		smmu->version = ARM_SMMU_V2;
1917 		smmu->model = ARM_MMU500;
1918 		break;
1919 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1920 		smmu->version = ARM_SMMU_V2;
1921 		smmu->model = CAVIUM_SMMUV2;
1922 		break;
1923 	default:
1924 		ret = -ENODEV;
1925 	}
1926 
1927 	return ret;
1928 }
1929 
1930 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1931 				      struct arm_smmu_device *smmu)
1932 {
1933 	struct device *dev = smmu->dev;
1934 	struct acpi_iort_node *node =
1935 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1936 	struct acpi_iort_smmu *iort_smmu;
1937 	int ret;
1938 
1939 	/* Retrieve SMMU1/2 specific data */
1940 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1941 
1942 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1943 	if (ret < 0)
1944 		return ret;
1945 
1946 	/* Ignore the configuration access interrupt */
1947 	smmu->num_global_irqs = 1;
1948 
1949 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1950 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1951 
1952 	return 0;
1953 }
1954 #else
1955 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1956 					     struct arm_smmu_device *smmu)
1957 {
1958 	return -ENODEV;
1959 }
1960 #endif
1961 
1962 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1963 				    struct arm_smmu_device *smmu)
1964 {
1965 	const struct arm_smmu_match_data *data;
1966 	struct device *dev = &pdev->dev;
1967 	bool legacy_binding;
1968 
1969 	if (of_property_read_u32(dev->of_node, "#global-interrupts",
1970 				 &smmu->num_global_irqs)) {
1971 		dev_err(dev, "missing #global-interrupts property\n");
1972 		return -ENODEV;
1973 	}
1974 
1975 	data = of_device_get_match_data(dev);
1976 	smmu->version = data->version;
1977 	smmu->model = data->model;
1978 
1979 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
1980 	if (legacy_binding && !using_generic_binding) {
1981 		if (!using_legacy_binding) {
1982 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
1983 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
1984 		}
1985 		using_legacy_binding = true;
1986 	} else if (!legacy_binding && !using_legacy_binding) {
1987 		using_generic_binding = true;
1988 	} else {
1989 		dev_err(dev, "not probing due to mismatched DT properties\n");
1990 		return -ENODEV;
1991 	}
1992 
1993 	if (of_dma_is_coherent(dev->of_node))
1994 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1995 
1996 	return 0;
1997 }
1998 
1999 static int arm_smmu_bus_init(struct iommu_ops *ops)
2000 {
2001 	int err;
2002 
2003 	/* Oh, for a proper bus abstraction */
2004 	if (!iommu_present(&platform_bus_type)) {
2005 		err = bus_set_iommu(&platform_bus_type, ops);
2006 		if (err)
2007 			return err;
2008 	}
2009 #ifdef CONFIG_ARM_AMBA
2010 	if (!iommu_present(&amba_bustype)) {
2011 		err = bus_set_iommu(&amba_bustype, ops);
2012 		if (err)
2013 			goto err_reset_platform_ops;
2014 	}
2015 #endif
2016 #ifdef CONFIG_PCI
2017 	if (!iommu_present(&pci_bus_type)) {
2018 		err = bus_set_iommu(&pci_bus_type, ops);
2019 		if (err)
2020 			goto err_reset_amba_ops;
2021 	}
2022 #endif
2023 #ifdef CONFIG_FSL_MC_BUS
2024 	if (!iommu_present(&fsl_mc_bus_type)) {
2025 		err = bus_set_iommu(&fsl_mc_bus_type, ops);
2026 		if (err)
2027 			goto err_reset_pci_ops;
2028 	}
2029 #endif
2030 	return 0;
2031 
2032 err_reset_pci_ops: __maybe_unused;
2033 #ifdef CONFIG_PCI
2034 	bus_set_iommu(&pci_bus_type, NULL);
2035 #endif
2036 err_reset_amba_ops: __maybe_unused;
2037 #ifdef CONFIG_ARM_AMBA
2038 	bus_set_iommu(&amba_bustype, NULL);
2039 #endif
2040 err_reset_platform_ops: __maybe_unused;
2041 	bus_set_iommu(&platform_bus_type, NULL);
2042 	return err;
2043 }
2044 
2045 static int arm_smmu_device_probe(struct platform_device *pdev)
2046 {
2047 	struct resource *res;
2048 	resource_size_t ioaddr;
2049 	struct arm_smmu_device *smmu;
2050 	struct device *dev = &pdev->dev;
2051 	int num_irqs, i, err;
2052 	irqreturn_t (*global_fault)(int irq, void *dev);
2053 
2054 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2055 	if (!smmu) {
2056 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2057 		return -ENOMEM;
2058 	}
2059 	smmu->dev = dev;
2060 
2061 	if (dev->of_node)
2062 		err = arm_smmu_device_dt_probe(pdev, smmu);
2063 	else
2064 		err = arm_smmu_device_acpi_probe(pdev, smmu);
2065 
2066 	if (err)
2067 		return err;
2068 
2069 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2070 	ioaddr = res->start;
2071 	smmu->base = devm_ioremap_resource(dev, res);
2072 	if (IS_ERR(smmu->base))
2073 		return PTR_ERR(smmu->base);
2074 	/*
2075 	 * The resource size should effectively match the value of SMMU_TOP;
2076 	 * stash that temporarily until we know PAGESIZE to validate it with.
2077 	 */
2078 	smmu->numpage = resource_size(res);
2079 
2080 	smmu = arm_smmu_impl_init(smmu);
2081 	if (IS_ERR(smmu))
2082 		return PTR_ERR(smmu);
2083 
2084 	num_irqs = 0;
2085 	while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2086 		num_irqs++;
2087 		if (num_irqs > smmu->num_global_irqs)
2088 			smmu->num_context_irqs++;
2089 	}
2090 
2091 	if (!smmu->num_context_irqs) {
2092 		dev_err(dev, "found %d interrupts but expected at least %d\n",
2093 			num_irqs, smmu->num_global_irqs + 1);
2094 		return -ENODEV;
2095 	}
2096 
2097 	smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2098 				  GFP_KERNEL);
2099 	if (!smmu->irqs) {
2100 		dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2101 		return -ENOMEM;
2102 	}
2103 
2104 	for (i = 0; i < num_irqs; ++i) {
2105 		int irq = platform_get_irq(pdev, i);
2106 
2107 		if (irq < 0)
2108 			return -ENODEV;
2109 		smmu->irqs[i] = irq;
2110 	}
2111 
2112 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2113 	if (err < 0) {
2114 		dev_err(dev, "failed to get clocks %d\n", err);
2115 		return err;
2116 	}
2117 	smmu->num_clks = err;
2118 
2119 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2120 	if (err)
2121 		return err;
2122 
2123 	err = arm_smmu_device_cfg_probe(smmu);
2124 	if (err)
2125 		return err;
2126 
2127 	if (smmu->version == ARM_SMMU_V2) {
2128 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2129 			dev_err(dev,
2130 			      "found only %d context irq(s) but %d required\n",
2131 			      smmu->num_context_irqs, smmu->num_context_banks);
2132 			return -ENODEV;
2133 		}
2134 
2135 		/* Ignore superfluous interrupts */
2136 		smmu->num_context_irqs = smmu->num_context_banks;
2137 	}
2138 
2139 	if (smmu->impl && smmu->impl->global_fault)
2140 		global_fault = smmu->impl->global_fault;
2141 	else
2142 		global_fault = arm_smmu_global_fault;
2143 
2144 	for (i = 0; i < smmu->num_global_irqs; ++i) {
2145 		err = devm_request_irq(smmu->dev, smmu->irqs[i],
2146 				       global_fault,
2147 				       IRQF_SHARED,
2148 				       "arm-smmu global fault",
2149 				       smmu);
2150 		if (err) {
2151 			dev_err(dev, "failed to request global IRQ %d (%u)\n",
2152 				i, smmu->irqs[i]);
2153 			return err;
2154 		}
2155 	}
2156 
2157 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2158 				     "smmu.%pa", &ioaddr);
2159 	if (err) {
2160 		dev_err(dev, "Failed to register iommu in sysfs\n");
2161 		return err;
2162 	}
2163 
2164 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
2165 	if (err) {
2166 		dev_err(dev, "Failed to register iommu\n");
2167 		return err;
2168 	}
2169 
2170 	platform_set_drvdata(pdev, smmu);
2171 	arm_smmu_device_reset(smmu);
2172 	arm_smmu_test_smr_masks(smmu);
2173 
2174 	/*
2175 	 * We want to avoid touching dev->power.lock in fastpaths unless
2176 	 * it's really going to do something useful - pm_runtime_enabled()
2177 	 * can serve as an ideal proxy for that decision. So, conditionally
2178 	 * enable pm_runtime.
2179 	 */
2180 	if (dev->pm_domain) {
2181 		pm_runtime_set_active(dev);
2182 		pm_runtime_enable(dev);
2183 	}
2184 
2185 	/*
2186 	 * For ACPI and generic DT bindings, an SMMU will be probed before
2187 	 * any device which might need it, so we want the bus ops in place
2188 	 * ready to handle default domain setup as soon as any SMMU exists.
2189 	 */
2190 	if (!using_legacy_binding)
2191 		return arm_smmu_bus_init(&arm_smmu_ops);
2192 
2193 	return 0;
2194 }
2195 
2196 static int arm_smmu_device_remove(struct platform_device *pdev)
2197 {
2198 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2199 
2200 	if (!smmu)
2201 		return -ENODEV;
2202 
2203 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2204 		dev_notice(&pdev->dev, "disabling translation\n");
2205 
2206 	arm_smmu_bus_init(NULL);
2207 	iommu_device_unregister(&smmu->iommu);
2208 	iommu_device_sysfs_remove(&smmu->iommu);
2209 
2210 	arm_smmu_rpm_get(smmu);
2211 	/* Turn the thing off */
2212 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2213 	arm_smmu_rpm_put(smmu);
2214 
2215 	if (pm_runtime_enabled(smmu->dev))
2216 		pm_runtime_force_suspend(smmu->dev);
2217 	else
2218 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2219 
2220 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2221 	return 0;
2222 }
2223 
2224 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2225 {
2226 	arm_smmu_device_remove(pdev);
2227 }
2228 
2229 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2230 {
2231 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2232 	int ret;
2233 
2234 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2235 	if (ret)
2236 		return ret;
2237 
2238 	arm_smmu_device_reset(smmu);
2239 
2240 	return 0;
2241 }
2242 
2243 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2244 {
2245 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2246 
2247 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2248 
2249 	return 0;
2250 }
2251 
2252 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2253 {
2254 	if (pm_runtime_suspended(dev))
2255 		return 0;
2256 
2257 	return arm_smmu_runtime_resume(dev);
2258 }
2259 
2260 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2261 {
2262 	if (pm_runtime_suspended(dev))
2263 		return 0;
2264 
2265 	return arm_smmu_runtime_suspend(dev);
2266 }
2267 
2268 static const struct dev_pm_ops arm_smmu_pm_ops = {
2269 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2270 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2271 			   arm_smmu_runtime_resume, NULL)
2272 };
2273 
2274 static struct platform_driver arm_smmu_driver = {
2275 	.driver	= {
2276 		.name			= "arm-smmu",
2277 		.of_match_table		= arm_smmu_of_match,
2278 		.pm			= &arm_smmu_pm_ops,
2279 		.suppress_bind_attrs    = true,
2280 	},
2281 	.probe	= arm_smmu_device_probe,
2282 	.remove	= arm_smmu_device_remove,
2283 	.shutdown = arm_smmu_device_shutdown,
2284 };
2285 module_platform_driver(arm_smmu_driver);
2286 
2287 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2288 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2289 MODULE_ALIAS("platform:arm-smmu");
2290 MODULE_LICENSE("GPL v2");
2291