xref: /openbmc/linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision f43e47c090dc7fe32d5410d8740c3a004eb2676f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
32 #include "../../iommu-sva-lib.h"
33 
34 static bool disable_bypass = true;
35 module_param(disable_bypass, bool, 0444);
36 MODULE_PARM_DESC(disable_bypass,
37 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
38 
39 static bool disable_msipolling;
40 module_param(disable_msipolling, bool, 0444);
41 MODULE_PARM_DESC(disable_msipolling,
42 	"Disable MSI-based polling for CMD_SYNC completion.");
43 
44 enum arm_smmu_msi_index {
45 	EVTQ_MSI_INDEX,
46 	GERROR_MSI_INDEX,
47 	PRIQ_MSI_INDEX,
48 	ARM_SMMU_MAX_MSIS,
49 };
50 
51 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
52 	[EVTQ_MSI_INDEX] = {
53 		ARM_SMMU_EVTQ_IRQ_CFG0,
54 		ARM_SMMU_EVTQ_IRQ_CFG1,
55 		ARM_SMMU_EVTQ_IRQ_CFG2,
56 	},
57 	[GERROR_MSI_INDEX] = {
58 		ARM_SMMU_GERROR_IRQ_CFG0,
59 		ARM_SMMU_GERROR_IRQ_CFG1,
60 		ARM_SMMU_GERROR_IRQ_CFG2,
61 	},
62 	[PRIQ_MSI_INDEX] = {
63 		ARM_SMMU_PRIQ_IRQ_CFG0,
64 		ARM_SMMU_PRIQ_IRQ_CFG1,
65 		ARM_SMMU_PRIQ_IRQ_CFG2,
66 	},
67 };
68 
69 struct arm_smmu_option_prop {
70 	u32 opt;
71 	const char *prop;
72 };
73 
74 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
75 DEFINE_MUTEX(arm_smmu_asid_lock);
76 
77 /*
78  * Special value used by SVA when a process dies, to quiesce a CD without
79  * disabling it.
80  */
81 struct arm_smmu_ctx_desc quiet_cd = { 0 };
82 
83 static struct arm_smmu_option_prop arm_smmu_options[] = {
84 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
85 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
86 	{ 0, NULL},
87 };
88 
89 static void parse_driver_options(struct arm_smmu_device *smmu)
90 {
91 	int i = 0;
92 
93 	do {
94 		if (of_property_read_bool(smmu->dev->of_node,
95 						arm_smmu_options[i].prop)) {
96 			smmu->options |= arm_smmu_options[i].opt;
97 			dev_notice(smmu->dev, "option %s\n",
98 				arm_smmu_options[i].prop);
99 		}
100 	} while (arm_smmu_options[++i].opt);
101 }
102 
103 /* Low-level queue manipulation functions */
104 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
105 {
106 	u32 space, prod, cons;
107 
108 	prod = Q_IDX(q, q->prod);
109 	cons = Q_IDX(q, q->cons);
110 
111 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
112 		space = (1 << q->max_n_shift) - (prod - cons);
113 	else
114 		space = cons - prod;
115 
116 	return space >= n;
117 }
118 
119 static bool queue_full(struct arm_smmu_ll_queue *q)
120 {
121 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
122 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
123 }
124 
125 static bool queue_empty(struct arm_smmu_ll_queue *q)
126 {
127 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
128 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
129 }
130 
131 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
132 {
133 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
134 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
135 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
137 }
138 
139 static void queue_sync_cons_out(struct arm_smmu_queue *q)
140 {
141 	/*
142 	 * Ensure that all CPU accesses (reads and writes) to the queue
143 	 * are complete before we update the cons pointer.
144 	 */
145 	__iomb();
146 	writel_relaxed(q->llq.cons, q->cons_reg);
147 }
148 
149 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
150 {
151 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
152 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
153 }
154 
155 static int queue_sync_prod_in(struct arm_smmu_queue *q)
156 {
157 	u32 prod;
158 	int ret = 0;
159 
160 	/*
161 	 * We can't use the _relaxed() variant here, as we must prevent
162 	 * speculative reads of the queue before we have determined that
163 	 * prod has indeed moved.
164 	 */
165 	prod = readl(q->prod_reg);
166 
167 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
168 		ret = -EOVERFLOW;
169 
170 	q->llq.prod = prod;
171 	return ret;
172 }
173 
174 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
175 {
176 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
177 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
178 }
179 
180 static void queue_poll_init(struct arm_smmu_device *smmu,
181 			    struct arm_smmu_queue_poll *qp)
182 {
183 	qp->delay = 1;
184 	qp->spin_cnt = 0;
185 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
186 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
187 }
188 
189 static int queue_poll(struct arm_smmu_queue_poll *qp)
190 {
191 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
192 		return -ETIMEDOUT;
193 
194 	if (qp->wfe) {
195 		wfe();
196 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
197 		cpu_relax();
198 	} else {
199 		udelay(qp->delay);
200 		qp->delay *= 2;
201 		qp->spin_cnt = 0;
202 	}
203 
204 	return 0;
205 }
206 
207 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
208 {
209 	int i;
210 
211 	for (i = 0; i < n_dwords; ++i)
212 		*dst++ = cpu_to_le64(*src++);
213 }
214 
215 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
216 {
217 	int i;
218 
219 	for (i = 0; i < n_dwords; ++i)
220 		*dst++ = le64_to_cpu(*src++);
221 }
222 
223 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
224 {
225 	if (queue_empty(&q->llq))
226 		return -EAGAIN;
227 
228 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
229 	queue_inc_cons(&q->llq);
230 	queue_sync_cons_out(q);
231 	return 0;
232 }
233 
234 /* High-level queue accessors */
235 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
236 {
237 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
238 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
239 
240 	switch (ent->opcode) {
241 	case CMDQ_OP_TLBI_EL2_ALL:
242 	case CMDQ_OP_TLBI_NSNH_ALL:
243 		break;
244 	case CMDQ_OP_PREFETCH_CFG:
245 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
246 		break;
247 	case CMDQ_OP_CFGI_CD:
248 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
249 		fallthrough;
250 	case CMDQ_OP_CFGI_STE:
251 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
252 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
253 		break;
254 	case CMDQ_OP_CFGI_CD_ALL:
255 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
256 		break;
257 	case CMDQ_OP_CFGI_ALL:
258 		/* Cover the entire SID range */
259 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
260 		break;
261 	case CMDQ_OP_TLBI_NH_VA:
262 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
263 		fallthrough;
264 	case CMDQ_OP_TLBI_EL2_VA:
265 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
266 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
267 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
268 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
269 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
270 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
271 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
272 		break;
273 	case CMDQ_OP_TLBI_S2_IPA:
274 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
275 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
277 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
278 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
280 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
281 		break;
282 	case CMDQ_OP_TLBI_NH_ASID:
283 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
284 		fallthrough;
285 	case CMDQ_OP_TLBI_S12_VMALL:
286 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
287 		break;
288 	case CMDQ_OP_TLBI_EL2_ASID:
289 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
290 		break;
291 	case CMDQ_OP_ATC_INV:
292 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
293 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
294 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
295 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
296 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
297 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
298 		break;
299 	case CMDQ_OP_PRI_RESP:
300 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
301 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
302 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
303 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
304 		switch (ent->pri.resp) {
305 		case PRI_RESP_DENY:
306 		case PRI_RESP_FAIL:
307 		case PRI_RESP_SUCC:
308 			break;
309 		default:
310 			return -EINVAL;
311 		}
312 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
313 		break;
314 	case CMDQ_OP_RESUME:
315 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
316 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
317 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
318 		break;
319 	case CMDQ_OP_CMD_SYNC:
320 		if (ent->sync.msiaddr) {
321 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
322 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
323 		} else {
324 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
325 		}
326 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
327 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
328 		break;
329 	default:
330 		return -ENOENT;
331 	}
332 
333 	return 0;
334 }
335 
336 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
337 {
338 	return &smmu->cmdq;
339 }
340 
341 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
342 					 struct arm_smmu_queue *q, u32 prod)
343 {
344 	struct arm_smmu_cmdq_ent ent = {
345 		.opcode = CMDQ_OP_CMD_SYNC,
346 	};
347 
348 	/*
349 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
350 	 * payload, so the write will zero the entire command on that platform.
351 	 */
352 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
353 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
354 				   q->ent_dwords * 8;
355 	}
356 
357 	arm_smmu_cmdq_build_cmd(cmd, &ent);
358 }
359 
360 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
361 				     struct arm_smmu_queue *q)
362 {
363 	static const char * const cerror_str[] = {
364 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
365 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
366 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
367 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
368 	};
369 
370 	int i;
371 	u64 cmd[CMDQ_ENT_DWORDS];
372 	u32 cons = readl_relaxed(q->cons_reg);
373 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
374 	struct arm_smmu_cmdq_ent cmd_sync = {
375 		.opcode = CMDQ_OP_CMD_SYNC,
376 	};
377 
378 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
379 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
380 
381 	switch (idx) {
382 	case CMDQ_ERR_CERROR_ABT_IDX:
383 		dev_err(smmu->dev, "retrying command fetch\n");
384 		return;
385 	case CMDQ_ERR_CERROR_NONE_IDX:
386 		return;
387 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
388 		/*
389 		 * ATC Invalidation Completion timeout. CONS is still pointing
390 		 * at the CMD_SYNC. Attempt to complete other pending commands
391 		 * by repeating the CMD_SYNC, though we might well end up back
392 		 * here since the ATC invalidation may still be pending.
393 		 */
394 		return;
395 	case CMDQ_ERR_CERROR_ILL_IDX:
396 	default:
397 		break;
398 	}
399 
400 	/*
401 	 * We may have concurrent producers, so we need to be careful
402 	 * not to touch any of the shadow cmdq state.
403 	 */
404 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
405 	dev_err(smmu->dev, "skipping command in error state:\n");
406 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
407 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
408 
409 	/* Convert the erroneous command into a CMD_SYNC */
410 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
411 
412 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
413 }
414 
415 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
416 {
417 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
418 }
419 
420 /*
421  * Command queue locking.
422  * This is a form of bastardised rwlock with the following major changes:
423  *
424  * - The only LOCK routines are exclusive_trylock() and shared_lock().
425  *   Neither have barrier semantics, and instead provide only a control
426  *   dependency.
427  *
428  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
429  *   fails if the caller appears to be the last lock holder (yes, this is
430  *   racy). All successful UNLOCK routines have RELEASE semantics.
431  */
432 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
433 {
434 	int val;
435 
436 	/*
437 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
438 	 * lock counter. When held in exclusive state, the lock counter is set
439 	 * to INT_MIN so these increments won't hurt as the value will remain
440 	 * negative.
441 	 */
442 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
443 		return;
444 
445 	do {
446 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
447 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
448 }
449 
450 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
451 {
452 	(void)atomic_dec_return_release(&cmdq->lock);
453 }
454 
455 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
456 {
457 	if (atomic_read(&cmdq->lock) == 1)
458 		return false;
459 
460 	arm_smmu_cmdq_shared_unlock(cmdq);
461 	return true;
462 }
463 
464 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
465 ({									\
466 	bool __ret;							\
467 	local_irq_save(flags);						\
468 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
469 	if (!__ret)							\
470 		local_irq_restore(flags);				\
471 	__ret;								\
472 })
473 
474 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
475 ({									\
476 	atomic_set_release(&cmdq->lock, 0);				\
477 	local_irq_restore(flags);					\
478 })
479 
480 
481 /*
482  * Command queue insertion.
483  * This is made fiddly by our attempts to achieve some sort of scalability
484  * since there is one queue shared amongst all of the CPUs in the system.  If
485  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
486  * then you'll *love* this monstrosity.
487  *
488  * The basic idea is to split the queue up into ranges of commands that are
489  * owned by a given CPU; the owner may not have written all of the commands
490  * itself, but is responsible for advancing the hardware prod pointer when
491  * the time comes. The algorithm is roughly:
492  *
493  * 	1. Allocate some space in the queue. At this point we also discover
494  *	   whether the head of the queue is currently owned by another CPU,
495  *	   or whether we are the owner.
496  *
497  *	2. Write our commands into our allocated slots in the queue.
498  *
499  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
500  *
501  *	4. If we are an owner:
502  *		a. Wait for the previous owner to finish.
503  *		b. Mark the queue head as unowned, which tells us the range
504  *		   that we are responsible for publishing.
505  *		c. Wait for all commands in our owned range to become valid.
506  *		d. Advance the hardware prod pointer.
507  *		e. Tell the next owner we've finished.
508  *
509  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
510  *	   owner), then we need to stick around until it has completed:
511  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
512  *		   to clear the first 4 bytes.
513  *		b. Otherwise, we spin waiting for the hardware cons pointer to
514  *		   advance past our command.
515  *
516  * The devil is in the details, particularly the use of locking for handling
517  * SYNC completion and freeing up space in the queue before we think that it is
518  * full.
519  */
520 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
521 					       u32 sprod, u32 eprod, bool set)
522 {
523 	u32 swidx, sbidx, ewidx, ebidx;
524 	struct arm_smmu_ll_queue llq = {
525 		.max_n_shift	= cmdq->q.llq.max_n_shift,
526 		.prod		= sprod,
527 	};
528 
529 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
530 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
531 
532 	while (llq.prod != eprod) {
533 		unsigned long mask;
534 		atomic_long_t *ptr;
535 		u32 limit = BITS_PER_LONG;
536 
537 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
538 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
539 
540 		ptr = &cmdq->valid_map[swidx];
541 
542 		if ((swidx == ewidx) && (sbidx < ebidx))
543 			limit = ebidx;
544 
545 		mask = GENMASK(limit - 1, sbidx);
546 
547 		/*
548 		 * The valid bit is the inverse of the wrap bit. This means
549 		 * that a zero-initialised queue is invalid and, after marking
550 		 * all entries as valid, they become invalid again when we
551 		 * wrap.
552 		 */
553 		if (set) {
554 			atomic_long_xor(mask, ptr);
555 		} else { /* Poll */
556 			unsigned long valid;
557 
558 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
559 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
560 		}
561 
562 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
563 	}
564 }
565 
566 /* Mark all entries in the range [sprod, eprod) as valid */
567 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
568 					u32 sprod, u32 eprod)
569 {
570 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
571 }
572 
573 /* Wait for all entries in the range [sprod, eprod) to become valid */
574 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
575 					 u32 sprod, u32 eprod)
576 {
577 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
578 }
579 
580 /* Wait for the command queue to become non-full */
581 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
582 					     struct arm_smmu_ll_queue *llq)
583 {
584 	unsigned long flags;
585 	struct arm_smmu_queue_poll qp;
586 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
587 	int ret = 0;
588 
589 	/*
590 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
591 	 * that fails, spin until somebody else updates it for us.
592 	 */
593 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
594 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
595 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
596 		llq->val = READ_ONCE(cmdq->q.llq.val);
597 		return 0;
598 	}
599 
600 	queue_poll_init(smmu, &qp);
601 	do {
602 		llq->val = READ_ONCE(cmdq->q.llq.val);
603 		if (!queue_full(llq))
604 			break;
605 
606 		ret = queue_poll(&qp);
607 	} while (!ret);
608 
609 	return ret;
610 }
611 
612 /*
613  * Wait until the SMMU signals a CMD_SYNC completion MSI.
614  * Must be called with the cmdq lock held in some capacity.
615  */
616 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
617 					  struct arm_smmu_ll_queue *llq)
618 {
619 	int ret = 0;
620 	struct arm_smmu_queue_poll qp;
621 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
622 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
623 
624 	queue_poll_init(smmu, &qp);
625 
626 	/*
627 	 * The MSI won't generate an event, since it's being written back
628 	 * into the command queue.
629 	 */
630 	qp.wfe = false;
631 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
632 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
633 	return ret;
634 }
635 
636 /*
637  * Wait until the SMMU cons index passes llq->prod.
638  * Must be called with the cmdq lock held in some capacity.
639  */
640 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
641 					       struct arm_smmu_ll_queue *llq)
642 {
643 	struct arm_smmu_queue_poll qp;
644 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
645 	u32 prod = llq->prod;
646 	int ret = 0;
647 
648 	queue_poll_init(smmu, &qp);
649 	llq->val = READ_ONCE(cmdq->q.llq.val);
650 	do {
651 		if (queue_consumed(llq, prod))
652 			break;
653 
654 		ret = queue_poll(&qp);
655 
656 		/*
657 		 * This needs to be a readl() so that our subsequent call
658 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
659 		 *
660 		 * Specifically, we need to ensure that we observe all
661 		 * shared_lock()s by other CMD_SYNCs that share our owner,
662 		 * so that a failing call to tryunlock() means that we're
663 		 * the last one out and therefore we can safely advance
664 		 * cmdq->q.llq.cons. Roughly speaking:
665 		 *
666 		 * CPU 0		CPU1			CPU2 (us)
667 		 *
668 		 * if (sync)
669 		 * 	shared_lock();
670 		 *
671 		 * dma_wmb();
672 		 * set_valid_map();
673 		 *
674 		 * 			if (owner) {
675 		 *				poll_valid_map();
676 		 *				<control dependency>
677 		 *				writel(prod_reg);
678 		 *
679 		 *						readl(cons_reg);
680 		 *						tryunlock();
681 		 *
682 		 * Requires us to see CPU 0's shared_lock() acquisition.
683 		 */
684 		llq->cons = readl(cmdq->q.cons_reg);
685 	} while (!ret);
686 
687 	return ret;
688 }
689 
690 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
691 					 struct arm_smmu_ll_queue *llq)
692 {
693 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
694 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
695 
696 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
697 }
698 
699 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
700 					u32 prod, int n)
701 {
702 	int i;
703 	struct arm_smmu_ll_queue llq = {
704 		.max_n_shift	= cmdq->q.llq.max_n_shift,
705 		.prod		= prod,
706 	};
707 
708 	for (i = 0; i < n; ++i) {
709 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
710 
711 		prod = queue_inc_prod_n(&llq, i);
712 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
713 	}
714 }
715 
716 /*
717  * This is the actual insertion function, and provides the following
718  * ordering guarantees to callers:
719  *
720  * - There is a dma_wmb() before publishing any commands to the queue.
721  *   This can be relied upon to order prior writes to data structures
722  *   in memory (such as a CD or an STE) before the command.
723  *
724  * - On completion of a CMD_SYNC, there is a control dependency.
725  *   This can be relied upon to order subsequent writes to memory (e.g.
726  *   freeing an IOVA) after completion of the CMD_SYNC.
727  *
728  * - Command insertion is totally ordered, so if two CPUs each race to
729  *   insert their own list of commands then all of the commands from one
730  *   CPU will appear before any of the commands from the other CPU.
731  */
732 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
733 				       u64 *cmds, int n, bool sync)
734 {
735 	u64 cmd_sync[CMDQ_ENT_DWORDS];
736 	u32 prod;
737 	unsigned long flags;
738 	bool owner;
739 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
740 	struct arm_smmu_ll_queue llq, head;
741 	int ret = 0;
742 
743 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
744 
745 	/* 1. Allocate some space in the queue */
746 	local_irq_save(flags);
747 	llq.val = READ_ONCE(cmdq->q.llq.val);
748 	do {
749 		u64 old;
750 
751 		while (!queue_has_space(&llq, n + sync)) {
752 			local_irq_restore(flags);
753 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
754 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
755 			local_irq_save(flags);
756 		}
757 
758 		head.cons = llq.cons;
759 		head.prod = queue_inc_prod_n(&llq, n + sync) |
760 					     CMDQ_PROD_OWNED_FLAG;
761 
762 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
763 		if (old == llq.val)
764 			break;
765 
766 		llq.val = old;
767 	} while (1);
768 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
769 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
770 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
771 
772 	/*
773 	 * 2. Write our commands into the queue
774 	 * Dependency ordering from the cmpxchg() loop above.
775 	 */
776 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
777 	if (sync) {
778 		prod = queue_inc_prod_n(&llq, n);
779 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
780 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
781 
782 		/*
783 		 * In order to determine completion of our CMD_SYNC, we must
784 		 * ensure that the queue can't wrap twice without us noticing.
785 		 * We achieve that by taking the cmdq lock as shared before
786 		 * marking our slot as valid.
787 		 */
788 		arm_smmu_cmdq_shared_lock(cmdq);
789 	}
790 
791 	/* 3. Mark our slots as valid, ensuring commands are visible first */
792 	dma_wmb();
793 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
794 
795 	/* 4. If we are the owner, take control of the SMMU hardware */
796 	if (owner) {
797 		/* a. Wait for previous owner to finish */
798 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
799 
800 		/* b. Stop gathering work by clearing the owned flag */
801 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
802 						   &cmdq->q.llq.atomic.prod);
803 		prod &= ~CMDQ_PROD_OWNED_FLAG;
804 
805 		/*
806 		 * c. Wait for any gathered work to be written to the queue.
807 		 * Note that we read our own entries so that we have the control
808 		 * dependency required by (d).
809 		 */
810 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
811 
812 		/*
813 		 * d. Advance the hardware prod pointer
814 		 * Control dependency ordering from the entries becoming valid.
815 		 */
816 		writel_relaxed(prod, cmdq->q.prod_reg);
817 
818 		/*
819 		 * e. Tell the next owner we're done
820 		 * Make sure we've updated the hardware first, so that we don't
821 		 * race to update prod and potentially move it backwards.
822 		 */
823 		atomic_set_release(&cmdq->owner_prod, prod);
824 	}
825 
826 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
827 	if (sync) {
828 		llq.prod = queue_inc_prod_n(&llq, n);
829 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
830 		if (ret) {
831 			dev_err_ratelimited(smmu->dev,
832 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
833 					    llq.prod,
834 					    readl_relaxed(cmdq->q.prod_reg),
835 					    readl_relaxed(cmdq->q.cons_reg));
836 		}
837 
838 		/*
839 		 * Try to unlock the cmdq lock. This will fail if we're the last
840 		 * reader, in which case we can safely update cmdq->q.llq.cons
841 		 */
842 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
843 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
844 			arm_smmu_cmdq_shared_unlock(cmdq);
845 		}
846 	}
847 
848 	local_irq_restore(flags);
849 	return ret;
850 }
851 
852 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
853 				     struct arm_smmu_cmdq_ent *ent,
854 				     bool sync)
855 {
856 	u64 cmd[CMDQ_ENT_DWORDS];
857 
858 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
859 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
860 			 ent->opcode);
861 		return -EINVAL;
862 	}
863 
864 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
865 }
866 
867 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
868 				   struct arm_smmu_cmdq_ent *ent)
869 {
870 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
871 }
872 
873 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
874 					     struct arm_smmu_cmdq_ent *ent)
875 {
876 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
877 }
878 
879 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
880 				    struct arm_smmu_cmdq_batch *cmds,
881 				    struct arm_smmu_cmdq_ent *cmd)
882 {
883 	int index;
884 
885 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
886 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
887 		cmds->num = 0;
888 	}
889 
890 	index = cmds->num * CMDQ_ENT_DWORDS;
891 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
892 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
893 			 cmd->opcode);
894 		return;
895 	}
896 
897 	cmds->num++;
898 }
899 
900 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
901 				      struct arm_smmu_cmdq_batch *cmds)
902 {
903 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
904 }
905 
906 static int arm_smmu_page_response(struct device *dev,
907 				  struct iommu_fault_event *unused,
908 				  struct iommu_page_response *resp)
909 {
910 	struct arm_smmu_cmdq_ent cmd = {0};
911 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
912 	int sid = master->streams[0].id;
913 
914 	if (master->stall_enabled) {
915 		cmd.opcode		= CMDQ_OP_RESUME;
916 		cmd.resume.sid		= sid;
917 		cmd.resume.stag		= resp->grpid;
918 		switch (resp->code) {
919 		case IOMMU_PAGE_RESP_INVALID:
920 		case IOMMU_PAGE_RESP_FAILURE:
921 			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
922 			break;
923 		case IOMMU_PAGE_RESP_SUCCESS:
924 			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
925 			break;
926 		default:
927 			return -EINVAL;
928 		}
929 	} else {
930 		return -ENODEV;
931 	}
932 
933 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
934 	/*
935 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
936 	 * RESUME consumption guarantees that the stalled transaction will be
937 	 * terminated... at some point in the future. PRI_RESP is fire and
938 	 * forget.
939 	 */
940 
941 	return 0;
942 }
943 
944 /* Context descriptor manipulation functions */
945 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
946 {
947 	struct arm_smmu_cmdq_ent cmd = {
948 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
949 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
950 		.tlbi.asid = asid,
951 	};
952 
953 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
954 }
955 
956 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
957 			     int ssid, bool leaf)
958 {
959 	size_t i;
960 	unsigned long flags;
961 	struct arm_smmu_master *master;
962 	struct arm_smmu_cmdq_batch cmds;
963 	struct arm_smmu_device *smmu = smmu_domain->smmu;
964 	struct arm_smmu_cmdq_ent cmd = {
965 		.opcode	= CMDQ_OP_CFGI_CD,
966 		.cfgi	= {
967 			.ssid	= ssid,
968 			.leaf	= leaf,
969 		},
970 	};
971 
972 	cmds.num = 0;
973 
974 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
975 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
976 		for (i = 0; i < master->num_streams; i++) {
977 			cmd.cfgi.sid = master->streams[i].id;
978 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
979 		}
980 	}
981 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
982 
983 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
984 }
985 
986 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
987 					struct arm_smmu_l1_ctx_desc *l1_desc)
988 {
989 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
990 
991 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
992 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
993 	if (!l1_desc->l2ptr) {
994 		dev_warn(smmu->dev,
995 			 "failed to allocate context descriptor table\n");
996 		return -ENOMEM;
997 	}
998 	return 0;
999 }
1000 
1001 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1002 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1003 {
1004 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1005 		  CTXDESC_L1_DESC_V;
1006 
1007 	/* See comment in arm_smmu_write_ctx_desc() */
1008 	WRITE_ONCE(*dst, cpu_to_le64(val));
1009 }
1010 
1011 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1012 				   u32 ssid)
1013 {
1014 	__le64 *l1ptr;
1015 	unsigned int idx;
1016 	struct arm_smmu_l1_ctx_desc *l1_desc;
1017 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1018 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1019 
1020 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1021 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1022 
1023 	idx = ssid >> CTXDESC_SPLIT;
1024 	l1_desc = &cdcfg->l1_desc[idx];
1025 	if (!l1_desc->l2ptr) {
1026 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1027 			return NULL;
1028 
1029 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1030 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1031 		/* An invalid L1CD can be cached */
1032 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1033 	}
1034 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1035 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1036 }
1037 
1038 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1039 			    struct arm_smmu_ctx_desc *cd)
1040 {
1041 	/*
1042 	 * This function handles the following cases:
1043 	 *
1044 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1045 	 * (2) Install a secondary CD, for SID+SSID traffic.
1046 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1047 	 *     CD, then invalidate the old entry and mappings.
1048 	 * (4) Quiesce the context without clearing the valid bit. Disable
1049 	 *     translation, and ignore any translation fault.
1050 	 * (5) Remove a secondary CD.
1051 	 */
1052 	u64 val;
1053 	bool cd_live;
1054 	__le64 *cdptr;
1055 
1056 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1057 		return -E2BIG;
1058 
1059 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1060 	if (!cdptr)
1061 		return -ENOMEM;
1062 
1063 	val = le64_to_cpu(cdptr[0]);
1064 	cd_live = !!(val & CTXDESC_CD_0_V);
1065 
1066 	if (!cd) { /* (5) */
1067 		val = 0;
1068 	} else if (cd == &quiet_cd) { /* (4) */
1069 		val |= CTXDESC_CD_0_TCR_EPD0;
1070 	} else if (cd_live) { /* (3) */
1071 		val &= ~CTXDESC_CD_0_ASID;
1072 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1073 		/*
1074 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1075 		 * this substream's traffic
1076 		 */
1077 	} else { /* (1) and (2) */
1078 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1079 		cdptr[2] = 0;
1080 		cdptr[3] = cpu_to_le64(cd->mair);
1081 
1082 		/*
1083 		 * STE is live, and the SMMU might read dwords of this CD in any
1084 		 * order. Ensure that it observes valid values before reading
1085 		 * V=1.
1086 		 */
1087 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1088 
1089 		val = cd->tcr |
1090 #ifdef __BIG_ENDIAN
1091 			CTXDESC_CD_0_ENDI |
1092 #endif
1093 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1094 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1095 			CTXDESC_CD_0_AA64 |
1096 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1097 			CTXDESC_CD_0_V;
1098 
1099 		if (smmu_domain->stall_enabled)
1100 			val |= CTXDESC_CD_0_S;
1101 	}
1102 
1103 	/*
1104 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1105 	 * "Configuration structures and configuration invalidation completion"
1106 	 *
1107 	 *   The size of single-copy atomic reads made by the SMMU is
1108 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1109 	 *   field within an aligned 64-bit span of a structure can be altered
1110 	 *   without first making the structure invalid.
1111 	 */
1112 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1113 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1114 	return 0;
1115 }
1116 
1117 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1118 {
1119 	int ret;
1120 	size_t l1size;
1121 	size_t max_contexts;
1122 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1123 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1124 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1125 
1126 	max_contexts = 1 << cfg->s1cdmax;
1127 
1128 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1129 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1130 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1131 		cdcfg->num_l1_ents = max_contexts;
1132 
1133 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1134 	} else {
1135 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1136 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1137 						  CTXDESC_L2_ENTRIES);
1138 
1139 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1140 					      sizeof(*cdcfg->l1_desc),
1141 					      GFP_KERNEL);
1142 		if (!cdcfg->l1_desc)
1143 			return -ENOMEM;
1144 
1145 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1146 	}
1147 
1148 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1149 					   GFP_KERNEL);
1150 	if (!cdcfg->cdtab) {
1151 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1152 		ret = -ENOMEM;
1153 		goto err_free_l1;
1154 	}
1155 
1156 	return 0;
1157 
1158 err_free_l1:
1159 	if (cdcfg->l1_desc) {
1160 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1161 		cdcfg->l1_desc = NULL;
1162 	}
1163 	return ret;
1164 }
1165 
1166 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1167 {
1168 	int i;
1169 	size_t size, l1size;
1170 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1171 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1172 
1173 	if (cdcfg->l1_desc) {
1174 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1175 
1176 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1177 			if (!cdcfg->l1_desc[i].l2ptr)
1178 				continue;
1179 
1180 			dmam_free_coherent(smmu->dev, size,
1181 					   cdcfg->l1_desc[i].l2ptr,
1182 					   cdcfg->l1_desc[i].l2ptr_dma);
1183 		}
1184 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1185 		cdcfg->l1_desc = NULL;
1186 
1187 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1188 	} else {
1189 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1190 	}
1191 
1192 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1193 	cdcfg->cdtab_dma = 0;
1194 	cdcfg->cdtab = NULL;
1195 }
1196 
1197 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1198 {
1199 	bool free;
1200 	struct arm_smmu_ctx_desc *old_cd;
1201 
1202 	if (!cd->asid)
1203 		return false;
1204 
1205 	free = refcount_dec_and_test(&cd->refs);
1206 	if (free) {
1207 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1208 		WARN_ON(old_cd != cd);
1209 	}
1210 	return free;
1211 }
1212 
1213 /* Stream table manipulation functions */
1214 static void
1215 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1216 {
1217 	u64 val = 0;
1218 
1219 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1220 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1221 
1222 	/* See comment in arm_smmu_write_ctx_desc() */
1223 	WRITE_ONCE(*dst, cpu_to_le64(val));
1224 }
1225 
1226 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1227 {
1228 	struct arm_smmu_cmdq_ent cmd = {
1229 		.opcode	= CMDQ_OP_CFGI_STE,
1230 		.cfgi	= {
1231 			.sid	= sid,
1232 			.leaf	= true,
1233 		},
1234 	};
1235 
1236 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1237 }
1238 
1239 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1240 				      __le64 *dst)
1241 {
1242 	/*
1243 	 * This is hideously complicated, but we only really care about
1244 	 * three cases at the moment:
1245 	 *
1246 	 * 1. Invalid (all zero) -> bypass/fault (init)
1247 	 * 2. Bypass/fault -> translation/bypass (attach)
1248 	 * 3. Translation/bypass -> bypass/fault (detach)
1249 	 *
1250 	 * Given that we can't update the STE atomically and the SMMU
1251 	 * doesn't read the thing in a defined order, that leaves us
1252 	 * with the following maintenance requirements:
1253 	 *
1254 	 * 1. Update Config, return (init time STEs aren't live)
1255 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1256 	 * 3. Update Config, sync
1257 	 */
1258 	u64 val = le64_to_cpu(dst[0]);
1259 	bool ste_live = false;
1260 	struct arm_smmu_device *smmu = NULL;
1261 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1262 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1263 	struct arm_smmu_domain *smmu_domain = NULL;
1264 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1265 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1266 		.prefetch	= {
1267 			.sid	= sid,
1268 		},
1269 	};
1270 
1271 	if (master) {
1272 		smmu_domain = master->domain;
1273 		smmu = master->smmu;
1274 	}
1275 
1276 	if (smmu_domain) {
1277 		switch (smmu_domain->stage) {
1278 		case ARM_SMMU_DOMAIN_S1:
1279 			s1_cfg = &smmu_domain->s1_cfg;
1280 			break;
1281 		case ARM_SMMU_DOMAIN_S2:
1282 		case ARM_SMMU_DOMAIN_NESTED:
1283 			s2_cfg = &smmu_domain->s2_cfg;
1284 			break;
1285 		default:
1286 			break;
1287 		}
1288 	}
1289 
1290 	if (val & STRTAB_STE_0_V) {
1291 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1292 		case STRTAB_STE_0_CFG_BYPASS:
1293 			break;
1294 		case STRTAB_STE_0_CFG_S1_TRANS:
1295 		case STRTAB_STE_0_CFG_S2_TRANS:
1296 			ste_live = true;
1297 			break;
1298 		case STRTAB_STE_0_CFG_ABORT:
1299 			BUG_ON(!disable_bypass);
1300 			break;
1301 		default:
1302 			BUG(); /* STE corruption */
1303 		}
1304 	}
1305 
1306 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1307 	val = STRTAB_STE_0_V;
1308 
1309 	/* Bypass/fault */
1310 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1311 		if (!smmu_domain && disable_bypass)
1312 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1313 		else
1314 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1315 
1316 		dst[0] = cpu_to_le64(val);
1317 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1318 						STRTAB_STE_1_SHCFG_INCOMING));
1319 		dst[2] = 0; /* Nuke the VMID */
1320 		/*
1321 		 * The SMMU can perform negative caching, so we must sync
1322 		 * the STE regardless of whether the old value was live.
1323 		 */
1324 		if (smmu)
1325 			arm_smmu_sync_ste_for_sid(smmu, sid);
1326 		return;
1327 	}
1328 
1329 	if (s1_cfg) {
1330 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1331 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1332 
1333 		BUG_ON(ste_live);
1334 		dst[1] = cpu_to_le64(
1335 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1336 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1337 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1338 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1339 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1340 
1341 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1342 		    !master->stall_enabled)
1343 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1344 
1345 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1346 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1347 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1348 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1349 	}
1350 
1351 	if (s2_cfg) {
1352 		BUG_ON(ste_live);
1353 		dst[2] = cpu_to_le64(
1354 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1355 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1356 #ifdef __BIG_ENDIAN
1357 			 STRTAB_STE_2_S2ENDI |
1358 #endif
1359 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1360 			 STRTAB_STE_2_S2R);
1361 
1362 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1363 
1364 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1365 	}
1366 
1367 	if (master->ats_enabled)
1368 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1369 						 STRTAB_STE_1_EATS_TRANS));
1370 
1371 	arm_smmu_sync_ste_for_sid(smmu, sid);
1372 	/* See comment in arm_smmu_write_ctx_desc() */
1373 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1374 	arm_smmu_sync_ste_for_sid(smmu, sid);
1375 
1376 	/* It's likely that we'll want to use the new STE soon */
1377 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1378 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1379 }
1380 
1381 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
1382 {
1383 	unsigned int i;
1384 	u64 val = STRTAB_STE_0_V;
1385 
1386 	if (disable_bypass && !force)
1387 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1388 	else
1389 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1390 
1391 	for (i = 0; i < nent; ++i) {
1392 		strtab[0] = cpu_to_le64(val);
1393 		strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1394 						   STRTAB_STE_1_SHCFG_INCOMING));
1395 		strtab[2] = 0;
1396 		strtab += STRTAB_STE_DWORDS;
1397 	}
1398 }
1399 
1400 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1401 {
1402 	size_t size;
1403 	void *strtab;
1404 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1405 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1406 
1407 	if (desc->l2ptr)
1408 		return 0;
1409 
1410 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1411 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1412 
1413 	desc->span = STRTAB_SPLIT + 1;
1414 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1415 					  GFP_KERNEL);
1416 	if (!desc->l2ptr) {
1417 		dev_err(smmu->dev,
1418 			"failed to allocate l2 stream table for SID %u\n",
1419 			sid);
1420 		return -ENOMEM;
1421 	}
1422 
1423 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1424 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1425 	return 0;
1426 }
1427 
1428 static struct arm_smmu_master *
1429 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1430 {
1431 	struct rb_node *node;
1432 	struct arm_smmu_stream *stream;
1433 
1434 	lockdep_assert_held(&smmu->streams_mutex);
1435 
1436 	node = smmu->streams.rb_node;
1437 	while (node) {
1438 		stream = rb_entry(node, struct arm_smmu_stream, node);
1439 		if (stream->id < sid)
1440 			node = node->rb_right;
1441 		else if (stream->id > sid)
1442 			node = node->rb_left;
1443 		else
1444 			return stream->master;
1445 	}
1446 
1447 	return NULL;
1448 }
1449 
1450 /* IRQ and event handlers */
1451 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1452 {
1453 	int ret;
1454 	u32 reason;
1455 	u32 perm = 0;
1456 	struct arm_smmu_master *master;
1457 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1458 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1459 	struct iommu_fault_event fault_evt = { };
1460 	struct iommu_fault *flt = &fault_evt.fault;
1461 
1462 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1463 	case EVT_ID_TRANSLATION_FAULT:
1464 		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1465 		break;
1466 	case EVT_ID_ADDR_SIZE_FAULT:
1467 		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1468 		break;
1469 	case EVT_ID_ACCESS_FAULT:
1470 		reason = IOMMU_FAULT_REASON_ACCESS;
1471 		break;
1472 	case EVT_ID_PERMISSION_FAULT:
1473 		reason = IOMMU_FAULT_REASON_PERMISSION;
1474 		break;
1475 	default:
1476 		return -EOPNOTSUPP;
1477 	}
1478 
1479 	/* Stage-2 is always pinned at the moment */
1480 	if (evt[1] & EVTQ_1_S2)
1481 		return -EFAULT;
1482 
1483 	if (evt[1] & EVTQ_1_RnW)
1484 		perm |= IOMMU_FAULT_PERM_READ;
1485 	else
1486 		perm |= IOMMU_FAULT_PERM_WRITE;
1487 
1488 	if (evt[1] & EVTQ_1_InD)
1489 		perm |= IOMMU_FAULT_PERM_EXEC;
1490 
1491 	if (evt[1] & EVTQ_1_PnU)
1492 		perm |= IOMMU_FAULT_PERM_PRIV;
1493 
1494 	if (evt[1] & EVTQ_1_STALL) {
1495 		flt->type = IOMMU_FAULT_PAGE_REQ;
1496 		flt->prm = (struct iommu_fault_page_request) {
1497 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1498 			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1499 			.perm = perm,
1500 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1501 		};
1502 
1503 		if (ssid_valid) {
1504 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1505 			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1506 		}
1507 	} else {
1508 		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1509 		flt->event = (struct iommu_fault_unrecoverable) {
1510 			.reason = reason,
1511 			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1512 			.perm = perm,
1513 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1514 		};
1515 
1516 		if (ssid_valid) {
1517 			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1518 			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1519 		}
1520 	}
1521 
1522 	mutex_lock(&smmu->streams_mutex);
1523 	master = arm_smmu_find_master(smmu, sid);
1524 	if (!master) {
1525 		ret = -EINVAL;
1526 		goto out_unlock;
1527 	}
1528 
1529 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1530 	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1531 		/* Nobody cared, abort the access */
1532 		struct iommu_page_response resp = {
1533 			.pasid		= flt->prm.pasid,
1534 			.grpid		= flt->prm.grpid,
1535 			.code		= IOMMU_PAGE_RESP_FAILURE,
1536 		};
1537 		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1538 	}
1539 
1540 out_unlock:
1541 	mutex_unlock(&smmu->streams_mutex);
1542 	return ret;
1543 }
1544 
1545 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1546 {
1547 	int i, ret;
1548 	struct arm_smmu_device *smmu = dev;
1549 	struct arm_smmu_queue *q = &smmu->evtq.q;
1550 	struct arm_smmu_ll_queue *llq = &q->llq;
1551 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1552 				      DEFAULT_RATELIMIT_BURST);
1553 	u64 evt[EVTQ_ENT_DWORDS];
1554 
1555 	do {
1556 		while (!queue_remove_raw(q, evt)) {
1557 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1558 
1559 			ret = arm_smmu_handle_evt(smmu, evt);
1560 			if (!ret || !__ratelimit(&rs))
1561 				continue;
1562 
1563 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1564 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1565 				dev_info(smmu->dev, "\t0x%016llx\n",
1566 					 (unsigned long long)evt[i]);
1567 
1568 			cond_resched();
1569 		}
1570 
1571 		/*
1572 		 * Not much we can do on overflow, so scream and pretend we're
1573 		 * trying harder.
1574 		 */
1575 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1576 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1577 	} while (!queue_empty(llq));
1578 
1579 	/* Sync our overflow flag, as we believe we're up to speed */
1580 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1581 		    Q_IDX(llq, llq->cons);
1582 	return IRQ_HANDLED;
1583 }
1584 
1585 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1586 {
1587 	u32 sid, ssid;
1588 	u16 grpid;
1589 	bool ssv, last;
1590 
1591 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1592 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1593 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1594 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1595 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1596 
1597 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1598 	dev_info(smmu->dev,
1599 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1600 		 sid, ssid, grpid, last ? "L" : "",
1601 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1602 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1603 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1604 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1605 		 evt[1] & PRIQ_1_ADDR_MASK);
1606 
1607 	if (last) {
1608 		struct arm_smmu_cmdq_ent cmd = {
1609 			.opcode			= CMDQ_OP_PRI_RESP,
1610 			.substream_valid	= ssv,
1611 			.pri			= {
1612 				.sid	= sid,
1613 				.ssid	= ssid,
1614 				.grpid	= grpid,
1615 				.resp	= PRI_RESP_DENY,
1616 			},
1617 		};
1618 
1619 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1620 	}
1621 }
1622 
1623 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1624 {
1625 	struct arm_smmu_device *smmu = dev;
1626 	struct arm_smmu_queue *q = &smmu->priq.q;
1627 	struct arm_smmu_ll_queue *llq = &q->llq;
1628 	u64 evt[PRIQ_ENT_DWORDS];
1629 
1630 	do {
1631 		while (!queue_remove_raw(q, evt))
1632 			arm_smmu_handle_ppr(smmu, evt);
1633 
1634 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1635 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1636 	} while (!queue_empty(llq));
1637 
1638 	/* Sync our overflow flag, as we believe we're up to speed */
1639 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1640 		      Q_IDX(llq, llq->cons);
1641 	queue_sync_cons_out(q);
1642 	return IRQ_HANDLED;
1643 }
1644 
1645 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1646 
1647 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1648 {
1649 	u32 gerror, gerrorn, active;
1650 	struct arm_smmu_device *smmu = dev;
1651 
1652 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1653 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1654 
1655 	active = gerror ^ gerrorn;
1656 	if (!(active & GERROR_ERR_MASK))
1657 		return IRQ_NONE; /* No errors pending */
1658 
1659 	dev_warn(smmu->dev,
1660 		 "unexpected global error reported (0x%08x), this could be serious\n",
1661 		 active);
1662 
1663 	if (active & GERROR_SFM_ERR) {
1664 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1665 		arm_smmu_device_disable(smmu);
1666 	}
1667 
1668 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1669 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1670 
1671 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1672 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1673 
1674 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1675 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1676 
1677 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1678 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1679 
1680 	if (active & GERROR_PRIQ_ABT_ERR)
1681 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1682 
1683 	if (active & GERROR_EVTQ_ABT_ERR)
1684 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1685 
1686 	if (active & GERROR_CMDQ_ERR)
1687 		arm_smmu_cmdq_skip_err(smmu);
1688 
1689 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1690 	return IRQ_HANDLED;
1691 }
1692 
1693 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1694 {
1695 	struct arm_smmu_device *smmu = dev;
1696 
1697 	arm_smmu_evtq_thread(irq, dev);
1698 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1699 		arm_smmu_priq_thread(irq, dev);
1700 
1701 	return IRQ_HANDLED;
1702 }
1703 
1704 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1705 {
1706 	arm_smmu_gerror_handler(irq, dev);
1707 	return IRQ_WAKE_THREAD;
1708 }
1709 
1710 static void
1711 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1712 			struct arm_smmu_cmdq_ent *cmd)
1713 {
1714 	size_t log2_span;
1715 	size_t span_mask;
1716 	/* ATC invalidates are always on 4096-bytes pages */
1717 	size_t inval_grain_shift = 12;
1718 	unsigned long page_start, page_end;
1719 
1720 	/*
1721 	 * ATS and PASID:
1722 	 *
1723 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1724 	 * prefix. In that case all ATC entries within the address range are
1725 	 * invalidated, including those that were requested with a PASID! There
1726 	 * is no way to invalidate only entries without PASID.
1727 	 *
1728 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1729 	 * traffic), translation requests without PASID create ATC entries
1730 	 * without PASID, which must be invalidated with substream_valid clear.
1731 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1732 	 * ATC entries within the address range.
1733 	 */
1734 	*cmd = (struct arm_smmu_cmdq_ent) {
1735 		.opcode			= CMDQ_OP_ATC_INV,
1736 		.substream_valid	= !!ssid,
1737 		.atc.ssid		= ssid,
1738 	};
1739 
1740 	if (!size) {
1741 		cmd->atc.size = ATC_INV_SIZE_ALL;
1742 		return;
1743 	}
1744 
1745 	page_start	= iova >> inval_grain_shift;
1746 	page_end	= (iova + size - 1) >> inval_grain_shift;
1747 
1748 	/*
1749 	 * In an ATS Invalidate Request, the address must be aligned on the
1750 	 * range size, which must be a power of two number of page sizes. We
1751 	 * thus have to choose between grossly over-invalidating the region, or
1752 	 * splitting the invalidation into multiple commands. For simplicity
1753 	 * we'll go with the first solution, but should refine it in the future
1754 	 * if multiple commands are shown to be more efficient.
1755 	 *
1756 	 * Find the smallest power of two that covers the range. The most
1757 	 * significant differing bit between the start and end addresses,
1758 	 * fls(start ^ end), indicates the required span. For example:
1759 	 *
1760 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1761 	 *		x = 0b1000 ^ 0b1011 = 0b11
1762 	 *		span = 1 << fls(x) = 4
1763 	 *
1764 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1765 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1766 	 *		span = 1 << fls(x) = 16
1767 	 */
1768 	log2_span	= fls_long(page_start ^ page_end);
1769 	span_mask	= (1ULL << log2_span) - 1;
1770 
1771 	page_start	&= ~span_mask;
1772 
1773 	cmd->atc.addr	= page_start << inval_grain_shift;
1774 	cmd->atc.size	= log2_span;
1775 }
1776 
1777 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1778 {
1779 	int i;
1780 	struct arm_smmu_cmdq_ent cmd;
1781 	struct arm_smmu_cmdq_batch cmds;
1782 
1783 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1784 
1785 	cmds.num = 0;
1786 	for (i = 0; i < master->num_streams; i++) {
1787 		cmd.atc.sid = master->streams[i].id;
1788 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1789 	}
1790 
1791 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1792 }
1793 
1794 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1795 			    unsigned long iova, size_t size)
1796 {
1797 	int i;
1798 	unsigned long flags;
1799 	struct arm_smmu_cmdq_ent cmd;
1800 	struct arm_smmu_master *master;
1801 	struct arm_smmu_cmdq_batch cmds;
1802 
1803 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1804 		return 0;
1805 
1806 	/*
1807 	 * Ensure that we've completed prior invalidation of the main TLBs
1808 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1809 	 * arm_smmu_enable_ats():
1810 	 *
1811 	 *	// unmap()			// arm_smmu_enable_ats()
1812 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1813 	 *	smp_mb();			[...]
1814 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1815 	 *
1816 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1817 	 * ATS was enabled at the PCI device before completion of the TLBI.
1818 	 */
1819 	smp_mb();
1820 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1821 		return 0;
1822 
1823 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1824 
1825 	cmds.num = 0;
1826 
1827 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1828 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1829 		if (!master->ats_enabled)
1830 			continue;
1831 
1832 		for (i = 0; i < master->num_streams; i++) {
1833 			cmd.atc.sid = master->streams[i].id;
1834 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1835 		}
1836 	}
1837 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1838 
1839 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1840 }
1841 
1842 /* IO_PGTABLE API */
1843 static void arm_smmu_tlb_inv_context(void *cookie)
1844 {
1845 	struct arm_smmu_domain *smmu_domain = cookie;
1846 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1847 	struct arm_smmu_cmdq_ent cmd;
1848 
1849 	/*
1850 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1851 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1852 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1853 	 * insertion to guarantee those are observed before the TLBI. Do be
1854 	 * careful, 007.
1855 	 */
1856 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1857 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1858 	} else {
1859 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1860 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1861 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1862 	}
1863 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1864 }
1865 
1866 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1867 				     unsigned long iova, size_t size,
1868 				     size_t granule,
1869 				     struct arm_smmu_domain *smmu_domain)
1870 {
1871 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1872 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1873 	size_t inv_range = granule;
1874 	struct arm_smmu_cmdq_batch cmds;
1875 
1876 	if (!size)
1877 		return;
1878 
1879 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1880 		/* Get the leaf page size */
1881 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1882 
1883 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1884 		cmd->tlbi.tg = (tg - 10) / 2;
1885 
1886 		/* Determine what level the granule is at */
1887 		cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1888 
1889 		num_pages = size >> tg;
1890 	}
1891 
1892 	cmds.num = 0;
1893 
1894 	while (iova < end) {
1895 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1896 			/*
1897 			 * On each iteration of the loop, the range is 5 bits
1898 			 * worth of the aligned size remaining.
1899 			 * The range in pages is:
1900 			 *
1901 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1902 			 */
1903 			unsigned long scale, num;
1904 
1905 			/* Determine the power of 2 multiple number of pages */
1906 			scale = __ffs(num_pages);
1907 			cmd->tlbi.scale = scale;
1908 
1909 			/* Determine how many chunks of 2^scale size we have */
1910 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1911 			cmd->tlbi.num = num - 1;
1912 
1913 			/* range is num * 2^scale * pgsize */
1914 			inv_range = num << (scale + tg);
1915 
1916 			/* Clear out the lower order bits for the next iteration */
1917 			num_pages -= num << scale;
1918 		}
1919 
1920 		cmd->tlbi.addr = iova;
1921 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1922 		iova += inv_range;
1923 	}
1924 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1925 }
1926 
1927 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1928 					  size_t granule, bool leaf,
1929 					  struct arm_smmu_domain *smmu_domain)
1930 {
1931 	struct arm_smmu_cmdq_ent cmd = {
1932 		.tlbi = {
1933 			.leaf	= leaf,
1934 		},
1935 	};
1936 
1937 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1938 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1939 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1940 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1941 	} else {
1942 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1943 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1944 	}
1945 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1946 
1947 	/*
1948 	 * Unfortunately, this can't be leaf-only since we may have
1949 	 * zapped an entire table.
1950 	 */
1951 	arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1952 }
1953 
1954 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1955 				 size_t granule, bool leaf,
1956 				 struct arm_smmu_domain *smmu_domain)
1957 {
1958 	struct arm_smmu_cmdq_ent cmd = {
1959 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1960 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1961 		.tlbi = {
1962 			.asid	= asid,
1963 			.leaf	= leaf,
1964 		},
1965 	};
1966 
1967 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1968 }
1969 
1970 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1971 					 unsigned long iova, size_t granule,
1972 					 void *cookie)
1973 {
1974 	struct arm_smmu_domain *smmu_domain = cookie;
1975 	struct iommu_domain *domain = &smmu_domain->domain;
1976 
1977 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1978 }
1979 
1980 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1981 				  size_t granule, void *cookie)
1982 {
1983 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1984 }
1985 
1986 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1987 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1988 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1989 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1990 };
1991 
1992 /* IOMMU API */
1993 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
1994 {
1995 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
1996 
1997 	switch (cap) {
1998 	case IOMMU_CAP_CACHE_COHERENCY:
1999 		/* Assume that a coherent TCU implies coherent TBUs */
2000 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2001 	case IOMMU_CAP_NOEXEC:
2002 		return true;
2003 	default:
2004 		return false;
2005 	}
2006 }
2007 
2008 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2009 {
2010 	struct arm_smmu_domain *smmu_domain;
2011 
2012 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2013 	    type != IOMMU_DOMAIN_DMA &&
2014 	    type != IOMMU_DOMAIN_DMA_FQ &&
2015 	    type != IOMMU_DOMAIN_IDENTITY)
2016 		return NULL;
2017 
2018 	/*
2019 	 * Allocate the domain and initialise some of its data structures.
2020 	 * We can't really do anything meaningful until we've added a
2021 	 * master.
2022 	 */
2023 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2024 	if (!smmu_domain)
2025 		return NULL;
2026 
2027 	mutex_init(&smmu_domain->init_mutex);
2028 	INIT_LIST_HEAD(&smmu_domain->devices);
2029 	spin_lock_init(&smmu_domain->devices_lock);
2030 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2031 
2032 	return &smmu_domain->domain;
2033 }
2034 
2035 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2036 {
2037 	int idx, size = 1 << span;
2038 
2039 	do {
2040 		idx = find_first_zero_bit(map, size);
2041 		if (idx == size)
2042 			return -ENOSPC;
2043 	} while (test_and_set_bit(idx, map));
2044 
2045 	return idx;
2046 }
2047 
2048 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2049 {
2050 	clear_bit(idx, map);
2051 }
2052 
2053 static void arm_smmu_domain_free(struct iommu_domain *domain)
2054 {
2055 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2056 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2057 
2058 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2059 
2060 	/* Free the CD and ASID, if we allocated them */
2061 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2062 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2063 
2064 		/* Prevent SVA from touching the CD while we're freeing it */
2065 		mutex_lock(&arm_smmu_asid_lock);
2066 		if (cfg->cdcfg.cdtab)
2067 			arm_smmu_free_cd_tables(smmu_domain);
2068 		arm_smmu_free_asid(&cfg->cd);
2069 		mutex_unlock(&arm_smmu_asid_lock);
2070 	} else {
2071 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2072 		if (cfg->vmid)
2073 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2074 	}
2075 
2076 	kfree(smmu_domain);
2077 }
2078 
2079 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2080 				       struct arm_smmu_master *master,
2081 				       struct io_pgtable_cfg *pgtbl_cfg)
2082 {
2083 	int ret;
2084 	u32 asid;
2085 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2086 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2087 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2088 
2089 	refcount_set(&cfg->cd.refs, 1);
2090 
2091 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2092 	mutex_lock(&arm_smmu_asid_lock);
2093 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2094 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2095 	if (ret)
2096 		goto out_unlock;
2097 
2098 	cfg->s1cdmax = master->ssid_bits;
2099 
2100 	smmu_domain->stall_enabled = master->stall_enabled;
2101 
2102 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2103 	if (ret)
2104 		goto out_free_asid;
2105 
2106 	cfg->cd.asid	= (u16)asid;
2107 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2108 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2109 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2110 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2111 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2112 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2113 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2114 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2115 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2116 
2117 	/*
2118 	 * Note that this will end up calling arm_smmu_sync_cd() before
2119 	 * the master has been added to the devices list for this domain.
2120 	 * This isn't an issue because the STE hasn't been installed yet.
2121 	 */
2122 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2123 	if (ret)
2124 		goto out_free_cd_tables;
2125 
2126 	mutex_unlock(&arm_smmu_asid_lock);
2127 	return 0;
2128 
2129 out_free_cd_tables:
2130 	arm_smmu_free_cd_tables(smmu_domain);
2131 out_free_asid:
2132 	arm_smmu_free_asid(&cfg->cd);
2133 out_unlock:
2134 	mutex_unlock(&arm_smmu_asid_lock);
2135 	return ret;
2136 }
2137 
2138 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2139 				       struct arm_smmu_master *master,
2140 				       struct io_pgtable_cfg *pgtbl_cfg)
2141 {
2142 	int vmid;
2143 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2144 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2145 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2146 
2147 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2148 	if (vmid < 0)
2149 		return vmid;
2150 
2151 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2152 	cfg->vmid	= (u16)vmid;
2153 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2154 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2155 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2156 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2157 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2158 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2159 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2160 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2161 	return 0;
2162 }
2163 
2164 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2165 				    struct arm_smmu_master *master)
2166 {
2167 	int ret;
2168 	unsigned long ias, oas;
2169 	enum io_pgtable_fmt fmt;
2170 	struct io_pgtable_cfg pgtbl_cfg;
2171 	struct io_pgtable_ops *pgtbl_ops;
2172 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2173 				 struct arm_smmu_master *,
2174 				 struct io_pgtable_cfg *);
2175 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2176 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2177 
2178 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2179 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2180 		return 0;
2181 	}
2182 
2183 	/* Restrict the stage to what we can actually support */
2184 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2185 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2186 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2187 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2188 
2189 	switch (smmu_domain->stage) {
2190 	case ARM_SMMU_DOMAIN_S1:
2191 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2192 		ias = min_t(unsigned long, ias, VA_BITS);
2193 		oas = smmu->ias;
2194 		fmt = ARM_64_LPAE_S1;
2195 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2196 		break;
2197 	case ARM_SMMU_DOMAIN_NESTED:
2198 	case ARM_SMMU_DOMAIN_S2:
2199 		ias = smmu->ias;
2200 		oas = smmu->oas;
2201 		fmt = ARM_64_LPAE_S2;
2202 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2203 		break;
2204 	default:
2205 		return -EINVAL;
2206 	}
2207 
2208 	pgtbl_cfg = (struct io_pgtable_cfg) {
2209 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2210 		.ias		= ias,
2211 		.oas		= oas,
2212 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2213 		.tlb		= &arm_smmu_flush_ops,
2214 		.iommu_dev	= smmu->dev,
2215 	};
2216 
2217 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2218 	if (!pgtbl_ops)
2219 		return -ENOMEM;
2220 
2221 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2222 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2223 	domain->geometry.force_aperture = true;
2224 
2225 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2226 	if (ret < 0) {
2227 		free_io_pgtable_ops(pgtbl_ops);
2228 		return ret;
2229 	}
2230 
2231 	smmu_domain->pgtbl_ops = pgtbl_ops;
2232 	return 0;
2233 }
2234 
2235 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2236 {
2237 	__le64 *step;
2238 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2239 
2240 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2241 		struct arm_smmu_strtab_l1_desc *l1_desc;
2242 		int idx;
2243 
2244 		/* Two-level walk */
2245 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2246 		l1_desc = &cfg->l1_desc[idx];
2247 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2248 		step = &l1_desc->l2ptr[idx];
2249 	} else {
2250 		/* Simple linear lookup */
2251 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2252 	}
2253 
2254 	return step;
2255 }
2256 
2257 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2258 {
2259 	int i, j;
2260 	struct arm_smmu_device *smmu = master->smmu;
2261 
2262 	for (i = 0; i < master->num_streams; ++i) {
2263 		u32 sid = master->streams[i].id;
2264 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2265 
2266 		/* Bridged PCI devices may end up with duplicated IDs */
2267 		for (j = 0; j < i; j++)
2268 			if (master->streams[j].id == sid)
2269 				break;
2270 		if (j < i)
2271 			continue;
2272 
2273 		arm_smmu_write_strtab_ent(master, sid, step);
2274 	}
2275 }
2276 
2277 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2278 {
2279 	struct device *dev = master->dev;
2280 	struct arm_smmu_device *smmu = master->smmu;
2281 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2282 
2283 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2284 		return false;
2285 
2286 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2287 		return false;
2288 
2289 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2290 }
2291 
2292 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2293 {
2294 	size_t stu;
2295 	struct pci_dev *pdev;
2296 	struct arm_smmu_device *smmu = master->smmu;
2297 	struct arm_smmu_domain *smmu_domain = master->domain;
2298 
2299 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2300 	if (!master->ats_enabled)
2301 		return;
2302 
2303 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2304 	stu = __ffs(smmu->pgsize_bitmap);
2305 	pdev = to_pci_dev(master->dev);
2306 
2307 	atomic_inc(&smmu_domain->nr_ats_masters);
2308 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2309 	if (pci_enable_ats(pdev, stu))
2310 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2311 }
2312 
2313 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2314 {
2315 	struct arm_smmu_domain *smmu_domain = master->domain;
2316 
2317 	if (!master->ats_enabled)
2318 		return;
2319 
2320 	pci_disable_ats(to_pci_dev(master->dev));
2321 	/*
2322 	 * Ensure ATS is disabled at the endpoint before we issue the
2323 	 * ATC invalidation via the SMMU.
2324 	 */
2325 	wmb();
2326 	arm_smmu_atc_inv_master(master);
2327 	atomic_dec(&smmu_domain->nr_ats_masters);
2328 }
2329 
2330 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2331 {
2332 	int ret;
2333 	int features;
2334 	int num_pasids;
2335 	struct pci_dev *pdev;
2336 
2337 	if (!dev_is_pci(master->dev))
2338 		return -ENODEV;
2339 
2340 	pdev = to_pci_dev(master->dev);
2341 
2342 	features = pci_pasid_features(pdev);
2343 	if (features < 0)
2344 		return features;
2345 
2346 	num_pasids = pci_max_pasids(pdev);
2347 	if (num_pasids <= 0)
2348 		return num_pasids;
2349 
2350 	ret = pci_enable_pasid(pdev, features);
2351 	if (ret) {
2352 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2353 		return ret;
2354 	}
2355 
2356 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2357 				  master->smmu->ssid_bits);
2358 	return 0;
2359 }
2360 
2361 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2362 {
2363 	struct pci_dev *pdev;
2364 
2365 	if (!dev_is_pci(master->dev))
2366 		return;
2367 
2368 	pdev = to_pci_dev(master->dev);
2369 
2370 	if (!pdev->pasid_enabled)
2371 		return;
2372 
2373 	master->ssid_bits = 0;
2374 	pci_disable_pasid(pdev);
2375 }
2376 
2377 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2378 {
2379 	unsigned long flags;
2380 	struct arm_smmu_domain *smmu_domain = master->domain;
2381 
2382 	if (!smmu_domain)
2383 		return;
2384 
2385 	arm_smmu_disable_ats(master);
2386 
2387 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2388 	list_del(&master->domain_head);
2389 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2390 
2391 	master->domain = NULL;
2392 	master->ats_enabled = false;
2393 	arm_smmu_install_ste_for_dev(master);
2394 }
2395 
2396 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2397 {
2398 	int ret = 0;
2399 	unsigned long flags;
2400 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2401 	struct arm_smmu_device *smmu;
2402 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2403 	struct arm_smmu_master *master;
2404 
2405 	if (!fwspec)
2406 		return -ENOENT;
2407 
2408 	master = dev_iommu_priv_get(dev);
2409 	smmu = master->smmu;
2410 
2411 	/*
2412 	 * Checking that SVA is disabled ensures that this device isn't bound to
2413 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2414 	 * be removed concurrently since we're holding the group mutex.
2415 	 */
2416 	if (arm_smmu_master_sva_enabled(master)) {
2417 		dev_err(dev, "cannot attach - SVA enabled\n");
2418 		return -EBUSY;
2419 	}
2420 
2421 	arm_smmu_detach_dev(master);
2422 
2423 	mutex_lock(&smmu_domain->init_mutex);
2424 
2425 	if (!smmu_domain->smmu) {
2426 		smmu_domain->smmu = smmu;
2427 		ret = arm_smmu_domain_finalise(domain, master);
2428 		if (ret) {
2429 			smmu_domain->smmu = NULL;
2430 			goto out_unlock;
2431 		}
2432 	} else if (smmu_domain->smmu != smmu) {
2433 		dev_err(dev,
2434 			"cannot attach to SMMU %s (upstream of %s)\n",
2435 			dev_name(smmu_domain->smmu->dev),
2436 			dev_name(smmu->dev));
2437 		ret = -ENXIO;
2438 		goto out_unlock;
2439 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2440 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2441 		dev_err(dev,
2442 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2443 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2444 		ret = -EINVAL;
2445 		goto out_unlock;
2446 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2447 		   smmu_domain->stall_enabled != master->stall_enabled) {
2448 		dev_err(dev, "cannot attach to stall-%s domain\n",
2449 			smmu_domain->stall_enabled ? "enabled" : "disabled");
2450 		ret = -EINVAL;
2451 		goto out_unlock;
2452 	}
2453 
2454 	master->domain = smmu_domain;
2455 
2456 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2457 		master->ats_enabled = arm_smmu_ats_supported(master);
2458 
2459 	arm_smmu_install_ste_for_dev(master);
2460 
2461 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2462 	list_add(&master->domain_head, &smmu_domain->devices);
2463 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2464 
2465 	arm_smmu_enable_ats(master);
2466 
2467 out_unlock:
2468 	mutex_unlock(&smmu_domain->init_mutex);
2469 	return ret;
2470 }
2471 
2472 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2473 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2474 			      int prot, gfp_t gfp, size_t *mapped)
2475 {
2476 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2477 
2478 	if (!ops)
2479 		return -ENODEV;
2480 
2481 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2482 }
2483 
2484 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2485 				   size_t pgsize, size_t pgcount,
2486 				   struct iommu_iotlb_gather *gather)
2487 {
2488 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2489 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2490 
2491 	if (!ops)
2492 		return 0;
2493 
2494 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2495 }
2496 
2497 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2498 {
2499 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2500 
2501 	if (smmu_domain->smmu)
2502 		arm_smmu_tlb_inv_context(smmu_domain);
2503 }
2504 
2505 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2506 				struct iommu_iotlb_gather *gather)
2507 {
2508 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2509 
2510 	if (!gather->pgsize)
2511 		return;
2512 
2513 	arm_smmu_tlb_inv_range_domain(gather->start,
2514 				      gather->end - gather->start + 1,
2515 				      gather->pgsize, true, smmu_domain);
2516 }
2517 
2518 static phys_addr_t
2519 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2520 {
2521 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2522 
2523 	if (!ops)
2524 		return 0;
2525 
2526 	return ops->iova_to_phys(ops, iova);
2527 }
2528 
2529 static struct platform_driver arm_smmu_driver;
2530 
2531 static
2532 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2533 {
2534 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2535 							  fwnode);
2536 	put_device(dev);
2537 	return dev ? dev_get_drvdata(dev) : NULL;
2538 }
2539 
2540 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2541 {
2542 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2543 
2544 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2545 		limit *= 1UL << STRTAB_SPLIT;
2546 
2547 	return sid < limit;
2548 }
2549 
2550 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2551 {
2552 	/* Check the SIDs are in range of the SMMU and our stream table */
2553 	if (!arm_smmu_sid_in_range(smmu, sid))
2554 		return -ERANGE;
2555 
2556 	/* Ensure l2 strtab is initialised */
2557 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2558 		return arm_smmu_init_l2_strtab(smmu, sid);
2559 
2560 	return 0;
2561 }
2562 
2563 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2564 				  struct arm_smmu_master *master)
2565 {
2566 	int i;
2567 	int ret = 0;
2568 	struct arm_smmu_stream *new_stream, *cur_stream;
2569 	struct rb_node **new_node, *parent_node = NULL;
2570 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2571 
2572 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2573 				  GFP_KERNEL);
2574 	if (!master->streams)
2575 		return -ENOMEM;
2576 	master->num_streams = fwspec->num_ids;
2577 
2578 	mutex_lock(&smmu->streams_mutex);
2579 	for (i = 0; i < fwspec->num_ids; i++) {
2580 		u32 sid = fwspec->ids[i];
2581 
2582 		new_stream = &master->streams[i];
2583 		new_stream->id = sid;
2584 		new_stream->master = master;
2585 
2586 		ret = arm_smmu_init_sid_strtab(smmu, sid);
2587 		if (ret)
2588 			break;
2589 
2590 		/* Insert into SID tree */
2591 		new_node = &(smmu->streams.rb_node);
2592 		while (*new_node) {
2593 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2594 					      node);
2595 			parent_node = *new_node;
2596 			if (cur_stream->id > new_stream->id) {
2597 				new_node = &((*new_node)->rb_left);
2598 			} else if (cur_stream->id < new_stream->id) {
2599 				new_node = &((*new_node)->rb_right);
2600 			} else {
2601 				dev_warn(master->dev,
2602 					 "stream %u already in tree\n",
2603 					 cur_stream->id);
2604 				ret = -EINVAL;
2605 				break;
2606 			}
2607 		}
2608 		if (ret)
2609 			break;
2610 
2611 		rb_link_node(&new_stream->node, parent_node, new_node);
2612 		rb_insert_color(&new_stream->node, &smmu->streams);
2613 	}
2614 
2615 	if (ret) {
2616 		for (i--; i >= 0; i--)
2617 			rb_erase(&master->streams[i].node, &smmu->streams);
2618 		kfree(master->streams);
2619 	}
2620 	mutex_unlock(&smmu->streams_mutex);
2621 
2622 	return ret;
2623 }
2624 
2625 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2626 {
2627 	int i;
2628 	struct arm_smmu_device *smmu = master->smmu;
2629 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2630 
2631 	if (!smmu || !master->streams)
2632 		return;
2633 
2634 	mutex_lock(&smmu->streams_mutex);
2635 	for (i = 0; i < fwspec->num_ids; i++)
2636 		rb_erase(&master->streams[i].node, &smmu->streams);
2637 	mutex_unlock(&smmu->streams_mutex);
2638 
2639 	kfree(master->streams);
2640 }
2641 
2642 static struct iommu_ops arm_smmu_ops;
2643 
2644 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2645 {
2646 	int ret;
2647 	struct arm_smmu_device *smmu;
2648 	struct arm_smmu_master *master;
2649 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2650 
2651 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2652 		return ERR_PTR(-ENODEV);
2653 
2654 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2655 		return ERR_PTR(-EBUSY);
2656 
2657 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2658 	if (!smmu)
2659 		return ERR_PTR(-ENODEV);
2660 
2661 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2662 	if (!master)
2663 		return ERR_PTR(-ENOMEM);
2664 
2665 	master->dev = dev;
2666 	master->smmu = smmu;
2667 	INIT_LIST_HEAD(&master->bonds);
2668 	dev_iommu_priv_set(dev, master);
2669 
2670 	ret = arm_smmu_insert_master(smmu, master);
2671 	if (ret)
2672 		goto err_free_master;
2673 
2674 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2675 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2676 
2677 	/*
2678 	 * Note that PASID must be enabled before, and disabled after ATS:
2679 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2680 	 *
2681 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2682 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2683 	 *   are changed.
2684 	 */
2685 	arm_smmu_enable_pasid(master);
2686 
2687 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2688 		master->ssid_bits = min_t(u8, master->ssid_bits,
2689 					  CTXDESC_LINEAR_CDMAX);
2690 
2691 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2692 	     device_property_read_bool(dev, "dma-can-stall")) ||
2693 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2694 		master->stall_enabled = true;
2695 
2696 	return &smmu->iommu;
2697 
2698 err_free_master:
2699 	kfree(master);
2700 	dev_iommu_priv_set(dev, NULL);
2701 	return ERR_PTR(ret);
2702 }
2703 
2704 static void arm_smmu_release_device(struct device *dev)
2705 {
2706 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2707 
2708 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2709 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2710 	arm_smmu_detach_dev(master);
2711 	arm_smmu_disable_pasid(master);
2712 	arm_smmu_remove_master(master);
2713 	kfree(master);
2714 }
2715 
2716 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2717 {
2718 	struct iommu_group *group;
2719 
2720 	/*
2721 	 * We don't support devices sharing stream IDs other than PCI RID
2722 	 * aliases, since the necessary ID-to-device lookup becomes rather
2723 	 * impractical given a potential sparse 32-bit stream ID space.
2724 	 */
2725 	if (dev_is_pci(dev))
2726 		group = pci_device_group(dev);
2727 	else
2728 		group = generic_device_group(dev);
2729 
2730 	return group;
2731 }
2732 
2733 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2734 {
2735 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2736 	int ret = 0;
2737 
2738 	mutex_lock(&smmu_domain->init_mutex);
2739 	if (smmu_domain->smmu)
2740 		ret = -EPERM;
2741 	else
2742 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2743 	mutex_unlock(&smmu_domain->init_mutex);
2744 
2745 	return ret;
2746 }
2747 
2748 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2749 {
2750 	return iommu_fwspec_add_ids(dev, args->args, 1);
2751 }
2752 
2753 static void arm_smmu_get_resv_regions(struct device *dev,
2754 				      struct list_head *head)
2755 {
2756 	struct iommu_resv_region *region;
2757 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2758 
2759 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2760 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2761 	if (!region)
2762 		return;
2763 
2764 	list_add_tail(&region->list, head);
2765 
2766 	iommu_dma_get_resv_regions(dev, head);
2767 }
2768 
2769 static int arm_smmu_dev_enable_feature(struct device *dev,
2770 				       enum iommu_dev_features feat)
2771 {
2772 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2773 
2774 	if (!master)
2775 		return -ENODEV;
2776 
2777 	switch (feat) {
2778 	case IOMMU_DEV_FEAT_IOPF:
2779 		if (!arm_smmu_master_iopf_supported(master))
2780 			return -EINVAL;
2781 		if (master->iopf_enabled)
2782 			return -EBUSY;
2783 		master->iopf_enabled = true;
2784 		return 0;
2785 	case IOMMU_DEV_FEAT_SVA:
2786 		if (!arm_smmu_master_sva_supported(master))
2787 			return -EINVAL;
2788 		if (arm_smmu_master_sva_enabled(master))
2789 			return -EBUSY;
2790 		return arm_smmu_master_enable_sva(master);
2791 	default:
2792 		return -EINVAL;
2793 	}
2794 }
2795 
2796 static int arm_smmu_dev_disable_feature(struct device *dev,
2797 					enum iommu_dev_features feat)
2798 {
2799 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2800 
2801 	if (!master)
2802 		return -EINVAL;
2803 
2804 	switch (feat) {
2805 	case IOMMU_DEV_FEAT_IOPF:
2806 		if (!master->iopf_enabled)
2807 			return -EINVAL;
2808 		if (master->sva_enabled)
2809 			return -EBUSY;
2810 		master->iopf_enabled = false;
2811 		return 0;
2812 	case IOMMU_DEV_FEAT_SVA:
2813 		if (!arm_smmu_master_sva_enabled(master))
2814 			return -EINVAL;
2815 		return arm_smmu_master_disable_sva(master);
2816 	default:
2817 		return -EINVAL;
2818 	}
2819 }
2820 
2821 /*
2822  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2823  * PCIe link and save the data to memory by DMA. The hardware is restricted to
2824  * use identity mapping only.
2825  */
2826 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2827 					 (pdev)->device == 0xa12e)
2828 
2829 static int arm_smmu_def_domain_type(struct device *dev)
2830 {
2831 	if (dev_is_pci(dev)) {
2832 		struct pci_dev *pdev = to_pci_dev(dev);
2833 
2834 		if (IS_HISI_PTT_DEVICE(pdev))
2835 			return IOMMU_DOMAIN_IDENTITY;
2836 	}
2837 
2838 	return 0;
2839 }
2840 
2841 static struct iommu_ops arm_smmu_ops = {
2842 	.capable		= arm_smmu_capable,
2843 	.domain_alloc		= arm_smmu_domain_alloc,
2844 	.probe_device		= arm_smmu_probe_device,
2845 	.release_device		= arm_smmu_release_device,
2846 	.device_group		= arm_smmu_device_group,
2847 	.of_xlate		= arm_smmu_of_xlate,
2848 	.get_resv_regions	= arm_smmu_get_resv_regions,
2849 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2850 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2851 	.sva_bind		= arm_smmu_sva_bind,
2852 	.sva_unbind		= arm_smmu_sva_unbind,
2853 	.sva_get_pasid		= arm_smmu_sva_get_pasid,
2854 	.page_response		= arm_smmu_page_response,
2855 	.def_domain_type	= arm_smmu_def_domain_type,
2856 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2857 	.owner			= THIS_MODULE,
2858 	.default_domain_ops = &(const struct iommu_domain_ops) {
2859 		.attach_dev		= arm_smmu_attach_dev,
2860 		.map_pages		= arm_smmu_map_pages,
2861 		.unmap_pages		= arm_smmu_unmap_pages,
2862 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2863 		.iotlb_sync		= arm_smmu_iotlb_sync,
2864 		.iova_to_phys		= arm_smmu_iova_to_phys,
2865 		.enable_nesting		= arm_smmu_enable_nesting,
2866 		.free			= arm_smmu_domain_free,
2867 	}
2868 };
2869 
2870 /* Probing and initialisation functions */
2871 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2872 				   struct arm_smmu_queue *q,
2873 				   void __iomem *page,
2874 				   unsigned long prod_off,
2875 				   unsigned long cons_off,
2876 				   size_t dwords, const char *name)
2877 {
2878 	size_t qsz;
2879 
2880 	do {
2881 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2882 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2883 					      GFP_KERNEL);
2884 		if (q->base || qsz < PAGE_SIZE)
2885 			break;
2886 
2887 		q->llq.max_n_shift--;
2888 	} while (1);
2889 
2890 	if (!q->base) {
2891 		dev_err(smmu->dev,
2892 			"failed to allocate queue (0x%zx bytes) for %s\n",
2893 			qsz, name);
2894 		return -ENOMEM;
2895 	}
2896 
2897 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2898 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2899 			 1 << q->llq.max_n_shift, name);
2900 	}
2901 
2902 	q->prod_reg	= page + prod_off;
2903 	q->cons_reg	= page + cons_off;
2904 	q->ent_dwords	= dwords;
2905 
2906 	q->q_base  = Q_BASE_RWA;
2907 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2908 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2909 
2910 	q->llq.prod = q->llq.cons = 0;
2911 	return 0;
2912 }
2913 
2914 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2915 {
2916 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2917 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2918 
2919 	atomic_set(&cmdq->owner_prod, 0);
2920 	atomic_set(&cmdq->lock, 0);
2921 
2922 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2923 							      GFP_KERNEL);
2924 	if (!cmdq->valid_map)
2925 		return -ENOMEM;
2926 
2927 	return 0;
2928 }
2929 
2930 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2931 {
2932 	int ret;
2933 
2934 	/* cmdq */
2935 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2936 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2937 				      CMDQ_ENT_DWORDS, "cmdq");
2938 	if (ret)
2939 		return ret;
2940 
2941 	ret = arm_smmu_cmdq_init(smmu);
2942 	if (ret)
2943 		return ret;
2944 
2945 	/* evtq */
2946 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2947 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2948 				      EVTQ_ENT_DWORDS, "evtq");
2949 	if (ret)
2950 		return ret;
2951 
2952 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2953 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2954 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2955 		if (!smmu->evtq.iopf)
2956 			return -ENOMEM;
2957 	}
2958 
2959 	/* priq */
2960 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2961 		return 0;
2962 
2963 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2964 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2965 				       PRIQ_ENT_DWORDS, "priq");
2966 }
2967 
2968 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2969 {
2970 	unsigned int i;
2971 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2972 	void *strtab = smmu->strtab_cfg.strtab;
2973 
2974 	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
2975 				    sizeof(*cfg->l1_desc), GFP_KERNEL);
2976 	if (!cfg->l1_desc)
2977 		return -ENOMEM;
2978 
2979 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2980 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2981 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2982 	}
2983 
2984 	return 0;
2985 }
2986 
2987 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2988 {
2989 	void *strtab;
2990 	u64 reg;
2991 	u32 size, l1size;
2992 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2993 
2994 	/* Calculate the L1 size, capped to the SIDSIZE. */
2995 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2996 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2997 	cfg->num_l1_ents = 1 << size;
2998 
2999 	size += STRTAB_SPLIT;
3000 	if (size < smmu->sid_bits)
3001 		dev_warn(smmu->dev,
3002 			 "2-level strtab only covers %u/%u bits of SID\n",
3003 			 size, smmu->sid_bits);
3004 
3005 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3006 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3007 				     GFP_KERNEL);
3008 	if (!strtab) {
3009 		dev_err(smmu->dev,
3010 			"failed to allocate l1 stream table (%u bytes)\n",
3011 			l1size);
3012 		return -ENOMEM;
3013 	}
3014 	cfg->strtab = strtab;
3015 
3016 	/* Configure strtab_base_cfg for 2 levels */
3017 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3018 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3019 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3020 	cfg->strtab_base_cfg = reg;
3021 
3022 	return arm_smmu_init_l1_strtab(smmu);
3023 }
3024 
3025 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3026 {
3027 	void *strtab;
3028 	u64 reg;
3029 	u32 size;
3030 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3031 
3032 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3033 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3034 				     GFP_KERNEL);
3035 	if (!strtab) {
3036 		dev_err(smmu->dev,
3037 			"failed to allocate linear stream table (%u bytes)\n",
3038 			size);
3039 		return -ENOMEM;
3040 	}
3041 	cfg->strtab = strtab;
3042 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3043 
3044 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3045 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3046 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3047 	cfg->strtab_base_cfg = reg;
3048 
3049 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3050 	return 0;
3051 }
3052 
3053 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3054 {
3055 	u64 reg;
3056 	int ret;
3057 
3058 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3059 		ret = arm_smmu_init_strtab_2lvl(smmu);
3060 	else
3061 		ret = arm_smmu_init_strtab_linear(smmu);
3062 
3063 	if (ret)
3064 		return ret;
3065 
3066 	/* Set the strtab base address */
3067 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3068 	reg |= STRTAB_BASE_RA;
3069 	smmu->strtab_cfg.strtab_base = reg;
3070 
3071 	/* Allocate the first VMID for stage-2 bypass STEs */
3072 	set_bit(0, smmu->vmid_map);
3073 	return 0;
3074 }
3075 
3076 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3077 {
3078 	int ret;
3079 
3080 	mutex_init(&smmu->streams_mutex);
3081 	smmu->streams = RB_ROOT;
3082 
3083 	ret = arm_smmu_init_queues(smmu);
3084 	if (ret)
3085 		return ret;
3086 
3087 	return arm_smmu_init_strtab(smmu);
3088 }
3089 
3090 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3091 				   unsigned int reg_off, unsigned int ack_off)
3092 {
3093 	u32 reg;
3094 
3095 	writel_relaxed(val, smmu->base + reg_off);
3096 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3097 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3098 }
3099 
3100 /* GBPA is "special" */
3101 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3102 {
3103 	int ret;
3104 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3105 
3106 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3107 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3108 	if (ret)
3109 		return ret;
3110 
3111 	reg &= ~clr;
3112 	reg |= set;
3113 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3114 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3115 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3116 
3117 	if (ret)
3118 		dev_err(smmu->dev, "GBPA not responding to update\n");
3119 	return ret;
3120 }
3121 
3122 static void arm_smmu_free_msis(void *data)
3123 {
3124 	struct device *dev = data;
3125 	platform_msi_domain_free_irqs(dev);
3126 }
3127 
3128 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3129 {
3130 	phys_addr_t doorbell;
3131 	struct device *dev = msi_desc_to_dev(desc);
3132 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3133 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3134 
3135 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3136 	doorbell &= MSI_CFG0_ADDR_MASK;
3137 
3138 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3139 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3140 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3141 }
3142 
3143 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3144 {
3145 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3146 	struct device *dev = smmu->dev;
3147 
3148 	/* Clear the MSI address regs */
3149 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3150 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3151 
3152 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3153 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3154 	else
3155 		nvec--;
3156 
3157 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3158 		return;
3159 
3160 	if (!dev->msi.domain) {
3161 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3162 		return;
3163 	}
3164 
3165 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3166 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3167 	if (ret) {
3168 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3169 		return;
3170 	}
3171 
3172 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3173 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3174 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3175 
3176 	/* Add callback to free MSIs on teardown */
3177 	devm_add_action(dev, arm_smmu_free_msis, dev);
3178 }
3179 
3180 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3181 {
3182 	int irq, ret;
3183 
3184 	arm_smmu_setup_msis(smmu);
3185 
3186 	/* Request interrupt lines */
3187 	irq = smmu->evtq.q.irq;
3188 	if (irq) {
3189 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3190 						arm_smmu_evtq_thread,
3191 						IRQF_ONESHOT,
3192 						"arm-smmu-v3-evtq", smmu);
3193 		if (ret < 0)
3194 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3195 	} else {
3196 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3197 	}
3198 
3199 	irq = smmu->gerr_irq;
3200 	if (irq) {
3201 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3202 				       0, "arm-smmu-v3-gerror", smmu);
3203 		if (ret < 0)
3204 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3205 	} else {
3206 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3207 	}
3208 
3209 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3210 		irq = smmu->priq.q.irq;
3211 		if (irq) {
3212 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3213 							arm_smmu_priq_thread,
3214 							IRQF_ONESHOT,
3215 							"arm-smmu-v3-priq",
3216 							smmu);
3217 			if (ret < 0)
3218 				dev_warn(smmu->dev,
3219 					 "failed to enable priq irq\n");
3220 		} else {
3221 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3222 		}
3223 	}
3224 }
3225 
3226 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3227 {
3228 	int ret, irq;
3229 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3230 
3231 	/* Disable IRQs first */
3232 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3233 				      ARM_SMMU_IRQ_CTRLACK);
3234 	if (ret) {
3235 		dev_err(smmu->dev, "failed to disable irqs\n");
3236 		return ret;
3237 	}
3238 
3239 	irq = smmu->combined_irq;
3240 	if (irq) {
3241 		/*
3242 		 * Cavium ThunderX2 implementation doesn't support unique irq
3243 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3244 		 */
3245 		ret = devm_request_threaded_irq(smmu->dev, irq,
3246 					arm_smmu_combined_irq_handler,
3247 					arm_smmu_combined_irq_thread,
3248 					IRQF_ONESHOT,
3249 					"arm-smmu-v3-combined-irq", smmu);
3250 		if (ret < 0)
3251 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3252 	} else
3253 		arm_smmu_setup_unique_irqs(smmu);
3254 
3255 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3256 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3257 
3258 	/* Enable interrupt generation on the SMMU */
3259 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3260 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3261 	if (ret)
3262 		dev_warn(smmu->dev, "failed to enable irqs\n");
3263 
3264 	return 0;
3265 }
3266 
3267 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3268 {
3269 	int ret;
3270 
3271 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3272 	if (ret)
3273 		dev_err(smmu->dev, "failed to clear cr0\n");
3274 
3275 	return ret;
3276 }
3277 
3278 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3279 {
3280 	int ret;
3281 	u32 reg, enables;
3282 	struct arm_smmu_cmdq_ent cmd;
3283 
3284 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3285 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3286 	if (reg & CR0_SMMUEN) {
3287 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3288 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3289 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3290 	}
3291 
3292 	ret = arm_smmu_device_disable(smmu);
3293 	if (ret)
3294 		return ret;
3295 
3296 	/* CR1 (table and queue memory attributes) */
3297 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3298 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3299 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3300 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3301 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3302 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3303 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3304 
3305 	/* CR2 (random crap) */
3306 	reg = CR2_PTM | CR2_RECINVSID;
3307 
3308 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3309 		reg |= CR2_E2H;
3310 
3311 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3312 
3313 	/* Stream table */
3314 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3315 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3316 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3317 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3318 
3319 	/* Command queue */
3320 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3321 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3322 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3323 
3324 	enables = CR0_CMDQEN;
3325 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3326 				      ARM_SMMU_CR0ACK);
3327 	if (ret) {
3328 		dev_err(smmu->dev, "failed to enable command queue\n");
3329 		return ret;
3330 	}
3331 
3332 	/* Invalidate any cached configuration */
3333 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3334 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3335 
3336 	/* Invalidate any stale TLB entries */
3337 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3338 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3339 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3340 	}
3341 
3342 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3343 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3344 
3345 	/* Event queue */
3346 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3347 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3348 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3349 
3350 	enables |= CR0_EVTQEN;
3351 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3352 				      ARM_SMMU_CR0ACK);
3353 	if (ret) {
3354 		dev_err(smmu->dev, "failed to enable event queue\n");
3355 		return ret;
3356 	}
3357 
3358 	/* PRI queue */
3359 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3360 		writeq_relaxed(smmu->priq.q.q_base,
3361 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3362 		writel_relaxed(smmu->priq.q.llq.prod,
3363 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3364 		writel_relaxed(smmu->priq.q.llq.cons,
3365 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3366 
3367 		enables |= CR0_PRIQEN;
3368 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3369 					      ARM_SMMU_CR0ACK);
3370 		if (ret) {
3371 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3372 			return ret;
3373 		}
3374 	}
3375 
3376 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3377 		enables |= CR0_ATSCHK;
3378 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3379 					      ARM_SMMU_CR0ACK);
3380 		if (ret) {
3381 			dev_err(smmu->dev, "failed to enable ATS check\n");
3382 			return ret;
3383 		}
3384 	}
3385 
3386 	ret = arm_smmu_setup_irqs(smmu);
3387 	if (ret) {
3388 		dev_err(smmu->dev, "failed to setup irqs\n");
3389 		return ret;
3390 	}
3391 
3392 	if (is_kdump_kernel())
3393 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3394 
3395 	/* Enable the SMMU interface, or ensure bypass */
3396 	if (!bypass || disable_bypass) {
3397 		enables |= CR0_SMMUEN;
3398 	} else {
3399 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3400 		if (ret)
3401 			return ret;
3402 	}
3403 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3404 				      ARM_SMMU_CR0ACK);
3405 	if (ret) {
3406 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3407 		return ret;
3408 	}
3409 
3410 	return 0;
3411 }
3412 
3413 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3414 {
3415 	u32 reg;
3416 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3417 
3418 	/* IDR0 */
3419 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3420 
3421 	/* 2-level structures */
3422 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3423 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3424 
3425 	if (reg & IDR0_CD2L)
3426 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3427 
3428 	/*
3429 	 * Translation table endianness.
3430 	 * We currently require the same endianness as the CPU, but this
3431 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3432 	 */
3433 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3434 	case IDR0_TTENDIAN_MIXED:
3435 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3436 		break;
3437 #ifdef __BIG_ENDIAN
3438 	case IDR0_TTENDIAN_BE:
3439 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3440 		break;
3441 #else
3442 	case IDR0_TTENDIAN_LE:
3443 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3444 		break;
3445 #endif
3446 	default:
3447 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3448 		return -ENXIO;
3449 	}
3450 
3451 	/* Boolean feature flags */
3452 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3453 		smmu->features |= ARM_SMMU_FEAT_PRI;
3454 
3455 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3456 		smmu->features |= ARM_SMMU_FEAT_ATS;
3457 
3458 	if (reg & IDR0_SEV)
3459 		smmu->features |= ARM_SMMU_FEAT_SEV;
3460 
3461 	if (reg & IDR0_MSI) {
3462 		smmu->features |= ARM_SMMU_FEAT_MSI;
3463 		if (coherent && !disable_msipolling)
3464 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3465 	}
3466 
3467 	if (reg & IDR0_HYP) {
3468 		smmu->features |= ARM_SMMU_FEAT_HYP;
3469 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3470 			smmu->features |= ARM_SMMU_FEAT_E2H;
3471 	}
3472 
3473 	/*
3474 	 * The coherency feature as set by FW is used in preference to the ID
3475 	 * register, but warn on mismatch.
3476 	 */
3477 	if (!!(reg & IDR0_COHACC) != coherent)
3478 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3479 			 coherent ? "true" : "false");
3480 
3481 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3482 	case IDR0_STALL_MODEL_FORCE:
3483 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3484 		fallthrough;
3485 	case IDR0_STALL_MODEL_STALL:
3486 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3487 	}
3488 
3489 	if (reg & IDR0_S1P)
3490 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3491 
3492 	if (reg & IDR0_S2P)
3493 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3494 
3495 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3496 		dev_err(smmu->dev, "no translation support!\n");
3497 		return -ENXIO;
3498 	}
3499 
3500 	/* We only support the AArch64 table format at present */
3501 	switch (FIELD_GET(IDR0_TTF, reg)) {
3502 	case IDR0_TTF_AARCH32_64:
3503 		smmu->ias = 40;
3504 		fallthrough;
3505 	case IDR0_TTF_AARCH64:
3506 		break;
3507 	default:
3508 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3509 		return -ENXIO;
3510 	}
3511 
3512 	/* ASID/VMID sizes */
3513 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3514 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3515 
3516 	/* IDR1 */
3517 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3518 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3519 		dev_err(smmu->dev, "embedded implementation not supported\n");
3520 		return -ENXIO;
3521 	}
3522 
3523 	/* Queue sizes, capped to ensure natural alignment */
3524 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3525 					     FIELD_GET(IDR1_CMDQS, reg));
3526 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3527 		/*
3528 		 * We don't support splitting up batches, so one batch of
3529 		 * commands plus an extra sync needs to fit inside the command
3530 		 * queue. There's also no way we can handle the weird alignment
3531 		 * restrictions on the base pointer for a unit-length queue.
3532 		 */
3533 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3534 			CMDQ_BATCH_ENTRIES);
3535 		return -ENXIO;
3536 	}
3537 
3538 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3539 					     FIELD_GET(IDR1_EVTQS, reg));
3540 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3541 					     FIELD_GET(IDR1_PRIQS, reg));
3542 
3543 	/* SID/SSID sizes */
3544 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3545 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3546 
3547 	/*
3548 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3549 	 * table, use a linear table instead.
3550 	 */
3551 	if (smmu->sid_bits <= STRTAB_SPLIT)
3552 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3553 
3554 	/* IDR3 */
3555 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3556 	if (FIELD_GET(IDR3_RIL, reg))
3557 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3558 
3559 	/* IDR5 */
3560 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3561 
3562 	/* Maximum number of outstanding stalls */
3563 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3564 
3565 	/* Page sizes */
3566 	if (reg & IDR5_GRAN64K)
3567 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3568 	if (reg & IDR5_GRAN16K)
3569 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3570 	if (reg & IDR5_GRAN4K)
3571 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3572 
3573 	/* Input address size */
3574 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3575 		smmu->features |= ARM_SMMU_FEAT_VAX;
3576 
3577 	/* Output address size */
3578 	switch (FIELD_GET(IDR5_OAS, reg)) {
3579 	case IDR5_OAS_32_BIT:
3580 		smmu->oas = 32;
3581 		break;
3582 	case IDR5_OAS_36_BIT:
3583 		smmu->oas = 36;
3584 		break;
3585 	case IDR5_OAS_40_BIT:
3586 		smmu->oas = 40;
3587 		break;
3588 	case IDR5_OAS_42_BIT:
3589 		smmu->oas = 42;
3590 		break;
3591 	case IDR5_OAS_44_BIT:
3592 		smmu->oas = 44;
3593 		break;
3594 	case IDR5_OAS_52_BIT:
3595 		smmu->oas = 52;
3596 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3597 		break;
3598 	default:
3599 		dev_info(smmu->dev,
3600 			"unknown output address size. Truncating to 48-bit\n");
3601 		fallthrough;
3602 	case IDR5_OAS_48_BIT:
3603 		smmu->oas = 48;
3604 	}
3605 
3606 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3607 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3608 	else
3609 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3610 
3611 	/* Set the DMA mask for our table walker */
3612 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3613 		dev_warn(smmu->dev,
3614 			 "failed to set DMA mask for table walker\n");
3615 
3616 	smmu->ias = max(smmu->ias, smmu->oas);
3617 
3618 	if (arm_smmu_sva_supported(smmu))
3619 		smmu->features |= ARM_SMMU_FEAT_SVA;
3620 
3621 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3622 		 smmu->ias, smmu->oas, smmu->features);
3623 	return 0;
3624 }
3625 
3626 #ifdef CONFIG_ACPI
3627 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3628 {
3629 	switch (model) {
3630 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3631 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3632 		break;
3633 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3634 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3635 		break;
3636 	}
3637 
3638 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3639 }
3640 
3641 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3642 				      struct arm_smmu_device *smmu)
3643 {
3644 	struct acpi_iort_smmu_v3 *iort_smmu;
3645 	struct device *dev = smmu->dev;
3646 	struct acpi_iort_node *node;
3647 
3648 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3649 
3650 	/* Retrieve SMMUv3 specific data */
3651 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3652 
3653 	acpi_smmu_get_options(iort_smmu->model, smmu);
3654 
3655 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3656 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3657 
3658 	return 0;
3659 }
3660 #else
3661 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3662 					     struct arm_smmu_device *smmu)
3663 {
3664 	return -ENODEV;
3665 }
3666 #endif
3667 
3668 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3669 				    struct arm_smmu_device *smmu)
3670 {
3671 	struct device *dev = &pdev->dev;
3672 	u32 cells;
3673 	int ret = -EINVAL;
3674 
3675 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3676 		dev_err(dev, "missing #iommu-cells property\n");
3677 	else if (cells != 1)
3678 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3679 	else
3680 		ret = 0;
3681 
3682 	parse_driver_options(smmu);
3683 
3684 	if (of_dma_is_coherent(dev->of_node))
3685 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3686 
3687 	return ret;
3688 }
3689 
3690 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3691 {
3692 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3693 		return SZ_64K;
3694 	else
3695 		return SZ_128K;
3696 }
3697 
3698 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3699 				      resource_size_t size)
3700 {
3701 	struct resource res = DEFINE_RES_MEM(start, size);
3702 
3703 	return devm_ioremap_resource(dev, &res);
3704 }
3705 
3706 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3707 {
3708 	struct list_head rmr_list;
3709 	struct iommu_resv_region *e;
3710 
3711 	INIT_LIST_HEAD(&rmr_list);
3712 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3713 
3714 	list_for_each_entry(e, &rmr_list, list) {
3715 		__le64 *step;
3716 		struct iommu_iort_rmr_data *rmr;
3717 		int ret, i;
3718 
3719 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3720 		for (i = 0; i < rmr->num_sids; i++) {
3721 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3722 			if (ret) {
3723 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3724 					rmr->sids[i]);
3725 				continue;
3726 			}
3727 
3728 			step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3729 			arm_smmu_init_bypass_stes(step, 1, true);
3730 		}
3731 	}
3732 
3733 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3734 }
3735 
3736 static int arm_smmu_device_probe(struct platform_device *pdev)
3737 {
3738 	int irq, ret;
3739 	struct resource *res;
3740 	resource_size_t ioaddr;
3741 	struct arm_smmu_device *smmu;
3742 	struct device *dev = &pdev->dev;
3743 	bool bypass;
3744 
3745 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3746 	if (!smmu)
3747 		return -ENOMEM;
3748 	smmu->dev = dev;
3749 
3750 	if (dev->of_node) {
3751 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3752 	} else {
3753 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3754 		if (ret == -ENODEV)
3755 			return ret;
3756 	}
3757 
3758 	/* Set bypass mode according to firmware probing result */
3759 	bypass = !!ret;
3760 
3761 	/* Base address */
3762 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3763 	if (!res)
3764 		return -EINVAL;
3765 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3766 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3767 		return -EINVAL;
3768 	}
3769 	ioaddr = res->start;
3770 
3771 	/*
3772 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3773 	 * the PMCG registers which are reserved by the PMU driver.
3774 	 */
3775 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3776 	if (IS_ERR(smmu->base))
3777 		return PTR_ERR(smmu->base);
3778 
3779 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3780 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3781 					       ARM_SMMU_REG_SZ);
3782 		if (IS_ERR(smmu->page1))
3783 			return PTR_ERR(smmu->page1);
3784 	} else {
3785 		smmu->page1 = smmu->base;
3786 	}
3787 
3788 	/* Interrupt lines */
3789 
3790 	irq = platform_get_irq_byname_optional(pdev, "combined");
3791 	if (irq > 0)
3792 		smmu->combined_irq = irq;
3793 	else {
3794 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3795 		if (irq > 0)
3796 			smmu->evtq.q.irq = irq;
3797 
3798 		irq = platform_get_irq_byname_optional(pdev, "priq");
3799 		if (irq > 0)
3800 			smmu->priq.q.irq = irq;
3801 
3802 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3803 		if (irq > 0)
3804 			smmu->gerr_irq = irq;
3805 	}
3806 	/* Probe the h/w */
3807 	ret = arm_smmu_device_hw_probe(smmu);
3808 	if (ret)
3809 		return ret;
3810 
3811 	/* Initialise in-memory data structures */
3812 	ret = arm_smmu_init_structures(smmu);
3813 	if (ret)
3814 		return ret;
3815 
3816 	/* Record our private device structure */
3817 	platform_set_drvdata(pdev, smmu);
3818 
3819 	/* Check for RMRs and install bypass STEs if any */
3820 	arm_smmu_rmr_install_bypass_ste(smmu);
3821 
3822 	/* Reset the device */
3823 	ret = arm_smmu_device_reset(smmu, bypass);
3824 	if (ret)
3825 		return ret;
3826 
3827 	/* And we're up. Go go go! */
3828 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3829 				     "smmu3.%pa", &ioaddr);
3830 	if (ret)
3831 		return ret;
3832 
3833 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3834 	if (ret) {
3835 		dev_err(dev, "Failed to register iommu\n");
3836 		iommu_device_sysfs_remove(&smmu->iommu);
3837 		return ret;
3838 	}
3839 
3840 	return 0;
3841 }
3842 
3843 static int arm_smmu_device_remove(struct platform_device *pdev)
3844 {
3845 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3846 
3847 	iommu_device_unregister(&smmu->iommu);
3848 	iommu_device_sysfs_remove(&smmu->iommu);
3849 	arm_smmu_device_disable(smmu);
3850 	iopf_queue_free(smmu->evtq.iopf);
3851 
3852 	return 0;
3853 }
3854 
3855 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3856 {
3857 	arm_smmu_device_remove(pdev);
3858 }
3859 
3860 static const struct of_device_id arm_smmu_of_match[] = {
3861 	{ .compatible = "arm,smmu-v3", },
3862 	{ },
3863 };
3864 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3865 
3866 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3867 {
3868 	arm_smmu_sva_notifier_synchronize();
3869 	platform_driver_unregister(drv);
3870 }
3871 
3872 static struct platform_driver arm_smmu_driver = {
3873 	.driver	= {
3874 		.name			= "arm-smmu-v3",
3875 		.of_match_table		= arm_smmu_of_match,
3876 		.suppress_bind_attrs	= true,
3877 	},
3878 	.probe	= arm_smmu_device_probe,
3879 	.remove	= arm_smmu_device_remove,
3880 	.shutdown = arm_smmu_device_shutdown,
3881 };
3882 module_driver(arm_smmu_driver, platform_driver_register,
3883 	      arm_smmu_driver_unregister);
3884 
3885 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3886 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3887 MODULE_ALIAS("platform:arm-smmu-v3");
3888 MODULE_LICENSE("GPL v2");
3889