1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_platform.h>
27 #include <linux/pci.h>
28 #include <linux/pci-ats.h>
29 #include <linux/platform_device.h>
30 
31 #include <linux/amba/bus.h>
32 
33 #include "arm-smmu-v3.h"
34 #include "../../iommu-sva-lib.h"
35 
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84 
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 	{ 0, NULL},
89 };
90 
91 static void parse_driver_options(struct arm_smmu_device *smmu)
92 {
93 	int i = 0;
94 
95 	do {
96 		if (of_property_read_bool(smmu->dev->of_node,
97 						arm_smmu_options[i].prop)) {
98 			smmu->options |= arm_smmu_options[i].opt;
99 			dev_notice(smmu->dev, "option %s\n",
100 				arm_smmu_options[i].prop);
101 		}
102 	} while (arm_smmu_options[++i].opt);
103 }
104 
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 {
108 	u32 space, prod, cons;
109 
110 	prod = Q_IDX(q, q->prod);
111 	cons = Q_IDX(q, q->cons);
112 
113 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114 		space = (1 << q->max_n_shift) - (prod - cons);
115 	else
116 		space = cons - prod;
117 
118 	return space >= n;
119 }
120 
121 static bool queue_full(struct arm_smmu_ll_queue *q)
122 {
123 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 }
126 
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 {
129 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 }
132 
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 {
135 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 }
140 
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 {
143 	/*
144 	 * Ensure that all CPU accesses (reads and writes) to the queue
145 	 * are complete before we update the cons pointer.
146 	 */
147 	__iomb();
148 	writel_relaxed(q->llq.cons, q->cons_reg);
149 }
150 
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 {
153 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 }
156 
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
158 {
159 	u32 prod;
160 	int ret = 0;
161 
162 	/*
163 	 * We can't use the _relaxed() variant here, as we must prevent
164 	 * speculative reads of the queue before we have determined that
165 	 * prod has indeed moved.
166 	 */
167 	prod = readl(q->prod_reg);
168 
169 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
170 		ret = -EOVERFLOW;
171 
172 	q->llq.prod = prod;
173 	return ret;
174 }
175 
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
177 {
178 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
180 }
181 
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183 			    struct arm_smmu_queue_poll *qp)
184 {
185 	qp->delay = 1;
186 	qp->spin_cnt = 0;
187 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
189 }
190 
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
192 {
193 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
194 		return -ETIMEDOUT;
195 
196 	if (qp->wfe) {
197 		wfe();
198 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
199 		cpu_relax();
200 	} else {
201 		udelay(qp->delay);
202 		qp->delay *= 2;
203 		qp->spin_cnt = 0;
204 	}
205 
206 	return 0;
207 }
208 
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
210 {
211 	int i;
212 
213 	for (i = 0; i < n_dwords; ++i)
214 		*dst++ = cpu_to_le64(*src++);
215 }
216 
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
218 {
219 	int i;
220 
221 	for (i = 0; i < n_dwords; ++i)
222 		*dst++ = le64_to_cpu(*src++);
223 }
224 
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
226 {
227 	if (queue_empty(&q->llq))
228 		return -EAGAIN;
229 
230 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231 	queue_inc_cons(&q->llq);
232 	queue_sync_cons_out(q);
233 	return 0;
234 }
235 
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
238 {
239 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
241 
242 	switch (ent->opcode) {
243 	case CMDQ_OP_TLBI_EL2_ALL:
244 	case CMDQ_OP_TLBI_NSNH_ALL:
245 		break;
246 	case CMDQ_OP_PREFETCH_CFG:
247 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
248 		break;
249 	case CMDQ_OP_CFGI_CD:
250 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
251 		fallthrough;
252 	case CMDQ_OP_CFGI_STE:
253 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
254 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
255 		break;
256 	case CMDQ_OP_CFGI_CD_ALL:
257 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
258 		break;
259 	case CMDQ_OP_CFGI_ALL:
260 		/* Cover the entire SID range */
261 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
262 		break;
263 	case CMDQ_OP_TLBI_NH_VA:
264 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
265 		fallthrough;
266 	case CMDQ_OP_TLBI_EL2_VA:
267 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
268 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
269 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
270 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
271 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
272 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
273 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
274 		break;
275 	case CMDQ_OP_TLBI_S2_IPA:
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
283 		break;
284 	case CMDQ_OP_TLBI_NH_ASID:
285 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
286 		fallthrough;
287 	case CMDQ_OP_TLBI_S12_VMALL:
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		break;
290 	case CMDQ_OP_TLBI_EL2_ASID:
291 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
292 		break;
293 	case CMDQ_OP_ATC_INV:
294 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
295 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
296 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
297 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
298 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
299 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
300 		break;
301 	case CMDQ_OP_PRI_RESP:
302 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
303 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
304 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
305 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
306 		switch (ent->pri.resp) {
307 		case PRI_RESP_DENY:
308 		case PRI_RESP_FAIL:
309 		case PRI_RESP_SUCC:
310 			break;
311 		default:
312 			return -EINVAL;
313 		}
314 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
315 		break;
316 	case CMDQ_OP_RESUME:
317 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
318 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
319 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
320 		break;
321 	case CMDQ_OP_CMD_SYNC:
322 		if (ent->sync.msiaddr) {
323 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325 		} else {
326 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327 		}
328 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330 		break;
331 	default:
332 		return -ENOENT;
333 	}
334 
335 	return 0;
336 }
337 
338 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
339 {
340 	return &smmu->cmdq;
341 }
342 
343 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
344 					 struct arm_smmu_queue *q, u32 prod)
345 {
346 	struct arm_smmu_cmdq_ent ent = {
347 		.opcode = CMDQ_OP_CMD_SYNC,
348 	};
349 
350 	/*
351 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
352 	 * payload, so the write will zero the entire command on that platform.
353 	 */
354 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
355 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
356 				   q->ent_dwords * 8;
357 	}
358 
359 	arm_smmu_cmdq_build_cmd(cmd, &ent);
360 }
361 
362 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
363 				     struct arm_smmu_queue *q)
364 {
365 	static const char * const cerror_str[] = {
366 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
367 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
368 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
369 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
370 	};
371 
372 	int i;
373 	u64 cmd[CMDQ_ENT_DWORDS];
374 	u32 cons = readl_relaxed(q->cons_reg);
375 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
376 	struct arm_smmu_cmdq_ent cmd_sync = {
377 		.opcode = CMDQ_OP_CMD_SYNC,
378 	};
379 
380 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
381 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
382 
383 	switch (idx) {
384 	case CMDQ_ERR_CERROR_ABT_IDX:
385 		dev_err(smmu->dev, "retrying command fetch\n");
386 		return;
387 	case CMDQ_ERR_CERROR_NONE_IDX:
388 		return;
389 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
390 		/*
391 		 * ATC Invalidation Completion timeout. CONS is still pointing
392 		 * at the CMD_SYNC. Attempt to complete other pending commands
393 		 * by repeating the CMD_SYNC, though we might well end up back
394 		 * here since the ATC invalidation may still be pending.
395 		 */
396 		return;
397 	case CMDQ_ERR_CERROR_ILL_IDX:
398 	default:
399 		break;
400 	}
401 
402 	/*
403 	 * We may have concurrent producers, so we need to be careful
404 	 * not to touch any of the shadow cmdq state.
405 	 */
406 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
407 	dev_err(smmu->dev, "skipping command in error state:\n");
408 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
409 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
410 
411 	/* Convert the erroneous command into a CMD_SYNC */
412 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
413 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
414 		return;
415 	}
416 
417 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
418 }
419 
420 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
421 {
422 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
423 }
424 
425 /*
426  * Command queue locking.
427  * This is a form of bastardised rwlock with the following major changes:
428  *
429  * - The only LOCK routines are exclusive_trylock() and shared_lock().
430  *   Neither have barrier semantics, and instead provide only a control
431  *   dependency.
432  *
433  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
434  *   fails if the caller appears to be the last lock holder (yes, this is
435  *   racy). All successful UNLOCK routines have RELEASE semantics.
436  */
437 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
438 {
439 	int val;
440 
441 	/*
442 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
443 	 * lock counter. When held in exclusive state, the lock counter is set
444 	 * to INT_MIN so these increments won't hurt as the value will remain
445 	 * negative.
446 	 */
447 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
448 		return;
449 
450 	do {
451 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
452 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
453 }
454 
455 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
456 {
457 	(void)atomic_dec_return_release(&cmdq->lock);
458 }
459 
460 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
461 {
462 	if (atomic_read(&cmdq->lock) == 1)
463 		return false;
464 
465 	arm_smmu_cmdq_shared_unlock(cmdq);
466 	return true;
467 }
468 
469 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
470 ({									\
471 	bool __ret;							\
472 	local_irq_save(flags);						\
473 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
474 	if (!__ret)							\
475 		local_irq_restore(flags);				\
476 	__ret;								\
477 })
478 
479 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
480 ({									\
481 	atomic_set_release(&cmdq->lock, 0);				\
482 	local_irq_restore(flags);					\
483 })
484 
485 
486 /*
487  * Command queue insertion.
488  * This is made fiddly by our attempts to achieve some sort of scalability
489  * since there is one queue shared amongst all of the CPUs in the system.  If
490  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
491  * then you'll *love* this monstrosity.
492  *
493  * The basic idea is to split the queue up into ranges of commands that are
494  * owned by a given CPU; the owner may not have written all of the commands
495  * itself, but is responsible for advancing the hardware prod pointer when
496  * the time comes. The algorithm is roughly:
497  *
498  * 	1. Allocate some space in the queue. At this point we also discover
499  *	   whether the head of the queue is currently owned by another CPU,
500  *	   or whether we are the owner.
501  *
502  *	2. Write our commands into our allocated slots in the queue.
503  *
504  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
505  *
506  *	4. If we are an owner:
507  *		a. Wait for the previous owner to finish.
508  *		b. Mark the queue head as unowned, which tells us the range
509  *		   that we are responsible for publishing.
510  *		c. Wait for all commands in our owned range to become valid.
511  *		d. Advance the hardware prod pointer.
512  *		e. Tell the next owner we've finished.
513  *
514  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
515  *	   owner), then we need to stick around until it has completed:
516  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
517  *		   to clear the first 4 bytes.
518  *		b. Otherwise, we spin waiting for the hardware cons pointer to
519  *		   advance past our command.
520  *
521  * The devil is in the details, particularly the use of locking for handling
522  * SYNC completion and freeing up space in the queue before we think that it is
523  * full.
524  */
525 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
526 					       u32 sprod, u32 eprod, bool set)
527 {
528 	u32 swidx, sbidx, ewidx, ebidx;
529 	struct arm_smmu_ll_queue llq = {
530 		.max_n_shift	= cmdq->q.llq.max_n_shift,
531 		.prod		= sprod,
532 	};
533 
534 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
535 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
536 
537 	while (llq.prod != eprod) {
538 		unsigned long mask;
539 		atomic_long_t *ptr;
540 		u32 limit = BITS_PER_LONG;
541 
542 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
543 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
544 
545 		ptr = &cmdq->valid_map[swidx];
546 
547 		if ((swidx == ewidx) && (sbidx < ebidx))
548 			limit = ebidx;
549 
550 		mask = GENMASK(limit - 1, sbidx);
551 
552 		/*
553 		 * The valid bit is the inverse of the wrap bit. This means
554 		 * that a zero-initialised queue is invalid and, after marking
555 		 * all entries as valid, they become invalid again when we
556 		 * wrap.
557 		 */
558 		if (set) {
559 			atomic_long_xor(mask, ptr);
560 		} else { /* Poll */
561 			unsigned long valid;
562 
563 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
564 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
565 		}
566 
567 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
568 	}
569 }
570 
571 /* Mark all entries in the range [sprod, eprod) as valid */
572 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
573 					u32 sprod, u32 eprod)
574 {
575 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
576 }
577 
578 /* Wait for all entries in the range [sprod, eprod) to become valid */
579 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
580 					 u32 sprod, u32 eprod)
581 {
582 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
583 }
584 
585 /* Wait for the command queue to become non-full */
586 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
587 					     struct arm_smmu_ll_queue *llq)
588 {
589 	unsigned long flags;
590 	struct arm_smmu_queue_poll qp;
591 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
592 	int ret = 0;
593 
594 	/*
595 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
596 	 * that fails, spin until somebody else updates it for us.
597 	 */
598 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
599 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
600 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
601 		llq->val = READ_ONCE(cmdq->q.llq.val);
602 		return 0;
603 	}
604 
605 	queue_poll_init(smmu, &qp);
606 	do {
607 		llq->val = READ_ONCE(cmdq->q.llq.val);
608 		if (!queue_full(llq))
609 			break;
610 
611 		ret = queue_poll(&qp);
612 	} while (!ret);
613 
614 	return ret;
615 }
616 
617 /*
618  * Wait until the SMMU signals a CMD_SYNC completion MSI.
619  * Must be called with the cmdq lock held in some capacity.
620  */
621 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
622 					  struct arm_smmu_ll_queue *llq)
623 {
624 	int ret = 0;
625 	struct arm_smmu_queue_poll qp;
626 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
627 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
628 
629 	queue_poll_init(smmu, &qp);
630 
631 	/*
632 	 * The MSI won't generate an event, since it's being written back
633 	 * into the command queue.
634 	 */
635 	qp.wfe = false;
636 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
637 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
638 	return ret;
639 }
640 
641 /*
642  * Wait until the SMMU cons index passes llq->prod.
643  * Must be called with the cmdq lock held in some capacity.
644  */
645 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
646 					       struct arm_smmu_ll_queue *llq)
647 {
648 	struct arm_smmu_queue_poll qp;
649 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
650 	u32 prod = llq->prod;
651 	int ret = 0;
652 
653 	queue_poll_init(smmu, &qp);
654 	llq->val = READ_ONCE(cmdq->q.llq.val);
655 	do {
656 		if (queue_consumed(llq, prod))
657 			break;
658 
659 		ret = queue_poll(&qp);
660 
661 		/*
662 		 * This needs to be a readl() so that our subsequent call
663 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
664 		 *
665 		 * Specifically, we need to ensure that we observe all
666 		 * shared_lock()s by other CMD_SYNCs that share our owner,
667 		 * so that a failing call to tryunlock() means that we're
668 		 * the last one out and therefore we can safely advance
669 		 * cmdq->q.llq.cons. Roughly speaking:
670 		 *
671 		 * CPU 0		CPU1			CPU2 (us)
672 		 *
673 		 * if (sync)
674 		 * 	shared_lock();
675 		 *
676 		 * dma_wmb();
677 		 * set_valid_map();
678 		 *
679 		 * 			if (owner) {
680 		 *				poll_valid_map();
681 		 *				<control dependency>
682 		 *				writel(prod_reg);
683 		 *
684 		 *						readl(cons_reg);
685 		 *						tryunlock();
686 		 *
687 		 * Requires us to see CPU 0's shared_lock() acquisition.
688 		 */
689 		llq->cons = readl(cmdq->q.cons_reg);
690 	} while (!ret);
691 
692 	return ret;
693 }
694 
695 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
696 					 struct arm_smmu_ll_queue *llq)
697 {
698 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
699 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
700 
701 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
702 }
703 
704 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
705 					u32 prod, int n)
706 {
707 	int i;
708 	struct arm_smmu_ll_queue llq = {
709 		.max_n_shift	= cmdq->q.llq.max_n_shift,
710 		.prod		= prod,
711 	};
712 
713 	for (i = 0; i < n; ++i) {
714 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
715 
716 		prod = queue_inc_prod_n(&llq, i);
717 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
718 	}
719 }
720 
721 /*
722  * This is the actual insertion function, and provides the following
723  * ordering guarantees to callers:
724  *
725  * - There is a dma_wmb() before publishing any commands to the queue.
726  *   This can be relied upon to order prior writes to data structures
727  *   in memory (such as a CD or an STE) before the command.
728  *
729  * - On completion of a CMD_SYNC, there is a control dependency.
730  *   This can be relied upon to order subsequent writes to memory (e.g.
731  *   freeing an IOVA) after completion of the CMD_SYNC.
732  *
733  * - Command insertion is totally ordered, so if two CPUs each race to
734  *   insert their own list of commands then all of the commands from one
735  *   CPU will appear before any of the commands from the other CPU.
736  */
737 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
738 				       u64 *cmds, int n, bool sync)
739 {
740 	u64 cmd_sync[CMDQ_ENT_DWORDS];
741 	u32 prod;
742 	unsigned long flags;
743 	bool owner;
744 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
745 	struct arm_smmu_ll_queue llq, head;
746 	int ret = 0;
747 
748 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
749 
750 	/* 1. Allocate some space in the queue */
751 	local_irq_save(flags);
752 	llq.val = READ_ONCE(cmdq->q.llq.val);
753 	do {
754 		u64 old;
755 
756 		while (!queue_has_space(&llq, n + sync)) {
757 			local_irq_restore(flags);
758 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
759 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
760 			local_irq_save(flags);
761 		}
762 
763 		head.cons = llq.cons;
764 		head.prod = queue_inc_prod_n(&llq, n + sync) |
765 					     CMDQ_PROD_OWNED_FLAG;
766 
767 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
768 		if (old == llq.val)
769 			break;
770 
771 		llq.val = old;
772 	} while (1);
773 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
774 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
775 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
776 
777 	/*
778 	 * 2. Write our commands into the queue
779 	 * Dependency ordering from the cmpxchg() loop above.
780 	 */
781 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
782 	if (sync) {
783 		prod = queue_inc_prod_n(&llq, n);
784 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
785 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
786 
787 		/*
788 		 * In order to determine completion of our CMD_SYNC, we must
789 		 * ensure that the queue can't wrap twice without us noticing.
790 		 * We achieve that by taking the cmdq lock as shared before
791 		 * marking our slot as valid.
792 		 */
793 		arm_smmu_cmdq_shared_lock(cmdq);
794 	}
795 
796 	/* 3. Mark our slots as valid, ensuring commands are visible first */
797 	dma_wmb();
798 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
799 
800 	/* 4. If we are the owner, take control of the SMMU hardware */
801 	if (owner) {
802 		/* a. Wait for previous owner to finish */
803 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
804 
805 		/* b. Stop gathering work by clearing the owned flag */
806 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
807 						   &cmdq->q.llq.atomic.prod);
808 		prod &= ~CMDQ_PROD_OWNED_FLAG;
809 
810 		/*
811 		 * c. Wait for any gathered work to be written to the queue.
812 		 * Note that we read our own entries so that we have the control
813 		 * dependency required by (d).
814 		 */
815 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
816 
817 		/*
818 		 * d. Advance the hardware prod pointer
819 		 * Control dependency ordering from the entries becoming valid.
820 		 */
821 		writel_relaxed(prod, cmdq->q.prod_reg);
822 
823 		/*
824 		 * e. Tell the next owner we're done
825 		 * Make sure we've updated the hardware first, so that we don't
826 		 * race to update prod and potentially move it backwards.
827 		 */
828 		atomic_set_release(&cmdq->owner_prod, prod);
829 	}
830 
831 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
832 	if (sync) {
833 		llq.prod = queue_inc_prod_n(&llq, n);
834 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
835 		if (ret) {
836 			dev_err_ratelimited(smmu->dev,
837 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
838 					    llq.prod,
839 					    readl_relaxed(cmdq->q.prod_reg),
840 					    readl_relaxed(cmdq->q.cons_reg));
841 		}
842 
843 		/*
844 		 * Try to unlock the cmdq lock. This will fail if we're the last
845 		 * reader, in which case we can safely update cmdq->q.llq.cons
846 		 */
847 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
848 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
849 			arm_smmu_cmdq_shared_unlock(cmdq);
850 		}
851 	}
852 
853 	local_irq_restore(flags);
854 	return ret;
855 }
856 
857 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
858 				     struct arm_smmu_cmdq_ent *ent,
859 				     bool sync)
860 {
861 	u64 cmd[CMDQ_ENT_DWORDS];
862 
863 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
864 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
865 			 ent->opcode);
866 		return -EINVAL;
867 	}
868 
869 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
870 }
871 
872 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
873 				   struct arm_smmu_cmdq_ent *ent)
874 {
875 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
876 }
877 
878 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
879 					     struct arm_smmu_cmdq_ent *ent)
880 {
881 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
882 }
883 
884 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
885 				    struct arm_smmu_cmdq_batch *cmds,
886 				    struct arm_smmu_cmdq_ent *cmd)
887 {
888 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
889 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
890 		cmds->num = 0;
891 	}
892 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
893 	cmds->num++;
894 }
895 
896 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
897 				      struct arm_smmu_cmdq_batch *cmds)
898 {
899 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
900 }
901 
902 static int arm_smmu_page_response(struct device *dev,
903 				  struct iommu_fault_event *unused,
904 				  struct iommu_page_response *resp)
905 {
906 	struct arm_smmu_cmdq_ent cmd = {0};
907 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
908 	int sid = master->streams[0].id;
909 
910 	if (master->stall_enabled) {
911 		cmd.opcode		= CMDQ_OP_RESUME;
912 		cmd.resume.sid		= sid;
913 		cmd.resume.stag		= resp->grpid;
914 		switch (resp->code) {
915 		case IOMMU_PAGE_RESP_INVALID:
916 		case IOMMU_PAGE_RESP_FAILURE:
917 			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
918 			break;
919 		case IOMMU_PAGE_RESP_SUCCESS:
920 			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
921 			break;
922 		default:
923 			return -EINVAL;
924 		}
925 	} else {
926 		return -ENODEV;
927 	}
928 
929 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
930 	/*
931 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
932 	 * RESUME consumption guarantees that the stalled transaction will be
933 	 * terminated... at some point in the future. PRI_RESP is fire and
934 	 * forget.
935 	 */
936 
937 	return 0;
938 }
939 
940 /* Context descriptor manipulation functions */
941 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
942 {
943 	struct arm_smmu_cmdq_ent cmd = {
944 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
945 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
946 		.tlbi.asid = asid,
947 	};
948 
949 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
950 }
951 
952 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
953 			     int ssid, bool leaf)
954 {
955 	size_t i;
956 	unsigned long flags;
957 	struct arm_smmu_master *master;
958 	struct arm_smmu_cmdq_batch cmds;
959 	struct arm_smmu_device *smmu = smmu_domain->smmu;
960 	struct arm_smmu_cmdq_ent cmd = {
961 		.opcode	= CMDQ_OP_CFGI_CD,
962 		.cfgi	= {
963 			.ssid	= ssid,
964 			.leaf	= leaf,
965 		},
966 	};
967 
968 	cmds.num = 0;
969 
970 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
971 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
972 		for (i = 0; i < master->num_streams; i++) {
973 			cmd.cfgi.sid = master->streams[i].id;
974 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
975 		}
976 	}
977 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
978 
979 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
980 }
981 
982 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
983 					struct arm_smmu_l1_ctx_desc *l1_desc)
984 {
985 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
986 
987 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
988 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
989 	if (!l1_desc->l2ptr) {
990 		dev_warn(smmu->dev,
991 			 "failed to allocate context descriptor table\n");
992 		return -ENOMEM;
993 	}
994 	return 0;
995 }
996 
997 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
998 				      struct arm_smmu_l1_ctx_desc *l1_desc)
999 {
1000 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1001 		  CTXDESC_L1_DESC_V;
1002 
1003 	/* See comment in arm_smmu_write_ctx_desc() */
1004 	WRITE_ONCE(*dst, cpu_to_le64(val));
1005 }
1006 
1007 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1008 				   u32 ssid)
1009 {
1010 	__le64 *l1ptr;
1011 	unsigned int idx;
1012 	struct arm_smmu_l1_ctx_desc *l1_desc;
1013 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1014 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1015 
1016 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1017 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1018 
1019 	idx = ssid >> CTXDESC_SPLIT;
1020 	l1_desc = &cdcfg->l1_desc[idx];
1021 	if (!l1_desc->l2ptr) {
1022 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1023 			return NULL;
1024 
1025 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1026 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1027 		/* An invalid L1CD can be cached */
1028 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1029 	}
1030 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1031 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1032 }
1033 
1034 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1035 			    struct arm_smmu_ctx_desc *cd)
1036 {
1037 	/*
1038 	 * This function handles the following cases:
1039 	 *
1040 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1041 	 * (2) Install a secondary CD, for SID+SSID traffic.
1042 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1043 	 *     CD, then invalidate the old entry and mappings.
1044 	 * (4) Quiesce the context without clearing the valid bit. Disable
1045 	 *     translation, and ignore any translation fault.
1046 	 * (5) Remove a secondary CD.
1047 	 */
1048 	u64 val;
1049 	bool cd_live;
1050 	__le64 *cdptr;
1051 
1052 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1053 		return -E2BIG;
1054 
1055 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1056 	if (!cdptr)
1057 		return -ENOMEM;
1058 
1059 	val = le64_to_cpu(cdptr[0]);
1060 	cd_live = !!(val & CTXDESC_CD_0_V);
1061 
1062 	if (!cd) { /* (5) */
1063 		val = 0;
1064 	} else if (cd == &quiet_cd) { /* (4) */
1065 		val |= CTXDESC_CD_0_TCR_EPD0;
1066 	} else if (cd_live) { /* (3) */
1067 		val &= ~CTXDESC_CD_0_ASID;
1068 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1069 		/*
1070 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1071 		 * this substream's traffic
1072 		 */
1073 	} else { /* (1) and (2) */
1074 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1075 		cdptr[2] = 0;
1076 		cdptr[3] = cpu_to_le64(cd->mair);
1077 
1078 		/*
1079 		 * STE is live, and the SMMU might read dwords of this CD in any
1080 		 * order. Ensure that it observes valid values before reading
1081 		 * V=1.
1082 		 */
1083 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1084 
1085 		val = cd->tcr |
1086 #ifdef __BIG_ENDIAN
1087 			CTXDESC_CD_0_ENDI |
1088 #endif
1089 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1090 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1091 			CTXDESC_CD_0_AA64 |
1092 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1093 			CTXDESC_CD_0_V;
1094 
1095 		if (smmu_domain->stall_enabled)
1096 			val |= CTXDESC_CD_0_S;
1097 	}
1098 
1099 	/*
1100 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1101 	 * "Configuration structures and configuration invalidation completion"
1102 	 *
1103 	 *   The size of single-copy atomic reads made by the SMMU is
1104 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1105 	 *   field within an aligned 64-bit span of a structure can be altered
1106 	 *   without first making the structure invalid.
1107 	 */
1108 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1109 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1110 	return 0;
1111 }
1112 
1113 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1114 {
1115 	int ret;
1116 	size_t l1size;
1117 	size_t max_contexts;
1118 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1119 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1120 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1121 
1122 	max_contexts = 1 << cfg->s1cdmax;
1123 
1124 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1125 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1126 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1127 		cdcfg->num_l1_ents = max_contexts;
1128 
1129 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1130 	} else {
1131 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1132 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1133 						  CTXDESC_L2_ENTRIES);
1134 
1135 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1136 					      sizeof(*cdcfg->l1_desc),
1137 					      GFP_KERNEL);
1138 		if (!cdcfg->l1_desc)
1139 			return -ENOMEM;
1140 
1141 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1142 	}
1143 
1144 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1145 					   GFP_KERNEL);
1146 	if (!cdcfg->cdtab) {
1147 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1148 		ret = -ENOMEM;
1149 		goto err_free_l1;
1150 	}
1151 
1152 	return 0;
1153 
1154 err_free_l1:
1155 	if (cdcfg->l1_desc) {
1156 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1157 		cdcfg->l1_desc = NULL;
1158 	}
1159 	return ret;
1160 }
1161 
1162 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1163 {
1164 	int i;
1165 	size_t size, l1size;
1166 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1167 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1168 
1169 	if (cdcfg->l1_desc) {
1170 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1171 
1172 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1173 			if (!cdcfg->l1_desc[i].l2ptr)
1174 				continue;
1175 
1176 			dmam_free_coherent(smmu->dev, size,
1177 					   cdcfg->l1_desc[i].l2ptr,
1178 					   cdcfg->l1_desc[i].l2ptr_dma);
1179 		}
1180 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1181 		cdcfg->l1_desc = NULL;
1182 
1183 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1184 	} else {
1185 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1186 	}
1187 
1188 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1189 	cdcfg->cdtab_dma = 0;
1190 	cdcfg->cdtab = NULL;
1191 }
1192 
1193 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1194 {
1195 	bool free;
1196 	struct arm_smmu_ctx_desc *old_cd;
1197 
1198 	if (!cd->asid)
1199 		return false;
1200 
1201 	free = refcount_dec_and_test(&cd->refs);
1202 	if (free) {
1203 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1204 		WARN_ON(old_cd != cd);
1205 	}
1206 	return free;
1207 }
1208 
1209 /* Stream table manipulation functions */
1210 static void
1211 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1212 {
1213 	u64 val = 0;
1214 
1215 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1216 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1217 
1218 	/* See comment in arm_smmu_write_ctx_desc() */
1219 	WRITE_ONCE(*dst, cpu_to_le64(val));
1220 }
1221 
1222 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1223 {
1224 	struct arm_smmu_cmdq_ent cmd = {
1225 		.opcode	= CMDQ_OP_CFGI_STE,
1226 		.cfgi	= {
1227 			.sid	= sid,
1228 			.leaf	= true,
1229 		},
1230 	};
1231 
1232 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1233 }
1234 
1235 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1236 				      __le64 *dst)
1237 {
1238 	/*
1239 	 * This is hideously complicated, but we only really care about
1240 	 * three cases at the moment:
1241 	 *
1242 	 * 1. Invalid (all zero) -> bypass/fault (init)
1243 	 * 2. Bypass/fault -> translation/bypass (attach)
1244 	 * 3. Translation/bypass -> bypass/fault (detach)
1245 	 *
1246 	 * Given that we can't update the STE atomically and the SMMU
1247 	 * doesn't read the thing in a defined order, that leaves us
1248 	 * with the following maintenance requirements:
1249 	 *
1250 	 * 1. Update Config, return (init time STEs aren't live)
1251 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1252 	 * 3. Update Config, sync
1253 	 */
1254 	u64 val = le64_to_cpu(dst[0]);
1255 	bool ste_live = false;
1256 	struct arm_smmu_device *smmu = NULL;
1257 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1258 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1259 	struct arm_smmu_domain *smmu_domain = NULL;
1260 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1261 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1262 		.prefetch	= {
1263 			.sid	= sid,
1264 		},
1265 	};
1266 
1267 	if (master) {
1268 		smmu_domain = master->domain;
1269 		smmu = master->smmu;
1270 	}
1271 
1272 	if (smmu_domain) {
1273 		switch (smmu_domain->stage) {
1274 		case ARM_SMMU_DOMAIN_S1:
1275 			s1_cfg = &smmu_domain->s1_cfg;
1276 			break;
1277 		case ARM_SMMU_DOMAIN_S2:
1278 		case ARM_SMMU_DOMAIN_NESTED:
1279 			s2_cfg = &smmu_domain->s2_cfg;
1280 			break;
1281 		default:
1282 			break;
1283 		}
1284 	}
1285 
1286 	if (val & STRTAB_STE_0_V) {
1287 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1288 		case STRTAB_STE_0_CFG_BYPASS:
1289 			break;
1290 		case STRTAB_STE_0_CFG_S1_TRANS:
1291 		case STRTAB_STE_0_CFG_S2_TRANS:
1292 			ste_live = true;
1293 			break;
1294 		case STRTAB_STE_0_CFG_ABORT:
1295 			BUG_ON(!disable_bypass);
1296 			break;
1297 		default:
1298 			BUG(); /* STE corruption */
1299 		}
1300 	}
1301 
1302 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1303 	val = STRTAB_STE_0_V;
1304 
1305 	/* Bypass/fault */
1306 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1307 		if (!smmu_domain && disable_bypass)
1308 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1309 		else
1310 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1311 
1312 		dst[0] = cpu_to_le64(val);
1313 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1314 						STRTAB_STE_1_SHCFG_INCOMING));
1315 		dst[2] = 0; /* Nuke the VMID */
1316 		/*
1317 		 * The SMMU can perform negative caching, so we must sync
1318 		 * the STE regardless of whether the old value was live.
1319 		 */
1320 		if (smmu)
1321 			arm_smmu_sync_ste_for_sid(smmu, sid);
1322 		return;
1323 	}
1324 
1325 	if (s1_cfg) {
1326 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1327 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1328 
1329 		BUG_ON(ste_live);
1330 		dst[1] = cpu_to_le64(
1331 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1332 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1333 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1334 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1335 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1336 
1337 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1338 		    !master->stall_enabled)
1339 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1340 
1341 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1342 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1343 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1344 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1345 	}
1346 
1347 	if (s2_cfg) {
1348 		BUG_ON(ste_live);
1349 		dst[2] = cpu_to_le64(
1350 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1351 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1352 #ifdef __BIG_ENDIAN
1353 			 STRTAB_STE_2_S2ENDI |
1354 #endif
1355 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1356 			 STRTAB_STE_2_S2R);
1357 
1358 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1359 
1360 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1361 	}
1362 
1363 	if (master->ats_enabled)
1364 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1365 						 STRTAB_STE_1_EATS_TRANS));
1366 
1367 	arm_smmu_sync_ste_for_sid(smmu, sid);
1368 	/* See comment in arm_smmu_write_ctx_desc() */
1369 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1370 	arm_smmu_sync_ste_for_sid(smmu, sid);
1371 
1372 	/* It's likely that we'll want to use the new STE soon */
1373 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1374 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1375 }
1376 
1377 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1378 {
1379 	unsigned int i;
1380 
1381 	for (i = 0; i < nent; ++i) {
1382 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1383 		strtab += STRTAB_STE_DWORDS;
1384 	}
1385 }
1386 
1387 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1388 {
1389 	size_t size;
1390 	void *strtab;
1391 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1392 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1393 
1394 	if (desc->l2ptr)
1395 		return 0;
1396 
1397 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1398 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1399 
1400 	desc->span = STRTAB_SPLIT + 1;
1401 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1402 					  GFP_KERNEL);
1403 	if (!desc->l2ptr) {
1404 		dev_err(smmu->dev,
1405 			"failed to allocate l2 stream table for SID %u\n",
1406 			sid);
1407 		return -ENOMEM;
1408 	}
1409 
1410 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1411 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1412 	return 0;
1413 }
1414 
1415 static struct arm_smmu_master *
1416 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1417 {
1418 	struct rb_node *node;
1419 	struct arm_smmu_stream *stream;
1420 
1421 	lockdep_assert_held(&smmu->streams_mutex);
1422 
1423 	node = smmu->streams.rb_node;
1424 	while (node) {
1425 		stream = rb_entry(node, struct arm_smmu_stream, node);
1426 		if (stream->id < sid)
1427 			node = node->rb_right;
1428 		else if (stream->id > sid)
1429 			node = node->rb_left;
1430 		else
1431 			return stream->master;
1432 	}
1433 
1434 	return NULL;
1435 }
1436 
1437 /* IRQ and event handlers */
1438 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1439 {
1440 	int ret;
1441 	u32 reason;
1442 	u32 perm = 0;
1443 	struct arm_smmu_master *master;
1444 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1445 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1446 	struct iommu_fault_event fault_evt = { };
1447 	struct iommu_fault *flt = &fault_evt.fault;
1448 
1449 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1450 	case EVT_ID_TRANSLATION_FAULT:
1451 		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1452 		break;
1453 	case EVT_ID_ADDR_SIZE_FAULT:
1454 		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1455 		break;
1456 	case EVT_ID_ACCESS_FAULT:
1457 		reason = IOMMU_FAULT_REASON_ACCESS;
1458 		break;
1459 	case EVT_ID_PERMISSION_FAULT:
1460 		reason = IOMMU_FAULT_REASON_PERMISSION;
1461 		break;
1462 	default:
1463 		return -EOPNOTSUPP;
1464 	}
1465 
1466 	/* Stage-2 is always pinned at the moment */
1467 	if (evt[1] & EVTQ_1_S2)
1468 		return -EFAULT;
1469 
1470 	if (evt[1] & EVTQ_1_RnW)
1471 		perm |= IOMMU_FAULT_PERM_READ;
1472 	else
1473 		perm |= IOMMU_FAULT_PERM_WRITE;
1474 
1475 	if (evt[1] & EVTQ_1_InD)
1476 		perm |= IOMMU_FAULT_PERM_EXEC;
1477 
1478 	if (evt[1] & EVTQ_1_PnU)
1479 		perm |= IOMMU_FAULT_PERM_PRIV;
1480 
1481 	if (evt[1] & EVTQ_1_STALL) {
1482 		flt->type = IOMMU_FAULT_PAGE_REQ;
1483 		flt->prm = (struct iommu_fault_page_request) {
1484 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1485 			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1486 			.perm = perm,
1487 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1488 		};
1489 
1490 		if (ssid_valid) {
1491 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1492 			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1493 		}
1494 	} else {
1495 		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1496 		flt->event = (struct iommu_fault_unrecoverable) {
1497 			.reason = reason,
1498 			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1499 			.perm = perm,
1500 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1501 		};
1502 
1503 		if (ssid_valid) {
1504 			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1505 			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1506 		}
1507 	}
1508 
1509 	mutex_lock(&smmu->streams_mutex);
1510 	master = arm_smmu_find_master(smmu, sid);
1511 	if (!master) {
1512 		ret = -EINVAL;
1513 		goto out_unlock;
1514 	}
1515 
1516 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1517 	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1518 		/* Nobody cared, abort the access */
1519 		struct iommu_page_response resp = {
1520 			.pasid		= flt->prm.pasid,
1521 			.grpid		= flt->prm.grpid,
1522 			.code		= IOMMU_PAGE_RESP_FAILURE,
1523 		};
1524 		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1525 	}
1526 
1527 out_unlock:
1528 	mutex_unlock(&smmu->streams_mutex);
1529 	return ret;
1530 }
1531 
1532 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1533 {
1534 	int i, ret;
1535 	struct arm_smmu_device *smmu = dev;
1536 	struct arm_smmu_queue *q = &smmu->evtq.q;
1537 	struct arm_smmu_ll_queue *llq = &q->llq;
1538 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1539 				      DEFAULT_RATELIMIT_BURST);
1540 	u64 evt[EVTQ_ENT_DWORDS];
1541 
1542 	do {
1543 		while (!queue_remove_raw(q, evt)) {
1544 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1545 
1546 			ret = arm_smmu_handle_evt(smmu, evt);
1547 			if (!ret || !__ratelimit(&rs))
1548 				continue;
1549 
1550 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1551 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1552 				dev_info(smmu->dev, "\t0x%016llx\n",
1553 					 (unsigned long long)evt[i]);
1554 
1555 		}
1556 
1557 		/*
1558 		 * Not much we can do on overflow, so scream and pretend we're
1559 		 * trying harder.
1560 		 */
1561 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1562 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1563 	} while (!queue_empty(llq));
1564 
1565 	/* Sync our overflow flag, as we believe we're up to speed */
1566 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1567 		    Q_IDX(llq, llq->cons);
1568 	return IRQ_HANDLED;
1569 }
1570 
1571 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1572 {
1573 	u32 sid, ssid;
1574 	u16 grpid;
1575 	bool ssv, last;
1576 
1577 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1578 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1579 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1580 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1581 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1582 
1583 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1584 	dev_info(smmu->dev,
1585 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1586 		 sid, ssid, grpid, last ? "L" : "",
1587 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1588 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1589 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1590 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1591 		 evt[1] & PRIQ_1_ADDR_MASK);
1592 
1593 	if (last) {
1594 		struct arm_smmu_cmdq_ent cmd = {
1595 			.opcode			= CMDQ_OP_PRI_RESP,
1596 			.substream_valid	= ssv,
1597 			.pri			= {
1598 				.sid	= sid,
1599 				.ssid	= ssid,
1600 				.grpid	= grpid,
1601 				.resp	= PRI_RESP_DENY,
1602 			},
1603 		};
1604 
1605 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1606 	}
1607 }
1608 
1609 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1610 {
1611 	struct arm_smmu_device *smmu = dev;
1612 	struct arm_smmu_queue *q = &smmu->priq.q;
1613 	struct arm_smmu_ll_queue *llq = &q->llq;
1614 	u64 evt[PRIQ_ENT_DWORDS];
1615 
1616 	do {
1617 		while (!queue_remove_raw(q, evt))
1618 			arm_smmu_handle_ppr(smmu, evt);
1619 
1620 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1621 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1622 	} while (!queue_empty(llq));
1623 
1624 	/* Sync our overflow flag, as we believe we're up to speed */
1625 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1626 		      Q_IDX(llq, llq->cons);
1627 	queue_sync_cons_out(q);
1628 	return IRQ_HANDLED;
1629 }
1630 
1631 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1632 
1633 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1634 {
1635 	u32 gerror, gerrorn, active;
1636 	struct arm_smmu_device *smmu = dev;
1637 
1638 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1639 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1640 
1641 	active = gerror ^ gerrorn;
1642 	if (!(active & GERROR_ERR_MASK))
1643 		return IRQ_NONE; /* No errors pending */
1644 
1645 	dev_warn(smmu->dev,
1646 		 "unexpected global error reported (0x%08x), this could be serious\n",
1647 		 active);
1648 
1649 	if (active & GERROR_SFM_ERR) {
1650 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1651 		arm_smmu_device_disable(smmu);
1652 	}
1653 
1654 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1655 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1656 
1657 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1658 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1659 
1660 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1661 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1662 
1663 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1664 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1665 
1666 	if (active & GERROR_PRIQ_ABT_ERR)
1667 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1668 
1669 	if (active & GERROR_EVTQ_ABT_ERR)
1670 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1671 
1672 	if (active & GERROR_CMDQ_ERR)
1673 		arm_smmu_cmdq_skip_err(smmu);
1674 
1675 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1676 	return IRQ_HANDLED;
1677 }
1678 
1679 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1680 {
1681 	struct arm_smmu_device *smmu = dev;
1682 
1683 	arm_smmu_evtq_thread(irq, dev);
1684 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1685 		arm_smmu_priq_thread(irq, dev);
1686 
1687 	return IRQ_HANDLED;
1688 }
1689 
1690 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1691 {
1692 	arm_smmu_gerror_handler(irq, dev);
1693 	return IRQ_WAKE_THREAD;
1694 }
1695 
1696 static void
1697 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1698 			struct arm_smmu_cmdq_ent *cmd)
1699 {
1700 	size_t log2_span;
1701 	size_t span_mask;
1702 	/* ATC invalidates are always on 4096-bytes pages */
1703 	size_t inval_grain_shift = 12;
1704 	unsigned long page_start, page_end;
1705 
1706 	/*
1707 	 * ATS and PASID:
1708 	 *
1709 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1710 	 * prefix. In that case all ATC entries within the address range are
1711 	 * invalidated, including those that were requested with a PASID! There
1712 	 * is no way to invalidate only entries without PASID.
1713 	 *
1714 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1715 	 * traffic), translation requests without PASID create ATC entries
1716 	 * without PASID, which must be invalidated with substream_valid clear.
1717 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1718 	 * ATC entries within the address range.
1719 	 */
1720 	*cmd = (struct arm_smmu_cmdq_ent) {
1721 		.opcode			= CMDQ_OP_ATC_INV,
1722 		.substream_valid	= !!ssid,
1723 		.atc.ssid		= ssid,
1724 	};
1725 
1726 	if (!size) {
1727 		cmd->atc.size = ATC_INV_SIZE_ALL;
1728 		return;
1729 	}
1730 
1731 	page_start	= iova >> inval_grain_shift;
1732 	page_end	= (iova + size - 1) >> inval_grain_shift;
1733 
1734 	/*
1735 	 * In an ATS Invalidate Request, the address must be aligned on the
1736 	 * range size, which must be a power of two number of page sizes. We
1737 	 * thus have to choose between grossly over-invalidating the region, or
1738 	 * splitting the invalidation into multiple commands. For simplicity
1739 	 * we'll go with the first solution, but should refine it in the future
1740 	 * if multiple commands are shown to be more efficient.
1741 	 *
1742 	 * Find the smallest power of two that covers the range. The most
1743 	 * significant differing bit between the start and end addresses,
1744 	 * fls(start ^ end), indicates the required span. For example:
1745 	 *
1746 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1747 	 *		x = 0b1000 ^ 0b1011 = 0b11
1748 	 *		span = 1 << fls(x) = 4
1749 	 *
1750 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1751 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1752 	 *		span = 1 << fls(x) = 16
1753 	 */
1754 	log2_span	= fls_long(page_start ^ page_end);
1755 	span_mask	= (1ULL << log2_span) - 1;
1756 
1757 	page_start	&= ~span_mask;
1758 
1759 	cmd->atc.addr	= page_start << inval_grain_shift;
1760 	cmd->atc.size	= log2_span;
1761 }
1762 
1763 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1764 {
1765 	int i;
1766 	struct arm_smmu_cmdq_ent cmd;
1767 	struct arm_smmu_cmdq_batch cmds = {};
1768 
1769 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1770 
1771 	for (i = 0; i < master->num_streams; i++) {
1772 		cmd.atc.sid = master->streams[i].id;
1773 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1774 	}
1775 
1776 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1777 }
1778 
1779 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1780 			    unsigned long iova, size_t size)
1781 {
1782 	int i;
1783 	unsigned long flags;
1784 	struct arm_smmu_cmdq_ent cmd;
1785 	struct arm_smmu_master *master;
1786 	struct arm_smmu_cmdq_batch cmds;
1787 
1788 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1789 		return 0;
1790 
1791 	/*
1792 	 * Ensure that we've completed prior invalidation of the main TLBs
1793 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1794 	 * arm_smmu_enable_ats():
1795 	 *
1796 	 *	// unmap()			// arm_smmu_enable_ats()
1797 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1798 	 *	smp_mb();			[...]
1799 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1800 	 *
1801 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1802 	 * ATS was enabled at the PCI device before completion of the TLBI.
1803 	 */
1804 	smp_mb();
1805 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1806 		return 0;
1807 
1808 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1809 
1810 	cmds.num = 0;
1811 
1812 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1813 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1814 		if (!master->ats_enabled)
1815 			continue;
1816 
1817 		for (i = 0; i < master->num_streams; i++) {
1818 			cmd.atc.sid = master->streams[i].id;
1819 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1820 		}
1821 	}
1822 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1823 
1824 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1825 }
1826 
1827 /* IO_PGTABLE API */
1828 static void arm_smmu_tlb_inv_context(void *cookie)
1829 {
1830 	struct arm_smmu_domain *smmu_domain = cookie;
1831 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1832 	struct arm_smmu_cmdq_ent cmd;
1833 
1834 	/*
1835 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1836 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1837 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1838 	 * insertion to guarantee those are observed before the TLBI. Do be
1839 	 * careful, 007.
1840 	 */
1841 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1842 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1843 	} else {
1844 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1845 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1846 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1847 	}
1848 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1849 }
1850 
1851 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1852 				     unsigned long iova, size_t size,
1853 				     size_t granule,
1854 				     struct arm_smmu_domain *smmu_domain)
1855 {
1856 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1857 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1858 	size_t inv_range = granule;
1859 	struct arm_smmu_cmdq_batch cmds;
1860 
1861 	if (!size)
1862 		return;
1863 
1864 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1865 		/* Get the leaf page size */
1866 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1867 
1868 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1869 		cmd->tlbi.tg = (tg - 10) / 2;
1870 
1871 		/* Determine what level the granule is at */
1872 		cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1873 
1874 		num_pages = size >> tg;
1875 	}
1876 
1877 	cmds.num = 0;
1878 
1879 	while (iova < end) {
1880 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1881 			/*
1882 			 * On each iteration of the loop, the range is 5 bits
1883 			 * worth of the aligned size remaining.
1884 			 * The range in pages is:
1885 			 *
1886 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1887 			 */
1888 			unsigned long scale, num;
1889 
1890 			/* Determine the power of 2 multiple number of pages */
1891 			scale = __ffs(num_pages);
1892 			cmd->tlbi.scale = scale;
1893 
1894 			/* Determine how many chunks of 2^scale size we have */
1895 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1896 			cmd->tlbi.num = num - 1;
1897 
1898 			/* range is num * 2^scale * pgsize */
1899 			inv_range = num << (scale + tg);
1900 
1901 			/* Clear out the lower order bits for the next iteration */
1902 			num_pages -= num << scale;
1903 		}
1904 
1905 		cmd->tlbi.addr = iova;
1906 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1907 		iova += inv_range;
1908 	}
1909 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1910 }
1911 
1912 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1913 					  size_t granule, bool leaf,
1914 					  struct arm_smmu_domain *smmu_domain)
1915 {
1916 	struct arm_smmu_cmdq_ent cmd = {
1917 		.tlbi = {
1918 			.leaf	= leaf,
1919 		},
1920 	};
1921 
1922 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1923 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1924 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1925 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1926 	} else {
1927 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1928 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1929 	}
1930 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1931 
1932 	/*
1933 	 * Unfortunately, this can't be leaf-only since we may have
1934 	 * zapped an entire table.
1935 	 */
1936 	arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1937 }
1938 
1939 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1940 				 size_t granule, bool leaf,
1941 				 struct arm_smmu_domain *smmu_domain)
1942 {
1943 	struct arm_smmu_cmdq_ent cmd = {
1944 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1945 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1946 		.tlbi = {
1947 			.asid	= asid,
1948 			.leaf	= leaf,
1949 		},
1950 	};
1951 
1952 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1953 }
1954 
1955 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1956 					 unsigned long iova, size_t granule,
1957 					 void *cookie)
1958 {
1959 	struct arm_smmu_domain *smmu_domain = cookie;
1960 	struct iommu_domain *domain = &smmu_domain->domain;
1961 
1962 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1963 }
1964 
1965 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1966 				  size_t granule, void *cookie)
1967 {
1968 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1969 }
1970 
1971 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1972 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1973 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1974 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1975 };
1976 
1977 /* IOMMU API */
1978 static bool arm_smmu_capable(enum iommu_cap cap)
1979 {
1980 	switch (cap) {
1981 	case IOMMU_CAP_CACHE_COHERENCY:
1982 		return true;
1983 	case IOMMU_CAP_NOEXEC:
1984 		return true;
1985 	default:
1986 		return false;
1987 	}
1988 }
1989 
1990 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1991 {
1992 	struct arm_smmu_domain *smmu_domain;
1993 
1994 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1995 	    type != IOMMU_DOMAIN_DMA &&
1996 	    type != IOMMU_DOMAIN_DMA_FQ &&
1997 	    type != IOMMU_DOMAIN_IDENTITY)
1998 		return NULL;
1999 
2000 	/*
2001 	 * Allocate the domain and initialise some of its data structures.
2002 	 * We can't really do anything meaningful until we've added a
2003 	 * master.
2004 	 */
2005 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2006 	if (!smmu_domain)
2007 		return NULL;
2008 
2009 	mutex_init(&smmu_domain->init_mutex);
2010 	INIT_LIST_HEAD(&smmu_domain->devices);
2011 	spin_lock_init(&smmu_domain->devices_lock);
2012 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2013 
2014 	return &smmu_domain->domain;
2015 }
2016 
2017 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2018 {
2019 	int idx, size = 1 << span;
2020 
2021 	do {
2022 		idx = find_first_zero_bit(map, size);
2023 		if (idx == size)
2024 			return -ENOSPC;
2025 	} while (test_and_set_bit(idx, map));
2026 
2027 	return idx;
2028 }
2029 
2030 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2031 {
2032 	clear_bit(idx, map);
2033 }
2034 
2035 static void arm_smmu_domain_free(struct iommu_domain *domain)
2036 {
2037 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2038 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2039 
2040 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2041 
2042 	/* Free the CD and ASID, if we allocated them */
2043 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2044 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2045 
2046 		/* Prevent SVA from touching the CD while we're freeing it */
2047 		mutex_lock(&arm_smmu_asid_lock);
2048 		if (cfg->cdcfg.cdtab)
2049 			arm_smmu_free_cd_tables(smmu_domain);
2050 		arm_smmu_free_asid(&cfg->cd);
2051 		mutex_unlock(&arm_smmu_asid_lock);
2052 	} else {
2053 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2054 		if (cfg->vmid)
2055 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2056 	}
2057 
2058 	kfree(smmu_domain);
2059 }
2060 
2061 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2062 				       struct arm_smmu_master *master,
2063 				       struct io_pgtable_cfg *pgtbl_cfg)
2064 {
2065 	int ret;
2066 	u32 asid;
2067 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2068 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2069 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2070 
2071 	refcount_set(&cfg->cd.refs, 1);
2072 
2073 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2074 	mutex_lock(&arm_smmu_asid_lock);
2075 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2076 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2077 	if (ret)
2078 		goto out_unlock;
2079 
2080 	cfg->s1cdmax = master->ssid_bits;
2081 
2082 	smmu_domain->stall_enabled = master->stall_enabled;
2083 
2084 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2085 	if (ret)
2086 		goto out_free_asid;
2087 
2088 	cfg->cd.asid	= (u16)asid;
2089 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2090 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2091 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2092 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2093 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2094 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2095 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2096 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2097 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2098 
2099 	/*
2100 	 * Note that this will end up calling arm_smmu_sync_cd() before
2101 	 * the master has been added to the devices list for this domain.
2102 	 * This isn't an issue because the STE hasn't been installed yet.
2103 	 */
2104 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2105 	if (ret)
2106 		goto out_free_cd_tables;
2107 
2108 	mutex_unlock(&arm_smmu_asid_lock);
2109 	return 0;
2110 
2111 out_free_cd_tables:
2112 	arm_smmu_free_cd_tables(smmu_domain);
2113 out_free_asid:
2114 	arm_smmu_free_asid(&cfg->cd);
2115 out_unlock:
2116 	mutex_unlock(&arm_smmu_asid_lock);
2117 	return ret;
2118 }
2119 
2120 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2121 				       struct arm_smmu_master *master,
2122 				       struct io_pgtable_cfg *pgtbl_cfg)
2123 {
2124 	int vmid;
2125 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2126 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2127 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2128 
2129 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2130 	if (vmid < 0)
2131 		return vmid;
2132 
2133 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2134 	cfg->vmid	= (u16)vmid;
2135 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2136 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2137 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2138 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2139 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2140 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2141 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2142 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2143 	return 0;
2144 }
2145 
2146 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2147 				    struct arm_smmu_master *master)
2148 {
2149 	int ret;
2150 	unsigned long ias, oas;
2151 	enum io_pgtable_fmt fmt;
2152 	struct io_pgtable_cfg pgtbl_cfg;
2153 	struct io_pgtable_ops *pgtbl_ops;
2154 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2155 				 struct arm_smmu_master *,
2156 				 struct io_pgtable_cfg *);
2157 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2158 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2159 
2160 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2161 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2162 		return 0;
2163 	}
2164 
2165 	/* Restrict the stage to what we can actually support */
2166 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2167 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2168 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2169 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2170 
2171 	switch (smmu_domain->stage) {
2172 	case ARM_SMMU_DOMAIN_S1:
2173 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2174 		ias = min_t(unsigned long, ias, VA_BITS);
2175 		oas = smmu->ias;
2176 		fmt = ARM_64_LPAE_S1;
2177 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2178 		break;
2179 	case ARM_SMMU_DOMAIN_NESTED:
2180 	case ARM_SMMU_DOMAIN_S2:
2181 		ias = smmu->ias;
2182 		oas = smmu->oas;
2183 		fmt = ARM_64_LPAE_S2;
2184 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2185 		break;
2186 	default:
2187 		return -EINVAL;
2188 	}
2189 
2190 	pgtbl_cfg = (struct io_pgtable_cfg) {
2191 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2192 		.ias		= ias,
2193 		.oas		= oas,
2194 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2195 		.tlb		= &arm_smmu_flush_ops,
2196 		.iommu_dev	= smmu->dev,
2197 	};
2198 
2199 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2200 	if (!pgtbl_ops)
2201 		return -ENOMEM;
2202 
2203 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2204 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2205 	domain->geometry.force_aperture = true;
2206 
2207 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2208 	if (ret < 0) {
2209 		free_io_pgtable_ops(pgtbl_ops);
2210 		return ret;
2211 	}
2212 
2213 	smmu_domain->pgtbl_ops = pgtbl_ops;
2214 	return 0;
2215 }
2216 
2217 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2218 {
2219 	__le64 *step;
2220 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2221 
2222 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2223 		struct arm_smmu_strtab_l1_desc *l1_desc;
2224 		int idx;
2225 
2226 		/* Two-level walk */
2227 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2228 		l1_desc = &cfg->l1_desc[idx];
2229 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2230 		step = &l1_desc->l2ptr[idx];
2231 	} else {
2232 		/* Simple linear lookup */
2233 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2234 	}
2235 
2236 	return step;
2237 }
2238 
2239 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2240 {
2241 	int i, j;
2242 	struct arm_smmu_device *smmu = master->smmu;
2243 
2244 	for (i = 0; i < master->num_streams; ++i) {
2245 		u32 sid = master->streams[i].id;
2246 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2247 
2248 		/* Bridged PCI devices may end up with duplicated IDs */
2249 		for (j = 0; j < i; j++)
2250 			if (master->streams[j].id == sid)
2251 				break;
2252 		if (j < i)
2253 			continue;
2254 
2255 		arm_smmu_write_strtab_ent(master, sid, step);
2256 	}
2257 }
2258 
2259 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2260 {
2261 	struct device *dev = master->dev;
2262 	struct arm_smmu_device *smmu = master->smmu;
2263 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2264 
2265 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2266 		return false;
2267 
2268 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2269 		return false;
2270 
2271 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2272 }
2273 
2274 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2275 {
2276 	size_t stu;
2277 	struct pci_dev *pdev;
2278 	struct arm_smmu_device *smmu = master->smmu;
2279 	struct arm_smmu_domain *smmu_domain = master->domain;
2280 
2281 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2282 	if (!master->ats_enabled)
2283 		return;
2284 
2285 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2286 	stu = __ffs(smmu->pgsize_bitmap);
2287 	pdev = to_pci_dev(master->dev);
2288 
2289 	atomic_inc(&smmu_domain->nr_ats_masters);
2290 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2291 	if (pci_enable_ats(pdev, stu))
2292 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2293 }
2294 
2295 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2296 {
2297 	struct arm_smmu_domain *smmu_domain = master->domain;
2298 
2299 	if (!master->ats_enabled)
2300 		return;
2301 
2302 	pci_disable_ats(to_pci_dev(master->dev));
2303 	/*
2304 	 * Ensure ATS is disabled at the endpoint before we issue the
2305 	 * ATC invalidation via the SMMU.
2306 	 */
2307 	wmb();
2308 	arm_smmu_atc_inv_master(master);
2309 	atomic_dec(&smmu_domain->nr_ats_masters);
2310 }
2311 
2312 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2313 {
2314 	int ret;
2315 	int features;
2316 	int num_pasids;
2317 	struct pci_dev *pdev;
2318 
2319 	if (!dev_is_pci(master->dev))
2320 		return -ENODEV;
2321 
2322 	pdev = to_pci_dev(master->dev);
2323 
2324 	features = pci_pasid_features(pdev);
2325 	if (features < 0)
2326 		return features;
2327 
2328 	num_pasids = pci_max_pasids(pdev);
2329 	if (num_pasids <= 0)
2330 		return num_pasids;
2331 
2332 	ret = pci_enable_pasid(pdev, features);
2333 	if (ret) {
2334 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2335 		return ret;
2336 	}
2337 
2338 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2339 				  master->smmu->ssid_bits);
2340 	return 0;
2341 }
2342 
2343 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2344 {
2345 	struct pci_dev *pdev;
2346 
2347 	if (!dev_is_pci(master->dev))
2348 		return;
2349 
2350 	pdev = to_pci_dev(master->dev);
2351 
2352 	if (!pdev->pasid_enabled)
2353 		return;
2354 
2355 	master->ssid_bits = 0;
2356 	pci_disable_pasid(pdev);
2357 }
2358 
2359 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2360 {
2361 	unsigned long flags;
2362 	struct arm_smmu_domain *smmu_domain = master->domain;
2363 
2364 	if (!smmu_domain)
2365 		return;
2366 
2367 	arm_smmu_disable_ats(master);
2368 
2369 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2370 	list_del(&master->domain_head);
2371 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2372 
2373 	master->domain = NULL;
2374 	master->ats_enabled = false;
2375 	arm_smmu_install_ste_for_dev(master);
2376 }
2377 
2378 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2379 {
2380 	int ret = 0;
2381 	unsigned long flags;
2382 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2383 	struct arm_smmu_device *smmu;
2384 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2385 	struct arm_smmu_master *master;
2386 
2387 	if (!fwspec)
2388 		return -ENOENT;
2389 
2390 	master = dev_iommu_priv_get(dev);
2391 	smmu = master->smmu;
2392 
2393 	/*
2394 	 * Checking that SVA is disabled ensures that this device isn't bound to
2395 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2396 	 * be removed concurrently since we're holding the group mutex.
2397 	 */
2398 	if (arm_smmu_master_sva_enabled(master)) {
2399 		dev_err(dev, "cannot attach - SVA enabled\n");
2400 		return -EBUSY;
2401 	}
2402 
2403 	arm_smmu_detach_dev(master);
2404 
2405 	mutex_lock(&smmu_domain->init_mutex);
2406 
2407 	if (!smmu_domain->smmu) {
2408 		smmu_domain->smmu = smmu;
2409 		ret = arm_smmu_domain_finalise(domain, master);
2410 		if (ret) {
2411 			smmu_domain->smmu = NULL;
2412 			goto out_unlock;
2413 		}
2414 	} else if (smmu_domain->smmu != smmu) {
2415 		dev_err(dev,
2416 			"cannot attach to SMMU %s (upstream of %s)\n",
2417 			dev_name(smmu_domain->smmu->dev),
2418 			dev_name(smmu->dev));
2419 		ret = -ENXIO;
2420 		goto out_unlock;
2421 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2422 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2423 		dev_err(dev,
2424 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2425 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2426 		ret = -EINVAL;
2427 		goto out_unlock;
2428 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2429 		   smmu_domain->stall_enabled != master->stall_enabled) {
2430 		dev_err(dev, "cannot attach to stall-%s domain\n",
2431 			smmu_domain->stall_enabled ? "enabled" : "disabled");
2432 		ret = -EINVAL;
2433 		goto out_unlock;
2434 	}
2435 
2436 	master->domain = smmu_domain;
2437 
2438 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2439 		master->ats_enabled = arm_smmu_ats_supported(master);
2440 
2441 	arm_smmu_install_ste_for_dev(master);
2442 
2443 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2444 	list_add(&master->domain_head, &smmu_domain->devices);
2445 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2446 
2447 	arm_smmu_enable_ats(master);
2448 
2449 out_unlock:
2450 	mutex_unlock(&smmu_domain->init_mutex);
2451 	return ret;
2452 }
2453 
2454 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2455 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2456 			      int prot, gfp_t gfp, size_t *mapped)
2457 {
2458 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2459 
2460 	if (!ops)
2461 		return -ENODEV;
2462 
2463 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2464 }
2465 
2466 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2467 				   size_t pgsize, size_t pgcount,
2468 				   struct iommu_iotlb_gather *gather)
2469 {
2470 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2471 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2472 
2473 	if (!ops)
2474 		return 0;
2475 
2476 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2477 }
2478 
2479 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2480 {
2481 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2482 
2483 	if (smmu_domain->smmu)
2484 		arm_smmu_tlb_inv_context(smmu_domain);
2485 }
2486 
2487 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2488 				struct iommu_iotlb_gather *gather)
2489 {
2490 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2491 
2492 	if (!gather->pgsize)
2493 		return;
2494 
2495 	arm_smmu_tlb_inv_range_domain(gather->start,
2496 				      gather->end - gather->start + 1,
2497 				      gather->pgsize, true, smmu_domain);
2498 }
2499 
2500 static phys_addr_t
2501 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2502 {
2503 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2504 
2505 	if (!ops)
2506 		return 0;
2507 
2508 	return ops->iova_to_phys(ops, iova);
2509 }
2510 
2511 static struct platform_driver arm_smmu_driver;
2512 
2513 static
2514 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2515 {
2516 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2517 							  fwnode);
2518 	put_device(dev);
2519 	return dev ? dev_get_drvdata(dev) : NULL;
2520 }
2521 
2522 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2523 {
2524 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2525 
2526 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2527 		limit *= 1UL << STRTAB_SPLIT;
2528 
2529 	return sid < limit;
2530 }
2531 
2532 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2533 				  struct arm_smmu_master *master)
2534 {
2535 	int i;
2536 	int ret = 0;
2537 	struct arm_smmu_stream *new_stream, *cur_stream;
2538 	struct rb_node **new_node, *parent_node = NULL;
2539 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2540 
2541 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2542 				  GFP_KERNEL);
2543 	if (!master->streams)
2544 		return -ENOMEM;
2545 	master->num_streams = fwspec->num_ids;
2546 
2547 	mutex_lock(&smmu->streams_mutex);
2548 	for (i = 0; i < fwspec->num_ids; i++) {
2549 		u32 sid = fwspec->ids[i];
2550 
2551 		new_stream = &master->streams[i];
2552 		new_stream->id = sid;
2553 		new_stream->master = master;
2554 
2555 		/*
2556 		 * Check the SIDs are in range of the SMMU and our stream table
2557 		 */
2558 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2559 			ret = -ERANGE;
2560 			break;
2561 		}
2562 
2563 		/* Ensure l2 strtab is initialised */
2564 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2565 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2566 			if (ret)
2567 				break;
2568 		}
2569 
2570 		/* Insert into SID tree */
2571 		new_node = &(smmu->streams.rb_node);
2572 		while (*new_node) {
2573 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2574 					      node);
2575 			parent_node = *new_node;
2576 			if (cur_stream->id > new_stream->id) {
2577 				new_node = &((*new_node)->rb_left);
2578 			} else if (cur_stream->id < new_stream->id) {
2579 				new_node = &((*new_node)->rb_right);
2580 			} else {
2581 				dev_warn(master->dev,
2582 					 "stream %u already in tree\n",
2583 					 cur_stream->id);
2584 				ret = -EINVAL;
2585 				break;
2586 			}
2587 		}
2588 		if (ret)
2589 			break;
2590 
2591 		rb_link_node(&new_stream->node, parent_node, new_node);
2592 		rb_insert_color(&new_stream->node, &smmu->streams);
2593 	}
2594 
2595 	if (ret) {
2596 		for (i--; i >= 0; i--)
2597 			rb_erase(&master->streams[i].node, &smmu->streams);
2598 		kfree(master->streams);
2599 	}
2600 	mutex_unlock(&smmu->streams_mutex);
2601 
2602 	return ret;
2603 }
2604 
2605 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2606 {
2607 	int i;
2608 	struct arm_smmu_device *smmu = master->smmu;
2609 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2610 
2611 	if (!smmu || !master->streams)
2612 		return;
2613 
2614 	mutex_lock(&smmu->streams_mutex);
2615 	for (i = 0; i < fwspec->num_ids; i++)
2616 		rb_erase(&master->streams[i].node, &smmu->streams);
2617 	mutex_unlock(&smmu->streams_mutex);
2618 
2619 	kfree(master->streams);
2620 }
2621 
2622 static struct iommu_ops arm_smmu_ops;
2623 
2624 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2625 {
2626 	int ret;
2627 	struct arm_smmu_device *smmu;
2628 	struct arm_smmu_master *master;
2629 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2630 
2631 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2632 		return ERR_PTR(-ENODEV);
2633 
2634 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2635 		return ERR_PTR(-EBUSY);
2636 
2637 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2638 	if (!smmu)
2639 		return ERR_PTR(-ENODEV);
2640 
2641 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2642 	if (!master)
2643 		return ERR_PTR(-ENOMEM);
2644 
2645 	master->dev = dev;
2646 	master->smmu = smmu;
2647 	INIT_LIST_HEAD(&master->bonds);
2648 	dev_iommu_priv_set(dev, master);
2649 
2650 	ret = arm_smmu_insert_master(smmu, master);
2651 	if (ret)
2652 		goto err_free_master;
2653 
2654 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2655 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2656 
2657 	/*
2658 	 * Note that PASID must be enabled before, and disabled after ATS:
2659 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2660 	 *
2661 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2662 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2663 	 *   are changed.
2664 	 */
2665 	arm_smmu_enable_pasid(master);
2666 
2667 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2668 		master->ssid_bits = min_t(u8, master->ssid_bits,
2669 					  CTXDESC_LINEAR_CDMAX);
2670 
2671 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2672 	     device_property_read_bool(dev, "dma-can-stall")) ||
2673 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2674 		master->stall_enabled = true;
2675 
2676 	return &smmu->iommu;
2677 
2678 err_free_master:
2679 	kfree(master);
2680 	dev_iommu_priv_set(dev, NULL);
2681 	return ERR_PTR(ret);
2682 }
2683 
2684 static void arm_smmu_release_device(struct device *dev)
2685 {
2686 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2687 	struct arm_smmu_master *master;
2688 
2689 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2690 		return;
2691 
2692 	master = dev_iommu_priv_get(dev);
2693 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2694 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2695 	arm_smmu_detach_dev(master);
2696 	arm_smmu_disable_pasid(master);
2697 	arm_smmu_remove_master(master);
2698 	kfree(master);
2699 	iommu_fwspec_free(dev);
2700 }
2701 
2702 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2703 {
2704 	struct iommu_group *group;
2705 
2706 	/*
2707 	 * We don't support devices sharing stream IDs other than PCI RID
2708 	 * aliases, since the necessary ID-to-device lookup becomes rather
2709 	 * impractical given a potential sparse 32-bit stream ID space.
2710 	 */
2711 	if (dev_is_pci(dev))
2712 		group = pci_device_group(dev);
2713 	else
2714 		group = generic_device_group(dev);
2715 
2716 	return group;
2717 }
2718 
2719 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2720 {
2721 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2722 	int ret = 0;
2723 
2724 	mutex_lock(&smmu_domain->init_mutex);
2725 	if (smmu_domain->smmu)
2726 		ret = -EPERM;
2727 	else
2728 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2729 	mutex_unlock(&smmu_domain->init_mutex);
2730 
2731 	return ret;
2732 }
2733 
2734 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2735 {
2736 	return iommu_fwspec_add_ids(dev, args->args, 1);
2737 }
2738 
2739 static void arm_smmu_get_resv_regions(struct device *dev,
2740 				      struct list_head *head)
2741 {
2742 	struct iommu_resv_region *region;
2743 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2744 
2745 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2746 					 prot, IOMMU_RESV_SW_MSI);
2747 	if (!region)
2748 		return;
2749 
2750 	list_add_tail(&region->list, head);
2751 
2752 	iommu_dma_get_resv_regions(dev, head);
2753 }
2754 
2755 static bool arm_smmu_dev_has_feature(struct device *dev,
2756 				     enum iommu_dev_features feat)
2757 {
2758 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2759 
2760 	if (!master)
2761 		return false;
2762 
2763 	switch (feat) {
2764 	case IOMMU_DEV_FEAT_IOPF:
2765 		return arm_smmu_master_iopf_supported(master);
2766 	case IOMMU_DEV_FEAT_SVA:
2767 		return arm_smmu_master_sva_supported(master);
2768 	default:
2769 		return false;
2770 	}
2771 }
2772 
2773 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2774 					 enum iommu_dev_features feat)
2775 {
2776 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2777 
2778 	if (!master)
2779 		return false;
2780 
2781 	switch (feat) {
2782 	case IOMMU_DEV_FEAT_IOPF:
2783 		return master->iopf_enabled;
2784 	case IOMMU_DEV_FEAT_SVA:
2785 		return arm_smmu_master_sva_enabled(master);
2786 	default:
2787 		return false;
2788 	}
2789 }
2790 
2791 static int arm_smmu_dev_enable_feature(struct device *dev,
2792 				       enum iommu_dev_features feat)
2793 {
2794 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2795 
2796 	if (!arm_smmu_dev_has_feature(dev, feat))
2797 		return -ENODEV;
2798 
2799 	if (arm_smmu_dev_feature_enabled(dev, feat))
2800 		return -EBUSY;
2801 
2802 	switch (feat) {
2803 	case IOMMU_DEV_FEAT_IOPF:
2804 		master->iopf_enabled = true;
2805 		return 0;
2806 	case IOMMU_DEV_FEAT_SVA:
2807 		return arm_smmu_master_enable_sva(master);
2808 	default:
2809 		return -EINVAL;
2810 	}
2811 }
2812 
2813 static int arm_smmu_dev_disable_feature(struct device *dev,
2814 					enum iommu_dev_features feat)
2815 {
2816 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2817 
2818 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2819 		return -EINVAL;
2820 
2821 	switch (feat) {
2822 	case IOMMU_DEV_FEAT_IOPF:
2823 		if (master->sva_enabled)
2824 			return -EBUSY;
2825 		master->iopf_enabled = false;
2826 		return 0;
2827 	case IOMMU_DEV_FEAT_SVA:
2828 		return arm_smmu_master_disable_sva(master);
2829 	default:
2830 		return -EINVAL;
2831 	}
2832 }
2833 
2834 static struct iommu_ops arm_smmu_ops = {
2835 	.capable		= arm_smmu_capable,
2836 	.domain_alloc		= arm_smmu_domain_alloc,
2837 	.domain_free		= arm_smmu_domain_free,
2838 	.attach_dev		= arm_smmu_attach_dev,
2839 	.map_pages		= arm_smmu_map_pages,
2840 	.unmap_pages		= arm_smmu_unmap_pages,
2841 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2842 	.iotlb_sync		= arm_smmu_iotlb_sync,
2843 	.iova_to_phys		= arm_smmu_iova_to_phys,
2844 	.probe_device		= arm_smmu_probe_device,
2845 	.release_device		= arm_smmu_release_device,
2846 	.device_group		= arm_smmu_device_group,
2847 	.enable_nesting		= arm_smmu_enable_nesting,
2848 	.of_xlate		= arm_smmu_of_xlate,
2849 	.get_resv_regions	= arm_smmu_get_resv_regions,
2850 	.put_resv_regions	= generic_iommu_put_resv_regions,
2851 	.dev_has_feat		= arm_smmu_dev_has_feature,
2852 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2853 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2854 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2855 	.sva_bind		= arm_smmu_sva_bind,
2856 	.sva_unbind		= arm_smmu_sva_unbind,
2857 	.sva_get_pasid		= arm_smmu_sva_get_pasid,
2858 	.page_response		= arm_smmu_page_response,
2859 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2860 	.owner			= THIS_MODULE,
2861 };
2862 
2863 /* Probing and initialisation functions */
2864 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2865 				   struct arm_smmu_queue *q,
2866 				   void __iomem *page,
2867 				   unsigned long prod_off,
2868 				   unsigned long cons_off,
2869 				   size_t dwords, const char *name)
2870 {
2871 	size_t qsz;
2872 
2873 	do {
2874 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2875 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2876 					      GFP_KERNEL);
2877 		if (q->base || qsz < PAGE_SIZE)
2878 			break;
2879 
2880 		q->llq.max_n_shift--;
2881 	} while (1);
2882 
2883 	if (!q->base) {
2884 		dev_err(smmu->dev,
2885 			"failed to allocate queue (0x%zx bytes) for %s\n",
2886 			qsz, name);
2887 		return -ENOMEM;
2888 	}
2889 
2890 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2891 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2892 			 1 << q->llq.max_n_shift, name);
2893 	}
2894 
2895 	q->prod_reg	= page + prod_off;
2896 	q->cons_reg	= page + cons_off;
2897 	q->ent_dwords	= dwords;
2898 
2899 	q->q_base  = Q_BASE_RWA;
2900 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2901 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2902 
2903 	q->llq.prod = q->llq.cons = 0;
2904 	return 0;
2905 }
2906 
2907 static void arm_smmu_cmdq_free_bitmap(void *data)
2908 {
2909 	unsigned long *bitmap = data;
2910 	bitmap_free(bitmap);
2911 }
2912 
2913 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2914 {
2915 	int ret = 0;
2916 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2917 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2918 	atomic_long_t *bitmap;
2919 
2920 	atomic_set(&cmdq->owner_prod, 0);
2921 	atomic_set(&cmdq->lock, 0);
2922 
2923 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2924 	if (!bitmap) {
2925 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2926 		ret = -ENOMEM;
2927 	} else {
2928 		cmdq->valid_map = bitmap;
2929 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2930 	}
2931 
2932 	return ret;
2933 }
2934 
2935 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2936 {
2937 	int ret;
2938 
2939 	/* cmdq */
2940 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2941 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2942 				      CMDQ_ENT_DWORDS, "cmdq");
2943 	if (ret)
2944 		return ret;
2945 
2946 	ret = arm_smmu_cmdq_init(smmu);
2947 	if (ret)
2948 		return ret;
2949 
2950 	/* evtq */
2951 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2952 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2953 				      EVTQ_ENT_DWORDS, "evtq");
2954 	if (ret)
2955 		return ret;
2956 
2957 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2958 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2959 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2960 		if (!smmu->evtq.iopf)
2961 			return -ENOMEM;
2962 	}
2963 
2964 	/* priq */
2965 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2966 		return 0;
2967 
2968 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2969 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2970 				       PRIQ_ENT_DWORDS, "priq");
2971 }
2972 
2973 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2974 {
2975 	unsigned int i;
2976 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2977 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2978 	void *strtab = smmu->strtab_cfg.strtab;
2979 
2980 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2981 	if (!cfg->l1_desc)
2982 		return -ENOMEM;
2983 
2984 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2985 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2986 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2987 	}
2988 
2989 	return 0;
2990 }
2991 
2992 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2993 {
2994 	void *strtab;
2995 	u64 reg;
2996 	u32 size, l1size;
2997 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2998 
2999 	/* Calculate the L1 size, capped to the SIDSIZE. */
3000 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3001 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3002 	cfg->num_l1_ents = 1 << size;
3003 
3004 	size += STRTAB_SPLIT;
3005 	if (size < smmu->sid_bits)
3006 		dev_warn(smmu->dev,
3007 			 "2-level strtab only covers %u/%u bits of SID\n",
3008 			 size, smmu->sid_bits);
3009 
3010 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3011 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3012 				     GFP_KERNEL);
3013 	if (!strtab) {
3014 		dev_err(smmu->dev,
3015 			"failed to allocate l1 stream table (%u bytes)\n",
3016 			l1size);
3017 		return -ENOMEM;
3018 	}
3019 	cfg->strtab = strtab;
3020 
3021 	/* Configure strtab_base_cfg for 2 levels */
3022 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3023 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3024 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3025 	cfg->strtab_base_cfg = reg;
3026 
3027 	return arm_smmu_init_l1_strtab(smmu);
3028 }
3029 
3030 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3031 {
3032 	void *strtab;
3033 	u64 reg;
3034 	u32 size;
3035 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3036 
3037 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3038 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3039 				     GFP_KERNEL);
3040 	if (!strtab) {
3041 		dev_err(smmu->dev,
3042 			"failed to allocate linear stream table (%u bytes)\n",
3043 			size);
3044 		return -ENOMEM;
3045 	}
3046 	cfg->strtab = strtab;
3047 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3048 
3049 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3050 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3051 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3052 	cfg->strtab_base_cfg = reg;
3053 
3054 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3055 	return 0;
3056 }
3057 
3058 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3059 {
3060 	u64 reg;
3061 	int ret;
3062 
3063 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3064 		ret = arm_smmu_init_strtab_2lvl(smmu);
3065 	else
3066 		ret = arm_smmu_init_strtab_linear(smmu);
3067 
3068 	if (ret)
3069 		return ret;
3070 
3071 	/* Set the strtab base address */
3072 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3073 	reg |= STRTAB_BASE_RA;
3074 	smmu->strtab_cfg.strtab_base = reg;
3075 
3076 	/* Allocate the first VMID for stage-2 bypass STEs */
3077 	set_bit(0, smmu->vmid_map);
3078 	return 0;
3079 }
3080 
3081 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3082 {
3083 	int ret;
3084 
3085 	mutex_init(&smmu->streams_mutex);
3086 	smmu->streams = RB_ROOT;
3087 
3088 	ret = arm_smmu_init_queues(smmu);
3089 	if (ret)
3090 		return ret;
3091 
3092 	return arm_smmu_init_strtab(smmu);
3093 }
3094 
3095 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3096 				   unsigned int reg_off, unsigned int ack_off)
3097 {
3098 	u32 reg;
3099 
3100 	writel_relaxed(val, smmu->base + reg_off);
3101 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3102 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3103 }
3104 
3105 /* GBPA is "special" */
3106 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3107 {
3108 	int ret;
3109 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3110 
3111 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3112 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3113 	if (ret)
3114 		return ret;
3115 
3116 	reg &= ~clr;
3117 	reg |= set;
3118 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3119 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3120 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3121 
3122 	if (ret)
3123 		dev_err(smmu->dev, "GBPA not responding to update\n");
3124 	return ret;
3125 }
3126 
3127 static void arm_smmu_free_msis(void *data)
3128 {
3129 	struct device *dev = data;
3130 	platform_msi_domain_free_irqs(dev);
3131 }
3132 
3133 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3134 {
3135 	phys_addr_t doorbell;
3136 	struct device *dev = msi_desc_to_dev(desc);
3137 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3138 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3139 
3140 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3141 	doorbell &= MSI_CFG0_ADDR_MASK;
3142 
3143 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3144 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3145 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3146 }
3147 
3148 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3149 {
3150 	struct msi_desc *desc;
3151 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3152 	struct device *dev = smmu->dev;
3153 
3154 	/* Clear the MSI address regs */
3155 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3156 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3157 
3158 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3159 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3160 	else
3161 		nvec--;
3162 
3163 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3164 		return;
3165 
3166 	if (!dev->msi_domain) {
3167 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3168 		return;
3169 	}
3170 
3171 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3172 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3173 	if (ret) {
3174 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3175 		return;
3176 	}
3177 
3178 	for_each_msi_entry(desc, dev) {
3179 		switch (desc->platform.msi_index) {
3180 		case EVTQ_MSI_INDEX:
3181 			smmu->evtq.q.irq = desc->irq;
3182 			break;
3183 		case GERROR_MSI_INDEX:
3184 			smmu->gerr_irq = desc->irq;
3185 			break;
3186 		case PRIQ_MSI_INDEX:
3187 			smmu->priq.q.irq = desc->irq;
3188 			break;
3189 		default:	/* Unknown */
3190 			continue;
3191 		}
3192 	}
3193 
3194 	/* Add callback to free MSIs on teardown */
3195 	devm_add_action(dev, arm_smmu_free_msis, dev);
3196 }
3197 
3198 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3199 {
3200 	int irq, ret;
3201 
3202 	arm_smmu_setup_msis(smmu);
3203 
3204 	/* Request interrupt lines */
3205 	irq = smmu->evtq.q.irq;
3206 	if (irq) {
3207 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3208 						arm_smmu_evtq_thread,
3209 						IRQF_ONESHOT,
3210 						"arm-smmu-v3-evtq", smmu);
3211 		if (ret < 0)
3212 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3213 	} else {
3214 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3215 	}
3216 
3217 	irq = smmu->gerr_irq;
3218 	if (irq) {
3219 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3220 				       0, "arm-smmu-v3-gerror", smmu);
3221 		if (ret < 0)
3222 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3223 	} else {
3224 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3225 	}
3226 
3227 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3228 		irq = smmu->priq.q.irq;
3229 		if (irq) {
3230 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3231 							arm_smmu_priq_thread,
3232 							IRQF_ONESHOT,
3233 							"arm-smmu-v3-priq",
3234 							smmu);
3235 			if (ret < 0)
3236 				dev_warn(smmu->dev,
3237 					 "failed to enable priq irq\n");
3238 		} else {
3239 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3240 		}
3241 	}
3242 }
3243 
3244 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3245 {
3246 	int ret, irq;
3247 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3248 
3249 	/* Disable IRQs first */
3250 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3251 				      ARM_SMMU_IRQ_CTRLACK);
3252 	if (ret) {
3253 		dev_err(smmu->dev, "failed to disable irqs\n");
3254 		return ret;
3255 	}
3256 
3257 	irq = smmu->combined_irq;
3258 	if (irq) {
3259 		/*
3260 		 * Cavium ThunderX2 implementation doesn't support unique irq
3261 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3262 		 */
3263 		ret = devm_request_threaded_irq(smmu->dev, irq,
3264 					arm_smmu_combined_irq_handler,
3265 					arm_smmu_combined_irq_thread,
3266 					IRQF_ONESHOT,
3267 					"arm-smmu-v3-combined-irq", smmu);
3268 		if (ret < 0)
3269 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3270 	} else
3271 		arm_smmu_setup_unique_irqs(smmu);
3272 
3273 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3274 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3275 
3276 	/* Enable interrupt generation on the SMMU */
3277 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3278 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3279 	if (ret)
3280 		dev_warn(smmu->dev, "failed to enable irqs\n");
3281 
3282 	return 0;
3283 }
3284 
3285 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3286 {
3287 	int ret;
3288 
3289 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3290 	if (ret)
3291 		dev_err(smmu->dev, "failed to clear cr0\n");
3292 
3293 	return ret;
3294 }
3295 
3296 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3297 {
3298 	int ret;
3299 	u32 reg, enables;
3300 	struct arm_smmu_cmdq_ent cmd;
3301 
3302 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3303 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3304 	if (reg & CR0_SMMUEN) {
3305 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3306 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3307 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3308 	}
3309 
3310 	ret = arm_smmu_device_disable(smmu);
3311 	if (ret)
3312 		return ret;
3313 
3314 	/* CR1 (table and queue memory attributes) */
3315 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3316 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3317 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3318 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3319 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3320 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3321 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3322 
3323 	/* CR2 (random crap) */
3324 	reg = CR2_PTM | CR2_RECINVSID;
3325 
3326 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3327 		reg |= CR2_E2H;
3328 
3329 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3330 
3331 	/* Stream table */
3332 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3333 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3334 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3335 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3336 
3337 	/* Command queue */
3338 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3339 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3340 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3341 
3342 	enables = CR0_CMDQEN;
3343 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3344 				      ARM_SMMU_CR0ACK);
3345 	if (ret) {
3346 		dev_err(smmu->dev, "failed to enable command queue\n");
3347 		return ret;
3348 	}
3349 
3350 	/* Invalidate any cached configuration */
3351 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3352 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3353 
3354 	/* Invalidate any stale TLB entries */
3355 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3356 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3357 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3358 	}
3359 
3360 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3361 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3362 
3363 	/* Event queue */
3364 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3365 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3366 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3367 
3368 	enables |= CR0_EVTQEN;
3369 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3370 				      ARM_SMMU_CR0ACK);
3371 	if (ret) {
3372 		dev_err(smmu->dev, "failed to enable event queue\n");
3373 		return ret;
3374 	}
3375 
3376 	/* PRI queue */
3377 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3378 		writeq_relaxed(smmu->priq.q.q_base,
3379 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3380 		writel_relaxed(smmu->priq.q.llq.prod,
3381 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3382 		writel_relaxed(smmu->priq.q.llq.cons,
3383 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3384 
3385 		enables |= CR0_PRIQEN;
3386 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3387 					      ARM_SMMU_CR0ACK);
3388 		if (ret) {
3389 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3390 			return ret;
3391 		}
3392 	}
3393 
3394 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3395 		enables |= CR0_ATSCHK;
3396 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3397 					      ARM_SMMU_CR0ACK);
3398 		if (ret) {
3399 			dev_err(smmu->dev, "failed to enable ATS check\n");
3400 			return ret;
3401 		}
3402 	}
3403 
3404 	ret = arm_smmu_setup_irqs(smmu);
3405 	if (ret) {
3406 		dev_err(smmu->dev, "failed to setup irqs\n");
3407 		return ret;
3408 	}
3409 
3410 	if (is_kdump_kernel())
3411 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3412 
3413 	/* Enable the SMMU interface, or ensure bypass */
3414 	if (!bypass || disable_bypass) {
3415 		enables |= CR0_SMMUEN;
3416 	} else {
3417 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3418 		if (ret)
3419 			return ret;
3420 	}
3421 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3422 				      ARM_SMMU_CR0ACK);
3423 	if (ret) {
3424 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3425 		return ret;
3426 	}
3427 
3428 	return 0;
3429 }
3430 
3431 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3432 {
3433 	u32 reg;
3434 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3435 
3436 	/* IDR0 */
3437 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3438 
3439 	/* 2-level structures */
3440 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3441 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3442 
3443 	if (reg & IDR0_CD2L)
3444 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3445 
3446 	/*
3447 	 * Translation table endianness.
3448 	 * We currently require the same endianness as the CPU, but this
3449 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3450 	 */
3451 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3452 	case IDR0_TTENDIAN_MIXED:
3453 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3454 		break;
3455 #ifdef __BIG_ENDIAN
3456 	case IDR0_TTENDIAN_BE:
3457 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3458 		break;
3459 #else
3460 	case IDR0_TTENDIAN_LE:
3461 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3462 		break;
3463 #endif
3464 	default:
3465 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3466 		return -ENXIO;
3467 	}
3468 
3469 	/* Boolean feature flags */
3470 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3471 		smmu->features |= ARM_SMMU_FEAT_PRI;
3472 
3473 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3474 		smmu->features |= ARM_SMMU_FEAT_ATS;
3475 
3476 	if (reg & IDR0_SEV)
3477 		smmu->features |= ARM_SMMU_FEAT_SEV;
3478 
3479 	if (reg & IDR0_MSI) {
3480 		smmu->features |= ARM_SMMU_FEAT_MSI;
3481 		if (coherent && !disable_msipolling)
3482 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3483 	}
3484 
3485 	if (reg & IDR0_HYP) {
3486 		smmu->features |= ARM_SMMU_FEAT_HYP;
3487 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3488 			smmu->features |= ARM_SMMU_FEAT_E2H;
3489 	}
3490 
3491 	/*
3492 	 * The coherency feature as set by FW is used in preference to the ID
3493 	 * register, but warn on mismatch.
3494 	 */
3495 	if (!!(reg & IDR0_COHACC) != coherent)
3496 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3497 			 coherent ? "true" : "false");
3498 
3499 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3500 	case IDR0_STALL_MODEL_FORCE:
3501 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3502 		fallthrough;
3503 	case IDR0_STALL_MODEL_STALL:
3504 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3505 	}
3506 
3507 	if (reg & IDR0_S1P)
3508 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3509 
3510 	if (reg & IDR0_S2P)
3511 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3512 
3513 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3514 		dev_err(smmu->dev, "no translation support!\n");
3515 		return -ENXIO;
3516 	}
3517 
3518 	/* We only support the AArch64 table format at present */
3519 	switch (FIELD_GET(IDR0_TTF, reg)) {
3520 	case IDR0_TTF_AARCH32_64:
3521 		smmu->ias = 40;
3522 		fallthrough;
3523 	case IDR0_TTF_AARCH64:
3524 		break;
3525 	default:
3526 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3527 		return -ENXIO;
3528 	}
3529 
3530 	/* ASID/VMID sizes */
3531 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3532 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3533 
3534 	/* IDR1 */
3535 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3536 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3537 		dev_err(smmu->dev, "embedded implementation not supported\n");
3538 		return -ENXIO;
3539 	}
3540 
3541 	/* Queue sizes, capped to ensure natural alignment */
3542 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3543 					     FIELD_GET(IDR1_CMDQS, reg));
3544 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3545 		/*
3546 		 * We don't support splitting up batches, so one batch of
3547 		 * commands plus an extra sync needs to fit inside the command
3548 		 * queue. There's also no way we can handle the weird alignment
3549 		 * restrictions on the base pointer for a unit-length queue.
3550 		 */
3551 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3552 			CMDQ_BATCH_ENTRIES);
3553 		return -ENXIO;
3554 	}
3555 
3556 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3557 					     FIELD_GET(IDR1_EVTQS, reg));
3558 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3559 					     FIELD_GET(IDR1_PRIQS, reg));
3560 
3561 	/* SID/SSID sizes */
3562 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3563 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3564 
3565 	/*
3566 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3567 	 * table, use a linear table instead.
3568 	 */
3569 	if (smmu->sid_bits <= STRTAB_SPLIT)
3570 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3571 
3572 	/* IDR3 */
3573 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3574 	if (FIELD_GET(IDR3_RIL, reg))
3575 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3576 
3577 	/* IDR5 */
3578 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3579 
3580 	/* Maximum number of outstanding stalls */
3581 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3582 
3583 	/* Page sizes */
3584 	if (reg & IDR5_GRAN64K)
3585 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3586 	if (reg & IDR5_GRAN16K)
3587 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3588 	if (reg & IDR5_GRAN4K)
3589 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3590 
3591 	/* Input address size */
3592 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3593 		smmu->features |= ARM_SMMU_FEAT_VAX;
3594 
3595 	/* Output address size */
3596 	switch (FIELD_GET(IDR5_OAS, reg)) {
3597 	case IDR5_OAS_32_BIT:
3598 		smmu->oas = 32;
3599 		break;
3600 	case IDR5_OAS_36_BIT:
3601 		smmu->oas = 36;
3602 		break;
3603 	case IDR5_OAS_40_BIT:
3604 		smmu->oas = 40;
3605 		break;
3606 	case IDR5_OAS_42_BIT:
3607 		smmu->oas = 42;
3608 		break;
3609 	case IDR5_OAS_44_BIT:
3610 		smmu->oas = 44;
3611 		break;
3612 	case IDR5_OAS_52_BIT:
3613 		smmu->oas = 52;
3614 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3615 		break;
3616 	default:
3617 		dev_info(smmu->dev,
3618 			"unknown output address size. Truncating to 48-bit\n");
3619 		fallthrough;
3620 	case IDR5_OAS_48_BIT:
3621 		smmu->oas = 48;
3622 	}
3623 
3624 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3625 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3626 	else
3627 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3628 
3629 	/* Set the DMA mask for our table walker */
3630 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3631 		dev_warn(smmu->dev,
3632 			 "failed to set DMA mask for table walker\n");
3633 
3634 	smmu->ias = max(smmu->ias, smmu->oas);
3635 
3636 	if (arm_smmu_sva_supported(smmu))
3637 		smmu->features |= ARM_SMMU_FEAT_SVA;
3638 
3639 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3640 		 smmu->ias, smmu->oas, smmu->features);
3641 	return 0;
3642 }
3643 
3644 #ifdef CONFIG_ACPI
3645 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3646 {
3647 	switch (model) {
3648 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3649 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3650 		break;
3651 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3652 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3653 		break;
3654 	}
3655 
3656 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3657 }
3658 
3659 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3660 				      struct arm_smmu_device *smmu)
3661 {
3662 	struct acpi_iort_smmu_v3 *iort_smmu;
3663 	struct device *dev = smmu->dev;
3664 	struct acpi_iort_node *node;
3665 
3666 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3667 
3668 	/* Retrieve SMMUv3 specific data */
3669 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3670 
3671 	acpi_smmu_get_options(iort_smmu->model, smmu);
3672 
3673 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3674 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3675 
3676 	return 0;
3677 }
3678 #else
3679 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3680 					     struct arm_smmu_device *smmu)
3681 {
3682 	return -ENODEV;
3683 }
3684 #endif
3685 
3686 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3687 				    struct arm_smmu_device *smmu)
3688 {
3689 	struct device *dev = &pdev->dev;
3690 	u32 cells;
3691 	int ret = -EINVAL;
3692 
3693 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3694 		dev_err(dev, "missing #iommu-cells property\n");
3695 	else if (cells != 1)
3696 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3697 	else
3698 		ret = 0;
3699 
3700 	parse_driver_options(smmu);
3701 
3702 	if (of_dma_is_coherent(dev->of_node))
3703 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3704 
3705 	return ret;
3706 }
3707 
3708 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3709 {
3710 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3711 		return SZ_64K;
3712 	else
3713 		return SZ_128K;
3714 }
3715 
3716 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3717 {
3718 	int err;
3719 
3720 #ifdef CONFIG_PCI
3721 	if (pci_bus_type.iommu_ops != ops) {
3722 		err = bus_set_iommu(&pci_bus_type, ops);
3723 		if (err)
3724 			return err;
3725 	}
3726 #endif
3727 #ifdef CONFIG_ARM_AMBA
3728 	if (amba_bustype.iommu_ops != ops) {
3729 		err = bus_set_iommu(&amba_bustype, ops);
3730 		if (err)
3731 			goto err_reset_pci_ops;
3732 	}
3733 #endif
3734 	if (platform_bus_type.iommu_ops != ops) {
3735 		err = bus_set_iommu(&platform_bus_type, ops);
3736 		if (err)
3737 			goto err_reset_amba_ops;
3738 	}
3739 
3740 	return 0;
3741 
3742 err_reset_amba_ops:
3743 #ifdef CONFIG_ARM_AMBA
3744 	bus_set_iommu(&amba_bustype, NULL);
3745 #endif
3746 err_reset_pci_ops: __maybe_unused;
3747 #ifdef CONFIG_PCI
3748 	bus_set_iommu(&pci_bus_type, NULL);
3749 #endif
3750 	return err;
3751 }
3752 
3753 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3754 				      resource_size_t size)
3755 {
3756 	struct resource res = DEFINE_RES_MEM(start, size);
3757 
3758 	return devm_ioremap_resource(dev, &res);
3759 }
3760 
3761 static int arm_smmu_device_probe(struct platform_device *pdev)
3762 {
3763 	int irq, ret;
3764 	struct resource *res;
3765 	resource_size_t ioaddr;
3766 	struct arm_smmu_device *smmu;
3767 	struct device *dev = &pdev->dev;
3768 	bool bypass;
3769 
3770 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3771 	if (!smmu)
3772 		return -ENOMEM;
3773 	smmu->dev = dev;
3774 
3775 	if (dev->of_node) {
3776 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3777 	} else {
3778 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3779 		if (ret == -ENODEV)
3780 			return ret;
3781 	}
3782 
3783 	/* Set bypass mode according to firmware probing result */
3784 	bypass = !!ret;
3785 
3786 	/* Base address */
3787 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3788 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3789 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3790 		return -EINVAL;
3791 	}
3792 	ioaddr = res->start;
3793 
3794 	/*
3795 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3796 	 * the PMCG registers which are reserved by the PMU driver.
3797 	 */
3798 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3799 	if (IS_ERR(smmu->base))
3800 		return PTR_ERR(smmu->base);
3801 
3802 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3803 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3804 					       ARM_SMMU_REG_SZ);
3805 		if (IS_ERR(smmu->page1))
3806 			return PTR_ERR(smmu->page1);
3807 	} else {
3808 		smmu->page1 = smmu->base;
3809 	}
3810 
3811 	/* Interrupt lines */
3812 
3813 	irq = platform_get_irq_byname_optional(pdev, "combined");
3814 	if (irq > 0)
3815 		smmu->combined_irq = irq;
3816 	else {
3817 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3818 		if (irq > 0)
3819 			smmu->evtq.q.irq = irq;
3820 
3821 		irq = platform_get_irq_byname_optional(pdev, "priq");
3822 		if (irq > 0)
3823 			smmu->priq.q.irq = irq;
3824 
3825 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3826 		if (irq > 0)
3827 			smmu->gerr_irq = irq;
3828 	}
3829 	/* Probe the h/w */
3830 	ret = arm_smmu_device_hw_probe(smmu);
3831 	if (ret)
3832 		return ret;
3833 
3834 	/* Initialise in-memory data structures */
3835 	ret = arm_smmu_init_structures(smmu);
3836 	if (ret)
3837 		return ret;
3838 
3839 	/* Record our private device structure */
3840 	platform_set_drvdata(pdev, smmu);
3841 
3842 	/* Reset the device */
3843 	ret = arm_smmu_device_reset(smmu, bypass);
3844 	if (ret)
3845 		return ret;
3846 
3847 	/* And we're up. Go go go! */
3848 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3849 				     "smmu3.%pa", &ioaddr);
3850 	if (ret)
3851 		return ret;
3852 
3853 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3854 	if (ret) {
3855 		dev_err(dev, "Failed to register iommu\n");
3856 		goto err_sysfs_remove;
3857 	}
3858 
3859 	ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3860 	if (ret)
3861 		goto err_unregister_device;
3862 
3863 	return 0;
3864 
3865 err_unregister_device:
3866 	iommu_device_unregister(&smmu->iommu);
3867 err_sysfs_remove:
3868 	iommu_device_sysfs_remove(&smmu->iommu);
3869 	return ret;
3870 }
3871 
3872 static int arm_smmu_device_remove(struct platform_device *pdev)
3873 {
3874 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3875 
3876 	arm_smmu_set_bus_ops(NULL);
3877 	iommu_device_unregister(&smmu->iommu);
3878 	iommu_device_sysfs_remove(&smmu->iommu);
3879 	arm_smmu_device_disable(smmu);
3880 	iopf_queue_free(smmu->evtq.iopf);
3881 
3882 	return 0;
3883 }
3884 
3885 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3886 {
3887 	arm_smmu_device_remove(pdev);
3888 }
3889 
3890 static const struct of_device_id arm_smmu_of_match[] = {
3891 	{ .compatible = "arm,smmu-v3", },
3892 	{ },
3893 };
3894 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3895 
3896 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3897 {
3898 	arm_smmu_sva_notifier_synchronize();
3899 	platform_driver_unregister(drv);
3900 }
3901 
3902 static struct platform_driver arm_smmu_driver = {
3903 	.driver	= {
3904 		.name			= "arm-smmu-v3",
3905 		.of_match_table		= arm_smmu_of_match,
3906 		.suppress_bind_attrs	= true,
3907 	},
3908 	.probe	= arm_smmu_device_probe,
3909 	.remove	= arm_smmu_device_remove,
3910 	.shutdown = arm_smmu_device_shutdown,
3911 };
3912 module_driver(arm_smmu_driver, platform_driver_register,
3913 	      arm_smmu_driver_unregister);
3914 
3915 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3916 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3917 MODULE_ALIAS("platform:arm-smmu-v3");
3918 MODULE_LICENSE("GPL v2");
3919