1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_platform.h>
27 #include <linux/pci.h>
28 #include <linux/pci-ats.h>
29 #include <linux/platform_device.h>
30 
31 #include <linux/amba/bus.h>
32 
33 #include "arm-smmu-v3.h"
34 #include "../../iommu-sva-lib.h"
35 
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84 
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 	{ 0, NULL},
89 };
90 
91 static void parse_driver_options(struct arm_smmu_device *smmu)
92 {
93 	int i = 0;
94 
95 	do {
96 		if (of_property_read_bool(smmu->dev->of_node,
97 						arm_smmu_options[i].prop)) {
98 			smmu->options |= arm_smmu_options[i].opt;
99 			dev_notice(smmu->dev, "option %s\n",
100 				arm_smmu_options[i].prop);
101 		}
102 	} while (arm_smmu_options[++i].opt);
103 }
104 
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 {
108 	u32 space, prod, cons;
109 
110 	prod = Q_IDX(q, q->prod);
111 	cons = Q_IDX(q, q->cons);
112 
113 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114 		space = (1 << q->max_n_shift) - (prod - cons);
115 	else
116 		space = cons - prod;
117 
118 	return space >= n;
119 }
120 
121 static bool queue_full(struct arm_smmu_ll_queue *q)
122 {
123 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 }
126 
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 {
129 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 }
132 
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 {
135 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 }
140 
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 {
143 	/*
144 	 * Ensure that all CPU accesses (reads and writes) to the queue
145 	 * are complete before we update the cons pointer.
146 	 */
147 	__iomb();
148 	writel_relaxed(q->llq.cons, q->cons_reg);
149 }
150 
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 {
153 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 }
156 
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
158 {
159 	u32 prod;
160 	int ret = 0;
161 
162 	/*
163 	 * We can't use the _relaxed() variant here, as we must prevent
164 	 * speculative reads of the queue before we have determined that
165 	 * prod has indeed moved.
166 	 */
167 	prod = readl(q->prod_reg);
168 
169 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
170 		ret = -EOVERFLOW;
171 
172 	q->llq.prod = prod;
173 	return ret;
174 }
175 
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
177 {
178 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
180 }
181 
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183 			    struct arm_smmu_queue_poll *qp)
184 {
185 	qp->delay = 1;
186 	qp->spin_cnt = 0;
187 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
189 }
190 
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
192 {
193 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
194 		return -ETIMEDOUT;
195 
196 	if (qp->wfe) {
197 		wfe();
198 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
199 		cpu_relax();
200 	} else {
201 		udelay(qp->delay);
202 		qp->delay *= 2;
203 		qp->spin_cnt = 0;
204 	}
205 
206 	return 0;
207 }
208 
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
210 {
211 	int i;
212 
213 	for (i = 0; i < n_dwords; ++i)
214 		*dst++ = cpu_to_le64(*src++);
215 }
216 
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
218 {
219 	int i;
220 
221 	for (i = 0; i < n_dwords; ++i)
222 		*dst++ = le64_to_cpu(*src++);
223 }
224 
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
226 {
227 	if (queue_empty(&q->llq))
228 		return -EAGAIN;
229 
230 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231 	queue_inc_cons(&q->llq);
232 	queue_sync_cons_out(q);
233 	return 0;
234 }
235 
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
238 {
239 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
241 
242 	switch (ent->opcode) {
243 	case CMDQ_OP_TLBI_EL2_ALL:
244 	case CMDQ_OP_TLBI_NSNH_ALL:
245 		break;
246 	case CMDQ_OP_PREFETCH_CFG:
247 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
248 		break;
249 	case CMDQ_OP_CFGI_CD:
250 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
251 		fallthrough;
252 	case CMDQ_OP_CFGI_STE:
253 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
254 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
255 		break;
256 	case CMDQ_OP_CFGI_CD_ALL:
257 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
258 		break;
259 	case CMDQ_OP_CFGI_ALL:
260 		/* Cover the entire SID range */
261 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
262 		break;
263 	case CMDQ_OP_TLBI_NH_VA:
264 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
265 		fallthrough;
266 	case CMDQ_OP_TLBI_EL2_VA:
267 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
268 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
269 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
270 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
271 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
272 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
273 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
274 		break;
275 	case CMDQ_OP_TLBI_S2_IPA:
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
283 		break;
284 	case CMDQ_OP_TLBI_NH_ASID:
285 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
286 		fallthrough;
287 	case CMDQ_OP_TLBI_S12_VMALL:
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		break;
290 	case CMDQ_OP_TLBI_EL2_ASID:
291 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
292 		break;
293 	case CMDQ_OP_ATC_INV:
294 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
295 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
296 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
297 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
298 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
299 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
300 		break;
301 	case CMDQ_OP_PRI_RESP:
302 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
303 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
304 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
305 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
306 		switch (ent->pri.resp) {
307 		case PRI_RESP_DENY:
308 		case PRI_RESP_FAIL:
309 		case PRI_RESP_SUCC:
310 			break;
311 		default:
312 			return -EINVAL;
313 		}
314 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
315 		break;
316 	case CMDQ_OP_RESUME:
317 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
318 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
319 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
320 		break;
321 	case CMDQ_OP_CMD_SYNC:
322 		if (ent->sync.msiaddr) {
323 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325 		} else {
326 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327 		}
328 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330 		break;
331 	default:
332 		return -ENOENT;
333 	}
334 
335 	return 0;
336 }
337 
338 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
339 					 u32 prod)
340 {
341 	struct arm_smmu_queue *q = &smmu->cmdq.q;
342 	struct arm_smmu_cmdq_ent ent = {
343 		.opcode = CMDQ_OP_CMD_SYNC,
344 	};
345 
346 	/*
347 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
348 	 * payload, so the write will zero the entire command on that platform.
349 	 */
350 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
351 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
352 				   q->ent_dwords * 8;
353 	}
354 
355 	arm_smmu_cmdq_build_cmd(cmd, &ent);
356 }
357 
358 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
359 {
360 	static const char * const cerror_str[] = {
361 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
362 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
363 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
364 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
365 	};
366 
367 	int i;
368 	u64 cmd[CMDQ_ENT_DWORDS];
369 	struct arm_smmu_queue *q = &smmu->cmdq.q;
370 	u32 cons = readl_relaxed(q->cons_reg);
371 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
372 	struct arm_smmu_cmdq_ent cmd_sync = {
373 		.opcode = CMDQ_OP_CMD_SYNC,
374 	};
375 
376 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
377 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
378 
379 	switch (idx) {
380 	case CMDQ_ERR_CERROR_ABT_IDX:
381 		dev_err(smmu->dev, "retrying command fetch\n");
382 		return;
383 	case CMDQ_ERR_CERROR_NONE_IDX:
384 		return;
385 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
386 		/*
387 		 * ATC Invalidation Completion timeout. CONS is still pointing
388 		 * at the CMD_SYNC. Attempt to complete other pending commands
389 		 * by repeating the CMD_SYNC, though we might well end up back
390 		 * here since the ATC invalidation may still be pending.
391 		 */
392 		return;
393 	case CMDQ_ERR_CERROR_ILL_IDX:
394 	default:
395 		break;
396 	}
397 
398 	/*
399 	 * We may have concurrent producers, so we need to be careful
400 	 * not to touch any of the shadow cmdq state.
401 	 */
402 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
403 	dev_err(smmu->dev, "skipping command in error state:\n");
404 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
405 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
406 
407 	/* Convert the erroneous command into a CMD_SYNC */
408 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
409 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
410 		return;
411 	}
412 
413 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
414 }
415 
416 /*
417  * Command queue locking.
418  * This is a form of bastardised rwlock with the following major changes:
419  *
420  * - The only LOCK routines are exclusive_trylock() and shared_lock().
421  *   Neither have barrier semantics, and instead provide only a control
422  *   dependency.
423  *
424  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
425  *   fails if the caller appears to be the last lock holder (yes, this is
426  *   racy). All successful UNLOCK routines have RELEASE semantics.
427  */
428 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
429 {
430 	int val;
431 
432 	/*
433 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
434 	 * lock counter. When held in exclusive state, the lock counter is set
435 	 * to INT_MIN so these increments won't hurt as the value will remain
436 	 * negative.
437 	 */
438 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
439 		return;
440 
441 	do {
442 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
443 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
444 }
445 
446 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
447 {
448 	(void)atomic_dec_return_release(&cmdq->lock);
449 }
450 
451 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
452 {
453 	if (atomic_read(&cmdq->lock) == 1)
454 		return false;
455 
456 	arm_smmu_cmdq_shared_unlock(cmdq);
457 	return true;
458 }
459 
460 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
461 ({									\
462 	bool __ret;							\
463 	local_irq_save(flags);						\
464 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
465 	if (!__ret)							\
466 		local_irq_restore(flags);				\
467 	__ret;								\
468 })
469 
470 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
471 ({									\
472 	atomic_set_release(&cmdq->lock, 0);				\
473 	local_irq_restore(flags);					\
474 })
475 
476 
477 /*
478  * Command queue insertion.
479  * This is made fiddly by our attempts to achieve some sort of scalability
480  * since there is one queue shared amongst all of the CPUs in the system.  If
481  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
482  * then you'll *love* this monstrosity.
483  *
484  * The basic idea is to split the queue up into ranges of commands that are
485  * owned by a given CPU; the owner may not have written all of the commands
486  * itself, but is responsible for advancing the hardware prod pointer when
487  * the time comes. The algorithm is roughly:
488  *
489  * 	1. Allocate some space in the queue. At this point we also discover
490  *	   whether the head of the queue is currently owned by another CPU,
491  *	   or whether we are the owner.
492  *
493  *	2. Write our commands into our allocated slots in the queue.
494  *
495  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
496  *
497  *	4. If we are an owner:
498  *		a. Wait for the previous owner to finish.
499  *		b. Mark the queue head as unowned, which tells us the range
500  *		   that we are responsible for publishing.
501  *		c. Wait for all commands in our owned range to become valid.
502  *		d. Advance the hardware prod pointer.
503  *		e. Tell the next owner we've finished.
504  *
505  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
506  *	   owner), then we need to stick around until it has completed:
507  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
508  *		   to clear the first 4 bytes.
509  *		b. Otherwise, we spin waiting for the hardware cons pointer to
510  *		   advance past our command.
511  *
512  * The devil is in the details, particularly the use of locking for handling
513  * SYNC completion and freeing up space in the queue before we think that it is
514  * full.
515  */
516 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
517 					       u32 sprod, u32 eprod, bool set)
518 {
519 	u32 swidx, sbidx, ewidx, ebidx;
520 	struct arm_smmu_ll_queue llq = {
521 		.max_n_shift	= cmdq->q.llq.max_n_shift,
522 		.prod		= sprod,
523 	};
524 
525 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
526 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
527 
528 	while (llq.prod != eprod) {
529 		unsigned long mask;
530 		atomic_long_t *ptr;
531 		u32 limit = BITS_PER_LONG;
532 
533 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
534 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
535 
536 		ptr = &cmdq->valid_map[swidx];
537 
538 		if ((swidx == ewidx) && (sbidx < ebidx))
539 			limit = ebidx;
540 
541 		mask = GENMASK(limit - 1, sbidx);
542 
543 		/*
544 		 * The valid bit is the inverse of the wrap bit. This means
545 		 * that a zero-initialised queue is invalid and, after marking
546 		 * all entries as valid, they become invalid again when we
547 		 * wrap.
548 		 */
549 		if (set) {
550 			atomic_long_xor(mask, ptr);
551 		} else { /* Poll */
552 			unsigned long valid;
553 
554 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
555 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
556 		}
557 
558 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
559 	}
560 }
561 
562 /* Mark all entries in the range [sprod, eprod) as valid */
563 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
564 					u32 sprod, u32 eprod)
565 {
566 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
567 }
568 
569 /* Wait for all entries in the range [sprod, eprod) to become valid */
570 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
571 					 u32 sprod, u32 eprod)
572 {
573 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
574 }
575 
576 /* Wait for the command queue to become non-full */
577 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
578 					     struct arm_smmu_ll_queue *llq)
579 {
580 	unsigned long flags;
581 	struct arm_smmu_queue_poll qp;
582 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
583 	int ret = 0;
584 
585 	/*
586 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
587 	 * that fails, spin until somebody else updates it for us.
588 	 */
589 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
590 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
591 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
592 		llq->val = READ_ONCE(cmdq->q.llq.val);
593 		return 0;
594 	}
595 
596 	queue_poll_init(smmu, &qp);
597 	do {
598 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
599 		if (!queue_full(llq))
600 			break;
601 
602 		ret = queue_poll(&qp);
603 	} while (!ret);
604 
605 	return ret;
606 }
607 
608 /*
609  * Wait until the SMMU signals a CMD_SYNC completion MSI.
610  * Must be called with the cmdq lock held in some capacity.
611  */
612 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
613 					  struct arm_smmu_ll_queue *llq)
614 {
615 	int ret = 0;
616 	struct arm_smmu_queue_poll qp;
617 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
618 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
619 
620 	queue_poll_init(smmu, &qp);
621 
622 	/*
623 	 * The MSI won't generate an event, since it's being written back
624 	 * into the command queue.
625 	 */
626 	qp.wfe = false;
627 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
628 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
629 	return ret;
630 }
631 
632 /*
633  * Wait until the SMMU cons index passes llq->prod.
634  * Must be called with the cmdq lock held in some capacity.
635  */
636 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
637 					       struct arm_smmu_ll_queue *llq)
638 {
639 	struct arm_smmu_queue_poll qp;
640 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
641 	u32 prod = llq->prod;
642 	int ret = 0;
643 
644 	queue_poll_init(smmu, &qp);
645 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
646 	do {
647 		if (queue_consumed(llq, prod))
648 			break;
649 
650 		ret = queue_poll(&qp);
651 
652 		/*
653 		 * This needs to be a readl() so that our subsequent call
654 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
655 		 *
656 		 * Specifically, we need to ensure that we observe all
657 		 * shared_lock()s by other CMD_SYNCs that share our owner,
658 		 * so that a failing call to tryunlock() means that we're
659 		 * the last one out and therefore we can safely advance
660 		 * cmdq->q.llq.cons. Roughly speaking:
661 		 *
662 		 * CPU 0		CPU1			CPU2 (us)
663 		 *
664 		 * if (sync)
665 		 * 	shared_lock();
666 		 *
667 		 * dma_wmb();
668 		 * set_valid_map();
669 		 *
670 		 * 			if (owner) {
671 		 *				poll_valid_map();
672 		 *				<control dependency>
673 		 *				writel(prod_reg);
674 		 *
675 		 *						readl(cons_reg);
676 		 *						tryunlock();
677 		 *
678 		 * Requires us to see CPU 0's shared_lock() acquisition.
679 		 */
680 		llq->cons = readl(cmdq->q.cons_reg);
681 	} while (!ret);
682 
683 	return ret;
684 }
685 
686 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
687 					 struct arm_smmu_ll_queue *llq)
688 {
689 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
690 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
691 
692 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
693 }
694 
695 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
696 					u32 prod, int n)
697 {
698 	int i;
699 	struct arm_smmu_ll_queue llq = {
700 		.max_n_shift	= cmdq->q.llq.max_n_shift,
701 		.prod		= prod,
702 	};
703 
704 	for (i = 0; i < n; ++i) {
705 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
706 
707 		prod = queue_inc_prod_n(&llq, i);
708 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
709 	}
710 }
711 
712 /*
713  * This is the actual insertion function, and provides the following
714  * ordering guarantees to callers:
715  *
716  * - There is a dma_wmb() before publishing any commands to the queue.
717  *   This can be relied upon to order prior writes to data structures
718  *   in memory (such as a CD or an STE) before the command.
719  *
720  * - On completion of a CMD_SYNC, there is a control dependency.
721  *   This can be relied upon to order subsequent writes to memory (e.g.
722  *   freeing an IOVA) after completion of the CMD_SYNC.
723  *
724  * - Command insertion is totally ordered, so if two CPUs each race to
725  *   insert their own list of commands then all of the commands from one
726  *   CPU will appear before any of the commands from the other CPU.
727  */
728 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
729 				       u64 *cmds, int n, bool sync)
730 {
731 	u64 cmd_sync[CMDQ_ENT_DWORDS];
732 	u32 prod;
733 	unsigned long flags;
734 	bool owner;
735 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
736 	struct arm_smmu_ll_queue llq = {
737 		.max_n_shift = cmdq->q.llq.max_n_shift,
738 	}, head = llq;
739 	int ret = 0;
740 
741 	/* 1. Allocate some space in the queue */
742 	local_irq_save(flags);
743 	llq.val = READ_ONCE(cmdq->q.llq.val);
744 	do {
745 		u64 old;
746 
747 		while (!queue_has_space(&llq, n + sync)) {
748 			local_irq_restore(flags);
749 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
750 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
751 			local_irq_save(flags);
752 		}
753 
754 		head.cons = llq.cons;
755 		head.prod = queue_inc_prod_n(&llq, n + sync) |
756 					     CMDQ_PROD_OWNED_FLAG;
757 
758 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
759 		if (old == llq.val)
760 			break;
761 
762 		llq.val = old;
763 	} while (1);
764 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
765 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
766 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
767 
768 	/*
769 	 * 2. Write our commands into the queue
770 	 * Dependency ordering from the cmpxchg() loop above.
771 	 */
772 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
773 	if (sync) {
774 		prod = queue_inc_prod_n(&llq, n);
775 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
776 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
777 
778 		/*
779 		 * In order to determine completion of our CMD_SYNC, we must
780 		 * ensure that the queue can't wrap twice without us noticing.
781 		 * We achieve that by taking the cmdq lock as shared before
782 		 * marking our slot as valid.
783 		 */
784 		arm_smmu_cmdq_shared_lock(cmdq);
785 	}
786 
787 	/* 3. Mark our slots as valid, ensuring commands are visible first */
788 	dma_wmb();
789 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
790 
791 	/* 4. If we are the owner, take control of the SMMU hardware */
792 	if (owner) {
793 		/* a. Wait for previous owner to finish */
794 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
795 
796 		/* b. Stop gathering work by clearing the owned flag */
797 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
798 						   &cmdq->q.llq.atomic.prod);
799 		prod &= ~CMDQ_PROD_OWNED_FLAG;
800 
801 		/*
802 		 * c. Wait for any gathered work to be written to the queue.
803 		 * Note that we read our own entries so that we have the control
804 		 * dependency required by (d).
805 		 */
806 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
807 
808 		/*
809 		 * d. Advance the hardware prod pointer
810 		 * Control dependency ordering from the entries becoming valid.
811 		 */
812 		writel_relaxed(prod, cmdq->q.prod_reg);
813 
814 		/*
815 		 * e. Tell the next owner we're done
816 		 * Make sure we've updated the hardware first, so that we don't
817 		 * race to update prod and potentially move it backwards.
818 		 */
819 		atomic_set_release(&cmdq->owner_prod, prod);
820 	}
821 
822 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
823 	if (sync) {
824 		llq.prod = queue_inc_prod_n(&llq, n);
825 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
826 		if (ret) {
827 			dev_err_ratelimited(smmu->dev,
828 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
829 					    llq.prod,
830 					    readl_relaxed(cmdq->q.prod_reg),
831 					    readl_relaxed(cmdq->q.cons_reg));
832 		}
833 
834 		/*
835 		 * Try to unlock the cmdq lock. This will fail if we're the last
836 		 * reader, in which case we can safely update cmdq->q.llq.cons
837 		 */
838 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
839 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
840 			arm_smmu_cmdq_shared_unlock(cmdq);
841 		}
842 	}
843 
844 	local_irq_restore(flags);
845 	return ret;
846 }
847 
848 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
849 				   struct arm_smmu_cmdq_ent *ent)
850 {
851 	u64 cmd[CMDQ_ENT_DWORDS];
852 
853 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
854 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
855 			 ent->opcode);
856 		return -EINVAL;
857 	}
858 
859 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
860 }
861 
862 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
863 {
864 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
865 }
866 
867 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
868 				    struct arm_smmu_cmdq_batch *cmds,
869 				    struct arm_smmu_cmdq_ent *cmd)
870 {
871 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
872 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
873 		cmds->num = 0;
874 	}
875 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
876 	cmds->num++;
877 }
878 
879 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
880 				      struct arm_smmu_cmdq_batch *cmds)
881 {
882 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
883 }
884 
885 static int arm_smmu_page_response(struct device *dev,
886 				  struct iommu_fault_event *unused,
887 				  struct iommu_page_response *resp)
888 {
889 	struct arm_smmu_cmdq_ent cmd = {0};
890 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
891 	int sid = master->streams[0].id;
892 
893 	if (master->stall_enabled) {
894 		cmd.opcode		= CMDQ_OP_RESUME;
895 		cmd.resume.sid		= sid;
896 		cmd.resume.stag		= resp->grpid;
897 		switch (resp->code) {
898 		case IOMMU_PAGE_RESP_INVALID:
899 		case IOMMU_PAGE_RESP_FAILURE:
900 			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
901 			break;
902 		case IOMMU_PAGE_RESP_SUCCESS:
903 			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
904 			break;
905 		default:
906 			return -EINVAL;
907 		}
908 	} else {
909 		return -ENODEV;
910 	}
911 
912 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
913 	/*
914 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
915 	 * RESUME consumption guarantees that the stalled transaction will be
916 	 * terminated... at some point in the future. PRI_RESP is fire and
917 	 * forget.
918 	 */
919 
920 	return 0;
921 }
922 
923 /* Context descriptor manipulation functions */
924 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
925 {
926 	struct arm_smmu_cmdq_ent cmd = {
927 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
928 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
929 		.tlbi.asid = asid,
930 	};
931 
932 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
933 	arm_smmu_cmdq_issue_sync(smmu);
934 }
935 
936 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
937 			     int ssid, bool leaf)
938 {
939 	size_t i;
940 	unsigned long flags;
941 	struct arm_smmu_master *master;
942 	struct arm_smmu_cmdq_batch cmds = {};
943 	struct arm_smmu_device *smmu = smmu_domain->smmu;
944 	struct arm_smmu_cmdq_ent cmd = {
945 		.opcode	= CMDQ_OP_CFGI_CD,
946 		.cfgi	= {
947 			.ssid	= ssid,
948 			.leaf	= leaf,
949 		},
950 	};
951 
952 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
953 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
954 		for (i = 0; i < master->num_streams; i++) {
955 			cmd.cfgi.sid = master->streams[i].id;
956 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
957 		}
958 	}
959 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
960 
961 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
962 }
963 
964 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
965 					struct arm_smmu_l1_ctx_desc *l1_desc)
966 {
967 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
968 
969 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
970 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
971 	if (!l1_desc->l2ptr) {
972 		dev_warn(smmu->dev,
973 			 "failed to allocate context descriptor table\n");
974 		return -ENOMEM;
975 	}
976 	return 0;
977 }
978 
979 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
980 				      struct arm_smmu_l1_ctx_desc *l1_desc)
981 {
982 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
983 		  CTXDESC_L1_DESC_V;
984 
985 	/* See comment in arm_smmu_write_ctx_desc() */
986 	WRITE_ONCE(*dst, cpu_to_le64(val));
987 }
988 
989 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
990 				   u32 ssid)
991 {
992 	__le64 *l1ptr;
993 	unsigned int idx;
994 	struct arm_smmu_l1_ctx_desc *l1_desc;
995 	struct arm_smmu_device *smmu = smmu_domain->smmu;
996 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
997 
998 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
999 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1000 
1001 	idx = ssid >> CTXDESC_SPLIT;
1002 	l1_desc = &cdcfg->l1_desc[idx];
1003 	if (!l1_desc->l2ptr) {
1004 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1005 			return NULL;
1006 
1007 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1008 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1009 		/* An invalid L1CD can be cached */
1010 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1011 	}
1012 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1013 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1014 }
1015 
1016 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1017 			    struct arm_smmu_ctx_desc *cd)
1018 {
1019 	/*
1020 	 * This function handles the following cases:
1021 	 *
1022 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1023 	 * (2) Install a secondary CD, for SID+SSID traffic.
1024 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1025 	 *     CD, then invalidate the old entry and mappings.
1026 	 * (4) Quiesce the context without clearing the valid bit. Disable
1027 	 *     translation, and ignore any translation fault.
1028 	 * (5) Remove a secondary CD.
1029 	 */
1030 	u64 val;
1031 	bool cd_live;
1032 	__le64 *cdptr;
1033 
1034 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1035 		return -E2BIG;
1036 
1037 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1038 	if (!cdptr)
1039 		return -ENOMEM;
1040 
1041 	val = le64_to_cpu(cdptr[0]);
1042 	cd_live = !!(val & CTXDESC_CD_0_V);
1043 
1044 	if (!cd) { /* (5) */
1045 		val = 0;
1046 	} else if (cd == &quiet_cd) { /* (4) */
1047 		val |= CTXDESC_CD_0_TCR_EPD0;
1048 	} else if (cd_live) { /* (3) */
1049 		val &= ~CTXDESC_CD_0_ASID;
1050 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1051 		/*
1052 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1053 		 * this substream's traffic
1054 		 */
1055 	} else { /* (1) and (2) */
1056 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1057 		cdptr[2] = 0;
1058 		cdptr[3] = cpu_to_le64(cd->mair);
1059 
1060 		/*
1061 		 * STE is live, and the SMMU might read dwords of this CD in any
1062 		 * order. Ensure that it observes valid values before reading
1063 		 * V=1.
1064 		 */
1065 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1066 
1067 		val = cd->tcr |
1068 #ifdef __BIG_ENDIAN
1069 			CTXDESC_CD_0_ENDI |
1070 #endif
1071 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1072 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1073 			CTXDESC_CD_0_AA64 |
1074 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1075 			CTXDESC_CD_0_V;
1076 
1077 		if (smmu_domain->stall_enabled)
1078 			val |= CTXDESC_CD_0_S;
1079 	}
1080 
1081 	/*
1082 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1083 	 * "Configuration structures and configuration invalidation completion"
1084 	 *
1085 	 *   The size of single-copy atomic reads made by the SMMU is
1086 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1087 	 *   field within an aligned 64-bit span of a structure can be altered
1088 	 *   without first making the structure invalid.
1089 	 */
1090 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1091 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1092 	return 0;
1093 }
1094 
1095 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1096 {
1097 	int ret;
1098 	size_t l1size;
1099 	size_t max_contexts;
1100 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1101 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1102 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1103 
1104 	max_contexts = 1 << cfg->s1cdmax;
1105 
1106 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1107 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1108 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1109 		cdcfg->num_l1_ents = max_contexts;
1110 
1111 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1112 	} else {
1113 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1114 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1115 						  CTXDESC_L2_ENTRIES);
1116 
1117 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1118 					      sizeof(*cdcfg->l1_desc),
1119 					      GFP_KERNEL);
1120 		if (!cdcfg->l1_desc)
1121 			return -ENOMEM;
1122 
1123 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1124 	}
1125 
1126 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1127 					   GFP_KERNEL);
1128 	if (!cdcfg->cdtab) {
1129 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1130 		ret = -ENOMEM;
1131 		goto err_free_l1;
1132 	}
1133 
1134 	return 0;
1135 
1136 err_free_l1:
1137 	if (cdcfg->l1_desc) {
1138 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1139 		cdcfg->l1_desc = NULL;
1140 	}
1141 	return ret;
1142 }
1143 
1144 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1145 {
1146 	int i;
1147 	size_t size, l1size;
1148 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1149 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1150 
1151 	if (cdcfg->l1_desc) {
1152 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1153 
1154 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1155 			if (!cdcfg->l1_desc[i].l2ptr)
1156 				continue;
1157 
1158 			dmam_free_coherent(smmu->dev, size,
1159 					   cdcfg->l1_desc[i].l2ptr,
1160 					   cdcfg->l1_desc[i].l2ptr_dma);
1161 		}
1162 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1163 		cdcfg->l1_desc = NULL;
1164 
1165 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1166 	} else {
1167 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1168 	}
1169 
1170 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1171 	cdcfg->cdtab_dma = 0;
1172 	cdcfg->cdtab = NULL;
1173 }
1174 
1175 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1176 {
1177 	bool free;
1178 	struct arm_smmu_ctx_desc *old_cd;
1179 
1180 	if (!cd->asid)
1181 		return false;
1182 
1183 	free = refcount_dec_and_test(&cd->refs);
1184 	if (free) {
1185 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1186 		WARN_ON(old_cd != cd);
1187 	}
1188 	return free;
1189 }
1190 
1191 /* Stream table manipulation functions */
1192 static void
1193 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1194 {
1195 	u64 val = 0;
1196 
1197 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1198 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1199 
1200 	/* See comment in arm_smmu_write_ctx_desc() */
1201 	WRITE_ONCE(*dst, cpu_to_le64(val));
1202 }
1203 
1204 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1205 {
1206 	struct arm_smmu_cmdq_ent cmd = {
1207 		.opcode	= CMDQ_OP_CFGI_STE,
1208 		.cfgi	= {
1209 			.sid	= sid,
1210 			.leaf	= true,
1211 		},
1212 	};
1213 
1214 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1215 	arm_smmu_cmdq_issue_sync(smmu);
1216 }
1217 
1218 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1219 				      __le64 *dst)
1220 {
1221 	/*
1222 	 * This is hideously complicated, but we only really care about
1223 	 * three cases at the moment:
1224 	 *
1225 	 * 1. Invalid (all zero) -> bypass/fault (init)
1226 	 * 2. Bypass/fault -> translation/bypass (attach)
1227 	 * 3. Translation/bypass -> bypass/fault (detach)
1228 	 *
1229 	 * Given that we can't update the STE atomically and the SMMU
1230 	 * doesn't read the thing in a defined order, that leaves us
1231 	 * with the following maintenance requirements:
1232 	 *
1233 	 * 1. Update Config, return (init time STEs aren't live)
1234 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1235 	 * 3. Update Config, sync
1236 	 */
1237 	u64 val = le64_to_cpu(dst[0]);
1238 	bool ste_live = false;
1239 	struct arm_smmu_device *smmu = NULL;
1240 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1241 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1242 	struct arm_smmu_domain *smmu_domain = NULL;
1243 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1244 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1245 		.prefetch	= {
1246 			.sid	= sid,
1247 		},
1248 	};
1249 
1250 	if (master) {
1251 		smmu_domain = master->domain;
1252 		smmu = master->smmu;
1253 	}
1254 
1255 	if (smmu_domain) {
1256 		switch (smmu_domain->stage) {
1257 		case ARM_SMMU_DOMAIN_S1:
1258 			s1_cfg = &smmu_domain->s1_cfg;
1259 			break;
1260 		case ARM_SMMU_DOMAIN_S2:
1261 		case ARM_SMMU_DOMAIN_NESTED:
1262 			s2_cfg = &smmu_domain->s2_cfg;
1263 			break;
1264 		default:
1265 			break;
1266 		}
1267 	}
1268 
1269 	if (val & STRTAB_STE_0_V) {
1270 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1271 		case STRTAB_STE_0_CFG_BYPASS:
1272 			break;
1273 		case STRTAB_STE_0_CFG_S1_TRANS:
1274 		case STRTAB_STE_0_CFG_S2_TRANS:
1275 			ste_live = true;
1276 			break;
1277 		case STRTAB_STE_0_CFG_ABORT:
1278 			BUG_ON(!disable_bypass);
1279 			break;
1280 		default:
1281 			BUG(); /* STE corruption */
1282 		}
1283 	}
1284 
1285 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1286 	val = STRTAB_STE_0_V;
1287 
1288 	/* Bypass/fault */
1289 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1290 		if (!smmu_domain && disable_bypass)
1291 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1292 		else
1293 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1294 
1295 		dst[0] = cpu_to_le64(val);
1296 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1297 						STRTAB_STE_1_SHCFG_INCOMING));
1298 		dst[2] = 0; /* Nuke the VMID */
1299 		/*
1300 		 * The SMMU can perform negative caching, so we must sync
1301 		 * the STE regardless of whether the old value was live.
1302 		 */
1303 		if (smmu)
1304 			arm_smmu_sync_ste_for_sid(smmu, sid);
1305 		return;
1306 	}
1307 
1308 	if (s1_cfg) {
1309 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1310 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1311 
1312 		BUG_ON(ste_live);
1313 		dst[1] = cpu_to_le64(
1314 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1315 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1316 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1317 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1318 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1319 
1320 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1321 		    !master->stall_enabled)
1322 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1323 
1324 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1325 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1326 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1327 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1328 	}
1329 
1330 	if (s2_cfg) {
1331 		BUG_ON(ste_live);
1332 		dst[2] = cpu_to_le64(
1333 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1334 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1335 #ifdef __BIG_ENDIAN
1336 			 STRTAB_STE_2_S2ENDI |
1337 #endif
1338 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1339 			 STRTAB_STE_2_S2R);
1340 
1341 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1342 
1343 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1344 	}
1345 
1346 	if (master->ats_enabled)
1347 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1348 						 STRTAB_STE_1_EATS_TRANS));
1349 
1350 	arm_smmu_sync_ste_for_sid(smmu, sid);
1351 	/* See comment in arm_smmu_write_ctx_desc() */
1352 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1353 	arm_smmu_sync_ste_for_sid(smmu, sid);
1354 
1355 	/* It's likely that we'll want to use the new STE soon */
1356 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1357 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1358 }
1359 
1360 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1361 {
1362 	unsigned int i;
1363 
1364 	for (i = 0; i < nent; ++i) {
1365 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1366 		strtab += STRTAB_STE_DWORDS;
1367 	}
1368 }
1369 
1370 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1371 {
1372 	size_t size;
1373 	void *strtab;
1374 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1375 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1376 
1377 	if (desc->l2ptr)
1378 		return 0;
1379 
1380 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1381 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1382 
1383 	desc->span = STRTAB_SPLIT + 1;
1384 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1385 					  GFP_KERNEL);
1386 	if (!desc->l2ptr) {
1387 		dev_err(smmu->dev,
1388 			"failed to allocate l2 stream table for SID %u\n",
1389 			sid);
1390 		return -ENOMEM;
1391 	}
1392 
1393 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1394 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1395 	return 0;
1396 }
1397 
1398 static struct arm_smmu_master *
1399 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1400 {
1401 	struct rb_node *node;
1402 	struct arm_smmu_stream *stream;
1403 
1404 	lockdep_assert_held(&smmu->streams_mutex);
1405 
1406 	node = smmu->streams.rb_node;
1407 	while (node) {
1408 		stream = rb_entry(node, struct arm_smmu_stream, node);
1409 		if (stream->id < sid)
1410 			node = node->rb_right;
1411 		else if (stream->id > sid)
1412 			node = node->rb_left;
1413 		else
1414 			return stream->master;
1415 	}
1416 
1417 	return NULL;
1418 }
1419 
1420 /* IRQ and event handlers */
1421 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1422 {
1423 	int ret;
1424 	u32 reason;
1425 	u32 perm = 0;
1426 	struct arm_smmu_master *master;
1427 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1428 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1429 	struct iommu_fault_event fault_evt = { };
1430 	struct iommu_fault *flt = &fault_evt.fault;
1431 
1432 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1433 	case EVT_ID_TRANSLATION_FAULT:
1434 		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1435 		break;
1436 	case EVT_ID_ADDR_SIZE_FAULT:
1437 		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1438 		break;
1439 	case EVT_ID_ACCESS_FAULT:
1440 		reason = IOMMU_FAULT_REASON_ACCESS;
1441 		break;
1442 	case EVT_ID_PERMISSION_FAULT:
1443 		reason = IOMMU_FAULT_REASON_PERMISSION;
1444 		break;
1445 	default:
1446 		return -EOPNOTSUPP;
1447 	}
1448 
1449 	/* Stage-2 is always pinned at the moment */
1450 	if (evt[1] & EVTQ_1_S2)
1451 		return -EFAULT;
1452 
1453 	if (evt[1] & EVTQ_1_RnW)
1454 		perm |= IOMMU_FAULT_PERM_READ;
1455 	else
1456 		perm |= IOMMU_FAULT_PERM_WRITE;
1457 
1458 	if (evt[1] & EVTQ_1_InD)
1459 		perm |= IOMMU_FAULT_PERM_EXEC;
1460 
1461 	if (evt[1] & EVTQ_1_PnU)
1462 		perm |= IOMMU_FAULT_PERM_PRIV;
1463 
1464 	if (evt[1] & EVTQ_1_STALL) {
1465 		flt->type = IOMMU_FAULT_PAGE_REQ;
1466 		flt->prm = (struct iommu_fault_page_request) {
1467 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1468 			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1469 			.perm = perm,
1470 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1471 		};
1472 
1473 		if (ssid_valid) {
1474 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1475 			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1476 		}
1477 	} else {
1478 		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1479 		flt->event = (struct iommu_fault_unrecoverable) {
1480 			.reason = reason,
1481 			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1482 			.perm = perm,
1483 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1484 		};
1485 
1486 		if (ssid_valid) {
1487 			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1488 			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1489 		}
1490 	}
1491 
1492 	mutex_lock(&smmu->streams_mutex);
1493 	master = arm_smmu_find_master(smmu, sid);
1494 	if (!master) {
1495 		ret = -EINVAL;
1496 		goto out_unlock;
1497 	}
1498 
1499 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1500 	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1501 		/* Nobody cared, abort the access */
1502 		struct iommu_page_response resp = {
1503 			.pasid		= flt->prm.pasid,
1504 			.grpid		= flt->prm.grpid,
1505 			.code		= IOMMU_PAGE_RESP_FAILURE,
1506 		};
1507 		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1508 	}
1509 
1510 out_unlock:
1511 	mutex_unlock(&smmu->streams_mutex);
1512 	return ret;
1513 }
1514 
1515 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1516 {
1517 	int i, ret;
1518 	struct arm_smmu_device *smmu = dev;
1519 	struct arm_smmu_queue *q = &smmu->evtq.q;
1520 	struct arm_smmu_ll_queue *llq = &q->llq;
1521 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1522 				      DEFAULT_RATELIMIT_BURST);
1523 	u64 evt[EVTQ_ENT_DWORDS];
1524 
1525 	do {
1526 		while (!queue_remove_raw(q, evt)) {
1527 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1528 
1529 			ret = arm_smmu_handle_evt(smmu, evt);
1530 			if (!ret || !__ratelimit(&rs))
1531 				continue;
1532 
1533 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1534 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1535 				dev_info(smmu->dev, "\t0x%016llx\n",
1536 					 (unsigned long long)evt[i]);
1537 
1538 		}
1539 
1540 		/*
1541 		 * Not much we can do on overflow, so scream and pretend we're
1542 		 * trying harder.
1543 		 */
1544 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1545 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1546 	} while (!queue_empty(llq));
1547 
1548 	/* Sync our overflow flag, as we believe we're up to speed */
1549 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1550 		    Q_IDX(llq, llq->cons);
1551 	return IRQ_HANDLED;
1552 }
1553 
1554 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1555 {
1556 	u32 sid, ssid;
1557 	u16 grpid;
1558 	bool ssv, last;
1559 
1560 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1561 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1562 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1563 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1564 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1565 
1566 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1567 	dev_info(smmu->dev,
1568 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1569 		 sid, ssid, grpid, last ? "L" : "",
1570 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1571 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1572 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1573 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1574 		 evt[1] & PRIQ_1_ADDR_MASK);
1575 
1576 	if (last) {
1577 		struct arm_smmu_cmdq_ent cmd = {
1578 			.opcode			= CMDQ_OP_PRI_RESP,
1579 			.substream_valid	= ssv,
1580 			.pri			= {
1581 				.sid	= sid,
1582 				.ssid	= ssid,
1583 				.grpid	= grpid,
1584 				.resp	= PRI_RESP_DENY,
1585 			},
1586 		};
1587 
1588 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1589 	}
1590 }
1591 
1592 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1593 {
1594 	struct arm_smmu_device *smmu = dev;
1595 	struct arm_smmu_queue *q = &smmu->priq.q;
1596 	struct arm_smmu_ll_queue *llq = &q->llq;
1597 	u64 evt[PRIQ_ENT_DWORDS];
1598 
1599 	do {
1600 		while (!queue_remove_raw(q, evt))
1601 			arm_smmu_handle_ppr(smmu, evt);
1602 
1603 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1604 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1605 	} while (!queue_empty(llq));
1606 
1607 	/* Sync our overflow flag, as we believe we're up to speed */
1608 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1609 		      Q_IDX(llq, llq->cons);
1610 	queue_sync_cons_out(q);
1611 	return IRQ_HANDLED;
1612 }
1613 
1614 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1615 
1616 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1617 {
1618 	u32 gerror, gerrorn, active;
1619 	struct arm_smmu_device *smmu = dev;
1620 
1621 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1622 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1623 
1624 	active = gerror ^ gerrorn;
1625 	if (!(active & GERROR_ERR_MASK))
1626 		return IRQ_NONE; /* No errors pending */
1627 
1628 	dev_warn(smmu->dev,
1629 		 "unexpected global error reported (0x%08x), this could be serious\n",
1630 		 active);
1631 
1632 	if (active & GERROR_SFM_ERR) {
1633 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1634 		arm_smmu_device_disable(smmu);
1635 	}
1636 
1637 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1638 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1639 
1640 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1641 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1642 
1643 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1644 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1645 
1646 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1647 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1648 
1649 	if (active & GERROR_PRIQ_ABT_ERR)
1650 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1651 
1652 	if (active & GERROR_EVTQ_ABT_ERR)
1653 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1654 
1655 	if (active & GERROR_CMDQ_ERR)
1656 		arm_smmu_cmdq_skip_err(smmu);
1657 
1658 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1659 	return IRQ_HANDLED;
1660 }
1661 
1662 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1663 {
1664 	struct arm_smmu_device *smmu = dev;
1665 
1666 	arm_smmu_evtq_thread(irq, dev);
1667 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1668 		arm_smmu_priq_thread(irq, dev);
1669 
1670 	return IRQ_HANDLED;
1671 }
1672 
1673 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1674 {
1675 	arm_smmu_gerror_handler(irq, dev);
1676 	return IRQ_WAKE_THREAD;
1677 }
1678 
1679 static void
1680 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1681 			struct arm_smmu_cmdq_ent *cmd)
1682 {
1683 	size_t log2_span;
1684 	size_t span_mask;
1685 	/* ATC invalidates are always on 4096-bytes pages */
1686 	size_t inval_grain_shift = 12;
1687 	unsigned long page_start, page_end;
1688 
1689 	/*
1690 	 * ATS and PASID:
1691 	 *
1692 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1693 	 * prefix. In that case all ATC entries within the address range are
1694 	 * invalidated, including those that were requested with a PASID! There
1695 	 * is no way to invalidate only entries without PASID.
1696 	 *
1697 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1698 	 * traffic), translation requests without PASID create ATC entries
1699 	 * without PASID, which must be invalidated with substream_valid clear.
1700 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1701 	 * ATC entries within the address range.
1702 	 */
1703 	*cmd = (struct arm_smmu_cmdq_ent) {
1704 		.opcode			= CMDQ_OP_ATC_INV,
1705 		.substream_valid	= !!ssid,
1706 		.atc.ssid		= ssid,
1707 	};
1708 
1709 	if (!size) {
1710 		cmd->atc.size = ATC_INV_SIZE_ALL;
1711 		return;
1712 	}
1713 
1714 	page_start	= iova >> inval_grain_shift;
1715 	page_end	= (iova + size - 1) >> inval_grain_shift;
1716 
1717 	/*
1718 	 * In an ATS Invalidate Request, the address must be aligned on the
1719 	 * range size, which must be a power of two number of page sizes. We
1720 	 * thus have to choose between grossly over-invalidating the region, or
1721 	 * splitting the invalidation into multiple commands. For simplicity
1722 	 * we'll go with the first solution, but should refine it in the future
1723 	 * if multiple commands are shown to be more efficient.
1724 	 *
1725 	 * Find the smallest power of two that covers the range. The most
1726 	 * significant differing bit between the start and end addresses,
1727 	 * fls(start ^ end), indicates the required span. For example:
1728 	 *
1729 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1730 	 *		x = 0b1000 ^ 0b1011 = 0b11
1731 	 *		span = 1 << fls(x) = 4
1732 	 *
1733 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1734 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1735 	 *		span = 1 << fls(x) = 16
1736 	 */
1737 	log2_span	= fls_long(page_start ^ page_end);
1738 	span_mask	= (1ULL << log2_span) - 1;
1739 
1740 	page_start	&= ~span_mask;
1741 
1742 	cmd->atc.addr	= page_start << inval_grain_shift;
1743 	cmd->atc.size	= log2_span;
1744 }
1745 
1746 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1747 {
1748 	int i;
1749 	struct arm_smmu_cmdq_ent cmd;
1750 
1751 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1752 
1753 	for (i = 0; i < master->num_streams; i++) {
1754 		cmd.atc.sid = master->streams[i].id;
1755 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1756 	}
1757 
1758 	return arm_smmu_cmdq_issue_sync(master->smmu);
1759 }
1760 
1761 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1762 			    unsigned long iova, size_t size)
1763 {
1764 	int i;
1765 	unsigned long flags;
1766 	struct arm_smmu_cmdq_ent cmd;
1767 	struct arm_smmu_master *master;
1768 	struct arm_smmu_cmdq_batch cmds = {};
1769 
1770 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1771 		return 0;
1772 
1773 	/*
1774 	 * Ensure that we've completed prior invalidation of the main TLBs
1775 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1776 	 * arm_smmu_enable_ats():
1777 	 *
1778 	 *	// unmap()			// arm_smmu_enable_ats()
1779 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1780 	 *	smp_mb();			[...]
1781 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1782 	 *
1783 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1784 	 * ATS was enabled at the PCI device before completion of the TLBI.
1785 	 */
1786 	smp_mb();
1787 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1788 		return 0;
1789 
1790 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1791 
1792 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1793 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1794 		if (!master->ats_enabled)
1795 			continue;
1796 
1797 		for (i = 0; i < master->num_streams; i++) {
1798 			cmd.atc.sid = master->streams[i].id;
1799 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1800 		}
1801 	}
1802 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1803 
1804 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1805 }
1806 
1807 /* IO_PGTABLE API */
1808 static void arm_smmu_tlb_inv_context(void *cookie)
1809 {
1810 	struct arm_smmu_domain *smmu_domain = cookie;
1811 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1812 	struct arm_smmu_cmdq_ent cmd;
1813 
1814 	/*
1815 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1816 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1817 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1818 	 * insertion to guarantee those are observed before the TLBI. Do be
1819 	 * careful, 007.
1820 	 */
1821 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1822 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1823 	} else {
1824 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1825 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1826 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1827 		arm_smmu_cmdq_issue_sync(smmu);
1828 	}
1829 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1830 }
1831 
1832 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1833 				     unsigned long iova, size_t size,
1834 				     size_t granule,
1835 				     struct arm_smmu_domain *smmu_domain)
1836 {
1837 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1838 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1839 	size_t inv_range = granule;
1840 	struct arm_smmu_cmdq_batch cmds = {};
1841 
1842 	if (!size)
1843 		return;
1844 
1845 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1846 		/* Get the leaf page size */
1847 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1848 
1849 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1850 		cmd->tlbi.tg = (tg - 10) / 2;
1851 
1852 		/* Determine what level the granule is at */
1853 		cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1854 
1855 		num_pages = size >> tg;
1856 	}
1857 
1858 	while (iova < end) {
1859 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1860 			/*
1861 			 * On each iteration of the loop, the range is 5 bits
1862 			 * worth of the aligned size remaining.
1863 			 * The range in pages is:
1864 			 *
1865 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1866 			 */
1867 			unsigned long scale, num;
1868 
1869 			/* Determine the power of 2 multiple number of pages */
1870 			scale = __ffs(num_pages);
1871 			cmd->tlbi.scale = scale;
1872 
1873 			/* Determine how many chunks of 2^scale size we have */
1874 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1875 			cmd->tlbi.num = num - 1;
1876 
1877 			/* range is num * 2^scale * pgsize */
1878 			inv_range = num << (scale + tg);
1879 
1880 			/* Clear out the lower order bits for the next iteration */
1881 			num_pages -= num << scale;
1882 		}
1883 
1884 		cmd->tlbi.addr = iova;
1885 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1886 		iova += inv_range;
1887 	}
1888 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1889 }
1890 
1891 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1892 					  size_t granule, bool leaf,
1893 					  struct arm_smmu_domain *smmu_domain)
1894 {
1895 	struct arm_smmu_cmdq_ent cmd = {
1896 		.tlbi = {
1897 			.leaf	= leaf,
1898 		},
1899 	};
1900 
1901 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1902 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1903 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1904 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1905 	} else {
1906 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1907 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1908 	}
1909 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1910 
1911 	/*
1912 	 * Unfortunately, this can't be leaf-only since we may have
1913 	 * zapped an entire table.
1914 	 */
1915 	arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1916 }
1917 
1918 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1919 				 size_t granule, bool leaf,
1920 				 struct arm_smmu_domain *smmu_domain)
1921 {
1922 	struct arm_smmu_cmdq_ent cmd = {
1923 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1924 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1925 		.tlbi = {
1926 			.asid	= asid,
1927 			.leaf	= leaf,
1928 		},
1929 	};
1930 
1931 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1932 }
1933 
1934 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1935 					 unsigned long iova, size_t granule,
1936 					 void *cookie)
1937 {
1938 	struct arm_smmu_domain *smmu_domain = cookie;
1939 	struct iommu_domain *domain = &smmu_domain->domain;
1940 
1941 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1942 }
1943 
1944 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1945 				  size_t granule, void *cookie)
1946 {
1947 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1948 }
1949 
1950 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1951 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1952 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1953 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1954 };
1955 
1956 /* IOMMU API */
1957 static bool arm_smmu_capable(enum iommu_cap cap)
1958 {
1959 	switch (cap) {
1960 	case IOMMU_CAP_CACHE_COHERENCY:
1961 		return true;
1962 	case IOMMU_CAP_NOEXEC:
1963 		return true;
1964 	default:
1965 		return false;
1966 	}
1967 }
1968 
1969 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1970 {
1971 	struct arm_smmu_domain *smmu_domain;
1972 
1973 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1974 	    type != IOMMU_DOMAIN_DMA &&
1975 	    type != IOMMU_DOMAIN_IDENTITY)
1976 		return NULL;
1977 
1978 	/*
1979 	 * Allocate the domain and initialise some of its data structures.
1980 	 * We can't really do anything meaningful until we've added a
1981 	 * master.
1982 	 */
1983 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1984 	if (!smmu_domain)
1985 		return NULL;
1986 
1987 	if (type == IOMMU_DOMAIN_DMA &&
1988 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1989 		kfree(smmu_domain);
1990 		return NULL;
1991 	}
1992 
1993 	mutex_init(&smmu_domain->init_mutex);
1994 	INIT_LIST_HEAD(&smmu_domain->devices);
1995 	spin_lock_init(&smmu_domain->devices_lock);
1996 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
1997 
1998 	return &smmu_domain->domain;
1999 }
2000 
2001 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2002 {
2003 	int idx, size = 1 << span;
2004 
2005 	do {
2006 		idx = find_first_zero_bit(map, size);
2007 		if (idx == size)
2008 			return -ENOSPC;
2009 	} while (test_and_set_bit(idx, map));
2010 
2011 	return idx;
2012 }
2013 
2014 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2015 {
2016 	clear_bit(idx, map);
2017 }
2018 
2019 static void arm_smmu_domain_free(struct iommu_domain *domain)
2020 {
2021 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2022 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2023 
2024 	iommu_put_dma_cookie(domain);
2025 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2026 
2027 	/* Free the CD and ASID, if we allocated them */
2028 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2029 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2030 
2031 		/* Prevent SVA from touching the CD while we're freeing it */
2032 		mutex_lock(&arm_smmu_asid_lock);
2033 		if (cfg->cdcfg.cdtab)
2034 			arm_smmu_free_cd_tables(smmu_domain);
2035 		arm_smmu_free_asid(&cfg->cd);
2036 		mutex_unlock(&arm_smmu_asid_lock);
2037 	} else {
2038 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2039 		if (cfg->vmid)
2040 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2041 	}
2042 
2043 	kfree(smmu_domain);
2044 }
2045 
2046 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2047 				       struct arm_smmu_master *master,
2048 				       struct io_pgtable_cfg *pgtbl_cfg)
2049 {
2050 	int ret;
2051 	u32 asid;
2052 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2053 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2054 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2055 
2056 	refcount_set(&cfg->cd.refs, 1);
2057 
2058 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2059 	mutex_lock(&arm_smmu_asid_lock);
2060 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2061 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2062 	if (ret)
2063 		goto out_unlock;
2064 
2065 	cfg->s1cdmax = master->ssid_bits;
2066 
2067 	smmu_domain->stall_enabled = master->stall_enabled;
2068 
2069 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2070 	if (ret)
2071 		goto out_free_asid;
2072 
2073 	cfg->cd.asid	= (u16)asid;
2074 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2075 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2076 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2077 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2078 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2079 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2080 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2081 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2082 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2083 
2084 	/*
2085 	 * Note that this will end up calling arm_smmu_sync_cd() before
2086 	 * the master has been added to the devices list for this domain.
2087 	 * This isn't an issue because the STE hasn't been installed yet.
2088 	 */
2089 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2090 	if (ret)
2091 		goto out_free_cd_tables;
2092 
2093 	mutex_unlock(&arm_smmu_asid_lock);
2094 	return 0;
2095 
2096 out_free_cd_tables:
2097 	arm_smmu_free_cd_tables(smmu_domain);
2098 out_free_asid:
2099 	arm_smmu_free_asid(&cfg->cd);
2100 out_unlock:
2101 	mutex_unlock(&arm_smmu_asid_lock);
2102 	return ret;
2103 }
2104 
2105 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2106 				       struct arm_smmu_master *master,
2107 				       struct io_pgtable_cfg *pgtbl_cfg)
2108 {
2109 	int vmid;
2110 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2111 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2112 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2113 
2114 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2115 	if (vmid < 0)
2116 		return vmid;
2117 
2118 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2119 	cfg->vmid	= (u16)vmid;
2120 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2121 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2122 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2123 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2124 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2125 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2126 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2127 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2128 	return 0;
2129 }
2130 
2131 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2132 				    struct arm_smmu_master *master)
2133 {
2134 	int ret;
2135 	unsigned long ias, oas;
2136 	enum io_pgtable_fmt fmt;
2137 	struct io_pgtable_cfg pgtbl_cfg;
2138 	struct io_pgtable_ops *pgtbl_ops;
2139 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2140 				 struct arm_smmu_master *,
2141 				 struct io_pgtable_cfg *);
2142 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2143 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2144 
2145 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2146 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2147 		return 0;
2148 	}
2149 
2150 	/* Restrict the stage to what we can actually support */
2151 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2152 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2153 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2154 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2155 
2156 	switch (smmu_domain->stage) {
2157 	case ARM_SMMU_DOMAIN_S1:
2158 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2159 		ias = min_t(unsigned long, ias, VA_BITS);
2160 		oas = smmu->ias;
2161 		fmt = ARM_64_LPAE_S1;
2162 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2163 		break;
2164 	case ARM_SMMU_DOMAIN_NESTED:
2165 	case ARM_SMMU_DOMAIN_S2:
2166 		ias = smmu->ias;
2167 		oas = smmu->oas;
2168 		fmt = ARM_64_LPAE_S2;
2169 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2170 		break;
2171 	default:
2172 		return -EINVAL;
2173 	}
2174 
2175 	pgtbl_cfg = (struct io_pgtable_cfg) {
2176 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2177 		.ias		= ias,
2178 		.oas		= oas,
2179 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2180 		.tlb		= &arm_smmu_flush_ops,
2181 		.iommu_dev	= smmu->dev,
2182 	};
2183 
2184 	if (!iommu_get_dma_strict(domain))
2185 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2186 
2187 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2188 	if (!pgtbl_ops)
2189 		return -ENOMEM;
2190 
2191 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2192 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2193 	domain->geometry.force_aperture = true;
2194 
2195 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2196 	if (ret < 0) {
2197 		free_io_pgtable_ops(pgtbl_ops);
2198 		return ret;
2199 	}
2200 
2201 	smmu_domain->pgtbl_ops = pgtbl_ops;
2202 	return 0;
2203 }
2204 
2205 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2206 {
2207 	__le64 *step;
2208 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2209 
2210 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2211 		struct arm_smmu_strtab_l1_desc *l1_desc;
2212 		int idx;
2213 
2214 		/* Two-level walk */
2215 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2216 		l1_desc = &cfg->l1_desc[idx];
2217 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2218 		step = &l1_desc->l2ptr[idx];
2219 	} else {
2220 		/* Simple linear lookup */
2221 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2222 	}
2223 
2224 	return step;
2225 }
2226 
2227 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2228 {
2229 	int i, j;
2230 	struct arm_smmu_device *smmu = master->smmu;
2231 
2232 	for (i = 0; i < master->num_streams; ++i) {
2233 		u32 sid = master->streams[i].id;
2234 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2235 
2236 		/* Bridged PCI devices may end up with duplicated IDs */
2237 		for (j = 0; j < i; j++)
2238 			if (master->streams[j].id == sid)
2239 				break;
2240 		if (j < i)
2241 			continue;
2242 
2243 		arm_smmu_write_strtab_ent(master, sid, step);
2244 	}
2245 }
2246 
2247 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2248 {
2249 	struct device *dev = master->dev;
2250 	struct arm_smmu_device *smmu = master->smmu;
2251 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2252 
2253 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2254 		return false;
2255 
2256 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2257 		return false;
2258 
2259 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2260 }
2261 
2262 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2263 {
2264 	size_t stu;
2265 	struct pci_dev *pdev;
2266 	struct arm_smmu_device *smmu = master->smmu;
2267 	struct arm_smmu_domain *smmu_domain = master->domain;
2268 
2269 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2270 	if (!master->ats_enabled)
2271 		return;
2272 
2273 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2274 	stu = __ffs(smmu->pgsize_bitmap);
2275 	pdev = to_pci_dev(master->dev);
2276 
2277 	atomic_inc(&smmu_domain->nr_ats_masters);
2278 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2279 	if (pci_enable_ats(pdev, stu))
2280 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2281 }
2282 
2283 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2284 {
2285 	struct arm_smmu_domain *smmu_domain = master->domain;
2286 
2287 	if (!master->ats_enabled)
2288 		return;
2289 
2290 	pci_disable_ats(to_pci_dev(master->dev));
2291 	/*
2292 	 * Ensure ATS is disabled at the endpoint before we issue the
2293 	 * ATC invalidation via the SMMU.
2294 	 */
2295 	wmb();
2296 	arm_smmu_atc_inv_master(master);
2297 	atomic_dec(&smmu_domain->nr_ats_masters);
2298 }
2299 
2300 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2301 {
2302 	int ret;
2303 	int features;
2304 	int num_pasids;
2305 	struct pci_dev *pdev;
2306 
2307 	if (!dev_is_pci(master->dev))
2308 		return -ENODEV;
2309 
2310 	pdev = to_pci_dev(master->dev);
2311 
2312 	features = pci_pasid_features(pdev);
2313 	if (features < 0)
2314 		return features;
2315 
2316 	num_pasids = pci_max_pasids(pdev);
2317 	if (num_pasids <= 0)
2318 		return num_pasids;
2319 
2320 	ret = pci_enable_pasid(pdev, features);
2321 	if (ret) {
2322 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2323 		return ret;
2324 	}
2325 
2326 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2327 				  master->smmu->ssid_bits);
2328 	return 0;
2329 }
2330 
2331 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2332 {
2333 	struct pci_dev *pdev;
2334 
2335 	if (!dev_is_pci(master->dev))
2336 		return;
2337 
2338 	pdev = to_pci_dev(master->dev);
2339 
2340 	if (!pdev->pasid_enabled)
2341 		return;
2342 
2343 	master->ssid_bits = 0;
2344 	pci_disable_pasid(pdev);
2345 }
2346 
2347 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2348 {
2349 	unsigned long flags;
2350 	struct arm_smmu_domain *smmu_domain = master->domain;
2351 
2352 	if (!smmu_domain)
2353 		return;
2354 
2355 	arm_smmu_disable_ats(master);
2356 
2357 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2358 	list_del(&master->domain_head);
2359 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2360 
2361 	master->domain = NULL;
2362 	master->ats_enabled = false;
2363 	arm_smmu_install_ste_for_dev(master);
2364 }
2365 
2366 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2367 {
2368 	int ret = 0;
2369 	unsigned long flags;
2370 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2371 	struct arm_smmu_device *smmu;
2372 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2373 	struct arm_smmu_master *master;
2374 
2375 	if (!fwspec)
2376 		return -ENOENT;
2377 
2378 	master = dev_iommu_priv_get(dev);
2379 	smmu = master->smmu;
2380 
2381 	/*
2382 	 * Checking that SVA is disabled ensures that this device isn't bound to
2383 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2384 	 * be removed concurrently since we're holding the group mutex.
2385 	 */
2386 	if (arm_smmu_master_sva_enabled(master)) {
2387 		dev_err(dev, "cannot attach - SVA enabled\n");
2388 		return -EBUSY;
2389 	}
2390 
2391 	arm_smmu_detach_dev(master);
2392 
2393 	mutex_lock(&smmu_domain->init_mutex);
2394 
2395 	if (!smmu_domain->smmu) {
2396 		smmu_domain->smmu = smmu;
2397 		ret = arm_smmu_domain_finalise(domain, master);
2398 		if (ret) {
2399 			smmu_domain->smmu = NULL;
2400 			goto out_unlock;
2401 		}
2402 	} else if (smmu_domain->smmu != smmu) {
2403 		dev_err(dev,
2404 			"cannot attach to SMMU %s (upstream of %s)\n",
2405 			dev_name(smmu_domain->smmu->dev),
2406 			dev_name(smmu->dev));
2407 		ret = -ENXIO;
2408 		goto out_unlock;
2409 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2410 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2411 		dev_err(dev,
2412 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2413 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2414 		ret = -EINVAL;
2415 		goto out_unlock;
2416 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2417 		   smmu_domain->stall_enabled != master->stall_enabled) {
2418 		dev_err(dev, "cannot attach to stall-%s domain\n",
2419 			smmu_domain->stall_enabled ? "enabled" : "disabled");
2420 		ret = -EINVAL;
2421 		goto out_unlock;
2422 	}
2423 
2424 	master->domain = smmu_domain;
2425 
2426 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2427 		master->ats_enabled = arm_smmu_ats_supported(master);
2428 
2429 	arm_smmu_install_ste_for_dev(master);
2430 
2431 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2432 	list_add(&master->domain_head, &smmu_domain->devices);
2433 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2434 
2435 	arm_smmu_enable_ats(master);
2436 
2437 out_unlock:
2438 	mutex_unlock(&smmu_domain->init_mutex);
2439 	return ret;
2440 }
2441 
2442 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2443 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2444 {
2445 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2446 
2447 	if (!ops)
2448 		return -ENODEV;
2449 
2450 	return ops->map(ops, iova, paddr, size, prot, gfp);
2451 }
2452 
2453 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2454 			     size_t size, struct iommu_iotlb_gather *gather)
2455 {
2456 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2457 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2458 
2459 	if (!ops)
2460 		return 0;
2461 
2462 	return ops->unmap(ops, iova, size, gather);
2463 }
2464 
2465 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2466 {
2467 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2468 
2469 	if (smmu_domain->smmu)
2470 		arm_smmu_tlb_inv_context(smmu_domain);
2471 }
2472 
2473 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2474 				struct iommu_iotlb_gather *gather)
2475 {
2476 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2477 
2478 	if (!gather->pgsize)
2479 		return;
2480 
2481 	arm_smmu_tlb_inv_range_domain(gather->start,
2482 				      gather->end - gather->start + 1,
2483 				      gather->pgsize, true, smmu_domain);
2484 }
2485 
2486 static phys_addr_t
2487 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2488 {
2489 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2490 
2491 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2492 		return iova;
2493 
2494 	if (!ops)
2495 		return 0;
2496 
2497 	return ops->iova_to_phys(ops, iova);
2498 }
2499 
2500 static struct platform_driver arm_smmu_driver;
2501 
2502 static
2503 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2504 {
2505 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2506 							  fwnode);
2507 	put_device(dev);
2508 	return dev ? dev_get_drvdata(dev) : NULL;
2509 }
2510 
2511 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2512 {
2513 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2514 
2515 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2516 		limit *= 1UL << STRTAB_SPLIT;
2517 
2518 	return sid < limit;
2519 }
2520 
2521 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2522 				  struct arm_smmu_master *master)
2523 {
2524 	int i;
2525 	int ret = 0;
2526 	struct arm_smmu_stream *new_stream, *cur_stream;
2527 	struct rb_node **new_node, *parent_node = NULL;
2528 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2529 
2530 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2531 				  GFP_KERNEL);
2532 	if (!master->streams)
2533 		return -ENOMEM;
2534 	master->num_streams = fwspec->num_ids;
2535 
2536 	mutex_lock(&smmu->streams_mutex);
2537 	for (i = 0; i < fwspec->num_ids; i++) {
2538 		u32 sid = fwspec->ids[i];
2539 
2540 		new_stream = &master->streams[i];
2541 		new_stream->id = sid;
2542 		new_stream->master = master;
2543 
2544 		/*
2545 		 * Check the SIDs are in range of the SMMU and our stream table
2546 		 */
2547 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2548 			ret = -ERANGE;
2549 			break;
2550 		}
2551 
2552 		/* Ensure l2 strtab is initialised */
2553 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2554 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2555 			if (ret)
2556 				break;
2557 		}
2558 
2559 		/* Insert into SID tree */
2560 		new_node = &(smmu->streams.rb_node);
2561 		while (*new_node) {
2562 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2563 					      node);
2564 			parent_node = *new_node;
2565 			if (cur_stream->id > new_stream->id) {
2566 				new_node = &((*new_node)->rb_left);
2567 			} else if (cur_stream->id < new_stream->id) {
2568 				new_node = &((*new_node)->rb_right);
2569 			} else {
2570 				dev_warn(master->dev,
2571 					 "stream %u already in tree\n",
2572 					 cur_stream->id);
2573 				ret = -EINVAL;
2574 				break;
2575 			}
2576 		}
2577 		if (ret)
2578 			break;
2579 
2580 		rb_link_node(&new_stream->node, parent_node, new_node);
2581 		rb_insert_color(&new_stream->node, &smmu->streams);
2582 	}
2583 
2584 	if (ret) {
2585 		for (i--; i >= 0; i--)
2586 			rb_erase(&master->streams[i].node, &smmu->streams);
2587 		kfree(master->streams);
2588 	}
2589 	mutex_unlock(&smmu->streams_mutex);
2590 
2591 	return ret;
2592 }
2593 
2594 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2595 {
2596 	int i;
2597 	struct arm_smmu_device *smmu = master->smmu;
2598 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2599 
2600 	if (!smmu || !master->streams)
2601 		return;
2602 
2603 	mutex_lock(&smmu->streams_mutex);
2604 	for (i = 0; i < fwspec->num_ids; i++)
2605 		rb_erase(&master->streams[i].node, &smmu->streams);
2606 	mutex_unlock(&smmu->streams_mutex);
2607 
2608 	kfree(master->streams);
2609 }
2610 
2611 static struct iommu_ops arm_smmu_ops;
2612 
2613 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2614 {
2615 	int ret;
2616 	struct arm_smmu_device *smmu;
2617 	struct arm_smmu_master *master;
2618 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2619 
2620 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2621 		return ERR_PTR(-ENODEV);
2622 
2623 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2624 		return ERR_PTR(-EBUSY);
2625 
2626 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2627 	if (!smmu)
2628 		return ERR_PTR(-ENODEV);
2629 
2630 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2631 	if (!master)
2632 		return ERR_PTR(-ENOMEM);
2633 
2634 	master->dev = dev;
2635 	master->smmu = smmu;
2636 	INIT_LIST_HEAD(&master->bonds);
2637 	dev_iommu_priv_set(dev, master);
2638 
2639 	ret = arm_smmu_insert_master(smmu, master);
2640 	if (ret)
2641 		goto err_free_master;
2642 
2643 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2644 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2645 
2646 	/*
2647 	 * Note that PASID must be enabled before, and disabled after ATS:
2648 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2649 	 *
2650 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2651 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2652 	 *   are changed.
2653 	 */
2654 	arm_smmu_enable_pasid(master);
2655 
2656 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2657 		master->ssid_bits = min_t(u8, master->ssid_bits,
2658 					  CTXDESC_LINEAR_CDMAX);
2659 
2660 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2661 	     device_property_read_bool(dev, "dma-can-stall")) ||
2662 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2663 		master->stall_enabled = true;
2664 
2665 	return &smmu->iommu;
2666 
2667 err_free_master:
2668 	kfree(master);
2669 	dev_iommu_priv_set(dev, NULL);
2670 	return ERR_PTR(ret);
2671 }
2672 
2673 static void arm_smmu_release_device(struct device *dev)
2674 {
2675 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2676 	struct arm_smmu_master *master;
2677 
2678 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2679 		return;
2680 
2681 	master = dev_iommu_priv_get(dev);
2682 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2683 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2684 	arm_smmu_detach_dev(master);
2685 	arm_smmu_disable_pasid(master);
2686 	arm_smmu_remove_master(master);
2687 	kfree(master);
2688 	iommu_fwspec_free(dev);
2689 }
2690 
2691 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2692 {
2693 	struct iommu_group *group;
2694 
2695 	/*
2696 	 * We don't support devices sharing stream IDs other than PCI RID
2697 	 * aliases, since the necessary ID-to-device lookup becomes rather
2698 	 * impractical given a potential sparse 32-bit stream ID space.
2699 	 */
2700 	if (dev_is_pci(dev))
2701 		group = pci_device_group(dev);
2702 	else
2703 		group = generic_device_group(dev);
2704 
2705 	return group;
2706 }
2707 
2708 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2709 {
2710 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2711 	int ret = 0;
2712 
2713 	mutex_lock(&smmu_domain->init_mutex);
2714 	if (smmu_domain->smmu)
2715 		ret = -EPERM;
2716 	else
2717 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2718 	mutex_unlock(&smmu_domain->init_mutex);
2719 
2720 	return ret;
2721 }
2722 
2723 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2724 {
2725 	return iommu_fwspec_add_ids(dev, args->args, 1);
2726 }
2727 
2728 static void arm_smmu_get_resv_regions(struct device *dev,
2729 				      struct list_head *head)
2730 {
2731 	struct iommu_resv_region *region;
2732 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2733 
2734 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2735 					 prot, IOMMU_RESV_SW_MSI);
2736 	if (!region)
2737 		return;
2738 
2739 	list_add_tail(&region->list, head);
2740 
2741 	iommu_dma_get_resv_regions(dev, head);
2742 }
2743 
2744 static bool arm_smmu_dev_has_feature(struct device *dev,
2745 				     enum iommu_dev_features feat)
2746 {
2747 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2748 
2749 	if (!master)
2750 		return false;
2751 
2752 	switch (feat) {
2753 	case IOMMU_DEV_FEAT_IOPF:
2754 		return arm_smmu_master_iopf_supported(master);
2755 	case IOMMU_DEV_FEAT_SVA:
2756 		return arm_smmu_master_sva_supported(master);
2757 	default:
2758 		return false;
2759 	}
2760 }
2761 
2762 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2763 					 enum iommu_dev_features feat)
2764 {
2765 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2766 
2767 	if (!master)
2768 		return false;
2769 
2770 	switch (feat) {
2771 	case IOMMU_DEV_FEAT_IOPF:
2772 		return master->iopf_enabled;
2773 	case IOMMU_DEV_FEAT_SVA:
2774 		return arm_smmu_master_sva_enabled(master);
2775 	default:
2776 		return false;
2777 	}
2778 }
2779 
2780 static int arm_smmu_dev_enable_feature(struct device *dev,
2781 				       enum iommu_dev_features feat)
2782 {
2783 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2784 
2785 	if (!arm_smmu_dev_has_feature(dev, feat))
2786 		return -ENODEV;
2787 
2788 	if (arm_smmu_dev_feature_enabled(dev, feat))
2789 		return -EBUSY;
2790 
2791 	switch (feat) {
2792 	case IOMMU_DEV_FEAT_IOPF:
2793 		master->iopf_enabled = true;
2794 		return 0;
2795 	case IOMMU_DEV_FEAT_SVA:
2796 		return arm_smmu_master_enable_sva(master);
2797 	default:
2798 		return -EINVAL;
2799 	}
2800 }
2801 
2802 static int arm_smmu_dev_disable_feature(struct device *dev,
2803 					enum iommu_dev_features feat)
2804 {
2805 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2806 
2807 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2808 		return -EINVAL;
2809 
2810 	switch (feat) {
2811 	case IOMMU_DEV_FEAT_IOPF:
2812 		if (master->sva_enabled)
2813 			return -EBUSY;
2814 		master->iopf_enabled = false;
2815 		return 0;
2816 	case IOMMU_DEV_FEAT_SVA:
2817 		return arm_smmu_master_disable_sva(master);
2818 	default:
2819 		return -EINVAL;
2820 	}
2821 }
2822 
2823 static struct iommu_ops arm_smmu_ops = {
2824 	.capable		= arm_smmu_capable,
2825 	.domain_alloc		= arm_smmu_domain_alloc,
2826 	.domain_free		= arm_smmu_domain_free,
2827 	.attach_dev		= arm_smmu_attach_dev,
2828 	.map			= arm_smmu_map,
2829 	.unmap			= arm_smmu_unmap,
2830 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2831 	.iotlb_sync		= arm_smmu_iotlb_sync,
2832 	.iova_to_phys		= arm_smmu_iova_to_phys,
2833 	.probe_device		= arm_smmu_probe_device,
2834 	.release_device		= arm_smmu_release_device,
2835 	.device_group		= arm_smmu_device_group,
2836 	.enable_nesting		= arm_smmu_enable_nesting,
2837 	.of_xlate		= arm_smmu_of_xlate,
2838 	.get_resv_regions	= arm_smmu_get_resv_regions,
2839 	.put_resv_regions	= generic_iommu_put_resv_regions,
2840 	.dev_has_feat		= arm_smmu_dev_has_feature,
2841 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2842 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2843 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2844 	.sva_bind		= arm_smmu_sva_bind,
2845 	.sva_unbind		= arm_smmu_sva_unbind,
2846 	.sva_get_pasid		= arm_smmu_sva_get_pasid,
2847 	.page_response		= arm_smmu_page_response,
2848 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2849 	.owner			= THIS_MODULE,
2850 };
2851 
2852 /* Probing and initialisation functions */
2853 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2854 				   struct arm_smmu_queue *q,
2855 				   void __iomem *page,
2856 				   unsigned long prod_off,
2857 				   unsigned long cons_off,
2858 				   size_t dwords, const char *name)
2859 {
2860 	size_t qsz;
2861 
2862 	do {
2863 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2864 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2865 					      GFP_KERNEL);
2866 		if (q->base || qsz < PAGE_SIZE)
2867 			break;
2868 
2869 		q->llq.max_n_shift--;
2870 	} while (1);
2871 
2872 	if (!q->base) {
2873 		dev_err(smmu->dev,
2874 			"failed to allocate queue (0x%zx bytes) for %s\n",
2875 			qsz, name);
2876 		return -ENOMEM;
2877 	}
2878 
2879 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2880 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2881 			 1 << q->llq.max_n_shift, name);
2882 	}
2883 
2884 	q->prod_reg	= page + prod_off;
2885 	q->cons_reg	= page + cons_off;
2886 	q->ent_dwords	= dwords;
2887 
2888 	q->q_base  = Q_BASE_RWA;
2889 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2890 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2891 
2892 	q->llq.prod = q->llq.cons = 0;
2893 	return 0;
2894 }
2895 
2896 static void arm_smmu_cmdq_free_bitmap(void *data)
2897 {
2898 	unsigned long *bitmap = data;
2899 	bitmap_free(bitmap);
2900 }
2901 
2902 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2903 {
2904 	int ret = 0;
2905 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2906 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2907 	atomic_long_t *bitmap;
2908 
2909 	atomic_set(&cmdq->owner_prod, 0);
2910 	atomic_set(&cmdq->lock, 0);
2911 
2912 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2913 	if (!bitmap) {
2914 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2915 		ret = -ENOMEM;
2916 	} else {
2917 		cmdq->valid_map = bitmap;
2918 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2919 	}
2920 
2921 	return ret;
2922 }
2923 
2924 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2925 {
2926 	int ret;
2927 
2928 	/* cmdq */
2929 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2930 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2931 				      CMDQ_ENT_DWORDS, "cmdq");
2932 	if (ret)
2933 		return ret;
2934 
2935 	ret = arm_smmu_cmdq_init(smmu);
2936 	if (ret)
2937 		return ret;
2938 
2939 	/* evtq */
2940 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2941 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2942 				      EVTQ_ENT_DWORDS, "evtq");
2943 	if (ret)
2944 		return ret;
2945 
2946 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2947 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2948 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2949 		if (!smmu->evtq.iopf)
2950 			return -ENOMEM;
2951 	}
2952 
2953 	/* priq */
2954 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2955 		return 0;
2956 
2957 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2958 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2959 				       PRIQ_ENT_DWORDS, "priq");
2960 }
2961 
2962 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2963 {
2964 	unsigned int i;
2965 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2966 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2967 	void *strtab = smmu->strtab_cfg.strtab;
2968 
2969 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2970 	if (!cfg->l1_desc)
2971 		return -ENOMEM;
2972 
2973 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2974 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2975 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2976 	}
2977 
2978 	return 0;
2979 }
2980 
2981 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2982 {
2983 	void *strtab;
2984 	u64 reg;
2985 	u32 size, l1size;
2986 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2987 
2988 	/* Calculate the L1 size, capped to the SIDSIZE. */
2989 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2990 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2991 	cfg->num_l1_ents = 1 << size;
2992 
2993 	size += STRTAB_SPLIT;
2994 	if (size < smmu->sid_bits)
2995 		dev_warn(smmu->dev,
2996 			 "2-level strtab only covers %u/%u bits of SID\n",
2997 			 size, smmu->sid_bits);
2998 
2999 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3000 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3001 				     GFP_KERNEL);
3002 	if (!strtab) {
3003 		dev_err(smmu->dev,
3004 			"failed to allocate l1 stream table (%u bytes)\n",
3005 			l1size);
3006 		return -ENOMEM;
3007 	}
3008 	cfg->strtab = strtab;
3009 
3010 	/* Configure strtab_base_cfg for 2 levels */
3011 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3012 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3013 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3014 	cfg->strtab_base_cfg = reg;
3015 
3016 	return arm_smmu_init_l1_strtab(smmu);
3017 }
3018 
3019 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3020 {
3021 	void *strtab;
3022 	u64 reg;
3023 	u32 size;
3024 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3025 
3026 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3027 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3028 				     GFP_KERNEL);
3029 	if (!strtab) {
3030 		dev_err(smmu->dev,
3031 			"failed to allocate linear stream table (%u bytes)\n",
3032 			size);
3033 		return -ENOMEM;
3034 	}
3035 	cfg->strtab = strtab;
3036 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3037 
3038 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3039 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3040 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3041 	cfg->strtab_base_cfg = reg;
3042 
3043 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3044 	return 0;
3045 }
3046 
3047 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3048 {
3049 	u64 reg;
3050 	int ret;
3051 
3052 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3053 		ret = arm_smmu_init_strtab_2lvl(smmu);
3054 	else
3055 		ret = arm_smmu_init_strtab_linear(smmu);
3056 
3057 	if (ret)
3058 		return ret;
3059 
3060 	/* Set the strtab base address */
3061 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3062 	reg |= STRTAB_BASE_RA;
3063 	smmu->strtab_cfg.strtab_base = reg;
3064 
3065 	/* Allocate the first VMID for stage-2 bypass STEs */
3066 	set_bit(0, smmu->vmid_map);
3067 	return 0;
3068 }
3069 
3070 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3071 {
3072 	int ret;
3073 
3074 	mutex_init(&smmu->streams_mutex);
3075 	smmu->streams = RB_ROOT;
3076 
3077 	ret = arm_smmu_init_queues(smmu);
3078 	if (ret)
3079 		return ret;
3080 
3081 	return arm_smmu_init_strtab(smmu);
3082 }
3083 
3084 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3085 				   unsigned int reg_off, unsigned int ack_off)
3086 {
3087 	u32 reg;
3088 
3089 	writel_relaxed(val, smmu->base + reg_off);
3090 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3091 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3092 }
3093 
3094 /* GBPA is "special" */
3095 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3096 {
3097 	int ret;
3098 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3099 
3100 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3101 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3102 	if (ret)
3103 		return ret;
3104 
3105 	reg &= ~clr;
3106 	reg |= set;
3107 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3108 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3109 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3110 
3111 	if (ret)
3112 		dev_err(smmu->dev, "GBPA not responding to update\n");
3113 	return ret;
3114 }
3115 
3116 static void arm_smmu_free_msis(void *data)
3117 {
3118 	struct device *dev = data;
3119 	platform_msi_domain_free_irqs(dev);
3120 }
3121 
3122 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3123 {
3124 	phys_addr_t doorbell;
3125 	struct device *dev = msi_desc_to_dev(desc);
3126 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3127 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3128 
3129 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3130 	doorbell &= MSI_CFG0_ADDR_MASK;
3131 
3132 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3133 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3134 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3135 }
3136 
3137 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3138 {
3139 	struct msi_desc *desc;
3140 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3141 	struct device *dev = smmu->dev;
3142 
3143 	/* Clear the MSI address regs */
3144 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3145 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3146 
3147 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3148 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3149 	else
3150 		nvec--;
3151 
3152 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3153 		return;
3154 
3155 	if (!dev->msi_domain) {
3156 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3157 		return;
3158 	}
3159 
3160 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3161 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3162 	if (ret) {
3163 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3164 		return;
3165 	}
3166 
3167 	for_each_msi_entry(desc, dev) {
3168 		switch (desc->platform.msi_index) {
3169 		case EVTQ_MSI_INDEX:
3170 			smmu->evtq.q.irq = desc->irq;
3171 			break;
3172 		case GERROR_MSI_INDEX:
3173 			smmu->gerr_irq = desc->irq;
3174 			break;
3175 		case PRIQ_MSI_INDEX:
3176 			smmu->priq.q.irq = desc->irq;
3177 			break;
3178 		default:	/* Unknown */
3179 			continue;
3180 		}
3181 	}
3182 
3183 	/* Add callback to free MSIs on teardown */
3184 	devm_add_action(dev, arm_smmu_free_msis, dev);
3185 }
3186 
3187 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3188 {
3189 	int irq, ret;
3190 
3191 	arm_smmu_setup_msis(smmu);
3192 
3193 	/* Request interrupt lines */
3194 	irq = smmu->evtq.q.irq;
3195 	if (irq) {
3196 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3197 						arm_smmu_evtq_thread,
3198 						IRQF_ONESHOT,
3199 						"arm-smmu-v3-evtq", smmu);
3200 		if (ret < 0)
3201 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3202 	} else {
3203 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3204 	}
3205 
3206 	irq = smmu->gerr_irq;
3207 	if (irq) {
3208 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3209 				       0, "arm-smmu-v3-gerror", smmu);
3210 		if (ret < 0)
3211 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3212 	} else {
3213 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3214 	}
3215 
3216 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3217 		irq = smmu->priq.q.irq;
3218 		if (irq) {
3219 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3220 							arm_smmu_priq_thread,
3221 							IRQF_ONESHOT,
3222 							"arm-smmu-v3-priq",
3223 							smmu);
3224 			if (ret < 0)
3225 				dev_warn(smmu->dev,
3226 					 "failed to enable priq irq\n");
3227 		} else {
3228 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3229 		}
3230 	}
3231 }
3232 
3233 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3234 {
3235 	int ret, irq;
3236 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3237 
3238 	/* Disable IRQs first */
3239 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3240 				      ARM_SMMU_IRQ_CTRLACK);
3241 	if (ret) {
3242 		dev_err(smmu->dev, "failed to disable irqs\n");
3243 		return ret;
3244 	}
3245 
3246 	irq = smmu->combined_irq;
3247 	if (irq) {
3248 		/*
3249 		 * Cavium ThunderX2 implementation doesn't support unique irq
3250 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3251 		 */
3252 		ret = devm_request_threaded_irq(smmu->dev, irq,
3253 					arm_smmu_combined_irq_handler,
3254 					arm_smmu_combined_irq_thread,
3255 					IRQF_ONESHOT,
3256 					"arm-smmu-v3-combined-irq", smmu);
3257 		if (ret < 0)
3258 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3259 	} else
3260 		arm_smmu_setup_unique_irqs(smmu);
3261 
3262 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3263 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3264 
3265 	/* Enable interrupt generation on the SMMU */
3266 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3267 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3268 	if (ret)
3269 		dev_warn(smmu->dev, "failed to enable irqs\n");
3270 
3271 	return 0;
3272 }
3273 
3274 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3275 {
3276 	int ret;
3277 
3278 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3279 	if (ret)
3280 		dev_err(smmu->dev, "failed to clear cr0\n");
3281 
3282 	return ret;
3283 }
3284 
3285 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3286 {
3287 	int ret;
3288 	u32 reg, enables;
3289 	struct arm_smmu_cmdq_ent cmd;
3290 
3291 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3292 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3293 	if (reg & CR0_SMMUEN) {
3294 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3295 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3296 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3297 	}
3298 
3299 	ret = arm_smmu_device_disable(smmu);
3300 	if (ret)
3301 		return ret;
3302 
3303 	/* CR1 (table and queue memory attributes) */
3304 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3305 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3306 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3307 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3308 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3309 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3310 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3311 
3312 	/* CR2 (random crap) */
3313 	reg = CR2_PTM | CR2_RECINVSID;
3314 
3315 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3316 		reg |= CR2_E2H;
3317 
3318 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3319 
3320 	/* Stream table */
3321 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3322 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3323 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3324 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3325 
3326 	/* Command queue */
3327 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3328 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3329 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3330 
3331 	enables = CR0_CMDQEN;
3332 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3333 				      ARM_SMMU_CR0ACK);
3334 	if (ret) {
3335 		dev_err(smmu->dev, "failed to enable command queue\n");
3336 		return ret;
3337 	}
3338 
3339 	/* Invalidate any cached configuration */
3340 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3341 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3342 	arm_smmu_cmdq_issue_sync(smmu);
3343 
3344 	/* Invalidate any stale TLB entries */
3345 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3346 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3347 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3348 	}
3349 
3350 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3351 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3352 	arm_smmu_cmdq_issue_sync(smmu);
3353 
3354 	/* Event queue */
3355 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3356 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3357 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3358 
3359 	enables |= CR0_EVTQEN;
3360 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3361 				      ARM_SMMU_CR0ACK);
3362 	if (ret) {
3363 		dev_err(smmu->dev, "failed to enable event queue\n");
3364 		return ret;
3365 	}
3366 
3367 	/* PRI queue */
3368 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3369 		writeq_relaxed(smmu->priq.q.q_base,
3370 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3371 		writel_relaxed(smmu->priq.q.llq.prod,
3372 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3373 		writel_relaxed(smmu->priq.q.llq.cons,
3374 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3375 
3376 		enables |= CR0_PRIQEN;
3377 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3378 					      ARM_SMMU_CR0ACK);
3379 		if (ret) {
3380 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3381 			return ret;
3382 		}
3383 	}
3384 
3385 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3386 		enables |= CR0_ATSCHK;
3387 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3388 					      ARM_SMMU_CR0ACK);
3389 		if (ret) {
3390 			dev_err(smmu->dev, "failed to enable ATS check\n");
3391 			return ret;
3392 		}
3393 	}
3394 
3395 	ret = arm_smmu_setup_irqs(smmu);
3396 	if (ret) {
3397 		dev_err(smmu->dev, "failed to setup irqs\n");
3398 		return ret;
3399 	}
3400 
3401 	if (is_kdump_kernel())
3402 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3403 
3404 	/* Enable the SMMU interface, or ensure bypass */
3405 	if (!bypass || disable_bypass) {
3406 		enables |= CR0_SMMUEN;
3407 	} else {
3408 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3409 		if (ret)
3410 			return ret;
3411 	}
3412 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3413 				      ARM_SMMU_CR0ACK);
3414 	if (ret) {
3415 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3416 		return ret;
3417 	}
3418 
3419 	return 0;
3420 }
3421 
3422 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3423 {
3424 	u32 reg;
3425 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3426 
3427 	/* IDR0 */
3428 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3429 
3430 	/* 2-level structures */
3431 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3432 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3433 
3434 	if (reg & IDR0_CD2L)
3435 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3436 
3437 	/*
3438 	 * Translation table endianness.
3439 	 * We currently require the same endianness as the CPU, but this
3440 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3441 	 */
3442 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3443 	case IDR0_TTENDIAN_MIXED:
3444 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3445 		break;
3446 #ifdef __BIG_ENDIAN
3447 	case IDR0_TTENDIAN_BE:
3448 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3449 		break;
3450 #else
3451 	case IDR0_TTENDIAN_LE:
3452 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3453 		break;
3454 #endif
3455 	default:
3456 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3457 		return -ENXIO;
3458 	}
3459 
3460 	/* Boolean feature flags */
3461 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3462 		smmu->features |= ARM_SMMU_FEAT_PRI;
3463 
3464 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3465 		smmu->features |= ARM_SMMU_FEAT_ATS;
3466 
3467 	if (reg & IDR0_SEV)
3468 		smmu->features |= ARM_SMMU_FEAT_SEV;
3469 
3470 	if (reg & IDR0_MSI) {
3471 		smmu->features |= ARM_SMMU_FEAT_MSI;
3472 		if (coherent && !disable_msipolling)
3473 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3474 	}
3475 
3476 	if (reg & IDR0_HYP) {
3477 		smmu->features |= ARM_SMMU_FEAT_HYP;
3478 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3479 			smmu->features |= ARM_SMMU_FEAT_E2H;
3480 	}
3481 
3482 	/*
3483 	 * The coherency feature as set by FW is used in preference to the ID
3484 	 * register, but warn on mismatch.
3485 	 */
3486 	if (!!(reg & IDR0_COHACC) != coherent)
3487 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3488 			 coherent ? "true" : "false");
3489 
3490 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3491 	case IDR0_STALL_MODEL_FORCE:
3492 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3493 		fallthrough;
3494 	case IDR0_STALL_MODEL_STALL:
3495 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3496 	}
3497 
3498 	if (reg & IDR0_S1P)
3499 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3500 
3501 	if (reg & IDR0_S2P)
3502 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3503 
3504 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3505 		dev_err(smmu->dev, "no translation support!\n");
3506 		return -ENXIO;
3507 	}
3508 
3509 	/* We only support the AArch64 table format at present */
3510 	switch (FIELD_GET(IDR0_TTF, reg)) {
3511 	case IDR0_TTF_AARCH32_64:
3512 		smmu->ias = 40;
3513 		fallthrough;
3514 	case IDR0_TTF_AARCH64:
3515 		break;
3516 	default:
3517 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3518 		return -ENXIO;
3519 	}
3520 
3521 	/* ASID/VMID sizes */
3522 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3523 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3524 
3525 	/* IDR1 */
3526 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3527 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3528 		dev_err(smmu->dev, "embedded implementation not supported\n");
3529 		return -ENXIO;
3530 	}
3531 
3532 	/* Queue sizes, capped to ensure natural alignment */
3533 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3534 					     FIELD_GET(IDR1_CMDQS, reg));
3535 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3536 		/*
3537 		 * We don't support splitting up batches, so one batch of
3538 		 * commands plus an extra sync needs to fit inside the command
3539 		 * queue. There's also no way we can handle the weird alignment
3540 		 * restrictions on the base pointer for a unit-length queue.
3541 		 */
3542 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3543 			CMDQ_BATCH_ENTRIES);
3544 		return -ENXIO;
3545 	}
3546 
3547 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3548 					     FIELD_GET(IDR1_EVTQS, reg));
3549 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3550 					     FIELD_GET(IDR1_PRIQS, reg));
3551 
3552 	/* SID/SSID sizes */
3553 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3554 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3555 
3556 	/*
3557 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3558 	 * table, use a linear table instead.
3559 	 */
3560 	if (smmu->sid_bits <= STRTAB_SPLIT)
3561 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3562 
3563 	/* IDR3 */
3564 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3565 	if (FIELD_GET(IDR3_RIL, reg))
3566 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3567 
3568 	/* IDR5 */
3569 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3570 
3571 	/* Maximum number of outstanding stalls */
3572 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3573 
3574 	/* Page sizes */
3575 	if (reg & IDR5_GRAN64K)
3576 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3577 	if (reg & IDR5_GRAN16K)
3578 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3579 	if (reg & IDR5_GRAN4K)
3580 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3581 
3582 	/* Input address size */
3583 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3584 		smmu->features |= ARM_SMMU_FEAT_VAX;
3585 
3586 	/* Output address size */
3587 	switch (FIELD_GET(IDR5_OAS, reg)) {
3588 	case IDR5_OAS_32_BIT:
3589 		smmu->oas = 32;
3590 		break;
3591 	case IDR5_OAS_36_BIT:
3592 		smmu->oas = 36;
3593 		break;
3594 	case IDR5_OAS_40_BIT:
3595 		smmu->oas = 40;
3596 		break;
3597 	case IDR5_OAS_42_BIT:
3598 		smmu->oas = 42;
3599 		break;
3600 	case IDR5_OAS_44_BIT:
3601 		smmu->oas = 44;
3602 		break;
3603 	case IDR5_OAS_52_BIT:
3604 		smmu->oas = 52;
3605 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3606 		break;
3607 	default:
3608 		dev_info(smmu->dev,
3609 			"unknown output address size. Truncating to 48-bit\n");
3610 		fallthrough;
3611 	case IDR5_OAS_48_BIT:
3612 		smmu->oas = 48;
3613 	}
3614 
3615 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3616 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3617 	else
3618 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3619 
3620 	/* Set the DMA mask for our table walker */
3621 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3622 		dev_warn(smmu->dev,
3623 			 "failed to set DMA mask for table walker\n");
3624 
3625 	smmu->ias = max(smmu->ias, smmu->oas);
3626 
3627 	if (arm_smmu_sva_supported(smmu))
3628 		smmu->features |= ARM_SMMU_FEAT_SVA;
3629 
3630 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3631 		 smmu->ias, smmu->oas, smmu->features);
3632 	return 0;
3633 }
3634 
3635 #ifdef CONFIG_ACPI
3636 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3637 {
3638 	switch (model) {
3639 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3640 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3641 		break;
3642 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3643 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3644 		break;
3645 	}
3646 
3647 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3648 }
3649 
3650 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3651 				      struct arm_smmu_device *smmu)
3652 {
3653 	struct acpi_iort_smmu_v3 *iort_smmu;
3654 	struct device *dev = smmu->dev;
3655 	struct acpi_iort_node *node;
3656 
3657 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3658 
3659 	/* Retrieve SMMUv3 specific data */
3660 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3661 
3662 	acpi_smmu_get_options(iort_smmu->model, smmu);
3663 
3664 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3665 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3666 
3667 	return 0;
3668 }
3669 #else
3670 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3671 					     struct arm_smmu_device *smmu)
3672 {
3673 	return -ENODEV;
3674 }
3675 #endif
3676 
3677 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3678 				    struct arm_smmu_device *smmu)
3679 {
3680 	struct device *dev = &pdev->dev;
3681 	u32 cells;
3682 	int ret = -EINVAL;
3683 
3684 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3685 		dev_err(dev, "missing #iommu-cells property\n");
3686 	else if (cells != 1)
3687 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3688 	else
3689 		ret = 0;
3690 
3691 	parse_driver_options(smmu);
3692 
3693 	if (of_dma_is_coherent(dev->of_node))
3694 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3695 
3696 	return ret;
3697 }
3698 
3699 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3700 {
3701 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3702 		return SZ_64K;
3703 	else
3704 		return SZ_128K;
3705 }
3706 
3707 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3708 {
3709 	int err;
3710 
3711 #ifdef CONFIG_PCI
3712 	if (pci_bus_type.iommu_ops != ops) {
3713 		err = bus_set_iommu(&pci_bus_type, ops);
3714 		if (err)
3715 			return err;
3716 	}
3717 #endif
3718 #ifdef CONFIG_ARM_AMBA
3719 	if (amba_bustype.iommu_ops != ops) {
3720 		err = bus_set_iommu(&amba_bustype, ops);
3721 		if (err)
3722 			goto err_reset_pci_ops;
3723 	}
3724 #endif
3725 	if (platform_bus_type.iommu_ops != ops) {
3726 		err = bus_set_iommu(&platform_bus_type, ops);
3727 		if (err)
3728 			goto err_reset_amba_ops;
3729 	}
3730 
3731 	return 0;
3732 
3733 err_reset_amba_ops:
3734 #ifdef CONFIG_ARM_AMBA
3735 	bus_set_iommu(&amba_bustype, NULL);
3736 #endif
3737 err_reset_pci_ops: __maybe_unused;
3738 #ifdef CONFIG_PCI
3739 	bus_set_iommu(&pci_bus_type, NULL);
3740 #endif
3741 	return err;
3742 }
3743 
3744 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3745 				      resource_size_t size)
3746 {
3747 	struct resource res = DEFINE_RES_MEM(start, size);
3748 
3749 	return devm_ioremap_resource(dev, &res);
3750 }
3751 
3752 static int arm_smmu_device_probe(struct platform_device *pdev)
3753 {
3754 	int irq, ret;
3755 	struct resource *res;
3756 	resource_size_t ioaddr;
3757 	struct arm_smmu_device *smmu;
3758 	struct device *dev = &pdev->dev;
3759 	bool bypass;
3760 
3761 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3762 	if (!smmu)
3763 		return -ENOMEM;
3764 	smmu->dev = dev;
3765 
3766 	if (dev->of_node) {
3767 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3768 	} else {
3769 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3770 		if (ret == -ENODEV)
3771 			return ret;
3772 	}
3773 
3774 	/* Set bypass mode according to firmware probing result */
3775 	bypass = !!ret;
3776 
3777 	/* Base address */
3778 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3779 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3780 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3781 		return -EINVAL;
3782 	}
3783 	ioaddr = res->start;
3784 
3785 	/*
3786 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3787 	 * the PMCG registers which are reserved by the PMU driver.
3788 	 */
3789 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3790 	if (IS_ERR(smmu->base))
3791 		return PTR_ERR(smmu->base);
3792 
3793 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3794 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3795 					       ARM_SMMU_REG_SZ);
3796 		if (IS_ERR(smmu->page1))
3797 			return PTR_ERR(smmu->page1);
3798 	} else {
3799 		smmu->page1 = smmu->base;
3800 	}
3801 
3802 	/* Interrupt lines */
3803 
3804 	irq = platform_get_irq_byname_optional(pdev, "combined");
3805 	if (irq > 0)
3806 		smmu->combined_irq = irq;
3807 	else {
3808 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3809 		if (irq > 0)
3810 			smmu->evtq.q.irq = irq;
3811 
3812 		irq = platform_get_irq_byname_optional(pdev, "priq");
3813 		if (irq > 0)
3814 			smmu->priq.q.irq = irq;
3815 
3816 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3817 		if (irq > 0)
3818 			smmu->gerr_irq = irq;
3819 	}
3820 	/* Probe the h/w */
3821 	ret = arm_smmu_device_hw_probe(smmu);
3822 	if (ret)
3823 		return ret;
3824 
3825 	/* Initialise in-memory data structures */
3826 	ret = arm_smmu_init_structures(smmu);
3827 	if (ret)
3828 		return ret;
3829 
3830 	/* Record our private device structure */
3831 	platform_set_drvdata(pdev, smmu);
3832 
3833 	/* Reset the device */
3834 	ret = arm_smmu_device_reset(smmu, bypass);
3835 	if (ret)
3836 		return ret;
3837 
3838 	/* And we're up. Go go go! */
3839 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3840 				     "smmu3.%pa", &ioaddr);
3841 	if (ret)
3842 		return ret;
3843 
3844 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3845 	if (ret) {
3846 		dev_err(dev, "Failed to register iommu\n");
3847 		goto err_sysfs_remove;
3848 	}
3849 
3850 	ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3851 	if (ret)
3852 		goto err_unregister_device;
3853 
3854 	return 0;
3855 
3856 err_unregister_device:
3857 	iommu_device_unregister(&smmu->iommu);
3858 err_sysfs_remove:
3859 	iommu_device_sysfs_remove(&smmu->iommu);
3860 	return ret;
3861 }
3862 
3863 static int arm_smmu_device_remove(struct platform_device *pdev)
3864 {
3865 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3866 
3867 	arm_smmu_set_bus_ops(NULL);
3868 	iommu_device_unregister(&smmu->iommu);
3869 	iommu_device_sysfs_remove(&smmu->iommu);
3870 	arm_smmu_device_disable(smmu);
3871 	iopf_queue_free(smmu->evtq.iopf);
3872 
3873 	return 0;
3874 }
3875 
3876 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3877 {
3878 	arm_smmu_device_remove(pdev);
3879 }
3880 
3881 static const struct of_device_id arm_smmu_of_match[] = {
3882 	{ .compatible = "arm,smmu-v3", },
3883 	{ },
3884 };
3885 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3886 
3887 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3888 {
3889 	arm_smmu_sva_notifier_synchronize();
3890 	platform_driver_unregister(drv);
3891 }
3892 
3893 static struct platform_driver arm_smmu_driver = {
3894 	.driver	= {
3895 		.name			= "arm-smmu-v3",
3896 		.of_match_table		= arm_smmu_of_match,
3897 		.suppress_bind_attrs	= true,
3898 	},
3899 	.probe	= arm_smmu_device_probe,
3900 	.remove	= arm_smmu_device_remove,
3901 	.shutdown = arm_smmu_device_shutdown,
3902 };
3903 module_driver(arm_smmu_driver, platform_driver_register,
3904 	      arm_smmu_driver_unregister);
3905 
3906 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3907 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3908 MODULE_ALIAS("platform:arm-smmu-v3");
3909 MODULE_LICENSE("GPL v2");
3910