1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_platform.h>
27 #include <linux/pci.h>
28 #include <linux/pci-ats.h>
29 #include <linux/platform_device.h>
30 
31 #include <linux/amba/bus.h>
32 
33 #include "arm-smmu-v3.h"
34 #include "../../iommu-sva-lib.h"
35 
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84 
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 	{ 0, NULL},
89 };
90 
91 static void parse_driver_options(struct arm_smmu_device *smmu)
92 {
93 	int i = 0;
94 
95 	do {
96 		if (of_property_read_bool(smmu->dev->of_node,
97 						arm_smmu_options[i].prop)) {
98 			smmu->options |= arm_smmu_options[i].opt;
99 			dev_notice(smmu->dev, "option %s\n",
100 				arm_smmu_options[i].prop);
101 		}
102 	} while (arm_smmu_options[++i].opt);
103 }
104 
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 {
108 	u32 space, prod, cons;
109 
110 	prod = Q_IDX(q, q->prod);
111 	cons = Q_IDX(q, q->cons);
112 
113 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114 		space = (1 << q->max_n_shift) - (prod - cons);
115 	else
116 		space = cons - prod;
117 
118 	return space >= n;
119 }
120 
121 static bool queue_full(struct arm_smmu_ll_queue *q)
122 {
123 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 }
126 
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 {
129 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 }
132 
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 {
135 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 }
140 
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 {
143 	/*
144 	 * Ensure that all CPU accesses (reads and writes) to the queue
145 	 * are complete before we update the cons pointer.
146 	 */
147 	__iomb();
148 	writel_relaxed(q->llq.cons, q->cons_reg);
149 }
150 
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 {
153 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 }
156 
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
158 {
159 	u32 prod;
160 	int ret = 0;
161 
162 	/*
163 	 * We can't use the _relaxed() variant here, as we must prevent
164 	 * speculative reads of the queue before we have determined that
165 	 * prod has indeed moved.
166 	 */
167 	prod = readl(q->prod_reg);
168 
169 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
170 		ret = -EOVERFLOW;
171 
172 	q->llq.prod = prod;
173 	return ret;
174 }
175 
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
177 {
178 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
180 }
181 
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183 			    struct arm_smmu_queue_poll *qp)
184 {
185 	qp->delay = 1;
186 	qp->spin_cnt = 0;
187 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
189 }
190 
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
192 {
193 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
194 		return -ETIMEDOUT;
195 
196 	if (qp->wfe) {
197 		wfe();
198 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
199 		cpu_relax();
200 	} else {
201 		udelay(qp->delay);
202 		qp->delay *= 2;
203 		qp->spin_cnt = 0;
204 	}
205 
206 	return 0;
207 }
208 
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
210 {
211 	int i;
212 
213 	for (i = 0; i < n_dwords; ++i)
214 		*dst++ = cpu_to_le64(*src++);
215 }
216 
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
218 {
219 	int i;
220 
221 	for (i = 0; i < n_dwords; ++i)
222 		*dst++ = le64_to_cpu(*src++);
223 }
224 
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
226 {
227 	if (queue_empty(&q->llq))
228 		return -EAGAIN;
229 
230 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231 	queue_inc_cons(&q->llq);
232 	queue_sync_cons_out(q);
233 	return 0;
234 }
235 
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
238 {
239 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
241 
242 	switch (ent->opcode) {
243 	case CMDQ_OP_TLBI_EL2_ALL:
244 	case CMDQ_OP_TLBI_NSNH_ALL:
245 		break;
246 	case CMDQ_OP_PREFETCH_CFG:
247 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
248 		break;
249 	case CMDQ_OP_CFGI_CD:
250 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
251 		fallthrough;
252 	case CMDQ_OP_CFGI_STE:
253 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
254 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
255 		break;
256 	case CMDQ_OP_CFGI_CD_ALL:
257 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
258 		break;
259 	case CMDQ_OP_CFGI_ALL:
260 		/* Cover the entire SID range */
261 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
262 		break;
263 	case CMDQ_OP_TLBI_NH_VA:
264 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
265 		fallthrough;
266 	case CMDQ_OP_TLBI_EL2_VA:
267 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
268 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
269 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
270 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
271 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
272 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
273 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
274 		break;
275 	case CMDQ_OP_TLBI_S2_IPA:
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
283 		break;
284 	case CMDQ_OP_TLBI_NH_ASID:
285 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
286 		fallthrough;
287 	case CMDQ_OP_TLBI_S12_VMALL:
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		break;
290 	case CMDQ_OP_TLBI_EL2_ASID:
291 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
292 		break;
293 	case CMDQ_OP_ATC_INV:
294 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
295 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
296 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
297 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
298 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
299 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
300 		break;
301 	case CMDQ_OP_PRI_RESP:
302 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
303 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
304 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
305 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
306 		switch (ent->pri.resp) {
307 		case PRI_RESP_DENY:
308 		case PRI_RESP_FAIL:
309 		case PRI_RESP_SUCC:
310 			break;
311 		default:
312 			return -EINVAL;
313 		}
314 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
315 		break;
316 	case CMDQ_OP_RESUME:
317 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
318 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
319 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
320 		break;
321 	case CMDQ_OP_CMD_SYNC:
322 		if (ent->sync.msiaddr) {
323 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325 		} else {
326 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327 		}
328 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330 		break;
331 	default:
332 		return -ENOENT;
333 	}
334 
335 	return 0;
336 }
337 
338 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
339 {
340 	return &smmu->cmdq;
341 }
342 
343 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
344 					 struct arm_smmu_queue *q, u32 prod)
345 {
346 	struct arm_smmu_cmdq_ent ent = {
347 		.opcode = CMDQ_OP_CMD_SYNC,
348 	};
349 
350 	/*
351 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
352 	 * payload, so the write will zero the entire command on that platform.
353 	 */
354 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
355 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
356 				   q->ent_dwords * 8;
357 	}
358 
359 	arm_smmu_cmdq_build_cmd(cmd, &ent);
360 }
361 
362 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
363 				     struct arm_smmu_queue *q)
364 {
365 	static const char * const cerror_str[] = {
366 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
367 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
368 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
369 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
370 	};
371 
372 	int i;
373 	u64 cmd[CMDQ_ENT_DWORDS];
374 	u32 cons = readl_relaxed(q->cons_reg);
375 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
376 	struct arm_smmu_cmdq_ent cmd_sync = {
377 		.opcode = CMDQ_OP_CMD_SYNC,
378 	};
379 
380 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
381 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
382 
383 	switch (idx) {
384 	case CMDQ_ERR_CERROR_ABT_IDX:
385 		dev_err(smmu->dev, "retrying command fetch\n");
386 		return;
387 	case CMDQ_ERR_CERROR_NONE_IDX:
388 		return;
389 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
390 		/*
391 		 * ATC Invalidation Completion timeout. CONS is still pointing
392 		 * at the CMD_SYNC. Attempt to complete other pending commands
393 		 * by repeating the CMD_SYNC, though we might well end up back
394 		 * here since the ATC invalidation may still be pending.
395 		 */
396 		return;
397 	case CMDQ_ERR_CERROR_ILL_IDX:
398 	default:
399 		break;
400 	}
401 
402 	/*
403 	 * We may have concurrent producers, so we need to be careful
404 	 * not to touch any of the shadow cmdq state.
405 	 */
406 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
407 	dev_err(smmu->dev, "skipping command in error state:\n");
408 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
409 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
410 
411 	/* Convert the erroneous command into a CMD_SYNC */
412 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
413 
414 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
415 }
416 
417 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
418 {
419 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
420 }
421 
422 /*
423  * Command queue locking.
424  * This is a form of bastardised rwlock with the following major changes:
425  *
426  * - The only LOCK routines are exclusive_trylock() and shared_lock().
427  *   Neither have barrier semantics, and instead provide only a control
428  *   dependency.
429  *
430  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
431  *   fails if the caller appears to be the last lock holder (yes, this is
432  *   racy). All successful UNLOCK routines have RELEASE semantics.
433  */
434 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
435 {
436 	int val;
437 
438 	/*
439 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
440 	 * lock counter. When held in exclusive state, the lock counter is set
441 	 * to INT_MIN so these increments won't hurt as the value will remain
442 	 * negative.
443 	 */
444 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
445 		return;
446 
447 	do {
448 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
449 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
450 }
451 
452 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
453 {
454 	(void)atomic_dec_return_release(&cmdq->lock);
455 }
456 
457 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
458 {
459 	if (atomic_read(&cmdq->lock) == 1)
460 		return false;
461 
462 	arm_smmu_cmdq_shared_unlock(cmdq);
463 	return true;
464 }
465 
466 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
467 ({									\
468 	bool __ret;							\
469 	local_irq_save(flags);						\
470 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
471 	if (!__ret)							\
472 		local_irq_restore(flags);				\
473 	__ret;								\
474 })
475 
476 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
477 ({									\
478 	atomic_set_release(&cmdq->lock, 0);				\
479 	local_irq_restore(flags);					\
480 })
481 
482 
483 /*
484  * Command queue insertion.
485  * This is made fiddly by our attempts to achieve some sort of scalability
486  * since there is one queue shared amongst all of the CPUs in the system.  If
487  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
488  * then you'll *love* this monstrosity.
489  *
490  * The basic idea is to split the queue up into ranges of commands that are
491  * owned by a given CPU; the owner may not have written all of the commands
492  * itself, but is responsible for advancing the hardware prod pointer when
493  * the time comes. The algorithm is roughly:
494  *
495  * 	1. Allocate some space in the queue. At this point we also discover
496  *	   whether the head of the queue is currently owned by another CPU,
497  *	   or whether we are the owner.
498  *
499  *	2. Write our commands into our allocated slots in the queue.
500  *
501  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
502  *
503  *	4. If we are an owner:
504  *		a. Wait for the previous owner to finish.
505  *		b. Mark the queue head as unowned, which tells us the range
506  *		   that we are responsible for publishing.
507  *		c. Wait for all commands in our owned range to become valid.
508  *		d. Advance the hardware prod pointer.
509  *		e. Tell the next owner we've finished.
510  *
511  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
512  *	   owner), then we need to stick around until it has completed:
513  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
514  *		   to clear the first 4 bytes.
515  *		b. Otherwise, we spin waiting for the hardware cons pointer to
516  *		   advance past our command.
517  *
518  * The devil is in the details, particularly the use of locking for handling
519  * SYNC completion and freeing up space in the queue before we think that it is
520  * full.
521  */
522 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
523 					       u32 sprod, u32 eprod, bool set)
524 {
525 	u32 swidx, sbidx, ewidx, ebidx;
526 	struct arm_smmu_ll_queue llq = {
527 		.max_n_shift	= cmdq->q.llq.max_n_shift,
528 		.prod		= sprod,
529 	};
530 
531 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
532 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
533 
534 	while (llq.prod != eprod) {
535 		unsigned long mask;
536 		atomic_long_t *ptr;
537 		u32 limit = BITS_PER_LONG;
538 
539 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
540 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
541 
542 		ptr = &cmdq->valid_map[swidx];
543 
544 		if ((swidx == ewidx) && (sbidx < ebidx))
545 			limit = ebidx;
546 
547 		mask = GENMASK(limit - 1, sbidx);
548 
549 		/*
550 		 * The valid bit is the inverse of the wrap bit. This means
551 		 * that a zero-initialised queue is invalid and, after marking
552 		 * all entries as valid, they become invalid again when we
553 		 * wrap.
554 		 */
555 		if (set) {
556 			atomic_long_xor(mask, ptr);
557 		} else { /* Poll */
558 			unsigned long valid;
559 
560 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
561 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
562 		}
563 
564 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
565 	}
566 }
567 
568 /* Mark all entries in the range [sprod, eprod) as valid */
569 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
570 					u32 sprod, u32 eprod)
571 {
572 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
573 }
574 
575 /* Wait for all entries in the range [sprod, eprod) to become valid */
576 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
577 					 u32 sprod, u32 eprod)
578 {
579 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
580 }
581 
582 /* Wait for the command queue to become non-full */
583 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
584 					     struct arm_smmu_ll_queue *llq)
585 {
586 	unsigned long flags;
587 	struct arm_smmu_queue_poll qp;
588 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
589 	int ret = 0;
590 
591 	/*
592 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
593 	 * that fails, spin until somebody else updates it for us.
594 	 */
595 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
596 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
597 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
598 		llq->val = READ_ONCE(cmdq->q.llq.val);
599 		return 0;
600 	}
601 
602 	queue_poll_init(smmu, &qp);
603 	do {
604 		llq->val = READ_ONCE(cmdq->q.llq.val);
605 		if (!queue_full(llq))
606 			break;
607 
608 		ret = queue_poll(&qp);
609 	} while (!ret);
610 
611 	return ret;
612 }
613 
614 /*
615  * Wait until the SMMU signals a CMD_SYNC completion MSI.
616  * Must be called with the cmdq lock held in some capacity.
617  */
618 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
619 					  struct arm_smmu_ll_queue *llq)
620 {
621 	int ret = 0;
622 	struct arm_smmu_queue_poll qp;
623 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
624 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
625 
626 	queue_poll_init(smmu, &qp);
627 
628 	/*
629 	 * The MSI won't generate an event, since it's being written back
630 	 * into the command queue.
631 	 */
632 	qp.wfe = false;
633 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
634 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
635 	return ret;
636 }
637 
638 /*
639  * Wait until the SMMU cons index passes llq->prod.
640  * Must be called with the cmdq lock held in some capacity.
641  */
642 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
643 					       struct arm_smmu_ll_queue *llq)
644 {
645 	struct arm_smmu_queue_poll qp;
646 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
647 	u32 prod = llq->prod;
648 	int ret = 0;
649 
650 	queue_poll_init(smmu, &qp);
651 	llq->val = READ_ONCE(cmdq->q.llq.val);
652 	do {
653 		if (queue_consumed(llq, prod))
654 			break;
655 
656 		ret = queue_poll(&qp);
657 
658 		/*
659 		 * This needs to be a readl() so that our subsequent call
660 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
661 		 *
662 		 * Specifically, we need to ensure that we observe all
663 		 * shared_lock()s by other CMD_SYNCs that share our owner,
664 		 * so that a failing call to tryunlock() means that we're
665 		 * the last one out and therefore we can safely advance
666 		 * cmdq->q.llq.cons. Roughly speaking:
667 		 *
668 		 * CPU 0		CPU1			CPU2 (us)
669 		 *
670 		 * if (sync)
671 		 * 	shared_lock();
672 		 *
673 		 * dma_wmb();
674 		 * set_valid_map();
675 		 *
676 		 * 			if (owner) {
677 		 *				poll_valid_map();
678 		 *				<control dependency>
679 		 *				writel(prod_reg);
680 		 *
681 		 *						readl(cons_reg);
682 		 *						tryunlock();
683 		 *
684 		 * Requires us to see CPU 0's shared_lock() acquisition.
685 		 */
686 		llq->cons = readl(cmdq->q.cons_reg);
687 	} while (!ret);
688 
689 	return ret;
690 }
691 
692 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
693 					 struct arm_smmu_ll_queue *llq)
694 {
695 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
696 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
697 
698 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
699 }
700 
701 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
702 					u32 prod, int n)
703 {
704 	int i;
705 	struct arm_smmu_ll_queue llq = {
706 		.max_n_shift	= cmdq->q.llq.max_n_shift,
707 		.prod		= prod,
708 	};
709 
710 	for (i = 0; i < n; ++i) {
711 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
712 
713 		prod = queue_inc_prod_n(&llq, i);
714 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
715 	}
716 }
717 
718 /*
719  * This is the actual insertion function, and provides the following
720  * ordering guarantees to callers:
721  *
722  * - There is a dma_wmb() before publishing any commands to the queue.
723  *   This can be relied upon to order prior writes to data structures
724  *   in memory (such as a CD or an STE) before the command.
725  *
726  * - On completion of a CMD_SYNC, there is a control dependency.
727  *   This can be relied upon to order subsequent writes to memory (e.g.
728  *   freeing an IOVA) after completion of the CMD_SYNC.
729  *
730  * - Command insertion is totally ordered, so if two CPUs each race to
731  *   insert their own list of commands then all of the commands from one
732  *   CPU will appear before any of the commands from the other CPU.
733  */
734 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
735 				       u64 *cmds, int n, bool sync)
736 {
737 	u64 cmd_sync[CMDQ_ENT_DWORDS];
738 	u32 prod;
739 	unsigned long flags;
740 	bool owner;
741 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
742 	struct arm_smmu_ll_queue llq, head;
743 	int ret = 0;
744 
745 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
746 
747 	/* 1. Allocate some space in the queue */
748 	local_irq_save(flags);
749 	llq.val = READ_ONCE(cmdq->q.llq.val);
750 	do {
751 		u64 old;
752 
753 		while (!queue_has_space(&llq, n + sync)) {
754 			local_irq_restore(flags);
755 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
756 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
757 			local_irq_save(flags);
758 		}
759 
760 		head.cons = llq.cons;
761 		head.prod = queue_inc_prod_n(&llq, n + sync) |
762 					     CMDQ_PROD_OWNED_FLAG;
763 
764 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
765 		if (old == llq.val)
766 			break;
767 
768 		llq.val = old;
769 	} while (1);
770 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
771 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
772 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
773 
774 	/*
775 	 * 2. Write our commands into the queue
776 	 * Dependency ordering from the cmpxchg() loop above.
777 	 */
778 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
779 	if (sync) {
780 		prod = queue_inc_prod_n(&llq, n);
781 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
782 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
783 
784 		/*
785 		 * In order to determine completion of our CMD_SYNC, we must
786 		 * ensure that the queue can't wrap twice without us noticing.
787 		 * We achieve that by taking the cmdq lock as shared before
788 		 * marking our slot as valid.
789 		 */
790 		arm_smmu_cmdq_shared_lock(cmdq);
791 	}
792 
793 	/* 3. Mark our slots as valid, ensuring commands are visible first */
794 	dma_wmb();
795 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
796 
797 	/* 4. If we are the owner, take control of the SMMU hardware */
798 	if (owner) {
799 		/* a. Wait for previous owner to finish */
800 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
801 
802 		/* b. Stop gathering work by clearing the owned flag */
803 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
804 						   &cmdq->q.llq.atomic.prod);
805 		prod &= ~CMDQ_PROD_OWNED_FLAG;
806 
807 		/*
808 		 * c. Wait for any gathered work to be written to the queue.
809 		 * Note that we read our own entries so that we have the control
810 		 * dependency required by (d).
811 		 */
812 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
813 
814 		/*
815 		 * d. Advance the hardware prod pointer
816 		 * Control dependency ordering from the entries becoming valid.
817 		 */
818 		writel_relaxed(prod, cmdq->q.prod_reg);
819 
820 		/*
821 		 * e. Tell the next owner we're done
822 		 * Make sure we've updated the hardware first, so that we don't
823 		 * race to update prod and potentially move it backwards.
824 		 */
825 		atomic_set_release(&cmdq->owner_prod, prod);
826 	}
827 
828 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
829 	if (sync) {
830 		llq.prod = queue_inc_prod_n(&llq, n);
831 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
832 		if (ret) {
833 			dev_err_ratelimited(smmu->dev,
834 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
835 					    llq.prod,
836 					    readl_relaxed(cmdq->q.prod_reg),
837 					    readl_relaxed(cmdq->q.cons_reg));
838 		}
839 
840 		/*
841 		 * Try to unlock the cmdq lock. This will fail if we're the last
842 		 * reader, in which case we can safely update cmdq->q.llq.cons
843 		 */
844 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
845 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
846 			arm_smmu_cmdq_shared_unlock(cmdq);
847 		}
848 	}
849 
850 	local_irq_restore(flags);
851 	return ret;
852 }
853 
854 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
855 				     struct arm_smmu_cmdq_ent *ent,
856 				     bool sync)
857 {
858 	u64 cmd[CMDQ_ENT_DWORDS];
859 
860 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
861 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
862 			 ent->opcode);
863 		return -EINVAL;
864 	}
865 
866 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
867 }
868 
869 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
870 				   struct arm_smmu_cmdq_ent *ent)
871 {
872 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
873 }
874 
875 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
876 					     struct arm_smmu_cmdq_ent *ent)
877 {
878 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
879 }
880 
881 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
882 				    struct arm_smmu_cmdq_batch *cmds,
883 				    struct arm_smmu_cmdq_ent *cmd)
884 {
885 	int index;
886 
887 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
888 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
889 		cmds->num = 0;
890 	}
891 
892 	index = cmds->num * CMDQ_ENT_DWORDS;
893 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
894 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
895 			 cmd->opcode);
896 		return;
897 	}
898 
899 	cmds->num++;
900 }
901 
902 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
903 				      struct arm_smmu_cmdq_batch *cmds)
904 {
905 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
906 }
907 
908 static int arm_smmu_page_response(struct device *dev,
909 				  struct iommu_fault_event *unused,
910 				  struct iommu_page_response *resp)
911 {
912 	struct arm_smmu_cmdq_ent cmd = {0};
913 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
914 	int sid = master->streams[0].id;
915 
916 	if (master->stall_enabled) {
917 		cmd.opcode		= CMDQ_OP_RESUME;
918 		cmd.resume.sid		= sid;
919 		cmd.resume.stag		= resp->grpid;
920 		switch (resp->code) {
921 		case IOMMU_PAGE_RESP_INVALID:
922 		case IOMMU_PAGE_RESP_FAILURE:
923 			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
924 			break;
925 		case IOMMU_PAGE_RESP_SUCCESS:
926 			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
927 			break;
928 		default:
929 			return -EINVAL;
930 		}
931 	} else {
932 		return -ENODEV;
933 	}
934 
935 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
936 	/*
937 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
938 	 * RESUME consumption guarantees that the stalled transaction will be
939 	 * terminated... at some point in the future. PRI_RESP is fire and
940 	 * forget.
941 	 */
942 
943 	return 0;
944 }
945 
946 /* Context descriptor manipulation functions */
947 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
948 {
949 	struct arm_smmu_cmdq_ent cmd = {
950 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
951 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
952 		.tlbi.asid = asid,
953 	};
954 
955 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
956 }
957 
958 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
959 			     int ssid, bool leaf)
960 {
961 	size_t i;
962 	unsigned long flags;
963 	struct arm_smmu_master *master;
964 	struct arm_smmu_cmdq_batch cmds;
965 	struct arm_smmu_device *smmu = smmu_domain->smmu;
966 	struct arm_smmu_cmdq_ent cmd = {
967 		.opcode	= CMDQ_OP_CFGI_CD,
968 		.cfgi	= {
969 			.ssid	= ssid,
970 			.leaf	= leaf,
971 		},
972 	};
973 
974 	cmds.num = 0;
975 
976 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
977 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
978 		for (i = 0; i < master->num_streams; i++) {
979 			cmd.cfgi.sid = master->streams[i].id;
980 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
981 		}
982 	}
983 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
984 
985 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
986 }
987 
988 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
989 					struct arm_smmu_l1_ctx_desc *l1_desc)
990 {
991 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
992 
993 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
994 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
995 	if (!l1_desc->l2ptr) {
996 		dev_warn(smmu->dev,
997 			 "failed to allocate context descriptor table\n");
998 		return -ENOMEM;
999 	}
1000 	return 0;
1001 }
1002 
1003 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1004 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1005 {
1006 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1007 		  CTXDESC_L1_DESC_V;
1008 
1009 	/* See comment in arm_smmu_write_ctx_desc() */
1010 	WRITE_ONCE(*dst, cpu_to_le64(val));
1011 }
1012 
1013 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1014 				   u32 ssid)
1015 {
1016 	__le64 *l1ptr;
1017 	unsigned int idx;
1018 	struct arm_smmu_l1_ctx_desc *l1_desc;
1019 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1020 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1021 
1022 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1023 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1024 
1025 	idx = ssid >> CTXDESC_SPLIT;
1026 	l1_desc = &cdcfg->l1_desc[idx];
1027 	if (!l1_desc->l2ptr) {
1028 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1029 			return NULL;
1030 
1031 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1032 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1033 		/* An invalid L1CD can be cached */
1034 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1035 	}
1036 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1037 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1038 }
1039 
1040 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1041 			    struct arm_smmu_ctx_desc *cd)
1042 {
1043 	/*
1044 	 * This function handles the following cases:
1045 	 *
1046 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1047 	 * (2) Install a secondary CD, for SID+SSID traffic.
1048 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1049 	 *     CD, then invalidate the old entry and mappings.
1050 	 * (4) Quiesce the context without clearing the valid bit. Disable
1051 	 *     translation, and ignore any translation fault.
1052 	 * (5) Remove a secondary CD.
1053 	 */
1054 	u64 val;
1055 	bool cd_live;
1056 	__le64 *cdptr;
1057 
1058 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1059 		return -E2BIG;
1060 
1061 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1062 	if (!cdptr)
1063 		return -ENOMEM;
1064 
1065 	val = le64_to_cpu(cdptr[0]);
1066 	cd_live = !!(val & CTXDESC_CD_0_V);
1067 
1068 	if (!cd) { /* (5) */
1069 		val = 0;
1070 	} else if (cd == &quiet_cd) { /* (4) */
1071 		val |= CTXDESC_CD_0_TCR_EPD0;
1072 	} else if (cd_live) { /* (3) */
1073 		val &= ~CTXDESC_CD_0_ASID;
1074 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1075 		/*
1076 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1077 		 * this substream's traffic
1078 		 */
1079 	} else { /* (1) and (2) */
1080 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1081 		cdptr[2] = 0;
1082 		cdptr[3] = cpu_to_le64(cd->mair);
1083 
1084 		/*
1085 		 * STE is live, and the SMMU might read dwords of this CD in any
1086 		 * order. Ensure that it observes valid values before reading
1087 		 * V=1.
1088 		 */
1089 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1090 
1091 		val = cd->tcr |
1092 #ifdef __BIG_ENDIAN
1093 			CTXDESC_CD_0_ENDI |
1094 #endif
1095 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1096 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1097 			CTXDESC_CD_0_AA64 |
1098 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1099 			CTXDESC_CD_0_V;
1100 
1101 		if (smmu_domain->stall_enabled)
1102 			val |= CTXDESC_CD_0_S;
1103 	}
1104 
1105 	/*
1106 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1107 	 * "Configuration structures and configuration invalidation completion"
1108 	 *
1109 	 *   The size of single-copy atomic reads made by the SMMU is
1110 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1111 	 *   field within an aligned 64-bit span of a structure can be altered
1112 	 *   without first making the structure invalid.
1113 	 */
1114 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1115 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1116 	return 0;
1117 }
1118 
1119 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1120 {
1121 	int ret;
1122 	size_t l1size;
1123 	size_t max_contexts;
1124 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1125 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1126 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1127 
1128 	max_contexts = 1 << cfg->s1cdmax;
1129 
1130 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1131 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1132 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1133 		cdcfg->num_l1_ents = max_contexts;
1134 
1135 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1136 	} else {
1137 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1138 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1139 						  CTXDESC_L2_ENTRIES);
1140 
1141 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1142 					      sizeof(*cdcfg->l1_desc),
1143 					      GFP_KERNEL);
1144 		if (!cdcfg->l1_desc)
1145 			return -ENOMEM;
1146 
1147 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1148 	}
1149 
1150 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1151 					   GFP_KERNEL);
1152 	if (!cdcfg->cdtab) {
1153 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1154 		ret = -ENOMEM;
1155 		goto err_free_l1;
1156 	}
1157 
1158 	return 0;
1159 
1160 err_free_l1:
1161 	if (cdcfg->l1_desc) {
1162 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1163 		cdcfg->l1_desc = NULL;
1164 	}
1165 	return ret;
1166 }
1167 
1168 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1169 {
1170 	int i;
1171 	size_t size, l1size;
1172 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1173 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1174 
1175 	if (cdcfg->l1_desc) {
1176 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1177 
1178 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1179 			if (!cdcfg->l1_desc[i].l2ptr)
1180 				continue;
1181 
1182 			dmam_free_coherent(smmu->dev, size,
1183 					   cdcfg->l1_desc[i].l2ptr,
1184 					   cdcfg->l1_desc[i].l2ptr_dma);
1185 		}
1186 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1187 		cdcfg->l1_desc = NULL;
1188 
1189 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1190 	} else {
1191 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1192 	}
1193 
1194 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1195 	cdcfg->cdtab_dma = 0;
1196 	cdcfg->cdtab = NULL;
1197 }
1198 
1199 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1200 {
1201 	bool free;
1202 	struct arm_smmu_ctx_desc *old_cd;
1203 
1204 	if (!cd->asid)
1205 		return false;
1206 
1207 	free = refcount_dec_and_test(&cd->refs);
1208 	if (free) {
1209 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1210 		WARN_ON(old_cd != cd);
1211 	}
1212 	return free;
1213 }
1214 
1215 /* Stream table manipulation functions */
1216 static void
1217 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1218 {
1219 	u64 val = 0;
1220 
1221 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1222 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1223 
1224 	/* See comment in arm_smmu_write_ctx_desc() */
1225 	WRITE_ONCE(*dst, cpu_to_le64(val));
1226 }
1227 
1228 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1229 {
1230 	struct arm_smmu_cmdq_ent cmd = {
1231 		.opcode	= CMDQ_OP_CFGI_STE,
1232 		.cfgi	= {
1233 			.sid	= sid,
1234 			.leaf	= true,
1235 		},
1236 	};
1237 
1238 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1239 }
1240 
1241 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1242 				      __le64 *dst)
1243 {
1244 	/*
1245 	 * This is hideously complicated, but we only really care about
1246 	 * three cases at the moment:
1247 	 *
1248 	 * 1. Invalid (all zero) -> bypass/fault (init)
1249 	 * 2. Bypass/fault -> translation/bypass (attach)
1250 	 * 3. Translation/bypass -> bypass/fault (detach)
1251 	 *
1252 	 * Given that we can't update the STE atomically and the SMMU
1253 	 * doesn't read the thing in a defined order, that leaves us
1254 	 * with the following maintenance requirements:
1255 	 *
1256 	 * 1. Update Config, return (init time STEs aren't live)
1257 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1258 	 * 3. Update Config, sync
1259 	 */
1260 	u64 val = le64_to_cpu(dst[0]);
1261 	bool ste_live = false;
1262 	struct arm_smmu_device *smmu = NULL;
1263 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1264 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1265 	struct arm_smmu_domain *smmu_domain = NULL;
1266 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1267 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1268 		.prefetch	= {
1269 			.sid	= sid,
1270 		},
1271 	};
1272 
1273 	if (master) {
1274 		smmu_domain = master->domain;
1275 		smmu = master->smmu;
1276 	}
1277 
1278 	if (smmu_domain) {
1279 		switch (smmu_domain->stage) {
1280 		case ARM_SMMU_DOMAIN_S1:
1281 			s1_cfg = &smmu_domain->s1_cfg;
1282 			break;
1283 		case ARM_SMMU_DOMAIN_S2:
1284 		case ARM_SMMU_DOMAIN_NESTED:
1285 			s2_cfg = &smmu_domain->s2_cfg;
1286 			break;
1287 		default:
1288 			break;
1289 		}
1290 	}
1291 
1292 	if (val & STRTAB_STE_0_V) {
1293 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1294 		case STRTAB_STE_0_CFG_BYPASS:
1295 			break;
1296 		case STRTAB_STE_0_CFG_S1_TRANS:
1297 		case STRTAB_STE_0_CFG_S2_TRANS:
1298 			ste_live = true;
1299 			break;
1300 		case STRTAB_STE_0_CFG_ABORT:
1301 			BUG_ON(!disable_bypass);
1302 			break;
1303 		default:
1304 			BUG(); /* STE corruption */
1305 		}
1306 	}
1307 
1308 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1309 	val = STRTAB_STE_0_V;
1310 
1311 	/* Bypass/fault */
1312 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1313 		if (!smmu_domain && disable_bypass)
1314 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1315 		else
1316 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1317 
1318 		dst[0] = cpu_to_le64(val);
1319 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1320 						STRTAB_STE_1_SHCFG_INCOMING));
1321 		dst[2] = 0; /* Nuke the VMID */
1322 		/*
1323 		 * The SMMU can perform negative caching, so we must sync
1324 		 * the STE regardless of whether the old value was live.
1325 		 */
1326 		if (smmu)
1327 			arm_smmu_sync_ste_for_sid(smmu, sid);
1328 		return;
1329 	}
1330 
1331 	if (s1_cfg) {
1332 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1333 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1334 
1335 		BUG_ON(ste_live);
1336 		dst[1] = cpu_to_le64(
1337 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1338 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1339 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1340 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1341 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1342 
1343 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1344 		    !master->stall_enabled)
1345 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1346 
1347 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1348 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1349 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1350 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1351 	}
1352 
1353 	if (s2_cfg) {
1354 		BUG_ON(ste_live);
1355 		dst[2] = cpu_to_le64(
1356 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1357 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1358 #ifdef __BIG_ENDIAN
1359 			 STRTAB_STE_2_S2ENDI |
1360 #endif
1361 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1362 			 STRTAB_STE_2_S2R);
1363 
1364 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1365 
1366 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1367 	}
1368 
1369 	if (master->ats_enabled)
1370 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1371 						 STRTAB_STE_1_EATS_TRANS));
1372 
1373 	arm_smmu_sync_ste_for_sid(smmu, sid);
1374 	/* See comment in arm_smmu_write_ctx_desc() */
1375 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1376 	arm_smmu_sync_ste_for_sid(smmu, sid);
1377 
1378 	/* It's likely that we'll want to use the new STE soon */
1379 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1380 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1381 }
1382 
1383 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1384 {
1385 	unsigned int i;
1386 
1387 	for (i = 0; i < nent; ++i) {
1388 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1389 		strtab += STRTAB_STE_DWORDS;
1390 	}
1391 }
1392 
1393 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1394 {
1395 	size_t size;
1396 	void *strtab;
1397 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1398 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1399 
1400 	if (desc->l2ptr)
1401 		return 0;
1402 
1403 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1404 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1405 
1406 	desc->span = STRTAB_SPLIT + 1;
1407 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1408 					  GFP_KERNEL);
1409 	if (!desc->l2ptr) {
1410 		dev_err(smmu->dev,
1411 			"failed to allocate l2 stream table for SID %u\n",
1412 			sid);
1413 		return -ENOMEM;
1414 	}
1415 
1416 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1417 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1418 	return 0;
1419 }
1420 
1421 static struct arm_smmu_master *
1422 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1423 {
1424 	struct rb_node *node;
1425 	struct arm_smmu_stream *stream;
1426 
1427 	lockdep_assert_held(&smmu->streams_mutex);
1428 
1429 	node = smmu->streams.rb_node;
1430 	while (node) {
1431 		stream = rb_entry(node, struct arm_smmu_stream, node);
1432 		if (stream->id < sid)
1433 			node = node->rb_right;
1434 		else if (stream->id > sid)
1435 			node = node->rb_left;
1436 		else
1437 			return stream->master;
1438 	}
1439 
1440 	return NULL;
1441 }
1442 
1443 /* IRQ and event handlers */
1444 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1445 {
1446 	int ret;
1447 	u32 reason;
1448 	u32 perm = 0;
1449 	struct arm_smmu_master *master;
1450 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1451 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1452 	struct iommu_fault_event fault_evt = { };
1453 	struct iommu_fault *flt = &fault_evt.fault;
1454 
1455 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1456 	case EVT_ID_TRANSLATION_FAULT:
1457 		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1458 		break;
1459 	case EVT_ID_ADDR_SIZE_FAULT:
1460 		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1461 		break;
1462 	case EVT_ID_ACCESS_FAULT:
1463 		reason = IOMMU_FAULT_REASON_ACCESS;
1464 		break;
1465 	case EVT_ID_PERMISSION_FAULT:
1466 		reason = IOMMU_FAULT_REASON_PERMISSION;
1467 		break;
1468 	default:
1469 		return -EOPNOTSUPP;
1470 	}
1471 
1472 	/* Stage-2 is always pinned at the moment */
1473 	if (evt[1] & EVTQ_1_S2)
1474 		return -EFAULT;
1475 
1476 	if (evt[1] & EVTQ_1_RnW)
1477 		perm |= IOMMU_FAULT_PERM_READ;
1478 	else
1479 		perm |= IOMMU_FAULT_PERM_WRITE;
1480 
1481 	if (evt[1] & EVTQ_1_InD)
1482 		perm |= IOMMU_FAULT_PERM_EXEC;
1483 
1484 	if (evt[1] & EVTQ_1_PnU)
1485 		perm |= IOMMU_FAULT_PERM_PRIV;
1486 
1487 	if (evt[1] & EVTQ_1_STALL) {
1488 		flt->type = IOMMU_FAULT_PAGE_REQ;
1489 		flt->prm = (struct iommu_fault_page_request) {
1490 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1491 			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1492 			.perm = perm,
1493 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1494 		};
1495 
1496 		if (ssid_valid) {
1497 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1498 			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1499 		}
1500 	} else {
1501 		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1502 		flt->event = (struct iommu_fault_unrecoverable) {
1503 			.reason = reason,
1504 			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1505 			.perm = perm,
1506 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1507 		};
1508 
1509 		if (ssid_valid) {
1510 			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1511 			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1512 		}
1513 	}
1514 
1515 	mutex_lock(&smmu->streams_mutex);
1516 	master = arm_smmu_find_master(smmu, sid);
1517 	if (!master) {
1518 		ret = -EINVAL;
1519 		goto out_unlock;
1520 	}
1521 
1522 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1523 	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1524 		/* Nobody cared, abort the access */
1525 		struct iommu_page_response resp = {
1526 			.pasid		= flt->prm.pasid,
1527 			.grpid		= flt->prm.grpid,
1528 			.code		= IOMMU_PAGE_RESP_FAILURE,
1529 		};
1530 		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1531 	}
1532 
1533 out_unlock:
1534 	mutex_unlock(&smmu->streams_mutex);
1535 	return ret;
1536 }
1537 
1538 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1539 {
1540 	int i, ret;
1541 	struct arm_smmu_device *smmu = dev;
1542 	struct arm_smmu_queue *q = &smmu->evtq.q;
1543 	struct arm_smmu_ll_queue *llq = &q->llq;
1544 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1545 				      DEFAULT_RATELIMIT_BURST);
1546 	u64 evt[EVTQ_ENT_DWORDS];
1547 
1548 	do {
1549 		while (!queue_remove_raw(q, evt)) {
1550 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1551 
1552 			ret = arm_smmu_handle_evt(smmu, evt);
1553 			if (!ret || !__ratelimit(&rs))
1554 				continue;
1555 
1556 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1557 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1558 				dev_info(smmu->dev, "\t0x%016llx\n",
1559 					 (unsigned long long)evt[i]);
1560 
1561 		}
1562 
1563 		/*
1564 		 * Not much we can do on overflow, so scream and pretend we're
1565 		 * trying harder.
1566 		 */
1567 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1568 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1569 	} while (!queue_empty(llq));
1570 
1571 	/* Sync our overflow flag, as we believe we're up to speed */
1572 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1573 		    Q_IDX(llq, llq->cons);
1574 	return IRQ_HANDLED;
1575 }
1576 
1577 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1578 {
1579 	u32 sid, ssid;
1580 	u16 grpid;
1581 	bool ssv, last;
1582 
1583 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1584 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1585 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1586 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1587 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1588 
1589 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1590 	dev_info(smmu->dev,
1591 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1592 		 sid, ssid, grpid, last ? "L" : "",
1593 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1594 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1595 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1596 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1597 		 evt[1] & PRIQ_1_ADDR_MASK);
1598 
1599 	if (last) {
1600 		struct arm_smmu_cmdq_ent cmd = {
1601 			.opcode			= CMDQ_OP_PRI_RESP,
1602 			.substream_valid	= ssv,
1603 			.pri			= {
1604 				.sid	= sid,
1605 				.ssid	= ssid,
1606 				.grpid	= grpid,
1607 				.resp	= PRI_RESP_DENY,
1608 			},
1609 		};
1610 
1611 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1612 	}
1613 }
1614 
1615 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1616 {
1617 	struct arm_smmu_device *smmu = dev;
1618 	struct arm_smmu_queue *q = &smmu->priq.q;
1619 	struct arm_smmu_ll_queue *llq = &q->llq;
1620 	u64 evt[PRIQ_ENT_DWORDS];
1621 
1622 	do {
1623 		while (!queue_remove_raw(q, evt))
1624 			arm_smmu_handle_ppr(smmu, evt);
1625 
1626 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1627 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1628 	} while (!queue_empty(llq));
1629 
1630 	/* Sync our overflow flag, as we believe we're up to speed */
1631 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1632 		      Q_IDX(llq, llq->cons);
1633 	queue_sync_cons_out(q);
1634 	return IRQ_HANDLED;
1635 }
1636 
1637 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1638 
1639 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1640 {
1641 	u32 gerror, gerrorn, active;
1642 	struct arm_smmu_device *smmu = dev;
1643 
1644 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1645 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1646 
1647 	active = gerror ^ gerrorn;
1648 	if (!(active & GERROR_ERR_MASK))
1649 		return IRQ_NONE; /* No errors pending */
1650 
1651 	dev_warn(smmu->dev,
1652 		 "unexpected global error reported (0x%08x), this could be serious\n",
1653 		 active);
1654 
1655 	if (active & GERROR_SFM_ERR) {
1656 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1657 		arm_smmu_device_disable(smmu);
1658 	}
1659 
1660 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1661 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1662 
1663 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1664 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1665 
1666 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1667 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1668 
1669 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1670 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1671 
1672 	if (active & GERROR_PRIQ_ABT_ERR)
1673 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1674 
1675 	if (active & GERROR_EVTQ_ABT_ERR)
1676 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1677 
1678 	if (active & GERROR_CMDQ_ERR)
1679 		arm_smmu_cmdq_skip_err(smmu);
1680 
1681 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1682 	return IRQ_HANDLED;
1683 }
1684 
1685 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1686 {
1687 	struct arm_smmu_device *smmu = dev;
1688 
1689 	arm_smmu_evtq_thread(irq, dev);
1690 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1691 		arm_smmu_priq_thread(irq, dev);
1692 
1693 	return IRQ_HANDLED;
1694 }
1695 
1696 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1697 {
1698 	arm_smmu_gerror_handler(irq, dev);
1699 	return IRQ_WAKE_THREAD;
1700 }
1701 
1702 static void
1703 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1704 			struct arm_smmu_cmdq_ent *cmd)
1705 {
1706 	size_t log2_span;
1707 	size_t span_mask;
1708 	/* ATC invalidates are always on 4096-bytes pages */
1709 	size_t inval_grain_shift = 12;
1710 	unsigned long page_start, page_end;
1711 
1712 	/*
1713 	 * ATS and PASID:
1714 	 *
1715 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1716 	 * prefix. In that case all ATC entries within the address range are
1717 	 * invalidated, including those that were requested with a PASID! There
1718 	 * is no way to invalidate only entries without PASID.
1719 	 *
1720 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1721 	 * traffic), translation requests without PASID create ATC entries
1722 	 * without PASID, which must be invalidated with substream_valid clear.
1723 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1724 	 * ATC entries within the address range.
1725 	 */
1726 	*cmd = (struct arm_smmu_cmdq_ent) {
1727 		.opcode			= CMDQ_OP_ATC_INV,
1728 		.substream_valid	= !!ssid,
1729 		.atc.ssid		= ssid,
1730 	};
1731 
1732 	if (!size) {
1733 		cmd->atc.size = ATC_INV_SIZE_ALL;
1734 		return;
1735 	}
1736 
1737 	page_start	= iova >> inval_grain_shift;
1738 	page_end	= (iova + size - 1) >> inval_grain_shift;
1739 
1740 	/*
1741 	 * In an ATS Invalidate Request, the address must be aligned on the
1742 	 * range size, which must be a power of two number of page sizes. We
1743 	 * thus have to choose between grossly over-invalidating the region, or
1744 	 * splitting the invalidation into multiple commands. For simplicity
1745 	 * we'll go with the first solution, but should refine it in the future
1746 	 * if multiple commands are shown to be more efficient.
1747 	 *
1748 	 * Find the smallest power of two that covers the range. The most
1749 	 * significant differing bit between the start and end addresses,
1750 	 * fls(start ^ end), indicates the required span. For example:
1751 	 *
1752 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1753 	 *		x = 0b1000 ^ 0b1011 = 0b11
1754 	 *		span = 1 << fls(x) = 4
1755 	 *
1756 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1757 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1758 	 *		span = 1 << fls(x) = 16
1759 	 */
1760 	log2_span	= fls_long(page_start ^ page_end);
1761 	span_mask	= (1ULL << log2_span) - 1;
1762 
1763 	page_start	&= ~span_mask;
1764 
1765 	cmd->atc.addr	= page_start << inval_grain_shift;
1766 	cmd->atc.size	= log2_span;
1767 }
1768 
1769 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1770 {
1771 	int i;
1772 	struct arm_smmu_cmdq_ent cmd;
1773 	struct arm_smmu_cmdq_batch cmds;
1774 
1775 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1776 
1777 	cmds.num = 0;
1778 	for (i = 0; i < master->num_streams; i++) {
1779 		cmd.atc.sid = master->streams[i].id;
1780 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1781 	}
1782 
1783 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1784 }
1785 
1786 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1787 			    unsigned long iova, size_t size)
1788 {
1789 	int i;
1790 	unsigned long flags;
1791 	struct arm_smmu_cmdq_ent cmd;
1792 	struct arm_smmu_master *master;
1793 	struct arm_smmu_cmdq_batch cmds;
1794 
1795 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1796 		return 0;
1797 
1798 	/*
1799 	 * Ensure that we've completed prior invalidation of the main TLBs
1800 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1801 	 * arm_smmu_enable_ats():
1802 	 *
1803 	 *	// unmap()			// arm_smmu_enable_ats()
1804 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1805 	 *	smp_mb();			[...]
1806 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1807 	 *
1808 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1809 	 * ATS was enabled at the PCI device before completion of the TLBI.
1810 	 */
1811 	smp_mb();
1812 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1813 		return 0;
1814 
1815 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1816 
1817 	cmds.num = 0;
1818 
1819 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1820 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1821 		if (!master->ats_enabled)
1822 			continue;
1823 
1824 		for (i = 0; i < master->num_streams; i++) {
1825 			cmd.atc.sid = master->streams[i].id;
1826 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1827 		}
1828 	}
1829 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1830 
1831 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1832 }
1833 
1834 /* IO_PGTABLE API */
1835 static void arm_smmu_tlb_inv_context(void *cookie)
1836 {
1837 	struct arm_smmu_domain *smmu_domain = cookie;
1838 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1839 	struct arm_smmu_cmdq_ent cmd;
1840 
1841 	/*
1842 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1843 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1844 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1845 	 * insertion to guarantee those are observed before the TLBI. Do be
1846 	 * careful, 007.
1847 	 */
1848 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1849 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1850 	} else {
1851 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1852 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1853 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1854 	}
1855 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1856 }
1857 
1858 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1859 				     unsigned long iova, size_t size,
1860 				     size_t granule,
1861 				     struct arm_smmu_domain *smmu_domain)
1862 {
1863 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1864 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1865 	size_t inv_range = granule;
1866 	struct arm_smmu_cmdq_batch cmds;
1867 
1868 	if (!size)
1869 		return;
1870 
1871 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1872 		/* Get the leaf page size */
1873 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1874 
1875 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1876 		cmd->tlbi.tg = (tg - 10) / 2;
1877 
1878 		/* Determine what level the granule is at */
1879 		cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1880 
1881 		num_pages = size >> tg;
1882 	}
1883 
1884 	cmds.num = 0;
1885 
1886 	while (iova < end) {
1887 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1888 			/*
1889 			 * On each iteration of the loop, the range is 5 bits
1890 			 * worth of the aligned size remaining.
1891 			 * The range in pages is:
1892 			 *
1893 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1894 			 */
1895 			unsigned long scale, num;
1896 
1897 			/* Determine the power of 2 multiple number of pages */
1898 			scale = __ffs(num_pages);
1899 			cmd->tlbi.scale = scale;
1900 
1901 			/* Determine how many chunks of 2^scale size we have */
1902 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1903 			cmd->tlbi.num = num - 1;
1904 
1905 			/* range is num * 2^scale * pgsize */
1906 			inv_range = num << (scale + tg);
1907 
1908 			/* Clear out the lower order bits for the next iteration */
1909 			num_pages -= num << scale;
1910 		}
1911 
1912 		cmd->tlbi.addr = iova;
1913 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1914 		iova += inv_range;
1915 	}
1916 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1917 }
1918 
1919 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1920 					  size_t granule, bool leaf,
1921 					  struct arm_smmu_domain *smmu_domain)
1922 {
1923 	struct arm_smmu_cmdq_ent cmd = {
1924 		.tlbi = {
1925 			.leaf	= leaf,
1926 		},
1927 	};
1928 
1929 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1930 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1931 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1932 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1933 	} else {
1934 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1935 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1936 	}
1937 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1938 
1939 	/*
1940 	 * Unfortunately, this can't be leaf-only since we may have
1941 	 * zapped an entire table.
1942 	 */
1943 	arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1944 }
1945 
1946 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1947 				 size_t granule, bool leaf,
1948 				 struct arm_smmu_domain *smmu_domain)
1949 {
1950 	struct arm_smmu_cmdq_ent cmd = {
1951 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1952 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1953 		.tlbi = {
1954 			.asid	= asid,
1955 			.leaf	= leaf,
1956 		},
1957 	};
1958 
1959 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1960 }
1961 
1962 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1963 					 unsigned long iova, size_t granule,
1964 					 void *cookie)
1965 {
1966 	struct arm_smmu_domain *smmu_domain = cookie;
1967 	struct iommu_domain *domain = &smmu_domain->domain;
1968 
1969 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1970 }
1971 
1972 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1973 				  size_t granule, void *cookie)
1974 {
1975 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1976 }
1977 
1978 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1979 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1980 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1981 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1982 };
1983 
1984 /* IOMMU API */
1985 static bool arm_smmu_capable(enum iommu_cap cap)
1986 {
1987 	switch (cap) {
1988 	case IOMMU_CAP_CACHE_COHERENCY:
1989 		return true;
1990 	case IOMMU_CAP_NOEXEC:
1991 		return true;
1992 	default:
1993 		return false;
1994 	}
1995 }
1996 
1997 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1998 {
1999 	struct arm_smmu_domain *smmu_domain;
2000 
2001 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2002 	    type != IOMMU_DOMAIN_DMA &&
2003 	    type != IOMMU_DOMAIN_DMA_FQ &&
2004 	    type != IOMMU_DOMAIN_IDENTITY)
2005 		return NULL;
2006 
2007 	/*
2008 	 * Allocate the domain and initialise some of its data structures.
2009 	 * We can't really do anything meaningful until we've added a
2010 	 * master.
2011 	 */
2012 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2013 	if (!smmu_domain)
2014 		return NULL;
2015 
2016 	mutex_init(&smmu_domain->init_mutex);
2017 	INIT_LIST_HEAD(&smmu_domain->devices);
2018 	spin_lock_init(&smmu_domain->devices_lock);
2019 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2020 
2021 	return &smmu_domain->domain;
2022 }
2023 
2024 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2025 {
2026 	int idx, size = 1 << span;
2027 
2028 	do {
2029 		idx = find_first_zero_bit(map, size);
2030 		if (idx == size)
2031 			return -ENOSPC;
2032 	} while (test_and_set_bit(idx, map));
2033 
2034 	return idx;
2035 }
2036 
2037 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2038 {
2039 	clear_bit(idx, map);
2040 }
2041 
2042 static void arm_smmu_domain_free(struct iommu_domain *domain)
2043 {
2044 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2045 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2046 
2047 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2048 
2049 	/* Free the CD and ASID, if we allocated them */
2050 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2051 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2052 
2053 		/* Prevent SVA from touching the CD while we're freeing it */
2054 		mutex_lock(&arm_smmu_asid_lock);
2055 		if (cfg->cdcfg.cdtab)
2056 			arm_smmu_free_cd_tables(smmu_domain);
2057 		arm_smmu_free_asid(&cfg->cd);
2058 		mutex_unlock(&arm_smmu_asid_lock);
2059 	} else {
2060 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2061 		if (cfg->vmid)
2062 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2063 	}
2064 
2065 	kfree(smmu_domain);
2066 }
2067 
2068 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2069 				       struct arm_smmu_master *master,
2070 				       struct io_pgtable_cfg *pgtbl_cfg)
2071 {
2072 	int ret;
2073 	u32 asid;
2074 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2075 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2076 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2077 
2078 	refcount_set(&cfg->cd.refs, 1);
2079 
2080 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2081 	mutex_lock(&arm_smmu_asid_lock);
2082 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2083 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2084 	if (ret)
2085 		goto out_unlock;
2086 
2087 	cfg->s1cdmax = master->ssid_bits;
2088 
2089 	smmu_domain->stall_enabled = master->stall_enabled;
2090 
2091 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2092 	if (ret)
2093 		goto out_free_asid;
2094 
2095 	cfg->cd.asid	= (u16)asid;
2096 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2097 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2098 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2099 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2100 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2101 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2102 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2103 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2104 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2105 
2106 	/*
2107 	 * Note that this will end up calling arm_smmu_sync_cd() before
2108 	 * the master has been added to the devices list for this domain.
2109 	 * This isn't an issue because the STE hasn't been installed yet.
2110 	 */
2111 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2112 	if (ret)
2113 		goto out_free_cd_tables;
2114 
2115 	mutex_unlock(&arm_smmu_asid_lock);
2116 	return 0;
2117 
2118 out_free_cd_tables:
2119 	arm_smmu_free_cd_tables(smmu_domain);
2120 out_free_asid:
2121 	arm_smmu_free_asid(&cfg->cd);
2122 out_unlock:
2123 	mutex_unlock(&arm_smmu_asid_lock);
2124 	return ret;
2125 }
2126 
2127 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2128 				       struct arm_smmu_master *master,
2129 				       struct io_pgtable_cfg *pgtbl_cfg)
2130 {
2131 	int vmid;
2132 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2133 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2134 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2135 
2136 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2137 	if (vmid < 0)
2138 		return vmid;
2139 
2140 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2141 	cfg->vmid	= (u16)vmid;
2142 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2143 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2144 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2145 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2146 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2147 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2148 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2149 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2150 	return 0;
2151 }
2152 
2153 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2154 				    struct arm_smmu_master *master)
2155 {
2156 	int ret;
2157 	unsigned long ias, oas;
2158 	enum io_pgtable_fmt fmt;
2159 	struct io_pgtable_cfg pgtbl_cfg;
2160 	struct io_pgtable_ops *pgtbl_ops;
2161 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2162 				 struct arm_smmu_master *,
2163 				 struct io_pgtable_cfg *);
2164 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2165 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2166 
2167 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2168 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2169 		return 0;
2170 	}
2171 
2172 	/* Restrict the stage to what we can actually support */
2173 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2174 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2175 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2176 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2177 
2178 	switch (smmu_domain->stage) {
2179 	case ARM_SMMU_DOMAIN_S1:
2180 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2181 		ias = min_t(unsigned long, ias, VA_BITS);
2182 		oas = smmu->ias;
2183 		fmt = ARM_64_LPAE_S1;
2184 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2185 		break;
2186 	case ARM_SMMU_DOMAIN_NESTED:
2187 	case ARM_SMMU_DOMAIN_S2:
2188 		ias = smmu->ias;
2189 		oas = smmu->oas;
2190 		fmt = ARM_64_LPAE_S2;
2191 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2192 		break;
2193 	default:
2194 		return -EINVAL;
2195 	}
2196 
2197 	pgtbl_cfg = (struct io_pgtable_cfg) {
2198 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2199 		.ias		= ias,
2200 		.oas		= oas,
2201 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2202 		.tlb		= &arm_smmu_flush_ops,
2203 		.iommu_dev	= smmu->dev,
2204 	};
2205 
2206 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2207 	if (!pgtbl_ops)
2208 		return -ENOMEM;
2209 
2210 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2211 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2212 	domain->geometry.force_aperture = true;
2213 
2214 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2215 	if (ret < 0) {
2216 		free_io_pgtable_ops(pgtbl_ops);
2217 		return ret;
2218 	}
2219 
2220 	smmu_domain->pgtbl_ops = pgtbl_ops;
2221 	return 0;
2222 }
2223 
2224 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2225 {
2226 	__le64 *step;
2227 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2228 
2229 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2230 		struct arm_smmu_strtab_l1_desc *l1_desc;
2231 		int idx;
2232 
2233 		/* Two-level walk */
2234 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2235 		l1_desc = &cfg->l1_desc[idx];
2236 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2237 		step = &l1_desc->l2ptr[idx];
2238 	} else {
2239 		/* Simple linear lookup */
2240 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2241 	}
2242 
2243 	return step;
2244 }
2245 
2246 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2247 {
2248 	int i, j;
2249 	struct arm_smmu_device *smmu = master->smmu;
2250 
2251 	for (i = 0; i < master->num_streams; ++i) {
2252 		u32 sid = master->streams[i].id;
2253 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2254 
2255 		/* Bridged PCI devices may end up with duplicated IDs */
2256 		for (j = 0; j < i; j++)
2257 			if (master->streams[j].id == sid)
2258 				break;
2259 		if (j < i)
2260 			continue;
2261 
2262 		arm_smmu_write_strtab_ent(master, sid, step);
2263 	}
2264 }
2265 
2266 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2267 {
2268 	struct device *dev = master->dev;
2269 	struct arm_smmu_device *smmu = master->smmu;
2270 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2271 
2272 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2273 		return false;
2274 
2275 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2276 		return false;
2277 
2278 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2279 }
2280 
2281 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2282 {
2283 	size_t stu;
2284 	struct pci_dev *pdev;
2285 	struct arm_smmu_device *smmu = master->smmu;
2286 	struct arm_smmu_domain *smmu_domain = master->domain;
2287 
2288 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2289 	if (!master->ats_enabled)
2290 		return;
2291 
2292 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2293 	stu = __ffs(smmu->pgsize_bitmap);
2294 	pdev = to_pci_dev(master->dev);
2295 
2296 	atomic_inc(&smmu_domain->nr_ats_masters);
2297 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2298 	if (pci_enable_ats(pdev, stu))
2299 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2300 }
2301 
2302 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2303 {
2304 	struct arm_smmu_domain *smmu_domain = master->domain;
2305 
2306 	if (!master->ats_enabled)
2307 		return;
2308 
2309 	pci_disable_ats(to_pci_dev(master->dev));
2310 	/*
2311 	 * Ensure ATS is disabled at the endpoint before we issue the
2312 	 * ATC invalidation via the SMMU.
2313 	 */
2314 	wmb();
2315 	arm_smmu_atc_inv_master(master);
2316 	atomic_dec(&smmu_domain->nr_ats_masters);
2317 }
2318 
2319 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2320 {
2321 	int ret;
2322 	int features;
2323 	int num_pasids;
2324 	struct pci_dev *pdev;
2325 
2326 	if (!dev_is_pci(master->dev))
2327 		return -ENODEV;
2328 
2329 	pdev = to_pci_dev(master->dev);
2330 
2331 	features = pci_pasid_features(pdev);
2332 	if (features < 0)
2333 		return features;
2334 
2335 	num_pasids = pci_max_pasids(pdev);
2336 	if (num_pasids <= 0)
2337 		return num_pasids;
2338 
2339 	ret = pci_enable_pasid(pdev, features);
2340 	if (ret) {
2341 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2342 		return ret;
2343 	}
2344 
2345 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2346 				  master->smmu->ssid_bits);
2347 	return 0;
2348 }
2349 
2350 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2351 {
2352 	struct pci_dev *pdev;
2353 
2354 	if (!dev_is_pci(master->dev))
2355 		return;
2356 
2357 	pdev = to_pci_dev(master->dev);
2358 
2359 	if (!pdev->pasid_enabled)
2360 		return;
2361 
2362 	master->ssid_bits = 0;
2363 	pci_disable_pasid(pdev);
2364 }
2365 
2366 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2367 {
2368 	unsigned long flags;
2369 	struct arm_smmu_domain *smmu_domain = master->domain;
2370 
2371 	if (!smmu_domain)
2372 		return;
2373 
2374 	arm_smmu_disable_ats(master);
2375 
2376 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2377 	list_del(&master->domain_head);
2378 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2379 
2380 	master->domain = NULL;
2381 	master->ats_enabled = false;
2382 	arm_smmu_install_ste_for_dev(master);
2383 }
2384 
2385 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2386 {
2387 	int ret = 0;
2388 	unsigned long flags;
2389 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2390 	struct arm_smmu_device *smmu;
2391 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2392 	struct arm_smmu_master *master;
2393 
2394 	if (!fwspec)
2395 		return -ENOENT;
2396 
2397 	master = dev_iommu_priv_get(dev);
2398 	smmu = master->smmu;
2399 
2400 	/*
2401 	 * Checking that SVA is disabled ensures that this device isn't bound to
2402 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2403 	 * be removed concurrently since we're holding the group mutex.
2404 	 */
2405 	if (arm_smmu_master_sva_enabled(master)) {
2406 		dev_err(dev, "cannot attach - SVA enabled\n");
2407 		return -EBUSY;
2408 	}
2409 
2410 	arm_smmu_detach_dev(master);
2411 
2412 	mutex_lock(&smmu_domain->init_mutex);
2413 
2414 	if (!smmu_domain->smmu) {
2415 		smmu_domain->smmu = smmu;
2416 		ret = arm_smmu_domain_finalise(domain, master);
2417 		if (ret) {
2418 			smmu_domain->smmu = NULL;
2419 			goto out_unlock;
2420 		}
2421 	} else if (smmu_domain->smmu != smmu) {
2422 		dev_err(dev,
2423 			"cannot attach to SMMU %s (upstream of %s)\n",
2424 			dev_name(smmu_domain->smmu->dev),
2425 			dev_name(smmu->dev));
2426 		ret = -ENXIO;
2427 		goto out_unlock;
2428 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2429 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2430 		dev_err(dev,
2431 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2432 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2433 		ret = -EINVAL;
2434 		goto out_unlock;
2435 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2436 		   smmu_domain->stall_enabled != master->stall_enabled) {
2437 		dev_err(dev, "cannot attach to stall-%s domain\n",
2438 			smmu_domain->stall_enabled ? "enabled" : "disabled");
2439 		ret = -EINVAL;
2440 		goto out_unlock;
2441 	}
2442 
2443 	master->domain = smmu_domain;
2444 
2445 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2446 		master->ats_enabled = arm_smmu_ats_supported(master);
2447 
2448 	arm_smmu_install_ste_for_dev(master);
2449 
2450 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2451 	list_add(&master->domain_head, &smmu_domain->devices);
2452 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2453 
2454 	arm_smmu_enable_ats(master);
2455 
2456 out_unlock:
2457 	mutex_unlock(&smmu_domain->init_mutex);
2458 	return ret;
2459 }
2460 
2461 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2462 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2463 			      int prot, gfp_t gfp, size_t *mapped)
2464 {
2465 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2466 
2467 	if (!ops)
2468 		return -ENODEV;
2469 
2470 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2471 }
2472 
2473 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2474 				   size_t pgsize, size_t pgcount,
2475 				   struct iommu_iotlb_gather *gather)
2476 {
2477 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2478 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2479 
2480 	if (!ops)
2481 		return 0;
2482 
2483 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2484 }
2485 
2486 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2487 {
2488 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2489 
2490 	if (smmu_domain->smmu)
2491 		arm_smmu_tlb_inv_context(smmu_domain);
2492 }
2493 
2494 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2495 				struct iommu_iotlb_gather *gather)
2496 {
2497 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2498 
2499 	if (!gather->pgsize)
2500 		return;
2501 
2502 	arm_smmu_tlb_inv_range_domain(gather->start,
2503 				      gather->end - gather->start + 1,
2504 				      gather->pgsize, true, smmu_domain);
2505 }
2506 
2507 static phys_addr_t
2508 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2509 {
2510 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2511 
2512 	if (!ops)
2513 		return 0;
2514 
2515 	return ops->iova_to_phys(ops, iova);
2516 }
2517 
2518 static struct platform_driver arm_smmu_driver;
2519 
2520 static
2521 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2522 {
2523 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2524 							  fwnode);
2525 	put_device(dev);
2526 	return dev ? dev_get_drvdata(dev) : NULL;
2527 }
2528 
2529 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2530 {
2531 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2532 
2533 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2534 		limit *= 1UL << STRTAB_SPLIT;
2535 
2536 	return sid < limit;
2537 }
2538 
2539 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2540 				  struct arm_smmu_master *master)
2541 {
2542 	int i;
2543 	int ret = 0;
2544 	struct arm_smmu_stream *new_stream, *cur_stream;
2545 	struct rb_node **new_node, *parent_node = NULL;
2546 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2547 
2548 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2549 				  GFP_KERNEL);
2550 	if (!master->streams)
2551 		return -ENOMEM;
2552 	master->num_streams = fwspec->num_ids;
2553 
2554 	mutex_lock(&smmu->streams_mutex);
2555 	for (i = 0; i < fwspec->num_ids; i++) {
2556 		u32 sid = fwspec->ids[i];
2557 
2558 		new_stream = &master->streams[i];
2559 		new_stream->id = sid;
2560 		new_stream->master = master;
2561 
2562 		/*
2563 		 * Check the SIDs are in range of the SMMU and our stream table
2564 		 */
2565 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2566 			ret = -ERANGE;
2567 			break;
2568 		}
2569 
2570 		/* Ensure l2 strtab is initialised */
2571 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2572 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2573 			if (ret)
2574 				break;
2575 		}
2576 
2577 		/* Insert into SID tree */
2578 		new_node = &(smmu->streams.rb_node);
2579 		while (*new_node) {
2580 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2581 					      node);
2582 			parent_node = *new_node;
2583 			if (cur_stream->id > new_stream->id) {
2584 				new_node = &((*new_node)->rb_left);
2585 			} else if (cur_stream->id < new_stream->id) {
2586 				new_node = &((*new_node)->rb_right);
2587 			} else {
2588 				dev_warn(master->dev,
2589 					 "stream %u already in tree\n",
2590 					 cur_stream->id);
2591 				ret = -EINVAL;
2592 				break;
2593 			}
2594 		}
2595 		if (ret)
2596 			break;
2597 
2598 		rb_link_node(&new_stream->node, parent_node, new_node);
2599 		rb_insert_color(&new_stream->node, &smmu->streams);
2600 	}
2601 
2602 	if (ret) {
2603 		for (i--; i >= 0; i--)
2604 			rb_erase(&master->streams[i].node, &smmu->streams);
2605 		kfree(master->streams);
2606 	}
2607 	mutex_unlock(&smmu->streams_mutex);
2608 
2609 	return ret;
2610 }
2611 
2612 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2613 {
2614 	int i;
2615 	struct arm_smmu_device *smmu = master->smmu;
2616 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2617 
2618 	if (!smmu || !master->streams)
2619 		return;
2620 
2621 	mutex_lock(&smmu->streams_mutex);
2622 	for (i = 0; i < fwspec->num_ids; i++)
2623 		rb_erase(&master->streams[i].node, &smmu->streams);
2624 	mutex_unlock(&smmu->streams_mutex);
2625 
2626 	kfree(master->streams);
2627 }
2628 
2629 static struct iommu_ops arm_smmu_ops;
2630 
2631 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2632 {
2633 	int ret;
2634 	struct arm_smmu_device *smmu;
2635 	struct arm_smmu_master *master;
2636 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2637 
2638 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2639 		return ERR_PTR(-ENODEV);
2640 
2641 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2642 		return ERR_PTR(-EBUSY);
2643 
2644 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2645 	if (!smmu)
2646 		return ERR_PTR(-ENODEV);
2647 
2648 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2649 	if (!master)
2650 		return ERR_PTR(-ENOMEM);
2651 
2652 	master->dev = dev;
2653 	master->smmu = smmu;
2654 	INIT_LIST_HEAD(&master->bonds);
2655 	dev_iommu_priv_set(dev, master);
2656 
2657 	ret = arm_smmu_insert_master(smmu, master);
2658 	if (ret)
2659 		goto err_free_master;
2660 
2661 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2662 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2663 
2664 	/*
2665 	 * Note that PASID must be enabled before, and disabled after ATS:
2666 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2667 	 *
2668 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2669 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2670 	 *   are changed.
2671 	 */
2672 	arm_smmu_enable_pasid(master);
2673 
2674 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2675 		master->ssid_bits = min_t(u8, master->ssid_bits,
2676 					  CTXDESC_LINEAR_CDMAX);
2677 
2678 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2679 	     device_property_read_bool(dev, "dma-can-stall")) ||
2680 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2681 		master->stall_enabled = true;
2682 
2683 	return &smmu->iommu;
2684 
2685 err_free_master:
2686 	kfree(master);
2687 	dev_iommu_priv_set(dev, NULL);
2688 	return ERR_PTR(ret);
2689 }
2690 
2691 static void arm_smmu_release_device(struct device *dev)
2692 {
2693 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2694 	struct arm_smmu_master *master;
2695 
2696 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2697 		return;
2698 
2699 	master = dev_iommu_priv_get(dev);
2700 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2701 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2702 	arm_smmu_detach_dev(master);
2703 	arm_smmu_disable_pasid(master);
2704 	arm_smmu_remove_master(master);
2705 	kfree(master);
2706 	iommu_fwspec_free(dev);
2707 }
2708 
2709 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2710 {
2711 	struct iommu_group *group;
2712 
2713 	/*
2714 	 * We don't support devices sharing stream IDs other than PCI RID
2715 	 * aliases, since the necessary ID-to-device lookup becomes rather
2716 	 * impractical given a potential sparse 32-bit stream ID space.
2717 	 */
2718 	if (dev_is_pci(dev))
2719 		group = pci_device_group(dev);
2720 	else
2721 		group = generic_device_group(dev);
2722 
2723 	return group;
2724 }
2725 
2726 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2727 {
2728 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2729 	int ret = 0;
2730 
2731 	mutex_lock(&smmu_domain->init_mutex);
2732 	if (smmu_domain->smmu)
2733 		ret = -EPERM;
2734 	else
2735 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2736 	mutex_unlock(&smmu_domain->init_mutex);
2737 
2738 	return ret;
2739 }
2740 
2741 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2742 {
2743 	return iommu_fwspec_add_ids(dev, args->args, 1);
2744 }
2745 
2746 static void arm_smmu_get_resv_regions(struct device *dev,
2747 				      struct list_head *head)
2748 {
2749 	struct iommu_resv_region *region;
2750 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2751 
2752 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2753 					 prot, IOMMU_RESV_SW_MSI);
2754 	if (!region)
2755 		return;
2756 
2757 	list_add_tail(&region->list, head);
2758 
2759 	iommu_dma_get_resv_regions(dev, head);
2760 }
2761 
2762 static bool arm_smmu_dev_has_feature(struct device *dev,
2763 				     enum iommu_dev_features feat)
2764 {
2765 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2766 
2767 	if (!master)
2768 		return false;
2769 
2770 	switch (feat) {
2771 	case IOMMU_DEV_FEAT_IOPF:
2772 		return arm_smmu_master_iopf_supported(master);
2773 	case IOMMU_DEV_FEAT_SVA:
2774 		return arm_smmu_master_sva_supported(master);
2775 	default:
2776 		return false;
2777 	}
2778 }
2779 
2780 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2781 					 enum iommu_dev_features feat)
2782 {
2783 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2784 
2785 	if (!master)
2786 		return false;
2787 
2788 	switch (feat) {
2789 	case IOMMU_DEV_FEAT_IOPF:
2790 		return master->iopf_enabled;
2791 	case IOMMU_DEV_FEAT_SVA:
2792 		return arm_smmu_master_sva_enabled(master);
2793 	default:
2794 		return false;
2795 	}
2796 }
2797 
2798 static int arm_smmu_dev_enable_feature(struct device *dev,
2799 				       enum iommu_dev_features feat)
2800 {
2801 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2802 
2803 	if (!arm_smmu_dev_has_feature(dev, feat))
2804 		return -ENODEV;
2805 
2806 	if (arm_smmu_dev_feature_enabled(dev, feat))
2807 		return -EBUSY;
2808 
2809 	switch (feat) {
2810 	case IOMMU_DEV_FEAT_IOPF:
2811 		master->iopf_enabled = true;
2812 		return 0;
2813 	case IOMMU_DEV_FEAT_SVA:
2814 		return arm_smmu_master_enable_sva(master);
2815 	default:
2816 		return -EINVAL;
2817 	}
2818 }
2819 
2820 static int arm_smmu_dev_disable_feature(struct device *dev,
2821 					enum iommu_dev_features feat)
2822 {
2823 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2824 
2825 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2826 		return -EINVAL;
2827 
2828 	switch (feat) {
2829 	case IOMMU_DEV_FEAT_IOPF:
2830 		if (master->sva_enabled)
2831 			return -EBUSY;
2832 		master->iopf_enabled = false;
2833 		return 0;
2834 	case IOMMU_DEV_FEAT_SVA:
2835 		return arm_smmu_master_disable_sva(master);
2836 	default:
2837 		return -EINVAL;
2838 	}
2839 }
2840 
2841 static struct iommu_ops arm_smmu_ops = {
2842 	.capable		= arm_smmu_capable,
2843 	.domain_alloc		= arm_smmu_domain_alloc,
2844 	.domain_free		= arm_smmu_domain_free,
2845 	.attach_dev		= arm_smmu_attach_dev,
2846 	.map_pages		= arm_smmu_map_pages,
2847 	.unmap_pages		= arm_smmu_unmap_pages,
2848 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2849 	.iotlb_sync		= arm_smmu_iotlb_sync,
2850 	.iova_to_phys		= arm_smmu_iova_to_phys,
2851 	.probe_device		= arm_smmu_probe_device,
2852 	.release_device		= arm_smmu_release_device,
2853 	.device_group		= arm_smmu_device_group,
2854 	.enable_nesting		= arm_smmu_enable_nesting,
2855 	.of_xlate		= arm_smmu_of_xlate,
2856 	.get_resv_regions	= arm_smmu_get_resv_regions,
2857 	.put_resv_regions	= generic_iommu_put_resv_regions,
2858 	.dev_has_feat		= arm_smmu_dev_has_feature,
2859 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2860 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2861 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2862 	.sva_bind		= arm_smmu_sva_bind,
2863 	.sva_unbind		= arm_smmu_sva_unbind,
2864 	.sva_get_pasid		= arm_smmu_sva_get_pasid,
2865 	.page_response		= arm_smmu_page_response,
2866 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2867 	.owner			= THIS_MODULE,
2868 };
2869 
2870 /* Probing and initialisation functions */
2871 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2872 				   struct arm_smmu_queue *q,
2873 				   void __iomem *page,
2874 				   unsigned long prod_off,
2875 				   unsigned long cons_off,
2876 				   size_t dwords, const char *name)
2877 {
2878 	size_t qsz;
2879 
2880 	do {
2881 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2882 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2883 					      GFP_KERNEL);
2884 		if (q->base || qsz < PAGE_SIZE)
2885 			break;
2886 
2887 		q->llq.max_n_shift--;
2888 	} while (1);
2889 
2890 	if (!q->base) {
2891 		dev_err(smmu->dev,
2892 			"failed to allocate queue (0x%zx bytes) for %s\n",
2893 			qsz, name);
2894 		return -ENOMEM;
2895 	}
2896 
2897 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2898 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2899 			 1 << q->llq.max_n_shift, name);
2900 	}
2901 
2902 	q->prod_reg	= page + prod_off;
2903 	q->cons_reg	= page + cons_off;
2904 	q->ent_dwords	= dwords;
2905 
2906 	q->q_base  = Q_BASE_RWA;
2907 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2908 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2909 
2910 	q->llq.prod = q->llq.cons = 0;
2911 	return 0;
2912 }
2913 
2914 static void arm_smmu_cmdq_free_bitmap(void *data)
2915 {
2916 	unsigned long *bitmap = data;
2917 	bitmap_free(bitmap);
2918 }
2919 
2920 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2921 {
2922 	int ret = 0;
2923 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2924 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2925 	atomic_long_t *bitmap;
2926 
2927 	atomic_set(&cmdq->owner_prod, 0);
2928 	atomic_set(&cmdq->lock, 0);
2929 
2930 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2931 	if (!bitmap) {
2932 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2933 		ret = -ENOMEM;
2934 	} else {
2935 		cmdq->valid_map = bitmap;
2936 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2937 	}
2938 
2939 	return ret;
2940 }
2941 
2942 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2943 {
2944 	int ret;
2945 
2946 	/* cmdq */
2947 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2948 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2949 				      CMDQ_ENT_DWORDS, "cmdq");
2950 	if (ret)
2951 		return ret;
2952 
2953 	ret = arm_smmu_cmdq_init(smmu);
2954 	if (ret)
2955 		return ret;
2956 
2957 	/* evtq */
2958 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2959 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2960 				      EVTQ_ENT_DWORDS, "evtq");
2961 	if (ret)
2962 		return ret;
2963 
2964 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2965 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2966 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2967 		if (!smmu->evtq.iopf)
2968 			return -ENOMEM;
2969 	}
2970 
2971 	/* priq */
2972 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2973 		return 0;
2974 
2975 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2976 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2977 				       PRIQ_ENT_DWORDS, "priq");
2978 }
2979 
2980 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2981 {
2982 	unsigned int i;
2983 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2984 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2985 	void *strtab = smmu->strtab_cfg.strtab;
2986 
2987 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2988 	if (!cfg->l1_desc)
2989 		return -ENOMEM;
2990 
2991 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2992 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2993 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2994 	}
2995 
2996 	return 0;
2997 }
2998 
2999 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3000 {
3001 	void *strtab;
3002 	u64 reg;
3003 	u32 size, l1size;
3004 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3005 
3006 	/* Calculate the L1 size, capped to the SIDSIZE. */
3007 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3008 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3009 	cfg->num_l1_ents = 1 << size;
3010 
3011 	size += STRTAB_SPLIT;
3012 	if (size < smmu->sid_bits)
3013 		dev_warn(smmu->dev,
3014 			 "2-level strtab only covers %u/%u bits of SID\n",
3015 			 size, smmu->sid_bits);
3016 
3017 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3018 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3019 				     GFP_KERNEL);
3020 	if (!strtab) {
3021 		dev_err(smmu->dev,
3022 			"failed to allocate l1 stream table (%u bytes)\n",
3023 			l1size);
3024 		return -ENOMEM;
3025 	}
3026 	cfg->strtab = strtab;
3027 
3028 	/* Configure strtab_base_cfg for 2 levels */
3029 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3030 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3031 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3032 	cfg->strtab_base_cfg = reg;
3033 
3034 	return arm_smmu_init_l1_strtab(smmu);
3035 }
3036 
3037 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3038 {
3039 	void *strtab;
3040 	u64 reg;
3041 	u32 size;
3042 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3043 
3044 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3045 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3046 				     GFP_KERNEL);
3047 	if (!strtab) {
3048 		dev_err(smmu->dev,
3049 			"failed to allocate linear stream table (%u bytes)\n",
3050 			size);
3051 		return -ENOMEM;
3052 	}
3053 	cfg->strtab = strtab;
3054 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3055 
3056 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3057 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3058 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3059 	cfg->strtab_base_cfg = reg;
3060 
3061 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3062 	return 0;
3063 }
3064 
3065 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3066 {
3067 	u64 reg;
3068 	int ret;
3069 
3070 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3071 		ret = arm_smmu_init_strtab_2lvl(smmu);
3072 	else
3073 		ret = arm_smmu_init_strtab_linear(smmu);
3074 
3075 	if (ret)
3076 		return ret;
3077 
3078 	/* Set the strtab base address */
3079 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3080 	reg |= STRTAB_BASE_RA;
3081 	smmu->strtab_cfg.strtab_base = reg;
3082 
3083 	/* Allocate the first VMID for stage-2 bypass STEs */
3084 	set_bit(0, smmu->vmid_map);
3085 	return 0;
3086 }
3087 
3088 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3089 {
3090 	int ret;
3091 
3092 	mutex_init(&smmu->streams_mutex);
3093 	smmu->streams = RB_ROOT;
3094 
3095 	ret = arm_smmu_init_queues(smmu);
3096 	if (ret)
3097 		return ret;
3098 
3099 	return arm_smmu_init_strtab(smmu);
3100 }
3101 
3102 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3103 				   unsigned int reg_off, unsigned int ack_off)
3104 {
3105 	u32 reg;
3106 
3107 	writel_relaxed(val, smmu->base + reg_off);
3108 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3109 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3110 }
3111 
3112 /* GBPA is "special" */
3113 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3114 {
3115 	int ret;
3116 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3117 
3118 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3119 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3120 	if (ret)
3121 		return ret;
3122 
3123 	reg &= ~clr;
3124 	reg |= set;
3125 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3126 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3127 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3128 
3129 	if (ret)
3130 		dev_err(smmu->dev, "GBPA not responding to update\n");
3131 	return ret;
3132 }
3133 
3134 static void arm_smmu_free_msis(void *data)
3135 {
3136 	struct device *dev = data;
3137 	platform_msi_domain_free_irqs(dev);
3138 }
3139 
3140 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3141 {
3142 	phys_addr_t doorbell;
3143 	struct device *dev = msi_desc_to_dev(desc);
3144 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3145 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3146 
3147 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3148 	doorbell &= MSI_CFG0_ADDR_MASK;
3149 
3150 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3151 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3152 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3153 }
3154 
3155 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3156 {
3157 	struct msi_desc *desc;
3158 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3159 	struct device *dev = smmu->dev;
3160 
3161 	/* Clear the MSI address regs */
3162 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3163 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3164 
3165 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3166 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3167 	else
3168 		nvec--;
3169 
3170 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3171 		return;
3172 
3173 	if (!dev->msi_domain) {
3174 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3175 		return;
3176 	}
3177 
3178 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3179 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3180 	if (ret) {
3181 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3182 		return;
3183 	}
3184 
3185 	for_each_msi_entry(desc, dev) {
3186 		switch (desc->platform.msi_index) {
3187 		case EVTQ_MSI_INDEX:
3188 			smmu->evtq.q.irq = desc->irq;
3189 			break;
3190 		case GERROR_MSI_INDEX:
3191 			smmu->gerr_irq = desc->irq;
3192 			break;
3193 		case PRIQ_MSI_INDEX:
3194 			smmu->priq.q.irq = desc->irq;
3195 			break;
3196 		default:	/* Unknown */
3197 			continue;
3198 		}
3199 	}
3200 
3201 	/* Add callback to free MSIs on teardown */
3202 	devm_add_action(dev, arm_smmu_free_msis, dev);
3203 }
3204 
3205 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3206 {
3207 	int irq, ret;
3208 
3209 	arm_smmu_setup_msis(smmu);
3210 
3211 	/* Request interrupt lines */
3212 	irq = smmu->evtq.q.irq;
3213 	if (irq) {
3214 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3215 						arm_smmu_evtq_thread,
3216 						IRQF_ONESHOT,
3217 						"arm-smmu-v3-evtq", smmu);
3218 		if (ret < 0)
3219 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3220 	} else {
3221 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3222 	}
3223 
3224 	irq = smmu->gerr_irq;
3225 	if (irq) {
3226 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3227 				       0, "arm-smmu-v3-gerror", smmu);
3228 		if (ret < 0)
3229 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3230 	} else {
3231 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3232 	}
3233 
3234 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3235 		irq = smmu->priq.q.irq;
3236 		if (irq) {
3237 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3238 							arm_smmu_priq_thread,
3239 							IRQF_ONESHOT,
3240 							"arm-smmu-v3-priq",
3241 							smmu);
3242 			if (ret < 0)
3243 				dev_warn(smmu->dev,
3244 					 "failed to enable priq irq\n");
3245 		} else {
3246 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3247 		}
3248 	}
3249 }
3250 
3251 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3252 {
3253 	int ret, irq;
3254 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3255 
3256 	/* Disable IRQs first */
3257 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3258 				      ARM_SMMU_IRQ_CTRLACK);
3259 	if (ret) {
3260 		dev_err(smmu->dev, "failed to disable irqs\n");
3261 		return ret;
3262 	}
3263 
3264 	irq = smmu->combined_irq;
3265 	if (irq) {
3266 		/*
3267 		 * Cavium ThunderX2 implementation doesn't support unique irq
3268 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3269 		 */
3270 		ret = devm_request_threaded_irq(smmu->dev, irq,
3271 					arm_smmu_combined_irq_handler,
3272 					arm_smmu_combined_irq_thread,
3273 					IRQF_ONESHOT,
3274 					"arm-smmu-v3-combined-irq", smmu);
3275 		if (ret < 0)
3276 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3277 	} else
3278 		arm_smmu_setup_unique_irqs(smmu);
3279 
3280 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3281 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3282 
3283 	/* Enable interrupt generation on the SMMU */
3284 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3285 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3286 	if (ret)
3287 		dev_warn(smmu->dev, "failed to enable irqs\n");
3288 
3289 	return 0;
3290 }
3291 
3292 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3293 {
3294 	int ret;
3295 
3296 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3297 	if (ret)
3298 		dev_err(smmu->dev, "failed to clear cr0\n");
3299 
3300 	return ret;
3301 }
3302 
3303 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3304 {
3305 	int ret;
3306 	u32 reg, enables;
3307 	struct arm_smmu_cmdq_ent cmd;
3308 
3309 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3310 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3311 	if (reg & CR0_SMMUEN) {
3312 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3313 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3314 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3315 	}
3316 
3317 	ret = arm_smmu_device_disable(smmu);
3318 	if (ret)
3319 		return ret;
3320 
3321 	/* CR1 (table and queue memory attributes) */
3322 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3323 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3324 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3325 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3326 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3327 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3328 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3329 
3330 	/* CR2 (random crap) */
3331 	reg = CR2_PTM | CR2_RECINVSID;
3332 
3333 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3334 		reg |= CR2_E2H;
3335 
3336 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3337 
3338 	/* Stream table */
3339 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3340 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3341 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3342 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3343 
3344 	/* Command queue */
3345 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3346 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3347 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3348 
3349 	enables = CR0_CMDQEN;
3350 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3351 				      ARM_SMMU_CR0ACK);
3352 	if (ret) {
3353 		dev_err(smmu->dev, "failed to enable command queue\n");
3354 		return ret;
3355 	}
3356 
3357 	/* Invalidate any cached configuration */
3358 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3359 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3360 
3361 	/* Invalidate any stale TLB entries */
3362 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3363 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3364 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3365 	}
3366 
3367 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3368 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3369 
3370 	/* Event queue */
3371 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3372 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3373 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3374 
3375 	enables |= CR0_EVTQEN;
3376 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3377 				      ARM_SMMU_CR0ACK);
3378 	if (ret) {
3379 		dev_err(smmu->dev, "failed to enable event queue\n");
3380 		return ret;
3381 	}
3382 
3383 	/* PRI queue */
3384 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3385 		writeq_relaxed(smmu->priq.q.q_base,
3386 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3387 		writel_relaxed(smmu->priq.q.llq.prod,
3388 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3389 		writel_relaxed(smmu->priq.q.llq.cons,
3390 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3391 
3392 		enables |= CR0_PRIQEN;
3393 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3394 					      ARM_SMMU_CR0ACK);
3395 		if (ret) {
3396 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3397 			return ret;
3398 		}
3399 	}
3400 
3401 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3402 		enables |= CR0_ATSCHK;
3403 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3404 					      ARM_SMMU_CR0ACK);
3405 		if (ret) {
3406 			dev_err(smmu->dev, "failed to enable ATS check\n");
3407 			return ret;
3408 		}
3409 	}
3410 
3411 	ret = arm_smmu_setup_irqs(smmu);
3412 	if (ret) {
3413 		dev_err(smmu->dev, "failed to setup irqs\n");
3414 		return ret;
3415 	}
3416 
3417 	if (is_kdump_kernel())
3418 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3419 
3420 	/* Enable the SMMU interface, or ensure bypass */
3421 	if (!bypass || disable_bypass) {
3422 		enables |= CR0_SMMUEN;
3423 	} else {
3424 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3425 		if (ret)
3426 			return ret;
3427 	}
3428 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3429 				      ARM_SMMU_CR0ACK);
3430 	if (ret) {
3431 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3432 		return ret;
3433 	}
3434 
3435 	return 0;
3436 }
3437 
3438 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3439 {
3440 	u32 reg;
3441 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3442 
3443 	/* IDR0 */
3444 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3445 
3446 	/* 2-level structures */
3447 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3448 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3449 
3450 	if (reg & IDR0_CD2L)
3451 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3452 
3453 	/*
3454 	 * Translation table endianness.
3455 	 * We currently require the same endianness as the CPU, but this
3456 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3457 	 */
3458 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3459 	case IDR0_TTENDIAN_MIXED:
3460 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3461 		break;
3462 #ifdef __BIG_ENDIAN
3463 	case IDR0_TTENDIAN_BE:
3464 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3465 		break;
3466 #else
3467 	case IDR0_TTENDIAN_LE:
3468 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3469 		break;
3470 #endif
3471 	default:
3472 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3473 		return -ENXIO;
3474 	}
3475 
3476 	/* Boolean feature flags */
3477 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3478 		smmu->features |= ARM_SMMU_FEAT_PRI;
3479 
3480 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3481 		smmu->features |= ARM_SMMU_FEAT_ATS;
3482 
3483 	if (reg & IDR0_SEV)
3484 		smmu->features |= ARM_SMMU_FEAT_SEV;
3485 
3486 	if (reg & IDR0_MSI) {
3487 		smmu->features |= ARM_SMMU_FEAT_MSI;
3488 		if (coherent && !disable_msipolling)
3489 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3490 	}
3491 
3492 	if (reg & IDR0_HYP) {
3493 		smmu->features |= ARM_SMMU_FEAT_HYP;
3494 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3495 			smmu->features |= ARM_SMMU_FEAT_E2H;
3496 	}
3497 
3498 	/*
3499 	 * The coherency feature as set by FW is used in preference to the ID
3500 	 * register, but warn on mismatch.
3501 	 */
3502 	if (!!(reg & IDR0_COHACC) != coherent)
3503 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3504 			 coherent ? "true" : "false");
3505 
3506 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3507 	case IDR0_STALL_MODEL_FORCE:
3508 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3509 		fallthrough;
3510 	case IDR0_STALL_MODEL_STALL:
3511 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3512 	}
3513 
3514 	if (reg & IDR0_S1P)
3515 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3516 
3517 	if (reg & IDR0_S2P)
3518 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3519 
3520 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3521 		dev_err(smmu->dev, "no translation support!\n");
3522 		return -ENXIO;
3523 	}
3524 
3525 	/* We only support the AArch64 table format at present */
3526 	switch (FIELD_GET(IDR0_TTF, reg)) {
3527 	case IDR0_TTF_AARCH32_64:
3528 		smmu->ias = 40;
3529 		fallthrough;
3530 	case IDR0_TTF_AARCH64:
3531 		break;
3532 	default:
3533 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3534 		return -ENXIO;
3535 	}
3536 
3537 	/* ASID/VMID sizes */
3538 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3539 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3540 
3541 	/* IDR1 */
3542 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3543 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3544 		dev_err(smmu->dev, "embedded implementation not supported\n");
3545 		return -ENXIO;
3546 	}
3547 
3548 	/* Queue sizes, capped to ensure natural alignment */
3549 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3550 					     FIELD_GET(IDR1_CMDQS, reg));
3551 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3552 		/*
3553 		 * We don't support splitting up batches, so one batch of
3554 		 * commands plus an extra sync needs to fit inside the command
3555 		 * queue. There's also no way we can handle the weird alignment
3556 		 * restrictions on the base pointer for a unit-length queue.
3557 		 */
3558 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3559 			CMDQ_BATCH_ENTRIES);
3560 		return -ENXIO;
3561 	}
3562 
3563 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3564 					     FIELD_GET(IDR1_EVTQS, reg));
3565 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3566 					     FIELD_GET(IDR1_PRIQS, reg));
3567 
3568 	/* SID/SSID sizes */
3569 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3570 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3571 
3572 	/*
3573 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3574 	 * table, use a linear table instead.
3575 	 */
3576 	if (smmu->sid_bits <= STRTAB_SPLIT)
3577 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3578 
3579 	/* IDR3 */
3580 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3581 	if (FIELD_GET(IDR3_RIL, reg))
3582 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3583 
3584 	/* IDR5 */
3585 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3586 
3587 	/* Maximum number of outstanding stalls */
3588 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3589 
3590 	/* Page sizes */
3591 	if (reg & IDR5_GRAN64K)
3592 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3593 	if (reg & IDR5_GRAN16K)
3594 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3595 	if (reg & IDR5_GRAN4K)
3596 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3597 
3598 	/* Input address size */
3599 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3600 		smmu->features |= ARM_SMMU_FEAT_VAX;
3601 
3602 	/* Output address size */
3603 	switch (FIELD_GET(IDR5_OAS, reg)) {
3604 	case IDR5_OAS_32_BIT:
3605 		smmu->oas = 32;
3606 		break;
3607 	case IDR5_OAS_36_BIT:
3608 		smmu->oas = 36;
3609 		break;
3610 	case IDR5_OAS_40_BIT:
3611 		smmu->oas = 40;
3612 		break;
3613 	case IDR5_OAS_42_BIT:
3614 		smmu->oas = 42;
3615 		break;
3616 	case IDR5_OAS_44_BIT:
3617 		smmu->oas = 44;
3618 		break;
3619 	case IDR5_OAS_52_BIT:
3620 		smmu->oas = 52;
3621 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3622 		break;
3623 	default:
3624 		dev_info(smmu->dev,
3625 			"unknown output address size. Truncating to 48-bit\n");
3626 		fallthrough;
3627 	case IDR5_OAS_48_BIT:
3628 		smmu->oas = 48;
3629 	}
3630 
3631 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3632 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3633 	else
3634 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3635 
3636 	/* Set the DMA mask for our table walker */
3637 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3638 		dev_warn(smmu->dev,
3639 			 "failed to set DMA mask for table walker\n");
3640 
3641 	smmu->ias = max(smmu->ias, smmu->oas);
3642 
3643 	if (arm_smmu_sva_supported(smmu))
3644 		smmu->features |= ARM_SMMU_FEAT_SVA;
3645 
3646 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3647 		 smmu->ias, smmu->oas, smmu->features);
3648 	return 0;
3649 }
3650 
3651 #ifdef CONFIG_ACPI
3652 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3653 {
3654 	switch (model) {
3655 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3656 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3657 		break;
3658 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3659 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3660 		break;
3661 	}
3662 
3663 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3664 }
3665 
3666 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3667 				      struct arm_smmu_device *smmu)
3668 {
3669 	struct acpi_iort_smmu_v3 *iort_smmu;
3670 	struct device *dev = smmu->dev;
3671 	struct acpi_iort_node *node;
3672 
3673 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3674 
3675 	/* Retrieve SMMUv3 specific data */
3676 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3677 
3678 	acpi_smmu_get_options(iort_smmu->model, smmu);
3679 
3680 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3681 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3682 
3683 	return 0;
3684 }
3685 #else
3686 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3687 					     struct arm_smmu_device *smmu)
3688 {
3689 	return -ENODEV;
3690 }
3691 #endif
3692 
3693 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3694 				    struct arm_smmu_device *smmu)
3695 {
3696 	struct device *dev = &pdev->dev;
3697 	u32 cells;
3698 	int ret = -EINVAL;
3699 
3700 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3701 		dev_err(dev, "missing #iommu-cells property\n");
3702 	else if (cells != 1)
3703 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3704 	else
3705 		ret = 0;
3706 
3707 	parse_driver_options(smmu);
3708 
3709 	if (of_dma_is_coherent(dev->of_node))
3710 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3711 
3712 	return ret;
3713 }
3714 
3715 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3716 {
3717 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3718 		return SZ_64K;
3719 	else
3720 		return SZ_128K;
3721 }
3722 
3723 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3724 {
3725 	int err;
3726 
3727 #ifdef CONFIG_PCI
3728 	if (pci_bus_type.iommu_ops != ops) {
3729 		err = bus_set_iommu(&pci_bus_type, ops);
3730 		if (err)
3731 			return err;
3732 	}
3733 #endif
3734 #ifdef CONFIG_ARM_AMBA
3735 	if (amba_bustype.iommu_ops != ops) {
3736 		err = bus_set_iommu(&amba_bustype, ops);
3737 		if (err)
3738 			goto err_reset_pci_ops;
3739 	}
3740 #endif
3741 	if (platform_bus_type.iommu_ops != ops) {
3742 		err = bus_set_iommu(&platform_bus_type, ops);
3743 		if (err)
3744 			goto err_reset_amba_ops;
3745 	}
3746 
3747 	return 0;
3748 
3749 err_reset_amba_ops:
3750 #ifdef CONFIG_ARM_AMBA
3751 	bus_set_iommu(&amba_bustype, NULL);
3752 #endif
3753 err_reset_pci_ops: __maybe_unused;
3754 #ifdef CONFIG_PCI
3755 	bus_set_iommu(&pci_bus_type, NULL);
3756 #endif
3757 	return err;
3758 }
3759 
3760 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3761 				      resource_size_t size)
3762 {
3763 	struct resource res = DEFINE_RES_MEM(start, size);
3764 
3765 	return devm_ioremap_resource(dev, &res);
3766 }
3767 
3768 static int arm_smmu_device_probe(struct platform_device *pdev)
3769 {
3770 	int irq, ret;
3771 	struct resource *res;
3772 	resource_size_t ioaddr;
3773 	struct arm_smmu_device *smmu;
3774 	struct device *dev = &pdev->dev;
3775 	bool bypass;
3776 
3777 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3778 	if (!smmu)
3779 		return -ENOMEM;
3780 	smmu->dev = dev;
3781 
3782 	if (dev->of_node) {
3783 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3784 	} else {
3785 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3786 		if (ret == -ENODEV)
3787 			return ret;
3788 	}
3789 
3790 	/* Set bypass mode according to firmware probing result */
3791 	bypass = !!ret;
3792 
3793 	/* Base address */
3794 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3795 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3796 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3797 		return -EINVAL;
3798 	}
3799 	ioaddr = res->start;
3800 
3801 	/*
3802 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3803 	 * the PMCG registers which are reserved by the PMU driver.
3804 	 */
3805 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3806 	if (IS_ERR(smmu->base))
3807 		return PTR_ERR(smmu->base);
3808 
3809 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3810 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3811 					       ARM_SMMU_REG_SZ);
3812 		if (IS_ERR(smmu->page1))
3813 			return PTR_ERR(smmu->page1);
3814 	} else {
3815 		smmu->page1 = smmu->base;
3816 	}
3817 
3818 	/* Interrupt lines */
3819 
3820 	irq = platform_get_irq_byname_optional(pdev, "combined");
3821 	if (irq > 0)
3822 		smmu->combined_irq = irq;
3823 	else {
3824 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3825 		if (irq > 0)
3826 			smmu->evtq.q.irq = irq;
3827 
3828 		irq = platform_get_irq_byname_optional(pdev, "priq");
3829 		if (irq > 0)
3830 			smmu->priq.q.irq = irq;
3831 
3832 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3833 		if (irq > 0)
3834 			smmu->gerr_irq = irq;
3835 	}
3836 	/* Probe the h/w */
3837 	ret = arm_smmu_device_hw_probe(smmu);
3838 	if (ret)
3839 		return ret;
3840 
3841 	/* Initialise in-memory data structures */
3842 	ret = arm_smmu_init_structures(smmu);
3843 	if (ret)
3844 		return ret;
3845 
3846 	/* Record our private device structure */
3847 	platform_set_drvdata(pdev, smmu);
3848 
3849 	/* Reset the device */
3850 	ret = arm_smmu_device_reset(smmu, bypass);
3851 	if (ret)
3852 		return ret;
3853 
3854 	/* And we're up. Go go go! */
3855 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3856 				     "smmu3.%pa", &ioaddr);
3857 	if (ret)
3858 		return ret;
3859 
3860 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3861 	if (ret) {
3862 		dev_err(dev, "Failed to register iommu\n");
3863 		goto err_sysfs_remove;
3864 	}
3865 
3866 	ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3867 	if (ret)
3868 		goto err_unregister_device;
3869 
3870 	return 0;
3871 
3872 err_unregister_device:
3873 	iommu_device_unregister(&smmu->iommu);
3874 err_sysfs_remove:
3875 	iommu_device_sysfs_remove(&smmu->iommu);
3876 	return ret;
3877 }
3878 
3879 static int arm_smmu_device_remove(struct platform_device *pdev)
3880 {
3881 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3882 
3883 	arm_smmu_set_bus_ops(NULL);
3884 	iommu_device_unregister(&smmu->iommu);
3885 	iommu_device_sysfs_remove(&smmu->iommu);
3886 	arm_smmu_device_disable(smmu);
3887 	iopf_queue_free(smmu->evtq.iopf);
3888 
3889 	return 0;
3890 }
3891 
3892 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3893 {
3894 	arm_smmu_device_remove(pdev);
3895 }
3896 
3897 static const struct of_device_id arm_smmu_of_match[] = {
3898 	{ .compatible = "arm,smmu-v3", },
3899 	{ },
3900 };
3901 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3902 
3903 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3904 {
3905 	arm_smmu_sva_notifier_synchronize();
3906 	platform_driver_unregister(drv);
3907 }
3908 
3909 static struct platform_driver arm_smmu_driver = {
3910 	.driver	= {
3911 		.name			= "arm-smmu-v3",
3912 		.of_match_table		= arm_smmu_of_match,
3913 		.suppress_bind_attrs	= true,
3914 	},
3915 	.probe	= arm_smmu_device_probe,
3916 	.remove	= arm_smmu_device_remove,
3917 	.shutdown = arm_smmu_device_shutdown,
3918 };
3919 module_driver(arm_smmu_driver, platform_driver_register,
3920 	      arm_smmu_driver_unregister);
3921 
3922 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3923 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3924 MODULE_ALIAS("platform:arm-smmu-v3");
3925 MODULE_LICENSE("GPL v2");
3926