1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
31 
32 #include <linux/amba/bus.h>
33 
34 #include "arm-smmu-v3.h"
35 
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84 
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 	{ 0, NULL},
89 };
90 
91 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
92 						 struct arm_smmu_device *smmu)
93 {
94 	if (offset > SZ_64K)
95 		return smmu->page1 + offset - SZ_64K;
96 
97 	return smmu->base + offset;
98 }
99 
100 static void parse_driver_options(struct arm_smmu_device *smmu)
101 {
102 	int i = 0;
103 
104 	do {
105 		if (of_property_read_bool(smmu->dev->of_node,
106 						arm_smmu_options[i].prop)) {
107 			smmu->options |= arm_smmu_options[i].opt;
108 			dev_notice(smmu->dev, "option %s\n",
109 				arm_smmu_options[i].prop);
110 		}
111 	} while (arm_smmu_options[++i].opt);
112 }
113 
114 /* Low-level queue manipulation functions */
115 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
116 {
117 	u32 space, prod, cons;
118 
119 	prod = Q_IDX(q, q->prod);
120 	cons = Q_IDX(q, q->cons);
121 
122 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
123 		space = (1 << q->max_n_shift) - (prod - cons);
124 	else
125 		space = cons - prod;
126 
127 	return space >= n;
128 }
129 
130 static bool queue_full(struct arm_smmu_ll_queue *q)
131 {
132 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
133 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
134 }
135 
136 static bool queue_empty(struct arm_smmu_ll_queue *q)
137 {
138 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
139 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
140 }
141 
142 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
143 {
144 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
145 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
146 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
147 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
148 }
149 
150 static void queue_sync_cons_out(struct arm_smmu_queue *q)
151 {
152 	/*
153 	 * Ensure that all CPU accesses (reads and writes) to the queue
154 	 * are complete before we update the cons pointer.
155 	 */
156 	__iomb();
157 	writel_relaxed(q->llq.cons, q->cons_reg);
158 }
159 
160 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
161 {
162 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
163 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
164 }
165 
166 static int queue_sync_prod_in(struct arm_smmu_queue *q)
167 {
168 	u32 prod;
169 	int ret = 0;
170 
171 	/*
172 	 * We can't use the _relaxed() variant here, as we must prevent
173 	 * speculative reads of the queue before we have determined that
174 	 * prod has indeed moved.
175 	 */
176 	prod = readl(q->prod_reg);
177 
178 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
179 		ret = -EOVERFLOW;
180 
181 	q->llq.prod = prod;
182 	return ret;
183 }
184 
185 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
186 {
187 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
188 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
189 }
190 
191 static void queue_poll_init(struct arm_smmu_device *smmu,
192 			    struct arm_smmu_queue_poll *qp)
193 {
194 	qp->delay = 1;
195 	qp->spin_cnt = 0;
196 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
197 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
198 }
199 
200 static int queue_poll(struct arm_smmu_queue_poll *qp)
201 {
202 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
203 		return -ETIMEDOUT;
204 
205 	if (qp->wfe) {
206 		wfe();
207 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
208 		cpu_relax();
209 	} else {
210 		udelay(qp->delay);
211 		qp->delay *= 2;
212 		qp->spin_cnt = 0;
213 	}
214 
215 	return 0;
216 }
217 
218 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
219 {
220 	int i;
221 
222 	for (i = 0; i < n_dwords; ++i)
223 		*dst++ = cpu_to_le64(*src++);
224 }
225 
226 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
227 {
228 	int i;
229 
230 	for (i = 0; i < n_dwords; ++i)
231 		*dst++ = le64_to_cpu(*src++);
232 }
233 
234 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
235 {
236 	if (queue_empty(&q->llq))
237 		return -EAGAIN;
238 
239 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
240 	queue_inc_cons(&q->llq);
241 	queue_sync_cons_out(q);
242 	return 0;
243 }
244 
245 /* High-level queue accessors */
246 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
247 {
248 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
249 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
250 
251 	switch (ent->opcode) {
252 	case CMDQ_OP_TLBI_EL2_ALL:
253 	case CMDQ_OP_TLBI_NSNH_ALL:
254 		break;
255 	case CMDQ_OP_PREFETCH_CFG:
256 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
257 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
258 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
259 		break;
260 	case CMDQ_OP_CFGI_CD:
261 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
262 		fallthrough;
263 	case CMDQ_OP_CFGI_STE:
264 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
265 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
266 		break;
267 	case CMDQ_OP_CFGI_CD_ALL:
268 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
269 		break;
270 	case CMDQ_OP_CFGI_ALL:
271 		/* Cover the entire SID range */
272 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
273 		break;
274 	case CMDQ_OP_TLBI_NH_VA:
275 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
283 		break;
284 	case CMDQ_OP_TLBI_S2_IPA:
285 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
286 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
287 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
288 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
289 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
290 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
291 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
292 		break;
293 	case CMDQ_OP_TLBI_NH_ASID:
294 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
295 		fallthrough;
296 	case CMDQ_OP_TLBI_S12_VMALL:
297 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
298 		break;
299 	case CMDQ_OP_ATC_INV:
300 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
301 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
302 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
303 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
304 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
305 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
306 		break;
307 	case CMDQ_OP_PRI_RESP:
308 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
309 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
310 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
311 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
312 		switch (ent->pri.resp) {
313 		case PRI_RESP_DENY:
314 		case PRI_RESP_FAIL:
315 		case PRI_RESP_SUCC:
316 			break;
317 		default:
318 			return -EINVAL;
319 		}
320 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
321 		break;
322 	case CMDQ_OP_CMD_SYNC:
323 		if (ent->sync.msiaddr) {
324 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
325 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
326 		} else {
327 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
328 		}
329 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
330 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
331 		break;
332 	default:
333 		return -ENOENT;
334 	}
335 
336 	return 0;
337 }
338 
339 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
340 					 u32 prod)
341 {
342 	struct arm_smmu_queue *q = &smmu->cmdq.q;
343 	struct arm_smmu_cmdq_ent ent = {
344 		.opcode = CMDQ_OP_CMD_SYNC,
345 	};
346 
347 	/*
348 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
349 	 * payload, so the write will zero the entire command on that platform.
350 	 */
351 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
352 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
353 				   q->ent_dwords * 8;
354 	}
355 
356 	arm_smmu_cmdq_build_cmd(cmd, &ent);
357 }
358 
359 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
360 {
361 	static const char *cerror_str[] = {
362 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
363 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
364 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
365 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
366 	};
367 
368 	int i;
369 	u64 cmd[CMDQ_ENT_DWORDS];
370 	struct arm_smmu_queue *q = &smmu->cmdq.q;
371 	u32 cons = readl_relaxed(q->cons_reg);
372 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
373 	struct arm_smmu_cmdq_ent cmd_sync = {
374 		.opcode = CMDQ_OP_CMD_SYNC,
375 	};
376 
377 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
378 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
379 
380 	switch (idx) {
381 	case CMDQ_ERR_CERROR_ABT_IDX:
382 		dev_err(smmu->dev, "retrying command fetch\n");
383 	case CMDQ_ERR_CERROR_NONE_IDX:
384 		return;
385 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
386 		/*
387 		 * ATC Invalidation Completion timeout. CONS is still pointing
388 		 * at the CMD_SYNC. Attempt to complete other pending commands
389 		 * by repeating the CMD_SYNC, though we might well end up back
390 		 * here since the ATC invalidation may still be pending.
391 		 */
392 		return;
393 	case CMDQ_ERR_CERROR_ILL_IDX:
394 	default:
395 		break;
396 	}
397 
398 	/*
399 	 * We may have concurrent producers, so we need to be careful
400 	 * not to touch any of the shadow cmdq state.
401 	 */
402 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
403 	dev_err(smmu->dev, "skipping command in error state:\n");
404 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
405 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
406 
407 	/* Convert the erroneous command into a CMD_SYNC */
408 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
409 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
410 		return;
411 	}
412 
413 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
414 }
415 
416 /*
417  * Command queue locking.
418  * This is a form of bastardised rwlock with the following major changes:
419  *
420  * - The only LOCK routines are exclusive_trylock() and shared_lock().
421  *   Neither have barrier semantics, and instead provide only a control
422  *   dependency.
423  *
424  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
425  *   fails if the caller appears to be the last lock holder (yes, this is
426  *   racy). All successful UNLOCK routines have RELEASE semantics.
427  */
428 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
429 {
430 	int val;
431 
432 	/*
433 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
434 	 * lock counter. When held in exclusive state, the lock counter is set
435 	 * to INT_MIN so these increments won't hurt as the value will remain
436 	 * negative.
437 	 */
438 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
439 		return;
440 
441 	do {
442 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
443 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
444 }
445 
446 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
447 {
448 	(void)atomic_dec_return_release(&cmdq->lock);
449 }
450 
451 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
452 {
453 	if (atomic_read(&cmdq->lock) == 1)
454 		return false;
455 
456 	arm_smmu_cmdq_shared_unlock(cmdq);
457 	return true;
458 }
459 
460 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
461 ({									\
462 	bool __ret;							\
463 	local_irq_save(flags);						\
464 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
465 	if (!__ret)							\
466 		local_irq_restore(flags);				\
467 	__ret;								\
468 })
469 
470 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
471 ({									\
472 	atomic_set_release(&cmdq->lock, 0);				\
473 	local_irq_restore(flags);					\
474 })
475 
476 
477 /*
478  * Command queue insertion.
479  * This is made fiddly by our attempts to achieve some sort of scalability
480  * since there is one queue shared amongst all of the CPUs in the system.  If
481  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
482  * then you'll *love* this monstrosity.
483  *
484  * The basic idea is to split the queue up into ranges of commands that are
485  * owned by a given CPU; the owner may not have written all of the commands
486  * itself, but is responsible for advancing the hardware prod pointer when
487  * the time comes. The algorithm is roughly:
488  *
489  * 	1. Allocate some space in the queue. At this point we also discover
490  *	   whether the head of the queue is currently owned by another CPU,
491  *	   or whether we are the owner.
492  *
493  *	2. Write our commands into our allocated slots in the queue.
494  *
495  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
496  *
497  *	4. If we are an owner:
498  *		a. Wait for the previous owner to finish.
499  *		b. Mark the queue head as unowned, which tells us the range
500  *		   that we are responsible for publishing.
501  *		c. Wait for all commands in our owned range to become valid.
502  *		d. Advance the hardware prod pointer.
503  *		e. Tell the next owner we've finished.
504  *
505  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
506  *	   owner), then we need to stick around until it has completed:
507  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
508  *		   to clear the first 4 bytes.
509  *		b. Otherwise, we spin waiting for the hardware cons pointer to
510  *		   advance past our command.
511  *
512  * The devil is in the details, particularly the use of locking for handling
513  * SYNC completion and freeing up space in the queue before we think that it is
514  * full.
515  */
516 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
517 					       u32 sprod, u32 eprod, bool set)
518 {
519 	u32 swidx, sbidx, ewidx, ebidx;
520 	struct arm_smmu_ll_queue llq = {
521 		.max_n_shift	= cmdq->q.llq.max_n_shift,
522 		.prod		= sprod,
523 	};
524 
525 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
526 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
527 
528 	while (llq.prod != eprod) {
529 		unsigned long mask;
530 		atomic_long_t *ptr;
531 		u32 limit = BITS_PER_LONG;
532 
533 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
534 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
535 
536 		ptr = &cmdq->valid_map[swidx];
537 
538 		if ((swidx == ewidx) && (sbidx < ebidx))
539 			limit = ebidx;
540 
541 		mask = GENMASK(limit - 1, sbidx);
542 
543 		/*
544 		 * The valid bit is the inverse of the wrap bit. This means
545 		 * that a zero-initialised queue is invalid and, after marking
546 		 * all entries as valid, they become invalid again when we
547 		 * wrap.
548 		 */
549 		if (set) {
550 			atomic_long_xor(mask, ptr);
551 		} else { /* Poll */
552 			unsigned long valid;
553 
554 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
555 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
556 		}
557 
558 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
559 	}
560 }
561 
562 /* Mark all entries in the range [sprod, eprod) as valid */
563 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
564 					u32 sprod, u32 eprod)
565 {
566 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
567 }
568 
569 /* Wait for all entries in the range [sprod, eprod) to become valid */
570 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
571 					 u32 sprod, u32 eprod)
572 {
573 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
574 }
575 
576 /* Wait for the command queue to become non-full */
577 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
578 					     struct arm_smmu_ll_queue *llq)
579 {
580 	unsigned long flags;
581 	struct arm_smmu_queue_poll qp;
582 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
583 	int ret = 0;
584 
585 	/*
586 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
587 	 * that fails, spin until somebody else updates it for us.
588 	 */
589 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
590 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
591 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
592 		llq->val = READ_ONCE(cmdq->q.llq.val);
593 		return 0;
594 	}
595 
596 	queue_poll_init(smmu, &qp);
597 	do {
598 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
599 		if (!queue_full(llq))
600 			break;
601 
602 		ret = queue_poll(&qp);
603 	} while (!ret);
604 
605 	return ret;
606 }
607 
608 /*
609  * Wait until the SMMU signals a CMD_SYNC completion MSI.
610  * Must be called with the cmdq lock held in some capacity.
611  */
612 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
613 					  struct arm_smmu_ll_queue *llq)
614 {
615 	int ret = 0;
616 	struct arm_smmu_queue_poll qp;
617 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
618 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
619 
620 	queue_poll_init(smmu, &qp);
621 
622 	/*
623 	 * The MSI won't generate an event, since it's being written back
624 	 * into the command queue.
625 	 */
626 	qp.wfe = false;
627 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
628 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
629 	return ret;
630 }
631 
632 /*
633  * Wait until the SMMU cons index passes llq->prod.
634  * Must be called with the cmdq lock held in some capacity.
635  */
636 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
637 					       struct arm_smmu_ll_queue *llq)
638 {
639 	struct arm_smmu_queue_poll qp;
640 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
641 	u32 prod = llq->prod;
642 	int ret = 0;
643 
644 	queue_poll_init(smmu, &qp);
645 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
646 	do {
647 		if (queue_consumed(llq, prod))
648 			break;
649 
650 		ret = queue_poll(&qp);
651 
652 		/*
653 		 * This needs to be a readl() so that our subsequent call
654 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
655 		 *
656 		 * Specifically, we need to ensure that we observe all
657 		 * shared_lock()s by other CMD_SYNCs that share our owner,
658 		 * so that a failing call to tryunlock() means that we're
659 		 * the last one out and therefore we can safely advance
660 		 * cmdq->q.llq.cons. Roughly speaking:
661 		 *
662 		 * CPU 0		CPU1			CPU2 (us)
663 		 *
664 		 * if (sync)
665 		 * 	shared_lock();
666 		 *
667 		 * dma_wmb();
668 		 * set_valid_map();
669 		 *
670 		 * 			if (owner) {
671 		 *				poll_valid_map();
672 		 *				<control dependency>
673 		 *				writel(prod_reg);
674 		 *
675 		 *						readl(cons_reg);
676 		 *						tryunlock();
677 		 *
678 		 * Requires us to see CPU 0's shared_lock() acquisition.
679 		 */
680 		llq->cons = readl(cmdq->q.cons_reg);
681 	} while (!ret);
682 
683 	return ret;
684 }
685 
686 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
687 					 struct arm_smmu_ll_queue *llq)
688 {
689 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
690 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
691 
692 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
693 }
694 
695 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
696 					u32 prod, int n)
697 {
698 	int i;
699 	struct arm_smmu_ll_queue llq = {
700 		.max_n_shift	= cmdq->q.llq.max_n_shift,
701 		.prod		= prod,
702 	};
703 
704 	for (i = 0; i < n; ++i) {
705 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
706 
707 		prod = queue_inc_prod_n(&llq, i);
708 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
709 	}
710 }
711 
712 /*
713  * This is the actual insertion function, and provides the following
714  * ordering guarantees to callers:
715  *
716  * - There is a dma_wmb() before publishing any commands to the queue.
717  *   This can be relied upon to order prior writes to data structures
718  *   in memory (such as a CD or an STE) before the command.
719  *
720  * - On completion of a CMD_SYNC, there is a control dependency.
721  *   This can be relied upon to order subsequent writes to memory (e.g.
722  *   freeing an IOVA) after completion of the CMD_SYNC.
723  *
724  * - Command insertion is totally ordered, so if two CPUs each race to
725  *   insert their own list of commands then all of the commands from one
726  *   CPU will appear before any of the commands from the other CPU.
727  */
728 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
729 				       u64 *cmds, int n, bool sync)
730 {
731 	u64 cmd_sync[CMDQ_ENT_DWORDS];
732 	u32 prod;
733 	unsigned long flags;
734 	bool owner;
735 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
736 	struct arm_smmu_ll_queue llq = {
737 		.max_n_shift = cmdq->q.llq.max_n_shift,
738 	}, head = llq;
739 	int ret = 0;
740 
741 	/* 1. Allocate some space in the queue */
742 	local_irq_save(flags);
743 	llq.val = READ_ONCE(cmdq->q.llq.val);
744 	do {
745 		u64 old;
746 
747 		while (!queue_has_space(&llq, n + sync)) {
748 			local_irq_restore(flags);
749 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
750 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
751 			local_irq_save(flags);
752 		}
753 
754 		head.cons = llq.cons;
755 		head.prod = queue_inc_prod_n(&llq, n + sync) |
756 					     CMDQ_PROD_OWNED_FLAG;
757 
758 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
759 		if (old == llq.val)
760 			break;
761 
762 		llq.val = old;
763 	} while (1);
764 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
765 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
766 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
767 
768 	/*
769 	 * 2. Write our commands into the queue
770 	 * Dependency ordering from the cmpxchg() loop above.
771 	 */
772 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
773 	if (sync) {
774 		prod = queue_inc_prod_n(&llq, n);
775 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
776 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
777 
778 		/*
779 		 * In order to determine completion of our CMD_SYNC, we must
780 		 * ensure that the queue can't wrap twice without us noticing.
781 		 * We achieve that by taking the cmdq lock as shared before
782 		 * marking our slot as valid.
783 		 */
784 		arm_smmu_cmdq_shared_lock(cmdq);
785 	}
786 
787 	/* 3. Mark our slots as valid, ensuring commands are visible first */
788 	dma_wmb();
789 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
790 
791 	/* 4. If we are the owner, take control of the SMMU hardware */
792 	if (owner) {
793 		/* a. Wait for previous owner to finish */
794 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
795 
796 		/* b. Stop gathering work by clearing the owned flag */
797 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
798 						   &cmdq->q.llq.atomic.prod);
799 		prod &= ~CMDQ_PROD_OWNED_FLAG;
800 
801 		/*
802 		 * c. Wait for any gathered work to be written to the queue.
803 		 * Note that we read our own entries so that we have the control
804 		 * dependency required by (d).
805 		 */
806 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
807 
808 		/*
809 		 * d. Advance the hardware prod pointer
810 		 * Control dependency ordering from the entries becoming valid.
811 		 */
812 		writel_relaxed(prod, cmdq->q.prod_reg);
813 
814 		/*
815 		 * e. Tell the next owner we're done
816 		 * Make sure we've updated the hardware first, so that we don't
817 		 * race to update prod and potentially move it backwards.
818 		 */
819 		atomic_set_release(&cmdq->owner_prod, prod);
820 	}
821 
822 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
823 	if (sync) {
824 		llq.prod = queue_inc_prod_n(&llq, n);
825 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
826 		if (ret) {
827 			dev_err_ratelimited(smmu->dev,
828 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
829 					    llq.prod,
830 					    readl_relaxed(cmdq->q.prod_reg),
831 					    readl_relaxed(cmdq->q.cons_reg));
832 		}
833 
834 		/*
835 		 * Try to unlock the cmdq lock. This will fail if we're the last
836 		 * reader, in which case we can safely update cmdq->q.llq.cons
837 		 */
838 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
839 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
840 			arm_smmu_cmdq_shared_unlock(cmdq);
841 		}
842 	}
843 
844 	local_irq_restore(flags);
845 	return ret;
846 }
847 
848 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
849 				   struct arm_smmu_cmdq_ent *ent)
850 {
851 	u64 cmd[CMDQ_ENT_DWORDS];
852 
853 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
854 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
855 			 ent->opcode);
856 		return -EINVAL;
857 	}
858 
859 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
860 }
861 
862 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
863 {
864 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
865 }
866 
867 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
868 				    struct arm_smmu_cmdq_batch *cmds,
869 				    struct arm_smmu_cmdq_ent *cmd)
870 {
871 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
872 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
873 		cmds->num = 0;
874 	}
875 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
876 	cmds->num++;
877 }
878 
879 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
880 				      struct arm_smmu_cmdq_batch *cmds)
881 {
882 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
883 }
884 
885 /* Context descriptor manipulation functions */
886 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
887 {
888 	struct arm_smmu_cmdq_ent cmd = {
889 		.opcode = CMDQ_OP_TLBI_NH_ASID,
890 		.tlbi.asid = asid,
891 	};
892 
893 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
894 	arm_smmu_cmdq_issue_sync(smmu);
895 }
896 
897 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
898 			     int ssid, bool leaf)
899 {
900 	size_t i;
901 	unsigned long flags;
902 	struct arm_smmu_master *master;
903 	struct arm_smmu_cmdq_batch cmds = {};
904 	struct arm_smmu_device *smmu = smmu_domain->smmu;
905 	struct arm_smmu_cmdq_ent cmd = {
906 		.opcode	= CMDQ_OP_CFGI_CD,
907 		.cfgi	= {
908 			.ssid	= ssid,
909 			.leaf	= leaf,
910 		},
911 	};
912 
913 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
914 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
915 		for (i = 0; i < master->num_sids; i++) {
916 			cmd.cfgi.sid = master->sids[i];
917 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
918 		}
919 	}
920 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
921 
922 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
923 }
924 
925 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
926 					struct arm_smmu_l1_ctx_desc *l1_desc)
927 {
928 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
929 
930 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
931 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
932 	if (!l1_desc->l2ptr) {
933 		dev_warn(smmu->dev,
934 			 "failed to allocate context descriptor table\n");
935 		return -ENOMEM;
936 	}
937 	return 0;
938 }
939 
940 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
941 				      struct arm_smmu_l1_ctx_desc *l1_desc)
942 {
943 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
944 		  CTXDESC_L1_DESC_V;
945 
946 	/* See comment in arm_smmu_write_ctx_desc() */
947 	WRITE_ONCE(*dst, cpu_to_le64(val));
948 }
949 
950 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
951 				   u32 ssid)
952 {
953 	__le64 *l1ptr;
954 	unsigned int idx;
955 	struct arm_smmu_l1_ctx_desc *l1_desc;
956 	struct arm_smmu_device *smmu = smmu_domain->smmu;
957 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
958 
959 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
960 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
961 
962 	idx = ssid >> CTXDESC_SPLIT;
963 	l1_desc = &cdcfg->l1_desc[idx];
964 	if (!l1_desc->l2ptr) {
965 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
966 			return NULL;
967 
968 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
969 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
970 		/* An invalid L1CD can be cached */
971 		arm_smmu_sync_cd(smmu_domain, ssid, false);
972 	}
973 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
974 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
975 }
976 
977 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
978 			    struct arm_smmu_ctx_desc *cd)
979 {
980 	/*
981 	 * This function handles the following cases:
982 	 *
983 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
984 	 * (2) Install a secondary CD, for SID+SSID traffic.
985 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
986 	 *     CD, then invalidate the old entry and mappings.
987 	 * (4) Quiesce the context without clearing the valid bit. Disable
988 	 *     translation, and ignore any translation fault.
989 	 * (5) Remove a secondary CD.
990 	 */
991 	u64 val;
992 	bool cd_live;
993 	__le64 *cdptr;
994 	struct arm_smmu_device *smmu = smmu_domain->smmu;
995 
996 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
997 		return -E2BIG;
998 
999 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1000 	if (!cdptr)
1001 		return -ENOMEM;
1002 
1003 	val = le64_to_cpu(cdptr[0]);
1004 	cd_live = !!(val & CTXDESC_CD_0_V);
1005 
1006 	if (!cd) { /* (5) */
1007 		val = 0;
1008 	} else if (cd == &quiet_cd) { /* (4) */
1009 		val |= CTXDESC_CD_0_TCR_EPD0;
1010 	} else if (cd_live) { /* (3) */
1011 		val &= ~CTXDESC_CD_0_ASID;
1012 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1013 		/*
1014 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1015 		 * this substream's traffic
1016 		 */
1017 	} else { /* (1) and (2) */
1018 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1019 		cdptr[2] = 0;
1020 		cdptr[3] = cpu_to_le64(cd->mair);
1021 
1022 		/*
1023 		 * STE is live, and the SMMU might read dwords of this CD in any
1024 		 * order. Ensure that it observes valid values before reading
1025 		 * V=1.
1026 		 */
1027 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1028 
1029 		val = cd->tcr |
1030 #ifdef __BIG_ENDIAN
1031 			CTXDESC_CD_0_ENDI |
1032 #endif
1033 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1034 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1035 			CTXDESC_CD_0_AA64 |
1036 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1037 			CTXDESC_CD_0_V;
1038 
1039 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1040 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1041 			val |= CTXDESC_CD_0_S;
1042 	}
1043 
1044 	/*
1045 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1046 	 * "Configuration structures and configuration invalidation completion"
1047 	 *
1048 	 *   The size of single-copy atomic reads made by the SMMU is
1049 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1050 	 *   field within an aligned 64-bit span of a structure can be altered
1051 	 *   without first making the structure invalid.
1052 	 */
1053 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1054 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1055 	return 0;
1056 }
1057 
1058 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1059 {
1060 	int ret;
1061 	size_t l1size;
1062 	size_t max_contexts;
1063 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1064 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1065 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1066 
1067 	max_contexts = 1 << cfg->s1cdmax;
1068 
1069 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1070 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1071 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1072 		cdcfg->num_l1_ents = max_contexts;
1073 
1074 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1075 	} else {
1076 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1077 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1078 						  CTXDESC_L2_ENTRIES);
1079 
1080 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1081 					      sizeof(*cdcfg->l1_desc),
1082 					      GFP_KERNEL);
1083 		if (!cdcfg->l1_desc)
1084 			return -ENOMEM;
1085 
1086 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1087 	}
1088 
1089 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1090 					   GFP_KERNEL);
1091 	if (!cdcfg->cdtab) {
1092 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1093 		ret = -ENOMEM;
1094 		goto err_free_l1;
1095 	}
1096 
1097 	return 0;
1098 
1099 err_free_l1:
1100 	if (cdcfg->l1_desc) {
1101 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1102 		cdcfg->l1_desc = NULL;
1103 	}
1104 	return ret;
1105 }
1106 
1107 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1108 {
1109 	int i;
1110 	size_t size, l1size;
1111 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1112 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1113 
1114 	if (cdcfg->l1_desc) {
1115 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1116 
1117 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1118 			if (!cdcfg->l1_desc[i].l2ptr)
1119 				continue;
1120 
1121 			dmam_free_coherent(smmu->dev, size,
1122 					   cdcfg->l1_desc[i].l2ptr,
1123 					   cdcfg->l1_desc[i].l2ptr_dma);
1124 		}
1125 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1126 		cdcfg->l1_desc = NULL;
1127 
1128 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1129 	} else {
1130 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1131 	}
1132 
1133 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1134 	cdcfg->cdtab_dma = 0;
1135 	cdcfg->cdtab = NULL;
1136 }
1137 
1138 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1139 {
1140 	bool free;
1141 	struct arm_smmu_ctx_desc *old_cd;
1142 
1143 	if (!cd->asid)
1144 		return false;
1145 
1146 	free = refcount_dec_and_test(&cd->refs);
1147 	if (free) {
1148 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1149 		WARN_ON(old_cd != cd);
1150 	}
1151 	return free;
1152 }
1153 
1154 /* Stream table manipulation functions */
1155 static void
1156 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1157 {
1158 	u64 val = 0;
1159 
1160 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1161 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1162 
1163 	/* See comment in arm_smmu_write_ctx_desc() */
1164 	WRITE_ONCE(*dst, cpu_to_le64(val));
1165 }
1166 
1167 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1168 {
1169 	struct arm_smmu_cmdq_ent cmd = {
1170 		.opcode	= CMDQ_OP_CFGI_STE,
1171 		.cfgi	= {
1172 			.sid	= sid,
1173 			.leaf	= true,
1174 		},
1175 	};
1176 
1177 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1178 	arm_smmu_cmdq_issue_sync(smmu);
1179 }
1180 
1181 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1182 				      __le64 *dst)
1183 {
1184 	/*
1185 	 * This is hideously complicated, but we only really care about
1186 	 * three cases at the moment:
1187 	 *
1188 	 * 1. Invalid (all zero) -> bypass/fault (init)
1189 	 * 2. Bypass/fault -> translation/bypass (attach)
1190 	 * 3. Translation/bypass -> bypass/fault (detach)
1191 	 *
1192 	 * Given that we can't update the STE atomically and the SMMU
1193 	 * doesn't read the thing in a defined order, that leaves us
1194 	 * with the following maintenance requirements:
1195 	 *
1196 	 * 1. Update Config, return (init time STEs aren't live)
1197 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1198 	 * 3. Update Config, sync
1199 	 */
1200 	u64 val = le64_to_cpu(dst[0]);
1201 	bool ste_live = false;
1202 	struct arm_smmu_device *smmu = NULL;
1203 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1204 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1205 	struct arm_smmu_domain *smmu_domain = NULL;
1206 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1207 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1208 		.prefetch	= {
1209 			.sid	= sid,
1210 		},
1211 	};
1212 
1213 	if (master) {
1214 		smmu_domain = master->domain;
1215 		smmu = master->smmu;
1216 	}
1217 
1218 	if (smmu_domain) {
1219 		switch (smmu_domain->stage) {
1220 		case ARM_SMMU_DOMAIN_S1:
1221 			s1_cfg = &smmu_domain->s1_cfg;
1222 			break;
1223 		case ARM_SMMU_DOMAIN_S2:
1224 		case ARM_SMMU_DOMAIN_NESTED:
1225 			s2_cfg = &smmu_domain->s2_cfg;
1226 			break;
1227 		default:
1228 			break;
1229 		}
1230 	}
1231 
1232 	if (val & STRTAB_STE_0_V) {
1233 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1234 		case STRTAB_STE_0_CFG_BYPASS:
1235 			break;
1236 		case STRTAB_STE_0_CFG_S1_TRANS:
1237 		case STRTAB_STE_0_CFG_S2_TRANS:
1238 			ste_live = true;
1239 			break;
1240 		case STRTAB_STE_0_CFG_ABORT:
1241 			BUG_ON(!disable_bypass);
1242 			break;
1243 		default:
1244 			BUG(); /* STE corruption */
1245 		}
1246 	}
1247 
1248 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1249 	val = STRTAB_STE_0_V;
1250 
1251 	/* Bypass/fault */
1252 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1253 		if (!smmu_domain && disable_bypass)
1254 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1255 		else
1256 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1257 
1258 		dst[0] = cpu_to_le64(val);
1259 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1260 						STRTAB_STE_1_SHCFG_INCOMING));
1261 		dst[2] = 0; /* Nuke the VMID */
1262 		/*
1263 		 * The SMMU can perform negative caching, so we must sync
1264 		 * the STE regardless of whether the old value was live.
1265 		 */
1266 		if (smmu)
1267 			arm_smmu_sync_ste_for_sid(smmu, sid);
1268 		return;
1269 	}
1270 
1271 	if (s1_cfg) {
1272 		BUG_ON(ste_live);
1273 		dst[1] = cpu_to_le64(
1274 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1275 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1276 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1277 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1278 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1279 
1280 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1281 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1282 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1283 
1284 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1285 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1286 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1287 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1288 	}
1289 
1290 	if (s2_cfg) {
1291 		BUG_ON(ste_live);
1292 		dst[2] = cpu_to_le64(
1293 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1294 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1295 #ifdef __BIG_ENDIAN
1296 			 STRTAB_STE_2_S2ENDI |
1297 #endif
1298 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1299 			 STRTAB_STE_2_S2R);
1300 
1301 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1302 
1303 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1304 	}
1305 
1306 	if (master->ats_enabled)
1307 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1308 						 STRTAB_STE_1_EATS_TRANS));
1309 
1310 	arm_smmu_sync_ste_for_sid(smmu, sid);
1311 	/* See comment in arm_smmu_write_ctx_desc() */
1312 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1313 	arm_smmu_sync_ste_for_sid(smmu, sid);
1314 
1315 	/* It's likely that we'll want to use the new STE soon */
1316 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1317 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1318 }
1319 
1320 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1321 {
1322 	unsigned int i;
1323 
1324 	for (i = 0; i < nent; ++i) {
1325 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1326 		strtab += STRTAB_STE_DWORDS;
1327 	}
1328 }
1329 
1330 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1331 {
1332 	size_t size;
1333 	void *strtab;
1334 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1335 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1336 
1337 	if (desc->l2ptr)
1338 		return 0;
1339 
1340 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1341 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1342 
1343 	desc->span = STRTAB_SPLIT + 1;
1344 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1345 					  GFP_KERNEL);
1346 	if (!desc->l2ptr) {
1347 		dev_err(smmu->dev,
1348 			"failed to allocate l2 stream table for SID %u\n",
1349 			sid);
1350 		return -ENOMEM;
1351 	}
1352 
1353 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1354 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1355 	return 0;
1356 }
1357 
1358 /* IRQ and event handlers */
1359 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1360 {
1361 	int i;
1362 	struct arm_smmu_device *smmu = dev;
1363 	struct arm_smmu_queue *q = &smmu->evtq.q;
1364 	struct arm_smmu_ll_queue *llq = &q->llq;
1365 	u64 evt[EVTQ_ENT_DWORDS];
1366 
1367 	do {
1368 		while (!queue_remove_raw(q, evt)) {
1369 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1370 
1371 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1372 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1373 				dev_info(smmu->dev, "\t0x%016llx\n",
1374 					 (unsigned long long)evt[i]);
1375 
1376 		}
1377 
1378 		/*
1379 		 * Not much we can do on overflow, so scream and pretend we're
1380 		 * trying harder.
1381 		 */
1382 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1383 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1384 	} while (!queue_empty(llq));
1385 
1386 	/* Sync our overflow flag, as we believe we're up to speed */
1387 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1388 		    Q_IDX(llq, llq->cons);
1389 	return IRQ_HANDLED;
1390 }
1391 
1392 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1393 {
1394 	u32 sid, ssid;
1395 	u16 grpid;
1396 	bool ssv, last;
1397 
1398 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1399 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1400 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1401 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1402 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1403 
1404 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1405 	dev_info(smmu->dev,
1406 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1407 		 sid, ssid, grpid, last ? "L" : "",
1408 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1409 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1410 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1411 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1412 		 evt[1] & PRIQ_1_ADDR_MASK);
1413 
1414 	if (last) {
1415 		struct arm_smmu_cmdq_ent cmd = {
1416 			.opcode			= CMDQ_OP_PRI_RESP,
1417 			.substream_valid	= ssv,
1418 			.pri			= {
1419 				.sid	= sid,
1420 				.ssid	= ssid,
1421 				.grpid	= grpid,
1422 				.resp	= PRI_RESP_DENY,
1423 			},
1424 		};
1425 
1426 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1427 	}
1428 }
1429 
1430 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1431 {
1432 	struct arm_smmu_device *smmu = dev;
1433 	struct arm_smmu_queue *q = &smmu->priq.q;
1434 	struct arm_smmu_ll_queue *llq = &q->llq;
1435 	u64 evt[PRIQ_ENT_DWORDS];
1436 
1437 	do {
1438 		while (!queue_remove_raw(q, evt))
1439 			arm_smmu_handle_ppr(smmu, evt);
1440 
1441 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1442 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1443 	} while (!queue_empty(llq));
1444 
1445 	/* Sync our overflow flag, as we believe we're up to speed */
1446 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1447 		      Q_IDX(llq, llq->cons);
1448 	queue_sync_cons_out(q);
1449 	return IRQ_HANDLED;
1450 }
1451 
1452 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1453 
1454 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1455 {
1456 	u32 gerror, gerrorn, active;
1457 	struct arm_smmu_device *smmu = dev;
1458 
1459 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1460 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1461 
1462 	active = gerror ^ gerrorn;
1463 	if (!(active & GERROR_ERR_MASK))
1464 		return IRQ_NONE; /* No errors pending */
1465 
1466 	dev_warn(smmu->dev,
1467 		 "unexpected global error reported (0x%08x), this could be serious\n",
1468 		 active);
1469 
1470 	if (active & GERROR_SFM_ERR) {
1471 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1472 		arm_smmu_device_disable(smmu);
1473 	}
1474 
1475 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1476 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1477 
1478 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1479 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1480 
1481 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1482 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1483 
1484 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1485 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1486 
1487 	if (active & GERROR_PRIQ_ABT_ERR)
1488 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1489 
1490 	if (active & GERROR_EVTQ_ABT_ERR)
1491 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1492 
1493 	if (active & GERROR_CMDQ_ERR)
1494 		arm_smmu_cmdq_skip_err(smmu);
1495 
1496 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1497 	return IRQ_HANDLED;
1498 }
1499 
1500 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1501 {
1502 	struct arm_smmu_device *smmu = dev;
1503 
1504 	arm_smmu_evtq_thread(irq, dev);
1505 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1506 		arm_smmu_priq_thread(irq, dev);
1507 
1508 	return IRQ_HANDLED;
1509 }
1510 
1511 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1512 {
1513 	arm_smmu_gerror_handler(irq, dev);
1514 	return IRQ_WAKE_THREAD;
1515 }
1516 
1517 static void
1518 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1519 			struct arm_smmu_cmdq_ent *cmd)
1520 {
1521 	size_t log2_span;
1522 	size_t span_mask;
1523 	/* ATC invalidates are always on 4096-bytes pages */
1524 	size_t inval_grain_shift = 12;
1525 	unsigned long page_start, page_end;
1526 
1527 	/*
1528 	 * ATS and PASID:
1529 	 *
1530 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1531 	 * prefix. In that case all ATC entries within the address range are
1532 	 * invalidated, including those that were requested with a PASID! There
1533 	 * is no way to invalidate only entries without PASID.
1534 	 *
1535 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1536 	 * traffic), translation requests without PASID create ATC entries
1537 	 * without PASID, which must be invalidated with substream_valid clear.
1538 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1539 	 * ATC entries within the address range.
1540 	 */
1541 	*cmd = (struct arm_smmu_cmdq_ent) {
1542 		.opcode			= CMDQ_OP_ATC_INV,
1543 		.substream_valid	= !!ssid,
1544 		.atc.ssid		= ssid,
1545 	};
1546 
1547 	if (!size) {
1548 		cmd->atc.size = ATC_INV_SIZE_ALL;
1549 		return;
1550 	}
1551 
1552 	page_start	= iova >> inval_grain_shift;
1553 	page_end	= (iova + size - 1) >> inval_grain_shift;
1554 
1555 	/*
1556 	 * In an ATS Invalidate Request, the address must be aligned on the
1557 	 * range size, which must be a power of two number of page sizes. We
1558 	 * thus have to choose between grossly over-invalidating the region, or
1559 	 * splitting the invalidation into multiple commands. For simplicity
1560 	 * we'll go with the first solution, but should refine it in the future
1561 	 * if multiple commands are shown to be more efficient.
1562 	 *
1563 	 * Find the smallest power of two that covers the range. The most
1564 	 * significant differing bit between the start and end addresses,
1565 	 * fls(start ^ end), indicates the required span. For example:
1566 	 *
1567 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1568 	 *		x = 0b1000 ^ 0b1011 = 0b11
1569 	 *		span = 1 << fls(x) = 4
1570 	 *
1571 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1572 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1573 	 *		span = 1 << fls(x) = 16
1574 	 */
1575 	log2_span	= fls_long(page_start ^ page_end);
1576 	span_mask	= (1ULL << log2_span) - 1;
1577 
1578 	page_start	&= ~span_mask;
1579 
1580 	cmd->atc.addr	= page_start << inval_grain_shift;
1581 	cmd->atc.size	= log2_span;
1582 }
1583 
1584 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1585 {
1586 	int i;
1587 	struct arm_smmu_cmdq_ent cmd;
1588 
1589 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1590 
1591 	for (i = 0; i < master->num_sids; i++) {
1592 		cmd.atc.sid = master->sids[i];
1593 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1594 	}
1595 
1596 	return arm_smmu_cmdq_issue_sync(master->smmu);
1597 }
1598 
1599 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1600 			    unsigned long iova, size_t size)
1601 {
1602 	int i;
1603 	unsigned long flags;
1604 	struct arm_smmu_cmdq_ent cmd;
1605 	struct arm_smmu_master *master;
1606 	struct arm_smmu_cmdq_batch cmds = {};
1607 
1608 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1609 		return 0;
1610 
1611 	/*
1612 	 * Ensure that we've completed prior invalidation of the main TLBs
1613 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1614 	 * arm_smmu_enable_ats():
1615 	 *
1616 	 *	// unmap()			// arm_smmu_enable_ats()
1617 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1618 	 *	smp_mb();			[...]
1619 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1620 	 *
1621 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1622 	 * ATS was enabled at the PCI device before completion of the TLBI.
1623 	 */
1624 	smp_mb();
1625 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1626 		return 0;
1627 
1628 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1629 
1630 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1631 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1632 		if (!master->ats_enabled)
1633 			continue;
1634 
1635 		for (i = 0; i < master->num_sids; i++) {
1636 			cmd.atc.sid = master->sids[i];
1637 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1638 		}
1639 	}
1640 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1641 
1642 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1643 }
1644 
1645 /* IO_PGTABLE API */
1646 static void arm_smmu_tlb_inv_context(void *cookie)
1647 {
1648 	struct arm_smmu_domain *smmu_domain = cookie;
1649 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1650 	struct arm_smmu_cmdq_ent cmd;
1651 
1652 	/*
1653 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1654 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1655 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1656 	 * insertion to guarantee those are observed before the TLBI. Do be
1657 	 * careful, 007.
1658 	 */
1659 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1660 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1661 	} else {
1662 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1663 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1664 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1665 		arm_smmu_cmdq_issue_sync(smmu);
1666 	}
1667 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1668 }
1669 
1670 static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1671 				   size_t granule, bool leaf,
1672 				   struct arm_smmu_domain *smmu_domain)
1673 {
1674 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1675 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1676 	size_t inv_range = granule;
1677 	struct arm_smmu_cmdq_batch cmds = {};
1678 	struct arm_smmu_cmdq_ent cmd = {
1679 		.tlbi = {
1680 			.leaf	= leaf,
1681 		},
1682 	};
1683 
1684 	if (!size)
1685 		return;
1686 
1687 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1688 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1689 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1690 	} else {
1691 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1692 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1693 	}
1694 
1695 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1696 		/* Get the leaf page size */
1697 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1698 
1699 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1700 		cmd.tlbi.tg = (tg - 10) / 2;
1701 
1702 		/* Determine what level the granule is at */
1703 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1704 
1705 		num_pages = size >> tg;
1706 	}
1707 
1708 	while (iova < end) {
1709 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1710 			/*
1711 			 * On each iteration of the loop, the range is 5 bits
1712 			 * worth of the aligned size remaining.
1713 			 * The range in pages is:
1714 			 *
1715 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1716 			 */
1717 			unsigned long scale, num;
1718 
1719 			/* Determine the power of 2 multiple number of pages */
1720 			scale = __ffs(num_pages);
1721 			cmd.tlbi.scale = scale;
1722 
1723 			/* Determine how many chunks of 2^scale size we have */
1724 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1725 			cmd.tlbi.num = num - 1;
1726 
1727 			/* range is num * 2^scale * pgsize */
1728 			inv_range = num << (scale + tg);
1729 
1730 			/* Clear out the lower order bits for the next iteration */
1731 			num_pages -= num << scale;
1732 		}
1733 
1734 		cmd.tlbi.addr = iova;
1735 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1736 		iova += inv_range;
1737 	}
1738 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1739 
1740 	/*
1741 	 * Unfortunately, this can't be leaf-only since we may have
1742 	 * zapped an entire table.
1743 	 */
1744 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1745 }
1746 
1747 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1748 					 unsigned long iova, size_t granule,
1749 					 void *cookie)
1750 {
1751 	struct arm_smmu_domain *smmu_domain = cookie;
1752 	struct iommu_domain *domain = &smmu_domain->domain;
1753 
1754 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1755 }
1756 
1757 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1758 				  size_t granule, void *cookie)
1759 {
1760 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1761 }
1762 
1763 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1764 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1765 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1766 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1767 };
1768 
1769 /* IOMMU API */
1770 static bool arm_smmu_capable(enum iommu_cap cap)
1771 {
1772 	switch (cap) {
1773 	case IOMMU_CAP_CACHE_COHERENCY:
1774 		return true;
1775 	case IOMMU_CAP_NOEXEC:
1776 		return true;
1777 	default:
1778 		return false;
1779 	}
1780 }
1781 
1782 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1783 {
1784 	struct arm_smmu_domain *smmu_domain;
1785 
1786 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1787 	    type != IOMMU_DOMAIN_DMA &&
1788 	    type != IOMMU_DOMAIN_IDENTITY)
1789 		return NULL;
1790 
1791 	/*
1792 	 * Allocate the domain and initialise some of its data structures.
1793 	 * We can't really do anything meaningful until we've added a
1794 	 * master.
1795 	 */
1796 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1797 	if (!smmu_domain)
1798 		return NULL;
1799 
1800 	if (type == IOMMU_DOMAIN_DMA &&
1801 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1802 		kfree(smmu_domain);
1803 		return NULL;
1804 	}
1805 
1806 	mutex_init(&smmu_domain->init_mutex);
1807 	INIT_LIST_HEAD(&smmu_domain->devices);
1808 	spin_lock_init(&smmu_domain->devices_lock);
1809 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
1810 
1811 	return &smmu_domain->domain;
1812 }
1813 
1814 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1815 {
1816 	int idx, size = 1 << span;
1817 
1818 	do {
1819 		idx = find_first_zero_bit(map, size);
1820 		if (idx == size)
1821 			return -ENOSPC;
1822 	} while (test_and_set_bit(idx, map));
1823 
1824 	return idx;
1825 }
1826 
1827 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1828 {
1829 	clear_bit(idx, map);
1830 }
1831 
1832 static void arm_smmu_domain_free(struct iommu_domain *domain)
1833 {
1834 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1835 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1836 
1837 	iommu_put_dma_cookie(domain);
1838 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1839 
1840 	/* Free the CD and ASID, if we allocated them */
1841 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1842 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1843 
1844 		/* Prevent SVA from touching the CD while we're freeing it */
1845 		mutex_lock(&arm_smmu_asid_lock);
1846 		if (cfg->cdcfg.cdtab)
1847 			arm_smmu_free_cd_tables(smmu_domain);
1848 		arm_smmu_free_asid(&cfg->cd);
1849 		mutex_unlock(&arm_smmu_asid_lock);
1850 	} else {
1851 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1852 		if (cfg->vmid)
1853 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1854 	}
1855 
1856 	kfree(smmu_domain);
1857 }
1858 
1859 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1860 				       struct arm_smmu_master *master,
1861 				       struct io_pgtable_cfg *pgtbl_cfg)
1862 {
1863 	int ret;
1864 	u32 asid;
1865 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1866 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1867 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1868 
1869 	refcount_set(&cfg->cd.refs, 1);
1870 
1871 	/* Prevent SVA from modifying the ASID until it is written to the CD */
1872 	mutex_lock(&arm_smmu_asid_lock);
1873 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1874 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1875 	if (ret)
1876 		goto out_unlock;
1877 
1878 	cfg->s1cdmax = master->ssid_bits;
1879 
1880 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
1881 	if (ret)
1882 		goto out_free_asid;
1883 
1884 	cfg->cd.asid	= (u16)asid;
1885 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1886 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1887 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1888 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1889 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1890 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1891 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1892 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1893 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
1894 
1895 	/*
1896 	 * Note that this will end up calling arm_smmu_sync_cd() before
1897 	 * the master has been added to the devices list for this domain.
1898 	 * This isn't an issue because the STE hasn't been installed yet.
1899 	 */
1900 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1901 	if (ret)
1902 		goto out_free_cd_tables;
1903 
1904 	mutex_unlock(&arm_smmu_asid_lock);
1905 	return 0;
1906 
1907 out_free_cd_tables:
1908 	arm_smmu_free_cd_tables(smmu_domain);
1909 out_free_asid:
1910 	arm_smmu_free_asid(&cfg->cd);
1911 out_unlock:
1912 	mutex_unlock(&arm_smmu_asid_lock);
1913 	return ret;
1914 }
1915 
1916 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1917 				       struct arm_smmu_master *master,
1918 				       struct io_pgtable_cfg *pgtbl_cfg)
1919 {
1920 	int vmid;
1921 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1922 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1923 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1924 
1925 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1926 	if (vmid < 0)
1927 		return vmid;
1928 
1929 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1930 	cfg->vmid	= (u16)vmid;
1931 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1932 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1933 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1934 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1935 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1936 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1937 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1938 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1939 	return 0;
1940 }
1941 
1942 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1943 				    struct arm_smmu_master *master)
1944 {
1945 	int ret;
1946 	unsigned long ias, oas;
1947 	enum io_pgtable_fmt fmt;
1948 	struct io_pgtable_cfg pgtbl_cfg;
1949 	struct io_pgtable_ops *pgtbl_ops;
1950 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1951 				 struct arm_smmu_master *,
1952 				 struct io_pgtable_cfg *);
1953 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1954 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1955 
1956 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1957 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1958 		return 0;
1959 	}
1960 
1961 	/* Restrict the stage to what we can actually support */
1962 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1963 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1964 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1965 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1966 
1967 	switch (smmu_domain->stage) {
1968 	case ARM_SMMU_DOMAIN_S1:
1969 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1970 		ias = min_t(unsigned long, ias, VA_BITS);
1971 		oas = smmu->ias;
1972 		fmt = ARM_64_LPAE_S1;
1973 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1974 		break;
1975 	case ARM_SMMU_DOMAIN_NESTED:
1976 	case ARM_SMMU_DOMAIN_S2:
1977 		ias = smmu->ias;
1978 		oas = smmu->oas;
1979 		fmt = ARM_64_LPAE_S2;
1980 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1981 		break;
1982 	default:
1983 		return -EINVAL;
1984 	}
1985 
1986 	pgtbl_cfg = (struct io_pgtable_cfg) {
1987 		.pgsize_bitmap	= smmu->pgsize_bitmap,
1988 		.ias		= ias,
1989 		.oas		= oas,
1990 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
1991 		.tlb		= &arm_smmu_flush_ops,
1992 		.iommu_dev	= smmu->dev,
1993 	};
1994 
1995 	if (smmu_domain->non_strict)
1996 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1997 
1998 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1999 	if (!pgtbl_ops)
2000 		return -ENOMEM;
2001 
2002 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2003 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2004 	domain->geometry.force_aperture = true;
2005 
2006 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2007 	if (ret < 0) {
2008 		free_io_pgtable_ops(pgtbl_ops);
2009 		return ret;
2010 	}
2011 
2012 	smmu_domain->pgtbl_ops = pgtbl_ops;
2013 	return 0;
2014 }
2015 
2016 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2017 {
2018 	__le64 *step;
2019 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2020 
2021 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2022 		struct arm_smmu_strtab_l1_desc *l1_desc;
2023 		int idx;
2024 
2025 		/* Two-level walk */
2026 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2027 		l1_desc = &cfg->l1_desc[idx];
2028 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2029 		step = &l1_desc->l2ptr[idx];
2030 	} else {
2031 		/* Simple linear lookup */
2032 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2033 	}
2034 
2035 	return step;
2036 }
2037 
2038 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2039 {
2040 	int i, j;
2041 	struct arm_smmu_device *smmu = master->smmu;
2042 
2043 	for (i = 0; i < master->num_sids; ++i) {
2044 		u32 sid = master->sids[i];
2045 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2046 
2047 		/* Bridged PCI devices may end up with duplicated IDs */
2048 		for (j = 0; j < i; j++)
2049 			if (master->sids[j] == sid)
2050 				break;
2051 		if (j < i)
2052 			continue;
2053 
2054 		arm_smmu_write_strtab_ent(master, sid, step);
2055 	}
2056 }
2057 
2058 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2059 {
2060 	struct device *dev = master->dev;
2061 	struct arm_smmu_device *smmu = master->smmu;
2062 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2063 
2064 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2065 		return false;
2066 
2067 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2068 		return false;
2069 
2070 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2071 }
2072 
2073 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2074 {
2075 	size_t stu;
2076 	struct pci_dev *pdev;
2077 	struct arm_smmu_device *smmu = master->smmu;
2078 	struct arm_smmu_domain *smmu_domain = master->domain;
2079 
2080 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2081 	if (!master->ats_enabled)
2082 		return;
2083 
2084 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2085 	stu = __ffs(smmu->pgsize_bitmap);
2086 	pdev = to_pci_dev(master->dev);
2087 
2088 	atomic_inc(&smmu_domain->nr_ats_masters);
2089 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2090 	if (pci_enable_ats(pdev, stu))
2091 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2092 }
2093 
2094 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2095 {
2096 	struct arm_smmu_domain *smmu_domain = master->domain;
2097 
2098 	if (!master->ats_enabled)
2099 		return;
2100 
2101 	pci_disable_ats(to_pci_dev(master->dev));
2102 	/*
2103 	 * Ensure ATS is disabled at the endpoint before we issue the
2104 	 * ATC invalidation via the SMMU.
2105 	 */
2106 	wmb();
2107 	arm_smmu_atc_inv_master(master);
2108 	atomic_dec(&smmu_domain->nr_ats_masters);
2109 }
2110 
2111 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2112 {
2113 	int ret;
2114 	int features;
2115 	int num_pasids;
2116 	struct pci_dev *pdev;
2117 
2118 	if (!dev_is_pci(master->dev))
2119 		return -ENODEV;
2120 
2121 	pdev = to_pci_dev(master->dev);
2122 
2123 	features = pci_pasid_features(pdev);
2124 	if (features < 0)
2125 		return features;
2126 
2127 	num_pasids = pci_max_pasids(pdev);
2128 	if (num_pasids <= 0)
2129 		return num_pasids;
2130 
2131 	ret = pci_enable_pasid(pdev, features);
2132 	if (ret) {
2133 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2134 		return ret;
2135 	}
2136 
2137 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2138 				  master->smmu->ssid_bits);
2139 	return 0;
2140 }
2141 
2142 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2143 {
2144 	struct pci_dev *pdev;
2145 
2146 	if (!dev_is_pci(master->dev))
2147 		return;
2148 
2149 	pdev = to_pci_dev(master->dev);
2150 
2151 	if (!pdev->pasid_enabled)
2152 		return;
2153 
2154 	master->ssid_bits = 0;
2155 	pci_disable_pasid(pdev);
2156 }
2157 
2158 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2159 {
2160 	unsigned long flags;
2161 	struct arm_smmu_domain *smmu_domain = master->domain;
2162 
2163 	if (!smmu_domain)
2164 		return;
2165 
2166 	arm_smmu_disable_ats(master);
2167 
2168 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2169 	list_del(&master->domain_head);
2170 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2171 
2172 	master->domain = NULL;
2173 	master->ats_enabled = false;
2174 	arm_smmu_install_ste_for_dev(master);
2175 }
2176 
2177 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2178 {
2179 	int ret = 0;
2180 	unsigned long flags;
2181 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2182 	struct arm_smmu_device *smmu;
2183 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2184 	struct arm_smmu_master *master;
2185 
2186 	if (!fwspec)
2187 		return -ENOENT;
2188 
2189 	master = dev_iommu_priv_get(dev);
2190 	smmu = master->smmu;
2191 
2192 	/*
2193 	 * Checking that SVA is disabled ensures that this device isn't bound to
2194 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2195 	 * be removed concurrently since we're holding the group mutex.
2196 	 */
2197 	if (arm_smmu_master_sva_enabled(master)) {
2198 		dev_err(dev, "cannot attach - SVA enabled\n");
2199 		return -EBUSY;
2200 	}
2201 
2202 	arm_smmu_detach_dev(master);
2203 
2204 	mutex_lock(&smmu_domain->init_mutex);
2205 
2206 	if (!smmu_domain->smmu) {
2207 		smmu_domain->smmu = smmu;
2208 		ret = arm_smmu_domain_finalise(domain, master);
2209 		if (ret) {
2210 			smmu_domain->smmu = NULL;
2211 			goto out_unlock;
2212 		}
2213 	} else if (smmu_domain->smmu != smmu) {
2214 		dev_err(dev,
2215 			"cannot attach to SMMU %s (upstream of %s)\n",
2216 			dev_name(smmu_domain->smmu->dev),
2217 			dev_name(smmu->dev));
2218 		ret = -ENXIO;
2219 		goto out_unlock;
2220 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2221 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2222 		dev_err(dev,
2223 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2224 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2225 		ret = -EINVAL;
2226 		goto out_unlock;
2227 	}
2228 
2229 	master->domain = smmu_domain;
2230 
2231 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2232 		master->ats_enabled = arm_smmu_ats_supported(master);
2233 
2234 	arm_smmu_install_ste_for_dev(master);
2235 
2236 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2237 	list_add(&master->domain_head, &smmu_domain->devices);
2238 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2239 
2240 	arm_smmu_enable_ats(master);
2241 
2242 out_unlock:
2243 	mutex_unlock(&smmu_domain->init_mutex);
2244 	return ret;
2245 }
2246 
2247 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2248 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2249 {
2250 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2251 
2252 	if (!ops)
2253 		return -ENODEV;
2254 
2255 	return ops->map(ops, iova, paddr, size, prot, gfp);
2256 }
2257 
2258 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2259 			     size_t size, struct iommu_iotlb_gather *gather)
2260 {
2261 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2262 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2263 
2264 	if (!ops)
2265 		return 0;
2266 
2267 	return ops->unmap(ops, iova, size, gather);
2268 }
2269 
2270 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2271 {
2272 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2273 
2274 	if (smmu_domain->smmu)
2275 		arm_smmu_tlb_inv_context(smmu_domain);
2276 }
2277 
2278 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2279 				struct iommu_iotlb_gather *gather)
2280 {
2281 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2282 
2283 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2284 			       gather->pgsize, true, smmu_domain);
2285 }
2286 
2287 static phys_addr_t
2288 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2289 {
2290 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2291 
2292 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2293 		return iova;
2294 
2295 	if (!ops)
2296 		return 0;
2297 
2298 	return ops->iova_to_phys(ops, iova);
2299 }
2300 
2301 static struct platform_driver arm_smmu_driver;
2302 
2303 static
2304 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2305 {
2306 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2307 							  fwnode);
2308 	put_device(dev);
2309 	return dev ? dev_get_drvdata(dev) : NULL;
2310 }
2311 
2312 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2313 {
2314 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2315 
2316 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2317 		limit *= 1UL << STRTAB_SPLIT;
2318 
2319 	return sid < limit;
2320 }
2321 
2322 static struct iommu_ops arm_smmu_ops;
2323 
2324 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2325 {
2326 	int i, ret;
2327 	struct arm_smmu_device *smmu;
2328 	struct arm_smmu_master *master;
2329 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2330 
2331 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2332 		return ERR_PTR(-ENODEV);
2333 
2334 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2335 		return ERR_PTR(-EBUSY);
2336 
2337 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2338 	if (!smmu)
2339 		return ERR_PTR(-ENODEV);
2340 
2341 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2342 	if (!master)
2343 		return ERR_PTR(-ENOMEM);
2344 
2345 	master->dev = dev;
2346 	master->smmu = smmu;
2347 	master->sids = fwspec->ids;
2348 	master->num_sids = fwspec->num_ids;
2349 	INIT_LIST_HEAD(&master->bonds);
2350 	dev_iommu_priv_set(dev, master);
2351 
2352 	/* Check the SIDs are in range of the SMMU and our stream table */
2353 	for (i = 0; i < master->num_sids; i++) {
2354 		u32 sid = master->sids[i];
2355 
2356 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2357 			ret = -ERANGE;
2358 			goto err_free_master;
2359 		}
2360 
2361 		/* Ensure l2 strtab is initialised */
2362 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2363 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2364 			if (ret)
2365 				goto err_free_master;
2366 		}
2367 	}
2368 
2369 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2370 
2371 	/*
2372 	 * Note that PASID must be enabled before, and disabled after ATS:
2373 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2374 	 *
2375 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2376 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2377 	 *   are changed.
2378 	 */
2379 	arm_smmu_enable_pasid(master);
2380 
2381 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2382 		master->ssid_bits = min_t(u8, master->ssid_bits,
2383 					  CTXDESC_LINEAR_CDMAX);
2384 
2385 	return &smmu->iommu;
2386 
2387 err_free_master:
2388 	kfree(master);
2389 	dev_iommu_priv_set(dev, NULL);
2390 	return ERR_PTR(ret);
2391 }
2392 
2393 static void arm_smmu_release_device(struct device *dev)
2394 {
2395 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2396 	struct arm_smmu_master *master;
2397 
2398 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2399 		return;
2400 
2401 	master = dev_iommu_priv_get(dev);
2402 	WARN_ON(arm_smmu_master_sva_enabled(master));
2403 	arm_smmu_detach_dev(master);
2404 	arm_smmu_disable_pasid(master);
2405 	kfree(master);
2406 	iommu_fwspec_free(dev);
2407 }
2408 
2409 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2410 {
2411 	struct iommu_group *group;
2412 
2413 	/*
2414 	 * We don't support devices sharing stream IDs other than PCI RID
2415 	 * aliases, since the necessary ID-to-device lookup becomes rather
2416 	 * impractical given a potential sparse 32-bit stream ID space.
2417 	 */
2418 	if (dev_is_pci(dev))
2419 		group = pci_device_group(dev);
2420 	else
2421 		group = generic_device_group(dev);
2422 
2423 	return group;
2424 }
2425 
2426 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2427 				    enum iommu_attr attr, void *data)
2428 {
2429 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2430 
2431 	switch (domain->type) {
2432 	case IOMMU_DOMAIN_UNMANAGED:
2433 		switch (attr) {
2434 		case DOMAIN_ATTR_NESTING:
2435 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2436 			return 0;
2437 		default:
2438 			return -ENODEV;
2439 		}
2440 		break;
2441 	case IOMMU_DOMAIN_DMA:
2442 		switch (attr) {
2443 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2444 			*(int *)data = smmu_domain->non_strict;
2445 			return 0;
2446 		default:
2447 			return -ENODEV;
2448 		}
2449 		break;
2450 	default:
2451 		return -EINVAL;
2452 	}
2453 }
2454 
2455 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2456 				    enum iommu_attr attr, void *data)
2457 {
2458 	int ret = 0;
2459 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2460 
2461 	mutex_lock(&smmu_domain->init_mutex);
2462 
2463 	switch (domain->type) {
2464 	case IOMMU_DOMAIN_UNMANAGED:
2465 		switch (attr) {
2466 		case DOMAIN_ATTR_NESTING:
2467 			if (smmu_domain->smmu) {
2468 				ret = -EPERM;
2469 				goto out_unlock;
2470 			}
2471 
2472 			if (*(int *)data)
2473 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2474 			else
2475 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2476 			break;
2477 		default:
2478 			ret = -ENODEV;
2479 		}
2480 		break;
2481 	case IOMMU_DOMAIN_DMA:
2482 		switch(attr) {
2483 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2484 			smmu_domain->non_strict = *(int *)data;
2485 			break;
2486 		default:
2487 			ret = -ENODEV;
2488 		}
2489 		break;
2490 	default:
2491 		ret = -EINVAL;
2492 	}
2493 
2494 out_unlock:
2495 	mutex_unlock(&smmu_domain->init_mutex);
2496 	return ret;
2497 }
2498 
2499 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2500 {
2501 	return iommu_fwspec_add_ids(dev, args->args, 1);
2502 }
2503 
2504 static void arm_smmu_get_resv_regions(struct device *dev,
2505 				      struct list_head *head)
2506 {
2507 	struct iommu_resv_region *region;
2508 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2509 
2510 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2511 					 prot, IOMMU_RESV_SW_MSI);
2512 	if (!region)
2513 		return;
2514 
2515 	list_add_tail(&region->list, head);
2516 
2517 	iommu_dma_get_resv_regions(dev, head);
2518 }
2519 
2520 static bool arm_smmu_dev_has_feature(struct device *dev,
2521 				     enum iommu_dev_features feat)
2522 {
2523 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2524 
2525 	if (!master)
2526 		return false;
2527 
2528 	switch (feat) {
2529 	case IOMMU_DEV_FEAT_SVA:
2530 		return arm_smmu_master_sva_supported(master);
2531 	default:
2532 		return false;
2533 	}
2534 }
2535 
2536 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2537 					 enum iommu_dev_features feat)
2538 {
2539 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2540 
2541 	if (!master)
2542 		return false;
2543 
2544 	switch (feat) {
2545 	case IOMMU_DEV_FEAT_SVA:
2546 		return arm_smmu_master_sva_enabled(master);
2547 	default:
2548 		return false;
2549 	}
2550 }
2551 
2552 static int arm_smmu_dev_enable_feature(struct device *dev,
2553 				       enum iommu_dev_features feat)
2554 {
2555 	if (!arm_smmu_dev_has_feature(dev, feat))
2556 		return -ENODEV;
2557 
2558 	if (arm_smmu_dev_feature_enabled(dev, feat))
2559 		return -EBUSY;
2560 
2561 	switch (feat) {
2562 	case IOMMU_DEV_FEAT_SVA:
2563 		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2564 	default:
2565 		return -EINVAL;
2566 	}
2567 }
2568 
2569 static int arm_smmu_dev_disable_feature(struct device *dev,
2570 					enum iommu_dev_features feat)
2571 {
2572 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2573 		return -EINVAL;
2574 
2575 	switch (feat) {
2576 	case IOMMU_DEV_FEAT_SVA:
2577 		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2578 	default:
2579 		return -EINVAL;
2580 	}
2581 }
2582 
2583 static struct iommu_ops arm_smmu_ops = {
2584 	.capable		= arm_smmu_capable,
2585 	.domain_alloc		= arm_smmu_domain_alloc,
2586 	.domain_free		= arm_smmu_domain_free,
2587 	.attach_dev		= arm_smmu_attach_dev,
2588 	.map			= arm_smmu_map,
2589 	.unmap			= arm_smmu_unmap,
2590 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2591 	.iotlb_sync		= arm_smmu_iotlb_sync,
2592 	.iova_to_phys		= arm_smmu_iova_to_phys,
2593 	.probe_device		= arm_smmu_probe_device,
2594 	.release_device		= arm_smmu_release_device,
2595 	.device_group		= arm_smmu_device_group,
2596 	.domain_get_attr	= arm_smmu_domain_get_attr,
2597 	.domain_set_attr	= arm_smmu_domain_set_attr,
2598 	.of_xlate		= arm_smmu_of_xlate,
2599 	.get_resv_regions	= arm_smmu_get_resv_regions,
2600 	.put_resv_regions	= generic_iommu_put_resv_regions,
2601 	.dev_has_feat		= arm_smmu_dev_has_feature,
2602 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2603 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2604 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2605 	.sva_bind		= arm_smmu_sva_bind,
2606 	.sva_unbind		= arm_smmu_sva_unbind,
2607 	.sva_get_pasid		= arm_smmu_sva_get_pasid,
2608 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2609 };
2610 
2611 /* Probing and initialisation functions */
2612 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2613 				   struct arm_smmu_queue *q,
2614 				   unsigned long prod_off,
2615 				   unsigned long cons_off,
2616 				   size_t dwords, const char *name)
2617 {
2618 	size_t qsz;
2619 
2620 	do {
2621 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2622 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2623 					      GFP_KERNEL);
2624 		if (q->base || qsz < PAGE_SIZE)
2625 			break;
2626 
2627 		q->llq.max_n_shift--;
2628 	} while (1);
2629 
2630 	if (!q->base) {
2631 		dev_err(smmu->dev,
2632 			"failed to allocate queue (0x%zx bytes) for %s\n",
2633 			qsz, name);
2634 		return -ENOMEM;
2635 	}
2636 
2637 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2638 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2639 			 1 << q->llq.max_n_shift, name);
2640 	}
2641 
2642 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
2643 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
2644 	q->ent_dwords	= dwords;
2645 
2646 	q->q_base  = Q_BASE_RWA;
2647 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2648 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2649 
2650 	q->llq.prod = q->llq.cons = 0;
2651 	return 0;
2652 }
2653 
2654 static void arm_smmu_cmdq_free_bitmap(void *data)
2655 {
2656 	unsigned long *bitmap = data;
2657 	bitmap_free(bitmap);
2658 }
2659 
2660 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2661 {
2662 	int ret = 0;
2663 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2664 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2665 	atomic_long_t *bitmap;
2666 
2667 	atomic_set(&cmdq->owner_prod, 0);
2668 	atomic_set(&cmdq->lock, 0);
2669 
2670 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2671 	if (!bitmap) {
2672 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2673 		ret = -ENOMEM;
2674 	} else {
2675 		cmdq->valid_map = bitmap;
2676 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2677 	}
2678 
2679 	return ret;
2680 }
2681 
2682 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2683 {
2684 	int ret;
2685 
2686 	/* cmdq */
2687 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2688 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2689 				      "cmdq");
2690 	if (ret)
2691 		return ret;
2692 
2693 	ret = arm_smmu_cmdq_init(smmu);
2694 	if (ret)
2695 		return ret;
2696 
2697 	/* evtq */
2698 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2699 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2700 				      "evtq");
2701 	if (ret)
2702 		return ret;
2703 
2704 	/* priq */
2705 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2706 		return 0;
2707 
2708 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2709 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2710 				       "priq");
2711 }
2712 
2713 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2714 {
2715 	unsigned int i;
2716 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2717 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2718 	void *strtab = smmu->strtab_cfg.strtab;
2719 
2720 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2721 	if (!cfg->l1_desc) {
2722 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2723 		return -ENOMEM;
2724 	}
2725 
2726 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2727 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2728 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2729 	}
2730 
2731 	return 0;
2732 }
2733 
2734 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2735 {
2736 	void *strtab;
2737 	u64 reg;
2738 	u32 size, l1size;
2739 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2740 
2741 	/* Calculate the L1 size, capped to the SIDSIZE. */
2742 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2743 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2744 	cfg->num_l1_ents = 1 << size;
2745 
2746 	size += STRTAB_SPLIT;
2747 	if (size < smmu->sid_bits)
2748 		dev_warn(smmu->dev,
2749 			 "2-level strtab only covers %u/%u bits of SID\n",
2750 			 size, smmu->sid_bits);
2751 
2752 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2753 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2754 				     GFP_KERNEL);
2755 	if (!strtab) {
2756 		dev_err(smmu->dev,
2757 			"failed to allocate l1 stream table (%u bytes)\n",
2758 			l1size);
2759 		return -ENOMEM;
2760 	}
2761 	cfg->strtab = strtab;
2762 
2763 	/* Configure strtab_base_cfg for 2 levels */
2764 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2765 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2766 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2767 	cfg->strtab_base_cfg = reg;
2768 
2769 	return arm_smmu_init_l1_strtab(smmu);
2770 }
2771 
2772 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2773 {
2774 	void *strtab;
2775 	u64 reg;
2776 	u32 size;
2777 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2778 
2779 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2780 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2781 				     GFP_KERNEL);
2782 	if (!strtab) {
2783 		dev_err(smmu->dev,
2784 			"failed to allocate linear stream table (%u bytes)\n",
2785 			size);
2786 		return -ENOMEM;
2787 	}
2788 	cfg->strtab = strtab;
2789 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2790 
2791 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2792 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2793 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2794 	cfg->strtab_base_cfg = reg;
2795 
2796 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2797 	return 0;
2798 }
2799 
2800 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2801 {
2802 	u64 reg;
2803 	int ret;
2804 
2805 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2806 		ret = arm_smmu_init_strtab_2lvl(smmu);
2807 	else
2808 		ret = arm_smmu_init_strtab_linear(smmu);
2809 
2810 	if (ret)
2811 		return ret;
2812 
2813 	/* Set the strtab base address */
2814 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2815 	reg |= STRTAB_BASE_RA;
2816 	smmu->strtab_cfg.strtab_base = reg;
2817 
2818 	/* Allocate the first VMID for stage-2 bypass STEs */
2819 	set_bit(0, smmu->vmid_map);
2820 	return 0;
2821 }
2822 
2823 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2824 {
2825 	int ret;
2826 
2827 	ret = arm_smmu_init_queues(smmu);
2828 	if (ret)
2829 		return ret;
2830 
2831 	return arm_smmu_init_strtab(smmu);
2832 }
2833 
2834 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2835 				   unsigned int reg_off, unsigned int ack_off)
2836 {
2837 	u32 reg;
2838 
2839 	writel_relaxed(val, smmu->base + reg_off);
2840 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2841 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2842 }
2843 
2844 /* GBPA is "special" */
2845 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2846 {
2847 	int ret;
2848 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2849 
2850 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2851 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2852 	if (ret)
2853 		return ret;
2854 
2855 	reg &= ~clr;
2856 	reg |= set;
2857 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2858 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2859 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2860 
2861 	if (ret)
2862 		dev_err(smmu->dev, "GBPA not responding to update\n");
2863 	return ret;
2864 }
2865 
2866 static void arm_smmu_free_msis(void *data)
2867 {
2868 	struct device *dev = data;
2869 	platform_msi_domain_free_irqs(dev);
2870 }
2871 
2872 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2873 {
2874 	phys_addr_t doorbell;
2875 	struct device *dev = msi_desc_to_dev(desc);
2876 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2877 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2878 
2879 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2880 	doorbell &= MSI_CFG0_ADDR_MASK;
2881 
2882 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2883 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2884 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2885 }
2886 
2887 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2888 {
2889 	struct msi_desc *desc;
2890 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2891 	struct device *dev = smmu->dev;
2892 
2893 	/* Clear the MSI address regs */
2894 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2895 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2896 
2897 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2898 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2899 	else
2900 		nvec--;
2901 
2902 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2903 		return;
2904 
2905 	if (!dev->msi_domain) {
2906 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2907 		return;
2908 	}
2909 
2910 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2911 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2912 	if (ret) {
2913 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2914 		return;
2915 	}
2916 
2917 	for_each_msi_entry(desc, dev) {
2918 		switch (desc->platform.msi_index) {
2919 		case EVTQ_MSI_INDEX:
2920 			smmu->evtq.q.irq = desc->irq;
2921 			break;
2922 		case GERROR_MSI_INDEX:
2923 			smmu->gerr_irq = desc->irq;
2924 			break;
2925 		case PRIQ_MSI_INDEX:
2926 			smmu->priq.q.irq = desc->irq;
2927 			break;
2928 		default:	/* Unknown */
2929 			continue;
2930 		}
2931 	}
2932 
2933 	/* Add callback to free MSIs on teardown */
2934 	devm_add_action(dev, arm_smmu_free_msis, dev);
2935 }
2936 
2937 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2938 {
2939 	int irq, ret;
2940 
2941 	arm_smmu_setup_msis(smmu);
2942 
2943 	/* Request interrupt lines */
2944 	irq = smmu->evtq.q.irq;
2945 	if (irq) {
2946 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2947 						arm_smmu_evtq_thread,
2948 						IRQF_ONESHOT,
2949 						"arm-smmu-v3-evtq", smmu);
2950 		if (ret < 0)
2951 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2952 	} else {
2953 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2954 	}
2955 
2956 	irq = smmu->gerr_irq;
2957 	if (irq) {
2958 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2959 				       0, "arm-smmu-v3-gerror", smmu);
2960 		if (ret < 0)
2961 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2962 	} else {
2963 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2964 	}
2965 
2966 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2967 		irq = smmu->priq.q.irq;
2968 		if (irq) {
2969 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2970 							arm_smmu_priq_thread,
2971 							IRQF_ONESHOT,
2972 							"arm-smmu-v3-priq",
2973 							smmu);
2974 			if (ret < 0)
2975 				dev_warn(smmu->dev,
2976 					 "failed to enable priq irq\n");
2977 		} else {
2978 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2979 		}
2980 	}
2981 }
2982 
2983 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2984 {
2985 	int ret, irq;
2986 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2987 
2988 	/* Disable IRQs first */
2989 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2990 				      ARM_SMMU_IRQ_CTRLACK);
2991 	if (ret) {
2992 		dev_err(smmu->dev, "failed to disable irqs\n");
2993 		return ret;
2994 	}
2995 
2996 	irq = smmu->combined_irq;
2997 	if (irq) {
2998 		/*
2999 		 * Cavium ThunderX2 implementation doesn't support unique irq
3000 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3001 		 */
3002 		ret = devm_request_threaded_irq(smmu->dev, irq,
3003 					arm_smmu_combined_irq_handler,
3004 					arm_smmu_combined_irq_thread,
3005 					IRQF_ONESHOT,
3006 					"arm-smmu-v3-combined-irq", smmu);
3007 		if (ret < 0)
3008 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3009 	} else
3010 		arm_smmu_setup_unique_irqs(smmu);
3011 
3012 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3013 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3014 
3015 	/* Enable interrupt generation on the SMMU */
3016 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3017 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3018 	if (ret)
3019 		dev_warn(smmu->dev, "failed to enable irqs\n");
3020 
3021 	return 0;
3022 }
3023 
3024 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3025 {
3026 	int ret;
3027 
3028 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3029 	if (ret)
3030 		dev_err(smmu->dev, "failed to clear cr0\n");
3031 
3032 	return ret;
3033 }
3034 
3035 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3036 {
3037 	int ret;
3038 	u32 reg, enables;
3039 	struct arm_smmu_cmdq_ent cmd;
3040 
3041 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3042 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3043 	if (reg & CR0_SMMUEN) {
3044 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3045 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3046 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3047 	}
3048 
3049 	ret = arm_smmu_device_disable(smmu);
3050 	if (ret)
3051 		return ret;
3052 
3053 	/* CR1 (table and queue memory attributes) */
3054 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3055 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3056 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3057 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3058 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3059 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3060 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3061 
3062 	/* CR2 (random crap) */
3063 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3064 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3065 
3066 	/* Stream table */
3067 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3068 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3069 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3070 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3071 
3072 	/* Command queue */
3073 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3074 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3075 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3076 
3077 	enables = CR0_CMDQEN;
3078 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3079 				      ARM_SMMU_CR0ACK);
3080 	if (ret) {
3081 		dev_err(smmu->dev, "failed to enable command queue\n");
3082 		return ret;
3083 	}
3084 
3085 	/* Invalidate any cached configuration */
3086 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3087 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3088 	arm_smmu_cmdq_issue_sync(smmu);
3089 
3090 	/* Invalidate any stale TLB entries */
3091 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3092 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3093 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3094 	}
3095 
3096 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3097 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3098 	arm_smmu_cmdq_issue_sync(smmu);
3099 
3100 	/* Event queue */
3101 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3102 	writel_relaxed(smmu->evtq.q.llq.prod,
3103 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3104 	writel_relaxed(smmu->evtq.q.llq.cons,
3105 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3106 
3107 	enables |= CR0_EVTQEN;
3108 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3109 				      ARM_SMMU_CR0ACK);
3110 	if (ret) {
3111 		dev_err(smmu->dev, "failed to enable event queue\n");
3112 		return ret;
3113 	}
3114 
3115 	/* PRI queue */
3116 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3117 		writeq_relaxed(smmu->priq.q.q_base,
3118 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3119 		writel_relaxed(smmu->priq.q.llq.prod,
3120 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3121 		writel_relaxed(smmu->priq.q.llq.cons,
3122 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3123 
3124 		enables |= CR0_PRIQEN;
3125 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3126 					      ARM_SMMU_CR0ACK);
3127 		if (ret) {
3128 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3129 			return ret;
3130 		}
3131 	}
3132 
3133 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3134 		enables |= CR0_ATSCHK;
3135 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3136 					      ARM_SMMU_CR0ACK);
3137 		if (ret) {
3138 			dev_err(smmu->dev, "failed to enable ATS check\n");
3139 			return ret;
3140 		}
3141 	}
3142 
3143 	ret = arm_smmu_setup_irqs(smmu);
3144 	if (ret) {
3145 		dev_err(smmu->dev, "failed to setup irqs\n");
3146 		return ret;
3147 	}
3148 
3149 	if (is_kdump_kernel())
3150 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3151 
3152 	/* Enable the SMMU interface, or ensure bypass */
3153 	if (!bypass || disable_bypass) {
3154 		enables |= CR0_SMMUEN;
3155 	} else {
3156 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3157 		if (ret)
3158 			return ret;
3159 	}
3160 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3161 				      ARM_SMMU_CR0ACK);
3162 	if (ret) {
3163 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3164 		return ret;
3165 	}
3166 
3167 	return 0;
3168 }
3169 
3170 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3171 {
3172 	u32 reg;
3173 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3174 
3175 	/* IDR0 */
3176 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3177 
3178 	/* 2-level structures */
3179 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3180 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3181 
3182 	if (reg & IDR0_CD2L)
3183 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3184 
3185 	/*
3186 	 * Translation table endianness.
3187 	 * We currently require the same endianness as the CPU, but this
3188 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3189 	 */
3190 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3191 	case IDR0_TTENDIAN_MIXED:
3192 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3193 		break;
3194 #ifdef __BIG_ENDIAN
3195 	case IDR0_TTENDIAN_BE:
3196 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3197 		break;
3198 #else
3199 	case IDR0_TTENDIAN_LE:
3200 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3201 		break;
3202 #endif
3203 	default:
3204 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3205 		return -ENXIO;
3206 	}
3207 
3208 	/* Boolean feature flags */
3209 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3210 		smmu->features |= ARM_SMMU_FEAT_PRI;
3211 
3212 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3213 		smmu->features |= ARM_SMMU_FEAT_ATS;
3214 
3215 	if (reg & IDR0_SEV)
3216 		smmu->features |= ARM_SMMU_FEAT_SEV;
3217 
3218 	if (reg & IDR0_MSI) {
3219 		smmu->features |= ARM_SMMU_FEAT_MSI;
3220 		if (coherent && !disable_msipolling)
3221 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3222 	}
3223 
3224 	if (reg & IDR0_HYP)
3225 		smmu->features |= ARM_SMMU_FEAT_HYP;
3226 
3227 	/*
3228 	 * The coherency feature as set by FW is used in preference to the ID
3229 	 * register, but warn on mismatch.
3230 	 */
3231 	if (!!(reg & IDR0_COHACC) != coherent)
3232 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3233 			 coherent ? "true" : "false");
3234 
3235 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3236 	case IDR0_STALL_MODEL_FORCE:
3237 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3238 		fallthrough;
3239 	case IDR0_STALL_MODEL_STALL:
3240 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3241 	}
3242 
3243 	if (reg & IDR0_S1P)
3244 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3245 
3246 	if (reg & IDR0_S2P)
3247 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3248 
3249 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3250 		dev_err(smmu->dev, "no translation support!\n");
3251 		return -ENXIO;
3252 	}
3253 
3254 	/* We only support the AArch64 table format at present */
3255 	switch (FIELD_GET(IDR0_TTF, reg)) {
3256 	case IDR0_TTF_AARCH32_64:
3257 		smmu->ias = 40;
3258 		fallthrough;
3259 	case IDR0_TTF_AARCH64:
3260 		break;
3261 	default:
3262 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3263 		return -ENXIO;
3264 	}
3265 
3266 	/* ASID/VMID sizes */
3267 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3268 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3269 
3270 	/* IDR1 */
3271 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3272 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3273 		dev_err(smmu->dev, "embedded implementation not supported\n");
3274 		return -ENXIO;
3275 	}
3276 
3277 	/* Queue sizes, capped to ensure natural alignment */
3278 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3279 					     FIELD_GET(IDR1_CMDQS, reg));
3280 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3281 		/*
3282 		 * We don't support splitting up batches, so one batch of
3283 		 * commands plus an extra sync needs to fit inside the command
3284 		 * queue. There's also no way we can handle the weird alignment
3285 		 * restrictions on the base pointer for a unit-length queue.
3286 		 */
3287 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3288 			CMDQ_BATCH_ENTRIES);
3289 		return -ENXIO;
3290 	}
3291 
3292 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3293 					     FIELD_GET(IDR1_EVTQS, reg));
3294 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3295 					     FIELD_GET(IDR1_PRIQS, reg));
3296 
3297 	/* SID/SSID sizes */
3298 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3299 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3300 
3301 	/*
3302 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3303 	 * table, use a linear table instead.
3304 	 */
3305 	if (smmu->sid_bits <= STRTAB_SPLIT)
3306 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3307 
3308 	/* IDR3 */
3309 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3310 	if (FIELD_GET(IDR3_RIL, reg))
3311 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3312 
3313 	/* IDR5 */
3314 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3315 
3316 	/* Maximum number of outstanding stalls */
3317 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3318 
3319 	/* Page sizes */
3320 	if (reg & IDR5_GRAN64K)
3321 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3322 	if (reg & IDR5_GRAN16K)
3323 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3324 	if (reg & IDR5_GRAN4K)
3325 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3326 
3327 	/* Input address size */
3328 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3329 		smmu->features |= ARM_SMMU_FEAT_VAX;
3330 
3331 	/* Output address size */
3332 	switch (FIELD_GET(IDR5_OAS, reg)) {
3333 	case IDR5_OAS_32_BIT:
3334 		smmu->oas = 32;
3335 		break;
3336 	case IDR5_OAS_36_BIT:
3337 		smmu->oas = 36;
3338 		break;
3339 	case IDR5_OAS_40_BIT:
3340 		smmu->oas = 40;
3341 		break;
3342 	case IDR5_OAS_42_BIT:
3343 		smmu->oas = 42;
3344 		break;
3345 	case IDR5_OAS_44_BIT:
3346 		smmu->oas = 44;
3347 		break;
3348 	case IDR5_OAS_52_BIT:
3349 		smmu->oas = 52;
3350 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3351 		break;
3352 	default:
3353 		dev_info(smmu->dev,
3354 			"unknown output address size. Truncating to 48-bit\n");
3355 		fallthrough;
3356 	case IDR5_OAS_48_BIT:
3357 		smmu->oas = 48;
3358 	}
3359 
3360 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3361 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3362 	else
3363 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3364 
3365 	/* Set the DMA mask for our table walker */
3366 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3367 		dev_warn(smmu->dev,
3368 			 "failed to set DMA mask for table walker\n");
3369 
3370 	smmu->ias = max(smmu->ias, smmu->oas);
3371 
3372 	if (arm_smmu_sva_supported(smmu))
3373 		smmu->features |= ARM_SMMU_FEAT_SVA;
3374 
3375 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3376 		 smmu->ias, smmu->oas, smmu->features);
3377 	return 0;
3378 }
3379 
3380 #ifdef CONFIG_ACPI
3381 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3382 {
3383 	switch (model) {
3384 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3385 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3386 		break;
3387 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3388 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3389 		break;
3390 	}
3391 
3392 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3393 }
3394 
3395 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3396 				      struct arm_smmu_device *smmu)
3397 {
3398 	struct acpi_iort_smmu_v3 *iort_smmu;
3399 	struct device *dev = smmu->dev;
3400 	struct acpi_iort_node *node;
3401 
3402 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3403 
3404 	/* Retrieve SMMUv3 specific data */
3405 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3406 
3407 	acpi_smmu_get_options(iort_smmu->model, smmu);
3408 
3409 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3410 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3411 
3412 	return 0;
3413 }
3414 #else
3415 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3416 					     struct arm_smmu_device *smmu)
3417 {
3418 	return -ENODEV;
3419 }
3420 #endif
3421 
3422 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3423 				    struct arm_smmu_device *smmu)
3424 {
3425 	struct device *dev = &pdev->dev;
3426 	u32 cells;
3427 	int ret = -EINVAL;
3428 
3429 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3430 		dev_err(dev, "missing #iommu-cells property\n");
3431 	else if (cells != 1)
3432 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3433 	else
3434 		ret = 0;
3435 
3436 	parse_driver_options(smmu);
3437 
3438 	if (of_dma_is_coherent(dev->of_node))
3439 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3440 
3441 	return ret;
3442 }
3443 
3444 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3445 {
3446 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3447 		return SZ_64K;
3448 	else
3449 		return SZ_128K;
3450 }
3451 
3452 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3453 {
3454 	int err;
3455 
3456 #ifdef CONFIG_PCI
3457 	if (pci_bus_type.iommu_ops != ops) {
3458 		err = bus_set_iommu(&pci_bus_type, ops);
3459 		if (err)
3460 			return err;
3461 	}
3462 #endif
3463 #ifdef CONFIG_ARM_AMBA
3464 	if (amba_bustype.iommu_ops != ops) {
3465 		err = bus_set_iommu(&amba_bustype, ops);
3466 		if (err)
3467 			goto err_reset_pci_ops;
3468 	}
3469 #endif
3470 	if (platform_bus_type.iommu_ops != ops) {
3471 		err = bus_set_iommu(&platform_bus_type, ops);
3472 		if (err)
3473 			goto err_reset_amba_ops;
3474 	}
3475 
3476 	return 0;
3477 
3478 err_reset_amba_ops:
3479 #ifdef CONFIG_ARM_AMBA
3480 	bus_set_iommu(&amba_bustype, NULL);
3481 #endif
3482 err_reset_pci_ops: __maybe_unused;
3483 #ifdef CONFIG_PCI
3484 	bus_set_iommu(&pci_bus_type, NULL);
3485 #endif
3486 	return err;
3487 }
3488 
3489 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3490 				      resource_size_t size)
3491 {
3492 	struct resource res = {
3493 		.flags = IORESOURCE_MEM,
3494 		.start = start,
3495 		.end = start + size - 1,
3496 	};
3497 
3498 	return devm_ioremap_resource(dev, &res);
3499 }
3500 
3501 static int arm_smmu_device_probe(struct platform_device *pdev)
3502 {
3503 	int irq, ret;
3504 	struct resource *res;
3505 	resource_size_t ioaddr;
3506 	struct arm_smmu_device *smmu;
3507 	struct device *dev = &pdev->dev;
3508 	bool bypass;
3509 
3510 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3511 	if (!smmu) {
3512 		dev_err(dev, "failed to allocate arm_smmu_device\n");
3513 		return -ENOMEM;
3514 	}
3515 	smmu->dev = dev;
3516 
3517 	if (dev->of_node) {
3518 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3519 	} else {
3520 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3521 		if (ret == -ENODEV)
3522 			return ret;
3523 	}
3524 
3525 	/* Set bypass mode according to firmware probing result */
3526 	bypass = !!ret;
3527 
3528 	/* Base address */
3529 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3530 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3531 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3532 		return -EINVAL;
3533 	}
3534 	ioaddr = res->start;
3535 
3536 	/*
3537 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3538 	 * the PMCG registers which are reserved by the PMU driver.
3539 	 */
3540 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3541 	if (IS_ERR(smmu->base))
3542 		return PTR_ERR(smmu->base);
3543 
3544 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3545 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3546 					       ARM_SMMU_REG_SZ);
3547 		if (IS_ERR(smmu->page1))
3548 			return PTR_ERR(smmu->page1);
3549 	} else {
3550 		smmu->page1 = smmu->base;
3551 	}
3552 
3553 	/* Interrupt lines */
3554 
3555 	irq = platform_get_irq_byname_optional(pdev, "combined");
3556 	if (irq > 0)
3557 		smmu->combined_irq = irq;
3558 	else {
3559 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3560 		if (irq > 0)
3561 			smmu->evtq.q.irq = irq;
3562 
3563 		irq = platform_get_irq_byname_optional(pdev, "priq");
3564 		if (irq > 0)
3565 			smmu->priq.q.irq = irq;
3566 
3567 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3568 		if (irq > 0)
3569 			smmu->gerr_irq = irq;
3570 	}
3571 	/* Probe the h/w */
3572 	ret = arm_smmu_device_hw_probe(smmu);
3573 	if (ret)
3574 		return ret;
3575 
3576 	/* Initialise in-memory data structures */
3577 	ret = arm_smmu_init_structures(smmu);
3578 	if (ret)
3579 		return ret;
3580 
3581 	/* Record our private device structure */
3582 	platform_set_drvdata(pdev, smmu);
3583 
3584 	/* Reset the device */
3585 	ret = arm_smmu_device_reset(smmu, bypass);
3586 	if (ret)
3587 		return ret;
3588 
3589 	/* And we're up. Go go go! */
3590 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3591 				     "smmu3.%pa", &ioaddr);
3592 	if (ret)
3593 		return ret;
3594 
3595 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3596 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3597 
3598 	ret = iommu_device_register(&smmu->iommu);
3599 	if (ret) {
3600 		dev_err(dev, "Failed to register iommu\n");
3601 		return ret;
3602 	}
3603 
3604 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
3605 }
3606 
3607 static int arm_smmu_device_remove(struct platform_device *pdev)
3608 {
3609 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3610 
3611 	arm_smmu_set_bus_ops(NULL);
3612 	iommu_device_unregister(&smmu->iommu);
3613 	iommu_device_sysfs_remove(&smmu->iommu);
3614 	arm_smmu_device_disable(smmu);
3615 
3616 	return 0;
3617 }
3618 
3619 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3620 {
3621 	arm_smmu_device_remove(pdev);
3622 }
3623 
3624 static const struct of_device_id arm_smmu_of_match[] = {
3625 	{ .compatible = "arm,smmu-v3", },
3626 	{ },
3627 };
3628 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3629 
3630 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3631 {
3632 	arm_smmu_sva_notifier_synchronize();
3633 	platform_driver_unregister(drv);
3634 }
3635 
3636 static struct platform_driver arm_smmu_driver = {
3637 	.driver	= {
3638 		.name			= "arm-smmu-v3",
3639 		.of_match_table		= arm_smmu_of_match,
3640 		.suppress_bind_attrs	= true,
3641 	},
3642 	.probe	= arm_smmu_device_probe,
3643 	.remove	= arm_smmu_device_remove,
3644 	.shutdown = arm_smmu_device_shutdown,
3645 };
3646 module_driver(arm_smmu_driver, platform_driver_register,
3647 	      arm_smmu_driver_unregister);
3648 
3649 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3650 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3651 MODULE_ALIAS("platform:arm-smmu-v3");
3652 MODULE_LICENSE("GPL v2");
3653