xref: /openbmc/linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 29e1c1ad3ff7f345d80c7b81b08175f5a8c84122)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
31 
32 #include <linux/amba/bus.h>
33 
34 #include "arm-smmu-v3.h"
35 
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84 
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 	{ 0, NULL},
89 };
90 
91 static void parse_driver_options(struct arm_smmu_device *smmu)
92 {
93 	int i = 0;
94 
95 	do {
96 		if (of_property_read_bool(smmu->dev->of_node,
97 						arm_smmu_options[i].prop)) {
98 			smmu->options |= arm_smmu_options[i].opt;
99 			dev_notice(smmu->dev, "option %s\n",
100 				arm_smmu_options[i].prop);
101 		}
102 	} while (arm_smmu_options[++i].opt);
103 }
104 
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 {
108 	u32 space, prod, cons;
109 
110 	prod = Q_IDX(q, q->prod);
111 	cons = Q_IDX(q, q->cons);
112 
113 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114 		space = (1 << q->max_n_shift) - (prod - cons);
115 	else
116 		space = cons - prod;
117 
118 	return space >= n;
119 }
120 
121 static bool queue_full(struct arm_smmu_ll_queue *q)
122 {
123 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 }
126 
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 {
129 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 }
132 
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 {
135 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 }
140 
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 {
143 	/*
144 	 * Ensure that all CPU accesses (reads and writes) to the queue
145 	 * are complete before we update the cons pointer.
146 	 */
147 	__iomb();
148 	writel_relaxed(q->llq.cons, q->cons_reg);
149 }
150 
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 {
153 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 }
156 
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
158 {
159 	u32 prod;
160 	int ret = 0;
161 
162 	/*
163 	 * We can't use the _relaxed() variant here, as we must prevent
164 	 * speculative reads of the queue before we have determined that
165 	 * prod has indeed moved.
166 	 */
167 	prod = readl(q->prod_reg);
168 
169 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
170 		ret = -EOVERFLOW;
171 
172 	q->llq.prod = prod;
173 	return ret;
174 }
175 
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
177 {
178 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
180 }
181 
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183 			    struct arm_smmu_queue_poll *qp)
184 {
185 	qp->delay = 1;
186 	qp->spin_cnt = 0;
187 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
189 }
190 
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
192 {
193 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
194 		return -ETIMEDOUT;
195 
196 	if (qp->wfe) {
197 		wfe();
198 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
199 		cpu_relax();
200 	} else {
201 		udelay(qp->delay);
202 		qp->delay *= 2;
203 		qp->spin_cnt = 0;
204 	}
205 
206 	return 0;
207 }
208 
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
210 {
211 	int i;
212 
213 	for (i = 0; i < n_dwords; ++i)
214 		*dst++ = cpu_to_le64(*src++);
215 }
216 
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
218 {
219 	int i;
220 
221 	for (i = 0; i < n_dwords; ++i)
222 		*dst++ = le64_to_cpu(*src++);
223 }
224 
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
226 {
227 	if (queue_empty(&q->llq))
228 		return -EAGAIN;
229 
230 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231 	queue_inc_cons(&q->llq);
232 	queue_sync_cons_out(q);
233 	return 0;
234 }
235 
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
238 {
239 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
241 
242 	switch (ent->opcode) {
243 	case CMDQ_OP_TLBI_EL2_ALL:
244 	case CMDQ_OP_TLBI_NSNH_ALL:
245 		break;
246 	case CMDQ_OP_PREFETCH_CFG:
247 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
248 		break;
249 	case CMDQ_OP_CFGI_CD:
250 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
251 		fallthrough;
252 	case CMDQ_OP_CFGI_STE:
253 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
254 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
255 		break;
256 	case CMDQ_OP_CFGI_CD_ALL:
257 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
258 		break;
259 	case CMDQ_OP_CFGI_ALL:
260 		/* Cover the entire SID range */
261 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
262 		break;
263 	case CMDQ_OP_TLBI_NH_VA:
264 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
265 		fallthrough;
266 	case CMDQ_OP_TLBI_EL2_VA:
267 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
268 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
269 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
270 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
271 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
272 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
273 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
274 		break;
275 	case CMDQ_OP_TLBI_S2_IPA:
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
282 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
283 		break;
284 	case CMDQ_OP_TLBI_NH_ASID:
285 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
286 		fallthrough;
287 	case CMDQ_OP_TLBI_S12_VMALL:
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		break;
290 	case CMDQ_OP_TLBI_EL2_ASID:
291 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
292 		break;
293 	case CMDQ_OP_ATC_INV:
294 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
295 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
296 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
297 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
298 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
299 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
300 		break;
301 	case CMDQ_OP_PRI_RESP:
302 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
303 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
304 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
305 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
306 		switch (ent->pri.resp) {
307 		case PRI_RESP_DENY:
308 		case PRI_RESP_FAIL:
309 		case PRI_RESP_SUCC:
310 			break;
311 		default:
312 			return -EINVAL;
313 		}
314 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
315 		break;
316 	case CMDQ_OP_CMD_SYNC:
317 		if (ent->sync.msiaddr) {
318 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
319 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
320 		} else {
321 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
322 		}
323 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
324 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
325 		break;
326 	default:
327 		return -ENOENT;
328 	}
329 
330 	return 0;
331 }
332 
333 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
334 					 u32 prod)
335 {
336 	struct arm_smmu_queue *q = &smmu->cmdq.q;
337 	struct arm_smmu_cmdq_ent ent = {
338 		.opcode = CMDQ_OP_CMD_SYNC,
339 	};
340 
341 	/*
342 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
343 	 * payload, so the write will zero the entire command on that platform.
344 	 */
345 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
346 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
347 				   q->ent_dwords * 8;
348 	}
349 
350 	arm_smmu_cmdq_build_cmd(cmd, &ent);
351 }
352 
353 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
354 {
355 	static const char *cerror_str[] = {
356 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
357 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
358 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
359 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
360 	};
361 
362 	int i;
363 	u64 cmd[CMDQ_ENT_DWORDS];
364 	struct arm_smmu_queue *q = &smmu->cmdq.q;
365 	u32 cons = readl_relaxed(q->cons_reg);
366 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
367 	struct arm_smmu_cmdq_ent cmd_sync = {
368 		.opcode = CMDQ_OP_CMD_SYNC,
369 	};
370 
371 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
372 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
373 
374 	switch (idx) {
375 	case CMDQ_ERR_CERROR_ABT_IDX:
376 		dev_err(smmu->dev, "retrying command fetch\n");
377 	case CMDQ_ERR_CERROR_NONE_IDX:
378 		return;
379 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
380 		/*
381 		 * ATC Invalidation Completion timeout. CONS is still pointing
382 		 * at the CMD_SYNC. Attempt to complete other pending commands
383 		 * by repeating the CMD_SYNC, though we might well end up back
384 		 * here since the ATC invalidation may still be pending.
385 		 */
386 		return;
387 	case CMDQ_ERR_CERROR_ILL_IDX:
388 	default:
389 		break;
390 	}
391 
392 	/*
393 	 * We may have concurrent producers, so we need to be careful
394 	 * not to touch any of the shadow cmdq state.
395 	 */
396 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
397 	dev_err(smmu->dev, "skipping command in error state:\n");
398 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
399 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
400 
401 	/* Convert the erroneous command into a CMD_SYNC */
402 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
403 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
404 		return;
405 	}
406 
407 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
408 }
409 
410 /*
411  * Command queue locking.
412  * This is a form of bastardised rwlock with the following major changes:
413  *
414  * - The only LOCK routines are exclusive_trylock() and shared_lock().
415  *   Neither have barrier semantics, and instead provide only a control
416  *   dependency.
417  *
418  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
419  *   fails if the caller appears to be the last lock holder (yes, this is
420  *   racy). All successful UNLOCK routines have RELEASE semantics.
421  */
422 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
423 {
424 	int val;
425 
426 	/*
427 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
428 	 * lock counter. When held in exclusive state, the lock counter is set
429 	 * to INT_MIN so these increments won't hurt as the value will remain
430 	 * negative.
431 	 */
432 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
433 		return;
434 
435 	do {
436 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
437 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
438 }
439 
440 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
441 {
442 	(void)atomic_dec_return_release(&cmdq->lock);
443 }
444 
445 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
446 {
447 	if (atomic_read(&cmdq->lock) == 1)
448 		return false;
449 
450 	arm_smmu_cmdq_shared_unlock(cmdq);
451 	return true;
452 }
453 
454 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
455 ({									\
456 	bool __ret;							\
457 	local_irq_save(flags);						\
458 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
459 	if (!__ret)							\
460 		local_irq_restore(flags);				\
461 	__ret;								\
462 })
463 
464 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
465 ({									\
466 	atomic_set_release(&cmdq->lock, 0);				\
467 	local_irq_restore(flags);					\
468 })
469 
470 
471 /*
472  * Command queue insertion.
473  * This is made fiddly by our attempts to achieve some sort of scalability
474  * since there is one queue shared amongst all of the CPUs in the system.  If
475  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
476  * then you'll *love* this monstrosity.
477  *
478  * The basic idea is to split the queue up into ranges of commands that are
479  * owned by a given CPU; the owner may not have written all of the commands
480  * itself, but is responsible for advancing the hardware prod pointer when
481  * the time comes. The algorithm is roughly:
482  *
483  * 	1. Allocate some space in the queue. At this point we also discover
484  *	   whether the head of the queue is currently owned by another CPU,
485  *	   or whether we are the owner.
486  *
487  *	2. Write our commands into our allocated slots in the queue.
488  *
489  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
490  *
491  *	4. If we are an owner:
492  *		a. Wait for the previous owner to finish.
493  *		b. Mark the queue head as unowned, which tells us the range
494  *		   that we are responsible for publishing.
495  *		c. Wait for all commands in our owned range to become valid.
496  *		d. Advance the hardware prod pointer.
497  *		e. Tell the next owner we've finished.
498  *
499  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
500  *	   owner), then we need to stick around until it has completed:
501  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
502  *		   to clear the first 4 bytes.
503  *		b. Otherwise, we spin waiting for the hardware cons pointer to
504  *		   advance past our command.
505  *
506  * The devil is in the details, particularly the use of locking for handling
507  * SYNC completion and freeing up space in the queue before we think that it is
508  * full.
509  */
510 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
511 					       u32 sprod, u32 eprod, bool set)
512 {
513 	u32 swidx, sbidx, ewidx, ebidx;
514 	struct arm_smmu_ll_queue llq = {
515 		.max_n_shift	= cmdq->q.llq.max_n_shift,
516 		.prod		= sprod,
517 	};
518 
519 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
520 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
521 
522 	while (llq.prod != eprod) {
523 		unsigned long mask;
524 		atomic_long_t *ptr;
525 		u32 limit = BITS_PER_LONG;
526 
527 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
528 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
529 
530 		ptr = &cmdq->valid_map[swidx];
531 
532 		if ((swidx == ewidx) && (sbidx < ebidx))
533 			limit = ebidx;
534 
535 		mask = GENMASK(limit - 1, sbidx);
536 
537 		/*
538 		 * The valid bit is the inverse of the wrap bit. This means
539 		 * that a zero-initialised queue is invalid and, after marking
540 		 * all entries as valid, they become invalid again when we
541 		 * wrap.
542 		 */
543 		if (set) {
544 			atomic_long_xor(mask, ptr);
545 		} else { /* Poll */
546 			unsigned long valid;
547 
548 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
549 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
550 		}
551 
552 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
553 	}
554 }
555 
556 /* Mark all entries in the range [sprod, eprod) as valid */
557 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
558 					u32 sprod, u32 eprod)
559 {
560 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
561 }
562 
563 /* Wait for all entries in the range [sprod, eprod) to become valid */
564 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
565 					 u32 sprod, u32 eprod)
566 {
567 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
568 }
569 
570 /* Wait for the command queue to become non-full */
571 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
572 					     struct arm_smmu_ll_queue *llq)
573 {
574 	unsigned long flags;
575 	struct arm_smmu_queue_poll qp;
576 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
577 	int ret = 0;
578 
579 	/*
580 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
581 	 * that fails, spin until somebody else updates it for us.
582 	 */
583 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
584 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
585 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
586 		llq->val = READ_ONCE(cmdq->q.llq.val);
587 		return 0;
588 	}
589 
590 	queue_poll_init(smmu, &qp);
591 	do {
592 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
593 		if (!queue_full(llq))
594 			break;
595 
596 		ret = queue_poll(&qp);
597 	} while (!ret);
598 
599 	return ret;
600 }
601 
602 /*
603  * Wait until the SMMU signals a CMD_SYNC completion MSI.
604  * Must be called with the cmdq lock held in some capacity.
605  */
606 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
607 					  struct arm_smmu_ll_queue *llq)
608 {
609 	int ret = 0;
610 	struct arm_smmu_queue_poll qp;
611 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
612 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
613 
614 	queue_poll_init(smmu, &qp);
615 
616 	/*
617 	 * The MSI won't generate an event, since it's being written back
618 	 * into the command queue.
619 	 */
620 	qp.wfe = false;
621 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
622 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
623 	return ret;
624 }
625 
626 /*
627  * Wait until the SMMU cons index passes llq->prod.
628  * Must be called with the cmdq lock held in some capacity.
629  */
630 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
631 					       struct arm_smmu_ll_queue *llq)
632 {
633 	struct arm_smmu_queue_poll qp;
634 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
635 	u32 prod = llq->prod;
636 	int ret = 0;
637 
638 	queue_poll_init(smmu, &qp);
639 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
640 	do {
641 		if (queue_consumed(llq, prod))
642 			break;
643 
644 		ret = queue_poll(&qp);
645 
646 		/*
647 		 * This needs to be a readl() so that our subsequent call
648 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
649 		 *
650 		 * Specifically, we need to ensure that we observe all
651 		 * shared_lock()s by other CMD_SYNCs that share our owner,
652 		 * so that a failing call to tryunlock() means that we're
653 		 * the last one out and therefore we can safely advance
654 		 * cmdq->q.llq.cons. Roughly speaking:
655 		 *
656 		 * CPU 0		CPU1			CPU2 (us)
657 		 *
658 		 * if (sync)
659 		 * 	shared_lock();
660 		 *
661 		 * dma_wmb();
662 		 * set_valid_map();
663 		 *
664 		 * 			if (owner) {
665 		 *				poll_valid_map();
666 		 *				<control dependency>
667 		 *				writel(prod_reg);
668 		 *
669 		 *						readl(cons_reg);
670 		 *						tryunlock();
671 		 *
672 		 * Requires us to see CPU 0's shared_lock() acquisition.
673 		 */
674 		llq->cons = readl(cmdq->q.cons_reg);
675 	} while (!ret);
676 
677 	return ret;
678 }
679 
680 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
681 					 struct arm_smmu_ll_queue *llq)
682 {
683 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
684 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
685 
686 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
687 }
688 
689 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
690 					u32 prod, int n)
691 {
692 	int i;
693 	struct arm_smmu_ll_queue llq = {
694 		.max_n_shift	= cmdq->q.llq.max_n_shift,
695 		.prod		= prod,
696 	};
697 
698 	for (i = 0; i < n; ++i) {
699 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
700 
701 		prod = queue_inc_prod_n(&llq, i);
702 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
703 	}
704 }
705 
706 /*
707  * This is the actual insertion function, and provides the following
708  * ordering guarantees to callers:
709  *
710  * - There is a dma_wmb() before publishing any commands to the queue.
711  *   This can be relied upon to order prior writes to data structures
712  *   in memory (such as a CD or an STE) before the command.
713  *
714  * - On completion of a CMD_SYNC, there is a control dependency.
715  *   This can be relied upon to order subsequent writes to memory (e.g.
716  *   freeing an IOVA) after completion of the CMD_SYNC.
717  *
718  * - Command insertion is totally ordered, so if two CPUs each race to
719  *   insert their own list of commands then all of the commands from one
720  *   CPU will appear before any of the commands from the other CPU.
721  */
722 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
723 				       u64 *cmds, int n, bool sync)
724 {
725 	u64 cmd_sync[CMDQ_ENT_DWORDS];
726 	u32 prod;
727 	unsigned long flags;
728 	bool owner;
729 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
730 	struct arm_smmu_ll_queue llq = {
731 		.max_n_shift = cmdq->q.llq.max_n_shift,
732 	}, head = llq;
733 	int ret = 0;
734 
735 	/* 1. Allocate some space in the queue */
736 	local_irq_save(flags);
737 	llq.val = READ_ONCE(cmdq->q.llq.val);
738 	do {
739 		u64 old;
740 
741 		while (!queue_has_space(&llq, n + sync)) {
742 			local_irq_restore(flags);
743 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
744 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
745 			local_irq_save(flags);
746 		}
747 
748 		head.cons = llq.cons;
749 		head.prod = queue_inc_prod_n(&llq, n + sync) |
750 					     CMDQ_PROD_OWNED_FLAG;
751 
752 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
753 		if (old == llq.val)
754 			break;
755 
756 		llq.val = old;
757 	} while (1);
758 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
759 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
760 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
761 
762 	/*
763 	 * 2. Write our commands into the queue
764 	 * Dependency ordering from the cmpxchg() loop above.
765 	 */
766 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
767 	if (sync) {
768 		prod = queue_inc_prod_n(&llq, n);
769 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
770 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
771 
772 		/*
773 		 * In order to determine completion of our CMD_SYNC, we must
774 		 * ensure that the queue can't wrap twice without us noticing.
775 		 * We achieve that by taking the cmdq lock as shared before
776 		 * marking our slot as valid.
777 		 */
778 		arm_smmu_cmdq_shared_lock(cmdq);
779 	}
780 
781 	/* 3. Mark our slots as valid, ensuring commands are visible first */
782 	dma_wmb();
783 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
784 
785 	/* 4. If we are the owner, take control of the SMMU hardware */
786 	if (owner) {
787 		/* a. Wait for previous owner to finish */
788 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
789 
790 		/* b. Stop gathering work by clearing the owned flag */
791 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
792 						   &cmdq->q.llq.atomic.prod);
793 		prod &= ~CMDQ_PROD_OWNED_FLAG;
794 
795 		/*
796 		 * c. Wait for any gathered work to be written to the queue.
797 		 * Note that we read our own entries so that we have the control
798 		 * dependency required by (d).
799 		 */
800 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
801 
802 		/*
803 		 * d. Advance the hardware prod pointer
804 		 * Control dependency ordering from the entries becoming valid.
805 		 */
806 		writel_relaxed(prod, cmdq->q.prod_reg);
807 
808 		/*
809 		 * e. Tell the next owner we're done
810 		 * Make sure we've updated the hardware first, so that we don't
811 		 * race to update prod and potentially move it backwards.
812 		 */
813 		atomic_set_release(&cmdq->owner_prod, prod);
814 	}
815 
816 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
817 	if (sync) {
818 		llq.prod = queue_inc_prod_n(&llq, n);
819 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
820 		if (ret) {
821 			dev_err_ratelimited(smmu->dev,
822 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
823 					    llq.prod,
824 					    readl_relaxed(cmdq->q.prod_reg),
825 					    readl_relaxed(cmdq->q.cons_reg));
826 		}
827 
828 		/*
829 		 * Try to unlock the cmdq lock. This will fail if we're the last
830 		 * reader, in which case we can safely update cmdq->q.llq.cons
831 		 */
832 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
833 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
834 			arm_smmu_cmdq_shared_unlock(cmdq);
835 		}
836 	}
837 
838 	local_irq_restore(flags);
839 	return ret;
840 }
841 
842 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
843 				   struct arm_smmu_cmdq_ent *ent)
844 {
845 	u64 cmd[CMDQ_ENT_DWORDS];
846 
847 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
848 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
849 			 ent->opcode);
850 		return -EINVAL;
851 	}
852 
853 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
854 }
855 
856 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
857 {
858 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
859 }
860 
861 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
862 				    struct arm_smmu_cmdq_batch *cmds,
863 				    struct arm_smmu_cmdq_ent *cmd)
864 {
865 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
866 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
867 		cmds->num = 0;
868 	}
869 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
870 	cmds->num++;
871 }
872 
873 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
874 				      struct arm_smmu_cmdq_batch *cmds)
875 {
876 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
877 }
878 
879 /* Context descriptor manipulation functions */
880 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
881 {
882 	struct arm_smmu_cmdq_ent cmd = {
883 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
884 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
885 		.tlbi.asid = asid,
886 	};
887 
888 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
889 	arm_smmu_cmdq_issue_sync(smmu);
890 }
891 
892 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
893 			     int ssid, bool leaf)
894 {
895 	size_t i;
896 	unsigned long flags;
897 	struct arm_smmu_master *master;
898 	struct arm_smmu_cmdq_batch cmds = {};
899 	struct arm_smmu_device *smmu = smmu_domain->smmu;
900 	struct arm_smmu_cmdq_ent cmd = {
901 		.opcode	= CMDQ_OP_CFGI_CD,
902 		.cfgi	= {
903 			.ssid	= ssid,
904 			.leaf	= leaf,
905 		},
906 	};
907 
908 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
909 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
910 		for (i = 0; i < master->num_streams; i++) {
911 			cmd.cfgi.sid = master->streams[i].id;
912 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
913 		}
914 	}
915 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
916 
917 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
918 }
919 
920 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
921 					struct arm_smmu_l1_ctx_desc *l1_desc)
922 {
923 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
924 
925 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
926 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
927 	if (!l1_desc->l2ptr) {
928 		dev_warn(smmu->dev,
929 			 "failed to allocate context descriptor table\n");
930 		return -ENOMEM;
931 	}
932 	return 0;
933 }
934 
935 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
936 				      struct arm_smmu_l1_ctx_desc *l1_desc)
937 {
938 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
939 		  CTXDESC_L1_DESC_V;
940 
941 	/* See comment in arm_smmu_write_ctx_desc() */
942 	WRITE_ONCE(*dst, cpu_to_le64(val));
943 }
944 
945 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
946 				   u32 ssid)
947 {
948 	__le64 *l1ptr;
949 	unsigned int idx;
950 	struct arm_smmu_l1_ctx_desc *l1_desc;
951 	struct arm_smmu_device *smmu = smmu_domain->smmu;
952 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
953 
954 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
955 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
956 
957 	idx = ssid >> CTXDESC_SPLIT;
958 	l1_desc = &cdcfg->l1_desc[idx];
959 	if (!l1_desc->l2ptr) {
960 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
961 			return NULL;
962 
963 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
964 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
965 		/* An invalid L1CD can be cached */
966 		arm_smmu_sync_cd(smmu_domain, ssid, false);
967 	}
968 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
969 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
970 }
971 
972 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
973 			    struct arm_smmu_ctx_desc *cd)
974 {
975 	/*
976 	 * This function handles the following cases:
977 	 *
978 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
979 	 * (2) Install a secondary CD, for SID+SSID traffic.
980 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
981 	 *     CD, then invalidate the old entry and mappings.
982 	 * (4) Quiesce the context without clearing the valid bit. Disable
983 	 *     translation, and ignore any translation fault.
984 	 * (5) Remove a secondary CD.
985 	 */
986 	u64 val;
987 	bool cd_live;
988 	__le64 *cdptr;
989 	struct arm_smmu_device *smmu = smmu_domain->smmu;
990 
991 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
992 		return -E2BIG;
993 
994 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
995 	if (!cdptr)
996 		return -ENOMEM;
997 
998 	val = le64_to_cpu(cdptr[0]);
999 	cd_live = !!(val & CTXDESC_CD_0_V);
1000 
1001 	if (!cd) { /* (5) */
1002 		val = 0;
1003 	} else if (cd == &quiet_cd) { /* (4) */
1004 		val |= CTXDESC_CD_0_TCR_EPD0;
1005 	} else if (cd_live) { /* (3) */
1006 		val &= ~CTXDESC_CD_0_ASID;
1007 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1008 		/*
1009 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1010 		 * this substream's traffic
1011 		 */
1012 	} else { /* (1) and (2) */
1013 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1014 		cdptr[2] = 0;
1015 		cdptr[3] = cpu_to_le64(cd->mair);
1016 
1017 		/*
1018 		 * STE is live, and the SMMU might read dwords of this CD in any
1019 		 * order. Ensure that it observes valid values before reading
1020 		 * V=1.
1021 		 */
1022 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1023 
1024 		val = cd->tcr |
1025 #ifdef __BIG_ENDIAN
1026 			CTXDESC_CD_0_ENDI |
1027 #endif
1028 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1029 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1030 			CTXDESC_CD_0_AA64 |
1031 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1032 			CTXDESC_CD_0_V;
1033 
1034 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1035 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1036 			val |= CTXDESC_CD_0_S;
1037 	}
1038 
1039 	/*
1040 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1041 	 * "Configuration structures and configuration invalidation completion"
1042 	 *
1043 	 *   The size of single-copy atomic reads made by the SMMU is
1044 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1045 	 *   field within an aligned 64-bit span of a structure can be altered
1046 	 *   without first making the structure invalid.
1047 	 */
1048 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1049 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1050 	return 0;
1051 }
1052 
1053 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1054 {
1055 	int ret;
1056 	size_t l1size;
1057 	size_t max_contexts;
1058 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1059 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1060 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1061 
1062 	max_contexts = 1 << cfg->s1cdmax;
1063 
1064 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1065 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1066 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1067 		cdcfg->num_l1_ents = max_contexts;
1068 
1069 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1070 	} else {
1071 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1072 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1073 						  CTXDESC_L2_ENTRIES);
1074 
1075 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1076 					      sizeof(*cdcfg->l1_desc),
1077 					      GFP_KERNEL);
1078 		if (!cdcfg->l1_desc)
1079 			return -ENOMEM;
1080 
1081 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1082 	}
1083 
1084 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1085 					   GFP_KERNEL);
1086 	if (!cdcfg->cdtab) {
1087 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1088 		ret = -ENOMEM;
1089 		goto err_free_l1;
1090 	}
1091 
1092 	return 0;
1093 
1094 err_free_l1:
1095 	if (cdcfg->l1_desc) {
1096 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1097 		cdcfg->l1_desc = NULL;
1098 	}
1099 	return ret;
1100 }
1101 
1102 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1103 {
1104 	int i;
1105 	size_t size, l1size;
1106 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1107 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1108 
1109 	if (cdcfg->l1_desc) {
1110 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1111 
1112 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1113 			if (!cdcfg->l1_desc[i].l2ptr)
1114 				continue;
1115 
1116 			dmam_free_coherent(smmu->dev, size,
1117 					   cdcfg->l1_desc[i].l2ptr,
1118 					   cdcfg->l1_desc[i].l2ptr_dma);
1119 		}
1120 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1121 		cdcfg->l1_desc = NULL;
1122 
1123 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1124 	} else {
1125 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1126 	}
1127 
1128 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1129 	cdcfg->cdtab_dma = 0;
1130 	cdcfg->cdtab = NULL;
1131 }
1132 
1133 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1134 {
1135 	bool free;
1136 	struct arm_smmu_ctx_desc *old_cd;
1137 
1138 	if (!cd->asid)
1139 		return false;
1140 
1141 	free = refcount_dec_and_test(&cd->refs);
1142 	if (free) {
1143 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1144 		WARN_ON(old_cd != cd);
1145 	}
1146 	return free;
1147 }
1148 
1149 /* Stream table manipulation functions */
1150 static void
1151 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1152 {
1153 	u64 val = 0;
1154 
1155 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1156 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1157 
1158 	/* See comment in arm_smmu_write_ctx_desc() */
1159 	WRITE_ONCE(*dst, cpu_to_le64(val));
1160 }
1161 
1162 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1163 {
1164 	struct arm_smmu_cmdq_ent cmd = {
1165 		.opcode	= CMDQ_OP_CFGI_STE,
1166 		.cfgi	= {
1167 			.sid	= sid,
1168 			.leaf	= true,
1169 		},
1170 	};
1171 
1172 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1173 	arm_smmu_cmdq_issue_sync(smmu);
1174 }
1175 
1176 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1177 				      __le64 *dst)
1178 {
1179 	/*
1180 	 * This is hideously complicated, but we only really care about
1181 	 * three cases at the moment:
1182 	 *
1183 	 * 1. Invalid (all zero) -> bypass/fault (init)
1184 	 * 2. Bypass/fault -> translation/bypass (attach)
1185 	 * 3. Translation/bypass -> bypass/fault (detach)
1186 	 *
1187 	 * Given that we can't update the STE atomically and the SMMU
1188 	 * doesn't read the thing in a defined order, that leaves us
1189 	 * with the following maintenance requirements:
1190 	 *
1191 	 * 1. Update Config, return (init time STEs aren't live)
1192 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1193 	 * 3. Update Config, sync
1194 	 */
1195 	u64 val = le64_to_cpu(dst[0]);
1196 	bool ste_live = false;
1197 	struct arm_smmu_device *smmu = NULL;
1198 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1199 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1200 	struct arm_smmu_domain *smmu_domain = NULL;
1201 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1202 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1203 		.prefetch	= {
1204 			.sid	= sid,
1205 		},
1206 	};
1207 
1208 	if (master) {
1209 		smmu_domain = master->domain;
1210 		smmu = master->smmu;
1211 	}
1212 
1213 	if (smmu_domain) {
1214 		switch (smmu_domain->stage) {
1215 		case ARM_SMMU_DOMAIN_S1:
1216 			s1_cfg = &smmu_domain->s1_cfg;
1217 			break;
1218 		case ARM_SMMU_DOMAIN_S2:
1219 		case ARM_SMMU_DOMAIN_NESTED:
1220 			s2_cfg = &smmu_domain->s2_cfg;
1221 			break;
1222 		default:
1223 			break;
1224 		}
1225 	}
1226 
1227 	if (val & STRTAB_STE_0_V) {
1228 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1229 		case STRTAB_STE_0_CFG_BYPASS:
1230 			break;
1231 		case STRTAB_STE_0_CFG_S1_TRANS:
1232 		case STRTAB_STE_0_CFG_S2_TRANS:
1233 			ste_live = true;
1234 			break;
1235 		case STRTAB_STE_0_CFG_ABORT:
1236 			BUG_ON(!disable_bypass);
1237 			break;
1238 		default:
1239 			BUG(); /* STE corruption */
1240 		}
1241 	}
1242 
1243 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1244 	val = STRTAB_STE_0_V;
1245 
1246 	/* Bypass/fault */
1247 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1248 		if (!smmu_domain && disable_bypass)
1249 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1250 		else
1251 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1252 
1253 		dst[0] = cpu_to_le64(val);
1254 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1255 						STRTAB_STE_1_SHCFG_INCOMING));
1256 		dst[2] = 0; /* Nuke the VMID */
1257 		/*
1258 		 * The SMMU can perform negative caching, so we must sync
1259 		 * the STE regardless of whether the old value was live.
1260 		 */
1261 		if (smmu)
1262 			arm_smmu_sync_ste_for_sid(smmu, sid);
1263 		return;
1264 	}
1265 
1266 	if (s1_cfg) {
1267 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1268 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1269 
1270 		BUG_ON(ste_live);
1271 		dst[1] = cpu_to_le64(
1272 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1273 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1274 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1275 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1276 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1277 
1278 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1279 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1280 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1281 
1282 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1283 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1284 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1285 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1286 	}
1287 
1288 	if (s2_cfg) {
1289 		BUG_ON(ste_live);
1290 		dst[2] = cpu_to_le64(
1291 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1292 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1293 #ifdef __BIG_ENDIAN
1294 			 STRTAB_STE_2_S2ENDI |
1295 #endif
1296 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1297 			 STRTAB_STE_2_S2R);
1298 
1299 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1300 
1301 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1302 	}
1303 
1304 	if (master->ats_enabled)
1305 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1306 						 STRTAB_STE_1_EATS_TRANS));
1307 
1308 	arm_smmu_sync_ste_for_sid(smmu, sid);
1309 	/* See comment in arm_smmu_write_ctx_desc() */
1310 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1311 	arm_smmu_sync_ste_for_sid(smmu, sid);
1312 
1313 	/* It's likely that we'll want to use the new STE soon */
1314 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1315 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1316 }
1317 
1318 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1319 {
1320 	unsigned int i;
1321 
1322 	for (i = 0; i < nent; ++i) {
1323 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1324 		strtab += STRTAB_STE_DWORDS;
1325 	}
1326 }
1327 
1328 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1329 {
1330 	size_t size;
1331 	void *strtab;
1332 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1333 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1334 
1335 	if (desc->l2ptr)
1336 		return 0;
1337 
1338 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1339 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1340 
1341 	desc->span = STRTAB_SPLIT + 1;
1342 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1343 					  GFP_KERNEL);
1344 	if (!desc->l2ptr) {
1345 		dev_err(smmu->dev,
1346 			"failed to allocate l2 stream table for SID %u\n",
1347 			sid);
1348 		return -ENOMEM;
1349 	}
1350 
1351 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1352 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1353 	return 0;
1354 }
1355 
1356 __maybe_unused
1357 static struct arm_smmu_master *
1358 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1359 {
1360 	struct rb_node *node;
1361 	struct arm_smmu_stream *stream;
1362 
1363 	lockdep_assert_held(&smmu->streams_mutex);
1364 
1365 	node = smmu->streams.rb_node;
1366 	while (node) {
1367 		stream = rb_entry(node, struct arm_smmu_stream, node);
1368 		if (stream->id < sid)
1369 			node = node->rb_right;
1370 		else if (stream->id > sid)
1371 			node = node->rb_left;
1372 		else
1373 			return stream->master;
1374 	}
1375 
1376 	return NULL;
1377 }
1378 
1379 /* IRQ and event handlers */
1380 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1381 {
1382 	int i;
1383 	struct arm_smmu_device *smmu = dev;
1384 	struct arm_smmu_queue *q = &smmu->evtq.q;
1385 	struct arm_smmu_ll_queue *llq = &q->llq;
1386 	u64 evt[EVTQ_ENT_DWORDS];
1387 
1388 	do {
1389 		while (!queue_remove_raw(q, evt)) {
1390 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1391 
1392 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1393 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1394 				dev_info(smmu->dev, "\t0x%016llx\n",
1395 					 (unsigned long long)evt[i]);
1396 
1397 		}
1398 
1399 		/*
1400 		 * Not much we can do on overflow, so scream and pretend we're
1401 		 * trying harder.
1402 		 */
1403 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1404 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1405 	} while (!queue_empty(llq));
1406 
1407 	/* Sync our overflow flag, as we believe we're up to speed */
1408 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1409 		    Q_IDX(llq, llq->cons);
1410 	return IRQ_HANDLED;
1411 }
1412 
1413 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1414 {
1415 	u32 sid, ssid;
1416 	u16 grpid;
1417 	bool ssv, last;
1418 
1419 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1420 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1421 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1422 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1423 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1424 
1425 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1426 	dev_info(smmu->dev,
1427 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1428 		 sid, ssid, grpid, last ? "L" : "",
1429 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1430 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1431 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1432 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1433 		 evt[1] & PRIQ_1_ADDR_MASK);
1434 
1435 	if (last) {
1436 		struct arm_smmu_cmdq_ent cmd = {
1437 			.opcode			= CMDQ_OP_PRI_RESP,
1438 			.substream_valid	= ssv,
1439 			.pri			= {
1440 				.sid	= sid,
1441 				.ssid	= ssid,
1442 				.grpid	= grpid,
1443 				.resp	= PRI_RESP_DENY,
1444 			},
1445 		};
1446 
1447 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1448 	}
1449 }
1450 
1451 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1452 {
1453 	struct arm_smmu_device *smmu = dev;
1454 	struct arm_smmu_queue *q = &smmu->priq.q;
1455 	struct arm_smmu_ll_queue *llq = &q->llq;
1456 	u64 evt[PRIQ_ENT_DWORDS];
1457 
1458 	do {
1459 		while (!queue_remove_raw(q, evt))
1460 			arm_smmu_handle_ppr(smmu, evt);
1461 
1462 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1463 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1464 	} while (!queue_empty(llq));
1465 
1466 	/* Sync our overflow flag, as we believe we're up to speed */
1467 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1468 		      Q_IDX(llq, llq->cons);
1469 	queue_sync_cons_out(q);
1470 	return IRQ_HANDLED;
1471 }
1472 
1473 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1474 
1475 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1476 {
1477 	u32 gerror, gerrorn, active;
1478 	struct arm_smmu_device *smmu = dev;
1479 
1480 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1481 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1482 
1483 	active = gerror ^ gerrorn;
1484 	if (!(active & GERROR_ERR_MASK))
1485 		return IRQ_NONE; /* No errors pending */
1486 
1487 	dev_warn(smmu->dev,
1488 		 "unexpected global error reported (0x%08x), this could be serious\n",
1489 		 active);
1490 
1491 	if (active & GERROR_SFM_ERR) {
1492 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1493 		arm_smmu_device_disable(smmu);
1494 	}
1495 
1496 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1497 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1498 
1499 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1500 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1501 
1502 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1503 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1504 
1505 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1506 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1507 
1508 	if (active & GERROR_PRIQ_ABT_ERR)
1509 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1510 
1511 	if (active & GERROR_EVTQ_ABT_ERR)
1512 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1513 
1514 	if (active & GERROR_CMDQ_ERR)
1515 		arm_smmu_cmdq_skip_err(smmu);
1516 
1517 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1518 	return IRQ_HANDLED;
1519 }
1520 
1521 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1522 {
1523 	struct arm_smmu_device *smmu = dev;
1524 
1525 	arm_smmu_evtq_thread(irq, dev);
1526 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1527 		arm_smmu_priq_thread(irq, dev);
1528 
1529 	return IRQ_HANDLED;
1530 }
1531 
1532 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1533 {
1534 	arm_smmu_gerror_handler(irq, dev);
1535 	return IRQ_WAKE_THREAD;
1536 }
1537 
1538 static void
1539 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1540 			struct arm_smmu_cmdq_ent *cmd)
1541 {
1542 	size_t log2_span;
1543 	size_t span_mask;
1544 	/* ATC invalidates are always on 4096-bytes pages */
1545 	size_t inval_grain_shift = 12;
1546 	unsigned long page_start, page_end;
1547 
1548 	/*
1549 	 * ATS and PASID:
1550 	 *
1551 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1552 	 * prefix. In that case all ATC entries within the address range are
1553 	 * invalidated, including those that were requested with a PASID! There
1554 	 * is no way to invalidate only entries without PASID.
1555 	 *
1556 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1557 	 * traffic), translation requests without PASID create ATC entries
1558 	 * without PASID, which must be invalidated with substream_valid clear.
1559 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1560 	 * ATC entries within the address range.
1561 	 */
1562 	*cmd = (struct arm_smmu_cmdq_ent) {
1563 		.opcode			= CMDQ_OP_ATC_INV,
1564 		.substream_valid	= !!ssid,
1565 		.atc.ssid		= ssid,
1566 	};
1567 
1568 	if (!size) {
1569 		cmd->atc.size = ATC_INV_SIZE_ALL;
1570 		return;
1571 	}
1572 
1573 	page_start	= iova >> inval_grain_shift;
1574 	page_end	= (iova + size - 1) >> inval_grain_shift;
1575 
1576 	/*
1577 	 * In an ATS Invalidate Request, the address must be aligned on the
1578 	 * range size, which must be a power of two number of page sizes. We
1579 	 * thus have to choose between grossly over-invalidating the region, or
1580 	 * splitting the invalidation into multiple commands. For simplicity
1581 	 * we'll go with the first solution, but should refine it in the future
1582 	 * if multiple commands are shown to be more efficient.
1583 	 *
1584 	 * Find the smallest power of two that covers the range. The most
1585 	 * significant differing bit between the start and end addresses,
1586 	 * fls(start ^ end), indicates the required span. For example:
1587 	 *
1588 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1589 	 *		x = 0b1000 ^ 0b1011 = 0b11
1590 	 *		span = 1 << fls(x) = 4
1591 	 *
1592 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1593 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1594 	 *		span = 1 << fls(x) = 16
1595 	 */
1596 	log2_span	= fls_long(page_start ^ page_end);
1597 	span_mask	= (1ULL << log2_span) - 1;
1598 
1599 	page_start	&= ~span_mask;
1600 
1601 	cmd->atc.addr	= page_start << inval_grain_shift;
1602 	cmd->atc.size	= log2_span;
1603 }
1604 
1605 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1606 {
1607 	int i;
1608 	struct arm_smmu_cmdq_ent cmd;
1609 
1610 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1611 
1612 	for (i = 0; i < master->num_streams; i++) {
1613 		cmd.atc.sid = master->streams[i].id;
1614 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1615 	}
1616 
1617 	return arm_smmu_cmdq_issue_sync(master->smmu);
1618 }
1619 
1620 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1621 			    unsigned long iova, size_t size)
1622 {
1623 	int i;
1624 	unsigned long flags;
1625 	struct arm_smmu_cmdq_ent cmd;
1626 	struct arm_smmu_master *master;
1627 	struct arm_smmu_cmdq_batch cmds = {};
1628 
1629 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1630 		return 0;
1631 
1632 	/*
1633 	 * Ensure that we've completed prior invalidation of the main TLBs
1634 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1635 	 * arm_smmu_enable_ats():
1636 	 *
1637 	 *	// unmap()			// arm_smmu_enable_ats()
1638 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1639 	 *	smp_mb();			[...]
1640 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1641 	 *
1642 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1643 	 * ATS was enabled at the PCI device before completion of the TLBI.
1644 	 */
1645 	smp_mb();
1646 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1647 		return 0;
1648 
1649 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1650 
1651 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1652 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1653 		if (!master->ats_enabled)
1654 			continue;
1655 
1656 		for (i = 0; i < master->num_streams; i++) {
1657 			cmd.atc.sid = master->streams[i].id;
1658 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1659 		}
1660 	}
1661 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1662 
1663 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1664 }
1665 
1666 /* IO_PGTABLE API */
1667 static void arm_smmu_tlb_inv_context(void *cookie)
1668 {
1669 	struct arm_smmu_domain *smmu_domain = cookie;
1670 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1671 	struct arm_smmu_cmdq_ent cmd;
1672 
1673 	/*
1674 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1675 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1676 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1677 	 * insertion to guarantee those are observed before the TLBI. Do be
1678 	 * careful, 007.
1679 	 */
1680 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1681 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1682 	} else {
1683 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1684 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1685 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1686 		arm_smmu_cmdq_issue_sync(smmu);
1687 	}
1688 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1689 }
1690 
1691 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1692 				     unsigned long iova, size_t size,
1693 				     size_t granule,
1694 				     struct arm_smmu_domain *smmu_domain)
1695 {
1696 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1697 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1698 	size_t inv_range = granule;
1699 	struct arm_smmu_cmdq_batch cmds = {};
1700 
1701 	if (!size)
1702 		return;
1703 
1704 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1705 		/* Get the leaf page size */
1706 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1707 
1708 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1709 		cmd->tlbi.tg = (tg - 10) / 2;
1710 
1711 		/* Determine what level the granule is at */
1712 		cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1713 
1714 		num_pages = size >> tg;
1715 	}
1716 
1717 	while (iova < end) {
1718 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1719 			/*
1720 			 * On each iteration of the loop, the range is 5 bits
1721 			 * worth of the aligned size remaining.
1722 			 * The range in pages is:
1723 			 *
1724 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1725 			 */
1726 			unsigned long scale, num;
1727 
1728 			/* Determine the power of 2 multiple number of pages */
1729 			scale = __ffs(num_pages);
1730 			cmd->tlbi.scale = scale;
1731 
1732 			/* Determine how many chunks of 2^scale size we have */
1733 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1734 			cmd->tlbi.num = num - 1;
1735 
1736 			/* range is num * 2^scale * pgsize */
1737 			inv_range = num << (scale + tg);
1738 
1739 			/* Clear out the lower order bits for the next iteration */
1740 			num_pages -= num << scale;
1741 		}
1742 
1743 		cmd->tlbi.addr = iova;
1744 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1745 		iova += inv_range;
1746 	}
1747 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1748 }
1749 
1750 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1751 					  size_t granule, bool leaf,
1752 					  struct arm_smmu_domain *smmu_domain)
1753 {
1754 	struct arm_smmu_cmdq_ent cmd = {
1755 		.tlbi = {
1756 			.leaf	= leaf,
1757 		},
1758 	};
1759 
1760 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1761 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1762 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1763 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1764 	} else {
1765 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1766 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1767 	}
1768 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1769 
1770 	/*
1771 	 * Unfortunately, this can't be leaf-only since we may have
1772 	 * zapped an entire table.
1773 	 */
1774 	arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1775 }
1776 
1777 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1778 				 size_t granule, bool leaf,
1779 				 struct arm_smmu_domain *smmu_domain)
1780 {
1781 	struct arm_smmu_cmdq_ent cmd = {
1782 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1783 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1784 		.tlbi = {
1785 			.asid	= asid,
1786 			.leaf	= leaf,
1787 		},
1788 	};
1789 
1790 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1791 }
1792 
1793 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1794 					 unsigned long iova, size_t granule,
1795 					 void *cookie)
1796 {
1797 	struct arm_smmu_domain *smmu_domain = cookie;
1798 	struct iommu_domain *domain = &smmu_domain->domain;
1799 
1800 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1801 }
1802 
1803 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1804 				  size_t granule, void *cookie)
1805 {
1806 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1807 }
1808 
1809 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1810 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1811 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1812 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1813 };
1814 
1815 /* IOMMU API */
1816 static bool arm_smmu_capable(enum iommu_cap cap)
1817 {
1818 	switch (cap) {
1819 	case IOMMU_CAP_CACHE_COHERENCY:
1820 		return true;
1821 	case IOMMU_CAP_NOEXEC:
1822 		return true;
1823 	default:
1824 		return false;
1825 	}
1826 }
1827 
1828 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1829 {
1830 	struct arm_smmu_domain *smmu_domain;
1831 
1832 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1833 	    type != IOMMU_DOMAIN_DMA &&
1834 	    type != IOMMU_DOMAIN_IDENTITY)
1835 		return NULL;
1836 
1837 	/*
1838 	 * Allocate the domain and initialise some of its data structures.
1839 	 * We can't really do anything meaningful until we've added a
1840 	 * master.
1841 	 */
1842 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1843 	if (!smmu_domain)
1844 		return NULL;
1845 
1846 	if (type == IOMMU_DOMAIN_DMA &&
1847 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1848 		kfree(smmu_domain);
1849 		return NULL;
1850 	}
1851 
1852 	mutex_init(&smmu_domain->init_mutex);
1853 	INIT_LIST_HEAD(&smmu_domain->devices);
1854 	spin_lock_init(&smmu_domain->devices_lock);
1855 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
1856 
1857 	return &smmu_domain->domain;
1858 }
1859 
1860 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1861 {
1862 	int idx, size = 1 << span;
1863 
1864 	do {
1865 		idx = find_first_zero_bit(map, size);
1866 		if (idx == size)
1867 			return -ENOSPC;
1868 	} while (test_and_set_bit(idx, map));
1869 
1870 	return idx;
1871 }
1872 
1873 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1874 {
1875 	clear_bit(idx, map);
1876 }
1877 
1878 static void arm_smmu_domain_free(struct iommu_domain *domain)
1879 {
1880 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1881 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1882 
1883 	iommu_put_dma_cookie(domain);
1884 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1885 
1886 	/* Free the CD and ASID, if we allocated them */
1887 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1888 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1889 
1890 		/* Prevent SVA from touching the CD while we're freeing it */
1891 		mutex_lock(&arm_smmu_asid_lock);
1892 		if (cfg->cdcfg.cdtab)
1893 			arm_smmu_free_cd_tables(smmu_domain);
1894 		arm_smmu_free_asid(&cfg->cd);
1895 		mutex_unlock(&arm_smmu_asid_lock);
1896 	} else {
1897 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1898 		if (cfg->vmid)
1899 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1900 	}
1901 
1902 	kfree(smmu_domain);
1903 }
1904 
1905 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1906 				       struct arm_smmu_master *master,
1907 				       struct io_pgtable_cfg *pgtbl_cfg)
1908 {
1909 	int ret;
1910 	u32 asid;
1911 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1912 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1913 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1914 
1915 	refcount_set(&cfg->cd.refs, 1);
1916 
1917 	/* Prevent SVA from modifying the ASID until it is written to the CD */
1918 	mutex_lock(&arm_smmu_asid_lock);
1919 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1920 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1921 	if (ret)
1922 		goto out_unlock;
1923 
1924 	cfg->s1cdmax = master->ssid_bits;
1925 
1926 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
1927 	if (ret)
1928 		goto out_free_asid;
1929 
1930 	cfg->cd.asid	= (u16)asid;
1931 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1932 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1933 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1934 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1935 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1936 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1937 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1938 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1939 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
1940 
1941 	/*
1942 	 * Note that this will end up calling arm_smmu_sync_cd() before
1943 	 * the master has been added to the devices list for this domain.
1944 	 * This isn't an issue because the STE hasn't been installed yet.
1945 	 */
1946 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1947 	if (ret)
1948 		goto out_free_cd_tables;
1949 
1950 	mutex_unlock(&arm_smmu_asid_lock);
1951 	return 0;
1952 
1953 out_free_cd_tables:
1954 	arm_smmu_free_cd_tables(smmu_domain);
1955 out_free_asid:
1956 	arm_smmu_free_asid(&cfg->cd);
1957 out_unlock:
1958 	mutex_unlock(&arm_smmu_asid_lock);
1959 	return ret;
1960 }
1961 
1962 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1963 				       struct arm_smmu_master *master,
1964 				       struct io_pgtable_cfg *pgtbl_cfg)
1965 {
1966 	int vmid;
1967 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1968 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1969 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1970 
1971 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1972 	if (vmid < 0)
1973 		return vmid;
1974 
1975 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1976 	cfg->vmid	= (u16)vmid;
1977 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1978 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1979 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1980 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1981 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1982 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1983 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1984 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1985 	return 0;
1986 }
1987 
1988 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1989 				    struct arm_smmu_master *master)
1990 {
1991 	int ret;
1992 	unsigned long ias, oas;
1993 	enum io_pgtable_fmt fmt;
1994 	struct io_pgtable_cfg pgtbl_cfg;
1995 	struct io_pgtable_ops *pgtbl_ops;
1996 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1997 				 struct arm_smmu_master *,
1998 				 struct io_pgtable_cfg *);
1999 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2000 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2001 
2002 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2003 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2004 		return 0;
2005 	}
2006 
2007 	/* Restrict the stage to what we can actually support */
2008 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2009 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2010 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2011 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2012 
2013 	switch (smmu_domain->stage) {
2014 	case ARM_SMMU_DOMAIN_S1:
2015 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2016 		ias = min_t(unsigned long, ias, VA_BITS);
2017 		oas = smmu->ias;
2018 		fmt = ARM_64_LPAE_S1;
2019 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2020 		break;
2021 	case ARM_SMMU_DOMAIN_NESTED:
2022 	case ARM_SMMU_DOMAIN_S2:
2023 		ias = smmu->ias;
2024 		oas = smmu->oas;
2025 		fmt = ARM_64_LPAE_S2;
2026 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2027 		break;
2028 	default:
2029 		return -EINVAL;
2030 	}
2031 
2032 	pgtbl_cfg = (struct io_pgtable_cfg) {
2033 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2034 		.ias		= ias,
2035 		.oas		= oas,
2036 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2037 		.tlb		= &arm_smmu_flush_ops,
2038 		.iommu_dev	= smmu->dev,
2039 	};
2040 
2041 	if (!iommu_get_dma_strict(domain))
2042 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2043 
2044 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2045 	if (!pgtbl_ops)
2046 		return -ENOMEM;
2047 
2048 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2049 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2050 	domain->geometry.force_aperture = true;
2051 
2052 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2053 	if (ret < 0) {
2054 		free_io_pgtable_ops(pgtbl_ops);
2055 		return ret;
2056 	}
2057 
2058 	smmu_domain->pgtbl_ops = pgtbl_ops;
2059 	return 0;
2060 }
2061 
2062 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2063 {
2064 	__le64 *step;
2065 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2066 
2067 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2068 		struct arm_smmu_strtab_l1_desc *l1_desc;
2069 		int idx;
2070 
2071 		/* Two-level walk */
2072 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2073 		l1_desc = &cfg->l1_desc[idx];
2074 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2075 		step = &l1_desc->l2ptr[idx];
2076 	} else {
2077 		/* Simple linear lookup */
2078 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2079 	}
2080 
2081 	return step;
2082 }
2083 
2084 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2085 {
2086 	int i, j;
2087 	struct arm_smmu_device *smmu = master->smmu;
2088 
2089 	for (i = 0; i < master->num_streams; ++i) {
2090 		u32 sid = master->streams[i].id;
2091 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2092 
2093 		/* Bridged PCI devices may end up with duplicated IDs */
2094 		for (j = 0; j < i; j++)
2095 			if (master->streams[j].id == sid)
2096 				break;
2097 		if (j < i)
2098 			continue;
2099 
2100 		arm_smmu_write_strtab_ent(master, sid, step);
2101 	}
2102 }
2103 
2104 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2105 {
2106 	struct device *dev = master->dev;
2107 	struct arm_smmu_device *smmu = master->smmu;
2108 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2109 
2110 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2111 		return false;
2112 
2113 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2114 		return false;
2115 
2116 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2117 }
2118 
2119 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2120 {
2121 	size_t stu;
2122 	struct pci_dev *pdev;
2123 	struct arm_smmu_device *smmu = master->smmu;
2124 	struct arm_smmu_domain *smmu_domain = master->domain;
2125 
2126 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2127 	if (!master->ats_enabled)
2128 		return;
2129 
2130 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2131 	stu = __ffs(smmu->pgsize_bitmap);
2132 	pdev = to_pci_dev(master->dev);
2133 
2134 	atomic_inc(&smmu_domain->nr_ats_masters);
2135 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2136 	if (pci_enable_ats(pdev, stu))
2137 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2138 }
2139 
2140 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2141 {
2142 	struct arm_smmu_domain *smmu_domain = master->domain;
2143 
2144 	if (!master->ats_enabled)
2145 		return;
2146 
2147 	pci_disable_ats(to_pci_dev(master->dev));
2148 	/*
2149 	 * Ensure ATS is disabled at the endpoint before we issue the
2150 	 * ATC invalidation via the SMMU.
2151 	 */
2152 	wmb();
2153 	arm_smmu_atc_inv_master(master);
2154 	atomic_dec(&smmu_domain->nr_ats_masters);
2155 }
2156 
2157 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2158 {
2159 	int ret;
2160 	int features;
2161 	int num_pasids;
2162 	struct pci_dev *pdev;
2163 
2164 	if (!dev_is_pci(master->dev))
2165 		return -ENODEV;
2166 
2167 	pdev = to_pci_dev(master->dev);
2168 
2169 	features = pci_pasid_features(pdev);
2170 	if (features < 0)
2171 		return features;
2172 
2173 	num_pasids = pci_max_pasids(pdev);
2174 	if (num_pasids <= 0)
2175 		return num_pasids;
2176 
2177 	ret = pci_enable_pasid(pdev, features);
2178 	if (ret) {
2179 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2180 		return ret;
2181 	}
2182 
2183 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2184 				  master->smmu->ssid_bits);
2185 	return 0;
2186 }
2187 
2188 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2189 {
2190 	struct pci_dev *pdev;
2191 
2192 	if (!dev_is_pci(master->dev))
2193 		return;
2194 
2195 	pdev = to_pci_dev(master->dev);
2196 
2197 	if (!pdev->pasid_enabled)
2198 		return;
2199 
2200 	master->ssid_bits = 0;
2201 	pci_disable_pasid(pdev);
2202 }
2203 
2204 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2205 {
2206 	unsigned long flags;
2207 	struct arm_smmu_domain *smmu_domain = master->domain;
2208 
2209 	if (!smmu_domain)
2210 		return;
2211 
2212 	arm_smmu_disable_ats(master);
2213 
2214 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2215 	list_del(&master->domain_head);
2216 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2217 
2218 	master->domain = NULL;
2219 	master->ats_enabled = false;
2220 	arm_smmu_install_ste_for_dev(master);
2221 }
2222 
2223 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2224 {
2225 	int ret = 0;
2226 	unsigned long flags;
2227 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2228 	struct arm_smmu_device *smmu;
2229 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2230 	struct arm_smmu_master *master;
2231 
2232 	if (!fwspec)
2233 		return -ENOENT;
2234 
2235 	master = dev_iommu_priv_get(dev);
2236 	smmu = master->smmu;
2237 
2238 	/*
2239 	 * Checking that SVA is disabled ensures that this device isn't bound to
2240 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2241 	 * be removed concurrently since we're holding the group mutex.
2242 	 */
2243 	if (arm_smmu_master_sva_enabled(master)) {
2244 		dev_err(dev, "cannot attach - SVA enabled\n");
2245 		return -EBUSY;
2246 	}
2247 
2248 	arm_smmu_detach_dev(master);
2249 
2250 	mutex_lock(&smmu_domain->init_mutex);
2251 
2252 	if (!smmu_domain->smmu) {
2253 		smmu_domain->smmu = smmu;
2254 		ret = arm_smmu_domain_finalise(domain, master);
2255 		if (ret) {
2256 			smmu_domain->smmu = NULL;
2257 			goto out_unlock;
2258 		}
2259 	} else if (smmu_domain->smmu != smmu) {
2260 		dev_err(dev,
2261 			"cannot attach to SMMU %s (upstream of %s)\n",
2262 			dev_name(smmu_domain->smmu->dev),
2263 			dev_name(smmu->dev));
2264 		ret = -ENXIO;
2265 		goto out_unlock;
2266 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2267 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2268 		dev_err(dev,
2269 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2270 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2271 		ret = -EINVAL;
2272 		goto out_unlock;
2273 	}
2274 
2275 	master->domain = smmu_domain;
2276 
2277 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2278 		master->ats_enabled = arm_smmu_ats_supported(master);
2279 
2280 	arm_smmu_install_ste_for_dev(master);
2281 
2282 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2283 	list_add(&master->domain_head, &smmu_domain->devices);
2284 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2285 
2286 	arm_smmu_enable_ats(master);
2287 
2288 out_unlock:
2289 	mutex_unlock(&smmu_domain->init_mutex);
2290 	return ret;
2291 }
2292 
2293 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2294 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2295 {
2296 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2297 
2298 	if (!ops)
2299 		return -ENODEV;
2300 
2301 	return ops->map(ops, iova, paddr, size, prot, gfp);
2302 }
2303 
2304 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2305 			     size_t size, struct iommu_iotlb_gather *gather)
2306 {
2307 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2308 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2309 
2310 	if (!ops)
2311 		return 0;
2312 
2313 	return ops->unmap(ops, iova, size, gather);
2314 }
2315 
2316 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2317 {
2318 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2319 
2320 	if (smmu_domain->smmu)
2321 		arm_smmu_tlb_inv_context(smmu_domain);
2322 }
2323 
2324 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2325 				struct iommu_iotlb_gather *gather)
2326 {
2327 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2328 
2329 	if (!gather->pgsize)
2330 		return;
2331 
2332 	arm_smmu_tlb_inv_range_domain(gather->start,
2333 				      gather->end - gather->start + 1,
2334 				      gather->pgsize, true, smmu_domain);
2335 }
2336 
2337 static phys_addr_t
2338 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2339 {
2340 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2341 
2342 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2343 		return iova;
2344 
2345 	if (!ops)
2346 		return 0;
2347 
2348 	return ops->iova_to_phys(ops, iova);
2349 }
2350 
2351 static struct platform_driver arm_smmu_driver;
2352 
2353 static
2354 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2355 {
2356 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2357 							  fwnode);
2358 	put_device(dev);
2359 	return dev ? dev_get_drvdata(dev) : NULL;
2360 }
2361 
2362 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2363 {
2364 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2365 
2366 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2367 		limit *= 1UL << STRTAB_SPLIT;
2368 
2369 	return sid < limit;
2370 }
2371 
2372 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2373 				  struct arm_smmu_master *master)
2374 {
2375 	int i;
2376 	int ret = 0;
2377 	struct arm_smmu_stream *new_stream, *cur_stream;
2378 	struct rb_node **new_node, *parent_node = NULL;
2379 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2380 
2381 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2382 				  GFP_KERNEL);
2383 	if (!master->streams)
2384 		return -ENOMEM;
2385 	master->num_streams = fwspec->num_ids;
2386 
2387 	mutex_lock(&smmu->streams_mutex);
2388 	for (i = 0; i < fwspec->num_ids; i++) {
2389 		u32 sid = fwspec->ids[i];
2390 
2391 		new_stream = &master->streams[i];
2392 		new_stream->id = sid;
2393 		new_stream->master = master;
2394 
2395 		/*
2396 		 * Check the SIDs are in range of the SMMU and our stream table
2397 		 */
2398 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2399 			ret = -ERANGE;
2400 			break;
2401 		}
2402 
2403 		/* Ensure l2 strtab is initialised */
2404 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2405 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2406 			if (ret)
2407 				break;
2408 		}
2409 
2410 		/* Insert into SID tree */
2411 		new_node = &(smmu->streams.rb_node);
2412 		while (*new_node) {
2413 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2414 					      node);
2415 			parent_node = *new_node;
2416 			if (cur_stream->id > new_stream->id) {
2417 				new_node = &((*new_node)->rb_left);
2418 			} else if (cur_stream->id < new_stream->id) {
2419 				new_node = &((*new_node)->rb_right);
2420 			} else {
2421 				dev_warn(master->dev,
2422 					 "stream %u already in tree\n",
2423 					 cur_stream->id);
2424 				ret = -EINVAL;
2425 				break;
2426 			}
2427 		}
2428 		if (ret)
2429 			break;
2430 
2431 		rb_link_node(&new_stream->node, parent_node, new_node);
2432 		rb_insert_color(&new_stream->node, &smmu->streams);
2433 	}
2434 
2435 	if (ret) {
2436 		for (i--; i >= 0; i--)
2437 			rb_erase(&master->streams[i].node, &smmu->streams);
2438 		kfree(master->streams);
2439 	}
2440 	mutex_unlock(&smmu->streams_mutex);
2441 
2442 	return ret;
2443 }
2444 
2445 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2446 {
2447 	int i;
2448 	struct arm_smmu_device *smmu = master->smmu;
2449 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2450 
2451 	if (!smmu || !master->streams)
2452 		return;
2453 
2454 	mutex_lock(&smmu->streams_mutex);
2455 	for (i = 0; i < fwspec->num_ids; i++)
2456 		rb_erase(&master->streams[i].node, &smmu->streams);
2457 	mutex_unlock(&smmu->streams_mutex);
2458 
2459 	kfree(master->streams);
2460 }
2461 
2462 static struct iommu_ops arm_smmu_ops;
2463 
2464 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2465 {
2466 	int ret;
2467 	struct arm_smmu_device *smmu;
2468 	struct arm_smmu_master *master;
2469 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2470 
2471 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2472 		return ERR_PTR(-ENODEV);
2473 
2474 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2475 		return ERR_PTR(-EBUSY);
2476 
2477 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2478 	if (!smmu)
2479 		return ERR_PTR(-ENODEV);
2480 
2481 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2482 	if (!master)
2483 		return ERR_PTR(-ENOMEM);
2484 
2485 	master->dev = dev;
2486 	master->smmu = smmu;
2487 	INIT_LIST_HEAD(&master->bonds);
2488 	dev_iommu_priv_set(dev, master);
2489 
2490 	ret = arm_smmu_insert_master(smmu, master);
2491 	if (ret)
2492 		goto err_free_master;
2493 
2494 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2495 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2496 
2497 	/*
2498 	 * Note that PASID must be enabled before, and disabled after ATS:
2499 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2500 	 *
2501 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2502 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2503 	 *   are changed.
2504 	 */
2505 	arm_smmu_enable_pasid(master);
2506 
2507 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2508 		master->ssid_bits = min_t(u8, master->ssid_bits,
2509 					  CTXDESC_LINEAR_CDMAX);
2510 
2511 	return &smmu->iommu;
2512 
2513 err_free_master:
2514 	kfree(master);
2515 	dev_iommu_priv_set(dev, NULL);
2516 	return ERR_PTR(ret);
2517 }
2518 
2519 static void arm_smmu_release_device(struct device *dev)
2520 {
2521 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2522 	struct arm_smmu_master *master;
2523 
2524 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2525 		return;
2526 
2527 	master = dev_iommu_priv_get(dev);
2528 	WARN_ON(arm_smmu_master_sva_enabled(master));
2529 	arm_smmu_detach_dev(master);
2530 	arm_smmu_disable_pasid(master);
2531 	arm_smmu_remove_master(master);
2532 	kfree(master);
2533 	iommu_fwspec_free(dev);
2534 }
2535 
2536 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2537 {
2538 	struct iommu_group *group;
2539 
2540 	/*
2541 	 * We don't support devices sharing stream IDs other than PCI RID
2542 	 * aliases, since the necessary ID-to-device lookup becomes rather
2543 	 * impractical given a potential sparse 32-bit stream ID space.
2544 	 */
2545 	if (dev_is_pci(dev))
2546 		group = pci_device_group(dev);
2547 	else
2548 		group = generic_device_group(dev);
2549 
2550 	return group;
2551 }
2552 
2553 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2554 {
2555 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2556 	int ret = 0;
2557 
2558 	mutex_lock(&smmu_domain->init_mutex);
2559 	if (smmu_domain->smmu)
2560 		ret = -EPERM;
2561 	else
2562 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2563 	mutex_unlock(&smmu_domain->init_mutex);
2564 
2565 	return ret;
2566 }
2567 
2568 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2569 {
2570 	return iommu_fwspec_add_ids(dev, args->args, 1);
2571 }
2572 
2573 static void arm_smmu_get_resv_regions(struct device *dev,
2574 				      struct list_head *head)
2575 {
2576 	struct iommu_resv_region *region;
2577 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2578 
2579 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2580 					 prot, IOMMU_RESV_SW_MSI);
2581 	if (!region)
2582 		return;
2583 
2584 	list_add_tail(&region->list, head);
2585 
2586 	iommu_dma_get_resv_regions(dev, head);
2587 }
2588 
2589 static bool arm_smmu_dev_has_feature(struct device *dev,
2590 				     enum iommu_dev_features feat)
2591 {
2592 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2593 
2594 	if (!master)
2595 		return false;
2596 
2597 	switch (feat) {
2598 	case IOMMU_DEV_FEAT_SVA:
2599 		return arm_smmu_master_sva_supported(master);
2600 	default:
2601 		return false;
2602 	}
2603 }
2604 
2605 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2606 					 enum iommu_dev_features feat)
2607 {
2608 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2609 
2610 	if (!master)
2611 		return false;
2612 
2613 	switch (feat) {
2614 	case IOMMU_DEV_FEAT_SVA:
2615 		return arm_smmu_master_sva_enabled(master);
2616 	default:
2617 		return false;
2618 	}
2619 }
2620 
2621 static int arm_smmu_dev_enable_feature(struct device *dev,
2622 				       enum iommu_dev_features feat)
2623 {
2624 	if (!arm_smmu_dev_has_feature(dev, feat))
2625 		return -ENODEV;
2626 
2627 	if (arm_smmu_dev_feature_enabled(dev, feat))
2628 		return -EBUSY;
2629 
2630 	switch (feat) {
2631 	case IOMMU_DEV_FEAT_SVA:
2632 		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2633 	default:
2634 		return -EINVAL;
2635 	}
2636 }
2637 
2638 static int arm_smmu_dev_disable_feature(struct device *dev,
2639 					enum iommu_dev_features feat)
2640 {
2641 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2642 		return -EINVAL;
2643 
2644 	switch (feat) {
2645 	case IOMMU_DEV_FEAT_SVA:
2646 		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2647 	default:
2648 		return -EINVAL;
2649 	}
2650 }
2651 
2652 static struct iommu_ops arm_smmu_ops = {
2653 	.capable		= arm_smmu_capable,
2654 	.domain_alloc		= arm_smmu_domain_alloc,
2655 	.domain_free		= arm_smmu_domain_free,
2656 	.attach_dev		= arm_smmu_attach_dev,
2657 	.map			= arm_smmu_map,
2658 	.unmap			= arm_smmu_unmap,
2659 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2660 	.iotlb_sync		= arm_smmu_iotlb_sync,
2661 	.iova_to_phys		= arm_smmu_iova_to_phys,
2662 	.probe_device		= arm_smmu_probe_device,
2663 	.release_device		= arm_smmu_release_device,
2664 	.device_group		= arm_smmu_device_group,
2665 	.enable_nesting		= arm_smmu_enable_nesting,
2666 	.of_xlate		= arm_smmu_of_xlate,
2667 	.get_resv_regions	= arm_smmu_get_resv_regions,
2668 	.put_resv_regions	= generic_iommu_put_resv_regions,
2669 	.dev_has_feat		= arm_smmu_dev_has_feature,
2670 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2671 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2672 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2673 	.sva_bind		= arm_smmu_sva_bind,
2674 	.sva_unbind		= arm_smmu_sva_unbind,
2675 	.sva_get_pasid		= arm_smmu_sva_get_pasid,
2676 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2677 	.owner			= THIS_MODULE,
2678 };
2679 
2680 /* Probing and initialisation functions */
2681 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2682 				   struct arm_smmu_queue *q,
2683 				   void __iomem *page,
2684 				   unsigned long prod_off,
2685 				   unsigned long cons_off,
2686 				   size_t dwords, const char *name)
2687 {
2688 	size_t qsz;
2689 
2690 	do {
2691 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2692 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2693 					      GFP_KERNEL);
2694 		if (q->base || qsz < PAGE_SIZE)
2695 			break;
2696 
2697 		q->llq.max_n_shift--;
2698 	} while (1);
2699 
2700 	if (!q->base) {
2701 		dev_err(smmu->dev,
2702 			"failed to allocate queue (0x%zx bytes) for %s\n",
2703 			qsz, name);
2704 		return -ENOMEM;
2705 	}
2706 
2707 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2708 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2709 			 1 << q->llq.max_n_shift, name);
2710 	}
2711 
2712 	q->prod_reg	= page + prod_off;
2713 	q->cons_reg	= page + cons_off;
2714 	q->ent_dwords	= dwords;
2715 
2716 	q->q_base  = Q_BASE_RWA;
2717 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2718 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2719 
2720 	q->llq.prod = q->llq.cons = 0;
2721 	return 0;
2722 }
2723 
2724 static void arm_smmu_cmdq_free_bitmap(void *data)
2725 {
2726 	unsigned long *bitmap = data;
2727 	bitmap_free(bitmap);
2728 }
2729 
2730 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2731 {
2732 	int ret = 0;
2733 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2734 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2735 	atomic_long_t *bitmap;
2736 
2737 	atomic_set(&cmdq->owner_prod, 0);
2738 	atomic_set(&cmdq->lock, 0);
2739 
2740 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2741 	if (!bitmap) {
2742 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2743 		ret = -ENOMEM;
2744 	} else {
2745 		cmdq->valid_map = bitmap;
2746 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2747 	}
2748 
2749 	return ret;
2750 }
2751 
2752 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2753 {
2754 	int ret;
2755 
2756 	/* cmdq */
2757 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2758 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2759 				      CMDQ_ENT_DWORDS, "cmdq");
2760 	if (ret)
2761 		return ret;
2762 
2763 	ret = arm_smmu_cmdq_init(smmu);
2764 	if (ret)
2765 		return ret;
2766 
2767 	/* evtq */
2768 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2769 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2770 				      EVTQ_ENT_DWORDS, "evtq");
2771 	if (ret)
2772 		return ret;
2773 
2774 	/* priq */
2775 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2776 		return 0;
2777 
2778 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2779 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2780 				       PRIQ_ENT_DWORDS, "priq");
2781 }
2782 
2783 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2784 {
2785 	unsigned int i;
2786 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2787 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2788 	void *strtab = smmu->strtab_cfg.strtab;
2789 
2790 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2791 	if (!cfg->l1_desc) {
2792 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2793 		return -ENOMEM;
2794 	}
2795 
2796 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2797 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2798 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2799 	}
2800 
2801 	return 0;
2802 }
2803 
2804 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2805 {
2806 	void *strtab;
2807 	u64 reg;
2808 	u32 size, l1size;
2809 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2810 
2811 	/* Calculate the L1 size, capped to the SIDSIZE. */
2812 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2813 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2814 	cfg->num_l1_ents = 1 << size;
2815 
2816 	size += STRTAB_SPLIT;
2817 	if (size < smmu->sid_bits)
2818 		dev_warn(smmu->dev,
2819 			 "2-level strtab only covers %u/%u bits of SID\n",
2820 			 size, smmu->sid_bits);
2821 
2822 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2823 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2824 				     GFP_KERNEL);
2825 	if (!strtab) {
2826 		dev_err(smmu->dev,
2827 			"failed to allocate l1 stream table (%u bytes)\n",
2828 			l1size);
2829 		return -ENOMEM;
2830 	}
2831 	cfg->strtab = strtab;
2832 
2833 	/* Configure strtab_base_cfg for 2 levels */
2834 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2835 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2836 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2837 	cfg->strtab_base_cfg = reg;
2838 
2839 	return arm_smmu_init_l1_strtab(smmu);
2840 }
2841 
2842 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2843 {
2844 	void *strtab;
2845 	u64 reg;
2846 	u32 size;
2847 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2848 
2849 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2850 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2851 				     GFP_KERNEL);
2852 	if (!strtab) {
2853 		dev_err(smmu->dev,
2854 			"failed to allocate linear stream table (%u bytes)\n",
2855 			size);
2856 		return -ENOMEM;
2857 	}
2858 	cfg->strtab = strtab;
2859 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2860 
2861 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2862 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2863 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2864 	cfg->strtab_base_cfg = reg;
2865 
2866 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2867 	return 0;
2868 }
2869 
2870 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2871 {
2872 	u64 reg;
2873 	int ret;
2874 
2875 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2876 		ret = arm_smmu_init_strtab_2lvl(smmu);
2877 	else
2878 		ret = arm_smmu_init_strtab_linear(smmu);
2879 
2880 	if (ret)
2881 		return ret;
2882 
2883 	/* Set the strtab base address */
2884 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2885 	reg |= STRTAB_BASE_RA;
2886 	smmu->strtab_cfg.strtab_base = reg;
2887 
2888 	/* Allocate the first VMID for stage-2 bypass STEs */
2889 	set_bit(0, smmu->vmid_map);
2890 	return 0;
2891 }
2892 
2893 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2894 {
2895 	int ret;
2896 
2897 	mutex_init(&smmu->streams_mutex);
2898 	smmu->streams = RB_ROOT;
2899 
2900 	ret = arm_smmu_init_queues(smmu);
2901 	if (ret)
2902 		return ret;
2903 
2904 	return arm_smmu_init_strtab(smmu);
2905 }
2906 
2907 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2908 				   unsigned int reg_off, unsigned int ack_off)
2909 {
2910 	u32 reg;
2911 
2912 	writel_relaxed(val, smmu->base + reg_off);
2913 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2914 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2915 }
2916 
2917 /* GBPA is "special" */
2918 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2919 {
2920 	int ret;
2921 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2922 
2923 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2924 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2925 	if (ret)
2926 		return ret;
2927 
2928 	reg &= ~clr;
2929 	reg |= set;
2930 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2931 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2932 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2933 
2934 	if (ret)
2935 		dev_err(smmu->dev, "GBPA not responding to update\n");
2936 	return ret;
2937 }
2938 
2939 static void arm_smmu_free_msis(void *data)
2940 {
2941 	struct device *dev = data;
2942 	platform_msi_domain_free_irqs(dev);
2943 }
2944 
2945 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2946 {
2947 	phys_addr_t doorbell;
2948 	struct device *dev = msi_desc_to_dev(desc);
2949 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2950 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2951 
2952 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2953 	doorbell &= MSI_CFG0_ADDR_MASK;
2954 
2955 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2956 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2957 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2958 }
2959 
2960 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2961 {
2962 	struct msi_desc *desc;
2963 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2964 	struct device *dev = smmu->dev;
2965 
2966 	/* Clear the MSI address regs */
2967 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2968 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2969 
2970 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2971 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2972 	else
2973 		nvec--;
2974 
2975 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2976 		return;
2977 
2978 	if (!dev->msi_domain) {
2979 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2980 		return;
2981 	}
2982 
2983 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2984 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2985 	if (ret) {
2986 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2987 		return;
2988 	}
2989 
2990 	for_each_msi_entry(desc, dev) {
2991 		switch (desc->platform.msi_index) {
2992 		case EVTQ_MSI_INDEX:
2993 			smmu->evtq.q.irq = desc->irq;
2994 			break;
2995 		case GERROR_MSI_INDEX:
2996 			smmu->gerr_irq = desc->irq;
2997 			break;
2998 		case PRIQ_MSI_INDEX:
2999 			smmu->priq.q.irq = desc->irq;
3000 			break;
3001 		default:	/* Unknown */
3002 			continue;
3003 		}
3004 	}
3005 
3006 	/* Add callback to free MSIs on teardown */
3007 	devm_add_action(dev, arm_smmu_free_msis, dev);
3008 }
3009 
3010 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3011 {
3012 	int irq, ret;
3013 
3014 	arm_smmu_setup_msis(smmu);
3015 
3016 	/* Request interrupt lines */
3017 	irq = smmu->evtq.q.irq;
3018 	if (irq) {
3019 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3020 						arm_smmu_evtq_thread,
3021 						IRQF_ONESHOT,
3022 						"arm-smmu-v3-evtq", smmu);
3023 		if (ret < 0)
3024 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3025 	} else {
3026 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3027 	}
3028 
3029 	irq = smmu->gerr_irq;
3030 	if (irq) {
3031 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3032 				       0, "arm-smmu-v3-gerror", smmu);
3033 		if (ret < 0)
3034 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3035 	} else {
3036 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3037 	}
3038 
3039 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3040 		irq = smmu->priq.q.irq;
3041 		if (irq) {
3042 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3043 							arm_smmu_priq_thread,
3044 							IRQF_ONESHOT,
3045 							"arm-smmu-v3-priq",
3046 							smmu);
3047 			if (ret < 0)
3048 				dev_warn(smmu->dev,
3049 					 "failed to enable priq irq\n");
3050 		} else {
3051 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3052 		}
3053 	}
3054 }
3055 
3056 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3057 {
3058 	int ret, irq;
3059 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3060 
3061 	/* Disable IRQs first */
3062 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3063 				      ARM_SMMU_IRQ_CTRLACK);
3064 	if (ret) {
3065 		dev_err(smmu->dev, "failed to disable irqs\n");
3066 		return ret;
3067 	}
3068 
3069 	irq = smmu->combined_irq;
3070 	if (irq) {
3071 		/*
3072 		 * Cavium ThunderX2 implementation doesn't support unique irq
3073 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3074 		 */
3075 		ret = devm_request_threaded_irq(smmu->dev, irq,
3076 					arm_smmu_combined_irq_handler,
3077 					arm_smmu_combined_irq_thread,
3078 					IRQF_ONESHOT,
3079 					"arm-smmu-v3-combined-irq", smmu);
3080 		if (ret < 0)
3081 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3082 	} else
3083 		arm_smmu_setup_unique_irqs(smmu);
3084 
3085 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3086 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3087 
3088 	/* Enable interrupt generation on the SMMU */
3089 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3090 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3091 	if (ret)
3092 		dev_warn(smmu->dev, "failed to enable irqs\n");
3093 
3094 	return 0;
3095 }
3096 
3097 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3098 {
3099 	int ret;
3100 
3101 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3102 	if (ret)
3103 		dev_err(smmu->dev, "failed to clear cr0\n");
3104 
3105 	return ret;
3106 }
3107 
3108 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3109 {
3110 	int ret;
3111 	u32 reg, enables;
3112 	struct arm_smmu_cmdq_ent cmd;
3113 
3114 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3115 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3116 	if (reg & CR0_SMMUEN) {
3117 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3118 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3119 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3120 	}
3121 
3122 	ret = arm_smmu_device_disable(smmu);
3123 	if (ret)
3124 		return ret;
3125 
3126 	/* CR1 (table and queue memory attributes) */
3127 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3128 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3129 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3130 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3131 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3132 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3133 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3134 
3135 	/* CR2 (random crap) */
3136 	reg = CR2_PTM | CR2_RECINVSID;
3137 
3138 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3139 		reg |= CR2_E2H;
3140 
3141 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3142 
3143 	/* Stream table */
3144 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3145 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3146 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3147 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3148 
3149 	/* Command queue */
3150 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3151 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3152 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3153 
3154 	enables = CR0_CMDQEN;
3155 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3156 				      ARM_SMMU_CR0ACK);
3157 	if (ret) {
3158 		dev_err(smmu->dev, "failed to enable command queue\n");
3159 		return ret;
3160 	}
3161 
3162 	/* Invalidate any cached configuration */
3163 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3164 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3165 	arm_smmu_cmdq_issue_sync(smmu);
3166 
3167 	/* Invalidate any stale TLB entries */
3168 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3169 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3170 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3171 	}
3172 
3173 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3174 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3175 	arm_smmu_cmdq_issue_sync(smmu);
3176 
3177 	/* Event queue */
3178 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3179 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3180 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3181 
3182 	enables |= CR0_EVTQEN;
3183 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3184 				      ARM_SMMU_CR0ACK);
3185 	if (ret) {
3186 		dev_err(smmu->dev, "failed to enable event queue\n");
3187 		return ret;
3188 	}
3189 
3190 	/* PRI queue */
3191 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3192 		writeq_relaxed(smmu->priq.q.q_base,
3193 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3194 		writel_relaxed(smmu->priq.q.llq.prod,
3195 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3196 		writel_relaxed(smmu->priq.q.llq.cons,
3197 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3198 
3199 		enables |= CR0_PRIQEN;
3200 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3201 					      ARM_SMMU_CR0ACK);
3202 		if (ret) {
3203 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3204 			return ret;
3205 		}
3206 	}
3207 
3208 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3209 		enables |= CR0_ATSCHK;
3210 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3211 					      ARM_SMMU_CR0ACK);
3212 		if (ret) {
3213 			dev_err(smmu->dev, "failed to enable ATS check\n");
3214 			return ret;
3215 		}
3216 	}
3217 
3218 	ret = arm_smmu_setup_irqs(smmu);
3219 	if (ret) {
3220 		dev_err(smmu->dev, "failed to setup irqs\n");
3221 		return ret;
3222 	}
3223 
3224 	if (is_kdump_kernel())
3225 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3226 
3227 	/* Enable the SMMU interface, or ensure bypass */
3228 	if (!bypass || disable_bypass) {
3229 		enables |= CR0_SMMUEN;
3230 	} else {
3231 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3232 		if (ret)
3233 			return ret;
3234 	}
3235 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3236 				      ARM_SMMU_CR0ACK);
3237 	if (ret) {
3238 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3239 		return ret;
3240 	}
3241 
3242 	return 0;
3243 }
3244 
3245 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3246 {
3247 	u32 reg;
3248 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3249 
3250 	/* IDR0 */
3251 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3252 
3253 	/* 2-level structures */
3254 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3255 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3256 
3257 	if (reg & IDR0_CD2L)
3258 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3259 
3260 	/*
3261 	 * Translation table endianness.
3262 	 * We currently require the same endianness as the CPU, but this
3263 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3264 	 */
3265 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3266 	case IDR0_TTENDIAN_MIXED:
3267 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3268 		break;
3269 #ifdef __BIG_ENDIAN
3270 	case IDR0_TTENDIAN_BE:
3271 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3272 		break;
3273 #else
3274 	case IDR0_TTENDIAN_LE:
3275 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3276 		break;
3277 #endif
3278 	default:
3279 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3280 		return -ENXIO;
3281 	}
3282 
3283 	/* Boolean feature flags */
3284 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3285 		smmu->features |= ARM_SMMU_FEAT_PRI;
3286 
3287 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3288 		smmu->features |= ARM_SMMU_FEAT_ATS;
3289 
3290 	if (reg & IDR0_SEV)
3291 		smmu->features |= ARM_SMMU_FEAT_SEV;
3292 
3293 	if (reg & IDR0_MSI) {
3294 		smmu->features |= ARM_SMMU_FEAT_MSI;
3295 		if (coherent && !disable_msipolling)
3296 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3297 	}
3298 
3299 	if (reg & IDR0_HYP) {
3300 		smmu->features |= ARM_SMMU_FEAT_HYP;
3301 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3302 			smmu->features |= ARM_SMMU_FEAT_E2H;
3303 	}
3304 
3305 	/*
3306 	 * The coherency feature as set by FW is used in preference to the ID
3307 	 * register, but warn on mismatch.
3308 	 */
3309 	if (!!(reg & IDR0_COHACC) != coherent)
3310 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3311 			 coherent ? "true" : "false");
3312 
3313 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3314 	case IDR0_STALL_MODEL_FORCE:
3315 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3316 		fallthrough;
3317 	case IDR0_STALL_MODEL_STALL:
3318 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3319 	}
3320 
3321 	if (reg & IDR0_S1P)
3322 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3323 
3324 	if (reg & IDR0_S2P)
3325 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3326 
3327 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3328 		dev_err(smmu->dev, "no translation support!\n");
3329 		return -ENXIO;
3330 	}
3331 
3332 	/* We only support the AArch64 table format at present */
3333 	switch (FIELD_GET(IDR0_TTF, reg)) {
3334 	case IDR0_TTF_AARCH32_64:
3335 		smmu->ias = 40;
3336 		fallthrough;
3337 	case IDR0_TTF_AARCH64:
3338 		break;
3339 	default:
3340 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3341 		return -ENXIO;
3342 	}
3343 
3344 	/* ASID/VMID sizes */
3345 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3346 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3347 
3348 	/* IDR1 */
3349 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3350 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3351 		dev_err(smmu->dev, "embedded implementation not supported\n");
3352 		return -ENXIO;
3353 	}
3354 
3355 	/* Queue sizes, capped to ensure natural alignment */
3356 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3357 					     FIELD_GET(IDR1_CMDQS, reg));
3358 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3359 		/*
3360 		 * We don't support splitting up batches, so one batch of
3361 		 * commands plus an extra sync needs to fit inside the command
3362 		 * queue. There's also no way we can handle the weird alignment
3363 		 * restrictions on the base pointer for a unit-length queue.
3364 		 */
3365 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3366 			CMDQ_BATCH_ENTRIES);
3367 		return -ENXIO;
3368 	}
3369 
3370 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3371 					     FIELD_GET(IDR1_EVTQS, reg));
3372 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3373 					     FIELD_GET(IDR1_PRIQS, reg));
3374 
3375 	/* SID/SSID sizes */
3376 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3377 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3378 
3379 	/*
3380 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3381 	 * table, use a linear table instead.
3382 	 */
3383 	if (smmu->sid_bits <= STRTAB_SPLIT)
3384 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3385 
3386 	/* IDR3 */
3387 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3388 	if (FIELD_GET(IDR3_RIL, reg))
3389 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3390 
3391 	/* IDR5 */
3392 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3393 
3394 	/* Maximum number of outstanding stalls */
3395 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3396 
3397 	/* Page sizes */
3398 	if (reg & IDR5_GRAN64K)
3399 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3400 	if (reg & IDR5_GRAN16K)
3401 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3402 	if (reg & IDR5_GRAN4K)
3403 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3404 
3405 	/* Input address size */
3406 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3407 		smmu->features |= ARM_SMMU_FEAT_VAX;
3408 
3409 	/* Output address size */
3410 	switch (FIELD_GET(IDR5_OAS, reg)) {
3411 	case IDR5_OAS_32_BIT:
3412 		smmu->oas = 32;
3413 		break;
3414 	case IDR5_OAS_36_BIT:
3415 		smmu->oas = 36;
3416 		break;
3417 	case IDR5_OAS_40_BIT:
3418 		smmu->oas = 40;
3419 		break;
3420 	case IDR5_OAS_42_BIT:
3421 		smmu->oas = 42;
3422 		break;
3423 	case IDR5_OAS_44_BIT:
3424 		smmu->oas = 44;
3425 		break;
3426 	case IDR5_OAS_52_BIT:
3427 		smmu->oas = 52;
3428 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3429 		break;
3430 	default:
3431 		dev_info(smmu->dev,
3432 			"unknown output address size. Truncating to 48-bit\n");
3433 		fallthrough;
3434 	case IDR5_OAS_48_BIT:
3435 		smmu->oas = 48;
3436 	}
3437 
3438 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3439 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3440 	else
3441 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3442 
3443 	/* Set the DMA mask for our table walker */
3444 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3445 		dev_warn(smmu->dev,
3446 			 "failed to set DMA mask for table walker\n");
3447 
3448 	smmu->ias = max(smmu->ias, smmu->oas);
3449 
3450 	if (arm_smmu_sva_supported(smmu))
3451 		smmu->features |= ARM_SMMU_FEAT_SVA;
3452 
3453 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3454 		 smmu->ias, smmu->oas, smmu->features);
3455 	return 0;
3456 }
3457 
3458 #ifdef CONFIG_ACPI
3459 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3460 {
3461 	switch (model) {
3462 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3463 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3464 		break;
3465 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3466 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3467 		break;
3468 	}
3469 
3470 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3471 }
3472 
3473 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3474 				      struct arm_smmu_device *smmu)
3475 {
3476 	struct acpi_iort_smmu_v3 *iort_smmu;
3477 	struct device *dev = smmu->dev;
3478 	struct acpi_iort_node *node;
3479 
3480 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3481 
3482 	/* Retrieve SMMUv3 specific data */
3483 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3484 
3485 	acpi_smmu_get_options(iort_smmu->model, smmu);
3486 
3487 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3488 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3489 
3490 	return 0;
3491 }
3492 #else
3493 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3494 					     struct arm_smmu_device *smmu)
3495 {
3496 	return -ENODEV;
3497 }
3498 #endif
3499 
3500 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3501 				    struct arm_smmu_device *smmu)
3502 {
3503 	struct device *dev = &pdev->dev;
3504 	u32 cells;
3505 	int ret = -EINVAL;
3506 
3507 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3508 		dev_err(dev, "missing #iommu-cells property\n");
3509 	else if (cells != 1)
3510 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3511 	else
3512 		ret = 0;
3513 
3514 	parse_driver_options(smmu);
3515 
3516 	if (of_dma_is_coherent(dev->of_node))
3517 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3518 
3519 	return ret;
3520 }
3521 
3522 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3523 {
3524 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3525 		return SZ_64K;
3526 	else
3527 		return SZ_128K;
3528 }
3529 
3530 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3531 {
3532 	int err;
3533 
3534 #ifdef CONFIG_PCI
3535 	if (pci_bus_type.iommu_ops != ops) {
3536 		err = bus_set_iommu(&pci_bus_type, ops);
3537 		if (err)
3538 			return err;
3539 	}
3540 #endif
3541 #ifdef CONFIG_ARM_AMBA
3542 	if (amba_bustype.iommu_ops != ops) {
3543 		err = bus_set_iommu(&amba_bustype, ops);
3544 		if (err)
3545 			goto err_reset_pci_ops;
3546 	}
3547 #endif
3548 	if (platform_bus_type.iommu_ops != ops) {
3549 		err = bus_set_iommu(&platform_bus_type, ops);
3550 		if (err)
3551 			goto err_reset_amba_ops;
3552 	}
3553 
3554 	return 0;
3555 
3556 err_reset_amba_ops:
3557 #ifdef CONFIG_ARM_AMBA
3558 	bus_set_iommu(&amba_bustype, NULL);
3559 #endif
3560 err_reset_pci_ops: __maybe_unused;
3561 #ifdef CONFIG_PCI
3562 	bus_set_iommu(&pci_bus_type, NULL);
3563 #endif
3564 	return err;
3565 }
3566 
3567 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3568 				      resource_size_t size)
3569 {
3570 	struct resource res = DEFINE_RES_MEM(start, size);
3571 
3572 	return devm_ioremap_resource(dev, &res);
3573 }
3574 
3575 static int arm_smmu_device_probe(struct platform_device *pdev)
3576 {
3577 	int irq, ret;
3578 	struct resource *res;
3579 	resource_size_t ioaddr;
3580 	struct arm_smmu_device *smmu;
3581 	struct device *dev = &pdev->dev;
3582 	bool bypass;
3583 
3584 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3585 	if (!smmu) {
3586 		dev_err(dev, "failed to allocate arm_smmu_device\n");
3587 		return -ENOMEM;
3588 	}
3589 	smmu->dev = dev;
3590 
3591 	if (dev->of_node) {
3592 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3593 	} else {
3594 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3595 		if (ret == -ENODEV)
3596 			return ret;
3597 	}
3598 
3599 	/* Set bypass mode according to firmware probing result */
3600 	bypass = !!ret;
3601 
3602 	/* Base address */
3603 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3604 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3605 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3606 		return -EINVAL;
3607 	}
3608 	ioaddr = res->start;
3609 
3610 	/*
3611 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3612 	 * the PMCG registers which are reserved by the PMU driver.
3613 	 */
3614 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3615 	if (IS_ERR(smmu->base))
3616 		return PTR_ERR(smmu->base);
3617 
3618 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3619 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3620 					       ARM_SMMU_REG_SZ);
3621 		if (IS_ERR(smmu->page1))
3622 			return PTR_ERR(smmu->page1);
3623 	} else {
3624 		smmu->page1 = smmu->base;
3625 	}
3626 
3627 	/* Interrupt lines */
3628 
3629 	irq = platform_get_irq_byname_optional(pdev, "combined");
3630 	if (irq > 0)
3631 		smmu->combined_irq = irq;
3632 	else {
3633 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3634 		if (irq > 0)
3635 			smmu->evtq.q.irq = irq;
3636 
3637 		irq = platform_get_irq_byname_optional(pdev, "priq");
3638 		if (irq > 0)
3639 			smmu->priq.q.irq = irq;
3640 
3641 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3642 		if (irq > 0)
3643 			smmu->gerr_irq = irq;
3644 	}
3645 	/* Probe the h/w */
3646 	ret = arm_smmu_device_hw_probe(smmu);
3647 	if (ret)
3648 		return ret;
3649 
3650 	/* Initialise in-memory data structures */
3651 	ret = arm_smmu_init_structures(smmu);
3652 	if (ret)
3653 		return ret;
3654 
3655 	/* Record our private device structure */
3656 	platform_set_drvdata(pdev, smmu);
3657 
3658 	/* Reset the device */
3659 	ret = arm_smmu_device_reset(smmu, bypass);
3660 	if (ret)
3661 		return ret;
3662 
3663 	/* And we're up. Go go go! */
3664 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3665 				     "smmu3.%pa", &ioaddr);
3666 	if (ret)
3667 		return ret;
3668 
3669 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3670 	if (ret) {
3671 		dev_err(dev, "Failed to register iommu\n");
3672 		return ret;
3673 	}
3674 
3675 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
3676 }
3677 
3678 static int arm_smmu_device_remove(struct platform_device *pdev)
3679 {
3680 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3681 
3682 	arm_smmu_set_bus_ops(NULL);
3683 	iommu_device_unregister(&smmu->iommu);
3684 	iommu_device_sysfs_remove(&smmu->iommu);
3685 	arm_smmu_device_disable(smmu);
3686 
3687 	return 0;
3688 }
3689 
3690 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3691 {
3692 	arm_smmu_device_remove(pdev);
3693 }
3694 
3695 static const struct of_device_id arm_smmu_of_match[] = {
3696 	{ .compatible = "arm,smmu-v3", },
3697 	{ },
3698 };
3699 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3700 
3701 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3702 {
3703 	arm_smmu_sva_notifier_synchronize();
3704 	platform_driver_unregister(drv);
3705 }
3706 
3707 static struct platform_driver arm_smmu_driver = {
3708 	.driver	= {
3709 		.name			= "arm-smmu-v3",
3710 		.of_match_table		= arm_smmu_of_match,
3711 		.suppress_bind_attrs	= true,
3712 	},
3713 	.probe	= arm_smmu_device_probe,
3714 	.remove	= arm_smmu_device_remove,
3715 	.shutdown = arm_smmu_device_shutdown,
3716 };
3717 module_driver(arm_smmu_driver, platform_driver_register,
3718 	      arm_smmu_driver_unregister);
3719 
3720 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3721 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3722 MODULE_ALIAS("platform:arm-smmu-v3");
3723 MODULE_LICENSE("GPL v2");
3724