xref: /openbmc/linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision b1a792601f264df7172a728f1a83a05b6b399dfb)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
31 
32 #include <linux/amba/bus.h>
33 
34 #include "arm-smmu-v3.h"
35 
36 static bool disable_bypass = true;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 /*
80  * Special value used by SVA when a process dies, to quiesce a CD without
81  * disabling it.
82  */
83 struct arm_smmu_ctx_desc quiet_cd = { 0 };
84 
85 static struct arm_smmu_option_prop arm_smmu_options[] = {
86 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88 	{ 0, NULL},
89 };
90 
91 static void parse_driver_options(struct arm_smmu_device *smmu)
92 {
93 	int i = 0;
94 
95 	do {
96 		if (of_property_read_bool(smmu->dev->of_node,
97 						arm_smmu_options[i].prop)) {
98 			smmu->options |= arm_smmu_options[i].opt;
99 			dev_notice(smmu->dev, "option %s\n",
100 				arm_smmu_options[i].prop);
101 		}
102 	} while (arm_smmu_options[++i].opt);
103 }
104 
105 /* Low-level queue manipulation functions */
106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
107 {
108 	u32 space, prod, cons;
109 
110 	prod = Q_IDX(q, q->prod);
111 	cons = Q_IDX(q, q->cons);
112 
113 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
114 		space = (1 << q->max_n_shift) - (prod - cons);
115 	else
116 		space = cons - prod;
117 
118 	return space >= n;
119 }
120 
121 static bool queue_full(struct arm_smmu_ll_queue *q)
122 {
123 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
124 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
125 }
126 
127 static bool queue_empty(struct arm_smmu_ll_queue *q)
128 {
129 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
130 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
131 }
132 
133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
134 {
135 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
137 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
138 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
139 }
140 
141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
142 {
143 	/*
144 	 * Ensure that all CPU accesses (reads and writes) to the queue
145 	 * are complete before we update the cons pointer.
146 	 */
147 	__iomb();
148 	writel_relaxed(q->llq.cons, q->cons_reg);
149 }
150 
151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
152 {
153 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
154 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
155 }
156 
157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
158 {
159 	u32 prod;
160 	int ret = 0;
161 
162 	/*
163 	 * We can't use the _relaxed() variant here, as we must prevent
164 	 * speculative reads of the queue before we have determined that
165 	 * prod has indeed moved.
166 	 */
167 	prod = readl(q->prod_reg);
168 
169 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
170 		ret = -EOVERFLOW;
171 
172 	q->llq.prod = prod;
173 	return ret;
174 }
175 
176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
177 {
178 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
179 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
180 }
181 
182 static void queue_poll_init(struct arm_smmu_device *smmu,
183 			    struct arm_smmu_queue_poll *qp)
184 {
185 	qp->delay = 1;
186 	qp->spin_cnt = 0;
187 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
188 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
189 }
190 
191 static int queue_poll(struct arm_smmu_queue_poll *qp)
192 {
193 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
194 		return -ETIMEDOUT;
195 
196 	if (qp->wfe) {
197 		wfe();
198 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
199 		cpu_relax();
200 	} else {
201 		udelay(qp->delay);
202 		qp->delay *= 2;
203 		qp->spin_cnt = 0;
204 	}
205 
206 	return 0;
207 }
208 
209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
210 {
211 	int i;
212 
213 	for (i = 0; i < n_dwords; ++i)
214 		*dst++ = cpu_to_le64(*src++);
215 }
216 
217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
218 {
219 	int i;
220 
221 	for (i = 0; i < n_dwords; ++i)
222 		*dst++ = le64_to_cpu(*src++);
223 }
224 
225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
226 {
227 	if (queue_empty(&q->llq))
228 		return -EAGAIN;
229 
230 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
231 	queue_inc_cons(&q->llq);
232 	queue_sync_cons_out(q);
233 	return 0;
234 }
235 
236 /* High-level queue accessors */
237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
238 {
239 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
240 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
241 
242 	switch (ent->opcode) {
243 	case CMDQ_OP_TLBI_EL2_ALL:
244 	case CMDQ_OP_TLBI_NSNH_ALL:
245 		break;
246 	case CMDQ_OP_PREFETCH_CFG:
247 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
248 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
249 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
250 		break;
251 	case CMDQ_OP_CFGI_CD:
252 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
253 		fallthrough;
254 	case CMDQ_OP_CFGI_STE:
255 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
256 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
257 		break;
258 	case CMDQ_OP_CFGI_CD_ALL:
259 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
260 		break;
261 	case CMDQ_OP_CFGI_ALL:
262 		/* Cover the entire SID range */
263 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
264 		break;
265 	case CMDQ_OP_TLBI_NH_VA:
266 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
267 		fallthrough;
268 	case CMDQ_OP_TLBI_EL2_VA:
269 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
270 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
271 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
272 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
273 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
274 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
275 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
276 		break;
277 	case CMDQ_OP_TLBI_S2_IPA:
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
279 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
280 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
282 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
283 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
284 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
285 		break;
286 	case CMDQ_OP_TLBI_NH_ASID:
287 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
288 		fallthrough;
289 	case CMDQ_OP_TLBI_S12_VMALL:
290 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
291 		break;
292 	case CMDQ_OP_TLBI_EL2_ASID:
293 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
294 		break;
295 	case CMDQ_OP_ATC_INV:
296 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
297 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
298 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
299 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
300 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
301 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
302 		break;
303 	case CMDQ_OP_PRI_RESP:
304 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
305 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
306 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
307 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
308 		switch (ent->pri.resp) {
309 		case PRI_RESP_DENY:
310 		case PRI_RESP_FAIL:
311 		case PRI_RESP_SUCC:
312 			break;
313 		default:
314 			return -EINVAL;
315 		}
316 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
317 		break;
318 	case CMDQ_OP_CMD_SYNC:
319 		if (ent->sync.msiaddr) {
320 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
321 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
322 		} else {
323 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
324 		}
325 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
326 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
327 		break;
328 	default:
329 		return -ENOENT;
330 	}
331 
332 	return 0;
333 }
334 
335 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
336 					 u32 prod)
337 {
338 	struct arm_smmu_queue *q = &smmu->cmdq.q;
339 	struct arm_smmu_cmdq_ent ent = {
340 		.opcode = CMDQ_OP_CMD_SYNC,
341 	};
342 
343 	/*
344 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
345 	 * payload, so the write will zero the entire command on that platform.
346 	 */
347 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
348 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
349 				   q->ent_dwords * 8;
350 	}
351 
352 	arm_smmu_cmdq_build_cmd(cmd, &ent);
353 }
354 
355 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
356 {
357 	static const char *cerror_str[] = {
358 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
359 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
360 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
361 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
362 	};
363 
364 	int i;
365 	u64 cmd[CMDQ_ENT_DWORDS];
366 	struct arm_smmu_queue *q = &smmu->cmdq.q;
367 	u32 cons = readl_relaxed(q->cons_reg);
368 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
369 	struct arm_smmu_cmdq_ent cmd_sync = {
370 		.opcode = CMDQ_OP_CMD_SYNC,
371 	};
372 
373 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
374 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
375 
376 	switch (idx) {
377 	case CMDQ_ERR_CERROR_ABT_IDX:
378 		dev_err(smmu->dev, "retrying command fetch\n");
379 	case CMDQ_ERR_CERROR_NONE_IDX:
380 		return;
381 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
382 		/*
383 		 * ATC Invalidation Completion timeout. CONS is still pointing
384 		 * at the CMD_SYNC. Attempt to complete other pending commands
385 		 * by repeating the CMD_SYNC, though we might well end up back
386 		 * here since the ATC invalidation may still be pending.
387 		 */
388 		return;
389 	case CMDQ_ERR_CERROR_ILL_IDX:
390 	default:
391 		break;
392 	}
393 
394 	/*
395 	 * We may have concurrent producers, so we need to be careful
396 	 * not to touch any of the shadow cmdq state.
397 	 */
398 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
399 	dev_err(smmu->dev, "skipping command in error state:\n");
400 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
401 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
402 
403 	/* Convert the erroneous command into a CMD_SYNC */
404 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
405 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
406 		return;
407 	}
408 
409 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
410 }
411 
412 /*
413  * Command queue locking.
414  * This is a form of bastardised rwlock with the following major changes:
415  *
416  * - The only LOCK routines are exclusive_trylock() and shared_lock().
417  *   Neither have barrier semantics, and instead provide only a control
418  *   dependency.
419  *
420  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
421  *   fails if the caller appears to be the last lock holder (yes, this is
422  *   racy). All successful UNLOCK routines have RELEASE semantics.
423  */
424 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
425 {
426 	int val;
427 
428 	/*
429 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
430 	 * lock counter. When held in exclusive state, the lock counter is set
431 	 * to INT_MIN so these increments won't hurt as the value will remain
432 	 * negative.
433 	 */
434 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
435 		return;
436 
437 	do {
438 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
439 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
440 }
441 
442 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
443 {
444 	(void)atomic_dec_return_release(&cmdq->lock);
445 }
446 
447 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
448 {
449 	if (atomic_read(&cmdq->lock) == 1)
450 		return false;
451 
452 	arm_smmu_cmdq_shared_unlock(cmdq);
453 	return true;
454 }
455 
456 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
457 ({									\
458 	bool __ret;							\
459 	local_irq_save(flags);						\
460 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
461 	if (!__ret)							\
462 		local_irq_restore(flags);				\
463 	__ret;								\
464 })
465 
466 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
467 ({									\
468 	atomic_set_release(&cmdq->lock, 0);				\
469 	local_irq_restore(flags);					\
470 })
471 
472 
473 /*
474  * Command queue insertion.
475  * This is made fiddly by our attempts to achieve some sort of scalability
476  * since there is one queue shared amongst all of the CPUs in the system.  If
477  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
478  * then you'll *love* this monstrosity.
479  *
480  * The basic idea is to split the queue up into ranges of commands that are
481  * owned by a given CPU; the owner may not have written all of the commands
482  * itself, but is responsible for advancing the hardware prod pointer when
483  * the time comes. The algorithm is roughly:
484  *
485  * 	1. Allocate some space in the queue. At this point we also discover
486  *	   whether the head of the queue is currently owned by another CPU,
487  *	   or whether we are the owner.
488  *
489  *	2. Write our commands into our allocated slots in the queue.
490  *
491  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
492  *
493  *	4. If we are an owner:
494  *		a. Wait for the previous owner to finish.
495  *		b. Mark the queue head as unowned, which tells us the range
496  *		   that we are responsible for publishing.
497  *		c. Wait for all commands in our owned range to become valid.
498  *		d. Advance the hardware prod pointer.
499  *		e. Tell the next owner we've finished.
500  *
501  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
502  *	   owner), then we need to stick around until it has completed:
503  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
504  *		   to clear the first 4 bytes.
505  *		b. Otherwise, we spin waiting for the hardware cons pointer to
506  *		   advance past our command.
507  *
508  * The devil is in the details, particularly the use of locking for handling
509  * SYNC completion and freeing up space in the queue before we think that it is
510  * full.
511  */
512 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
513 					       u32 sprod, u32 eprod, bool set)
514 {
515 	u32 swidx, sbidx, ewidx, ebidx;
516 	struct arm_smmu_ll_queue llq = {
517 		.max_n_shift	= cmdq->q.llq.max_n_shift,
518 		.prod		= sprod,
519 	};
520 
521 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
522 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
523 
524 	while (llq.prod != eprod) {
525 		unsigned long mask;
526 		atomic_long_t *ptr;
527 		u32 limit = BITS_PER_LONG;
528 
529 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
530 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
531 
532 		ptr = &cmdq->valid_map[swidx];
533 
534 		if ((swidx == ewidx) && (sbidx < ebidx))
535 			limit = ebidx;
536 
537 		mask = GENMASK(limit - 1, sbidx);
538 
539 		/*
540 		 * The valid bit is the inverse of the wrap bit. This means
541 		 * that a zero-initialised queue is invalid and, after marking
542 		 * all entries as valid, they become invalid again when we
543 		 * wrap.
544 		 */
545 		if (set) {
546 			atomic_long_xor(mask, ptr);
547 		} else { /* Poll */
548 			unsigned long valid;
549 
550 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
551 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
552 		}
553 
554 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
555 	}
556 }
557 
558 /* Mark all entries in the range [sprod, eprod) as valid */
559 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
560 					u32 sprod, u32 eprod)
561 {
562 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
563 }
564 
565 /* Wait for all entries in the range [sprod, eprod) to become valid */
566 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
567 					 u32 sprod, u32 eprod)
568 {
569 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
570 }
571 
572 /* Wait for the command queue to become non-full */
573 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
574 					     struct arm_smmu_ll_queue *llq)
575 {
576 	unsigned long flags;
577 	struct arm_smmu_queue_poll qp;
578 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
579 	int ret = 0;
580 
581 	/*
582 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
583 	 * that fails, spin until somebody else updates it for us.
584 	 */
585 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
586 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
587 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
588 		llq->val = READ_ONCE(cmdq->q.llq.val);
589 		return 0;
590 	}
591 
592 	queue_poll_init(smmu, &qp);
593 	do {
594 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
595 		if (!queue_full(llq))
596 			break;
597 
598 		ret = queue_poll(&qp);
599 	} while (!ret);
600 
601 	return ret;
602 }
603 
604 /*
605  * Wait until the SMMU signals a CMD_SYNC completion MSI.
606  * Must be called with the cmdq lock held in some capacity.
607  */
608 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
609 					  struct arm_smmu_ll_queue *llq)
610 {
611 	int ret = 0;
612 	struct arm_smmu_queue_poll qp;
613 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
614 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
615 
616 	queue_poll_init(smmu, &qp);
617 
618 	/*
619 	 * The MSI won't generate an event, since it's being written back
620 	 * into the command queue.
621 	 */
622 	qp.wfe = false;
623 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
624 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
625 	return ret;
626 }
627 
628 /*
629  * Wait until the SMMU cons index passes llq->prod.
630  * Must be called with the cmdq lock held in some capacity.
631  */
632 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
633 					       struct arm_smmu_ll_queue *llq)
634 {
635 	struct arm_smmu_queue_poll qp;
636 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
637 	u32 prod = llq->prod;
638 	int ret = 0;
639 
640 	queue_poll_init(smmu, &qp);
641 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
642 	do {
643 		if (queue_consumed(llq, prod))
644 			break;
645 
646 		ret = queue_poll(&qp);
647 
648 		/*
649 		 * This needs to be a readl() so that our subsequent call
650 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
651 		 *
652 		 * Specifically, we need to ensure that we observe all
653 		 * shared_lock()s by other CMD_SYNCs that share our owner,
654 		 * so that a failing call to tryunlock() means that we're
655 		 * the last one out and therefore we can safely advance
656 		 * cmdq->q.llq.cons. Roughly speaking:
657 		 *
658 		 * CPU 0		CPU1			CPU2 (us)
659 		 *
660 		 * if (sync)
661 		 * 	shared_lock();
662 		 *
663 		 * dma_wmb();
664 		 * set_valid_map();
665 		 *
666 		 * 			if (owner) {
667 		 *				poll_valid_map();
668 		 *				<control dependency>
669 		 *				writel(prod_reg);
670 		 *
671 		 *						readl(cons_reg);
672 		 *						tryunlock();
673 		 *
674 		 * Requires us to see CPU 0's shared_lock() acquisition.
675 		 */
676 		llq->cons = readl(cmdq->q.cons_reg);
677 	} while (!ret);
678 
679 	return ret;
680 }
681 
682 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
683 					 struct arm_smmu_ll_queue *llq)
684 {
685 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
686 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
687 
688 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
689 }
690 
691 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
692 					u32 prod, int n)
693 {
694 	int i;
695 	struct arm_smmu_ll_queue llq = {
696 		.max_n_shift	= cmdq->q.llq.max_n_shift,
697 		.prod		= prod,
698 	};
699 
700 	for (i = 0; i < n; ++i) {
701 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
702 
703 		prod = queue_inc_prod_n(&llq, i);
704 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
705 	}
706 }
707 
708 /*
709  * This is the actual insertion function, and provides the following
710  * ordering guarantees to callers:
711  *
712  * - There is a dma_wmb() before publishing any commands to the queue.
713  *   This can be relied upon to order prior writes to data structures
714  *   in memory (such as a CD or an STE) before the command.
715  *
716  * - On completion of a CMD_SYNC, there is a control dependency.
717  *   This can be relied upon to order subsequent writes to memory (e.g.
718  *   freeing an IOVA) after completion of the CMD_SYNC.
719  *
720  * - Command insertion is totally ordered, so if two CPUs each race to
721  *   insert their own list of commands then all of the commands from one
722  *   CPU will appear before any of the commands from the other CPU.
723  */
724 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
725 				       u64 *cmds, int n, bool sync)
726 {
727 	u64 cmd_sync[CMDQ_ENT_DWORDS];
728 	u32 prod;
729 	unsigned long flags;
730 	bool owner;
731 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
732 	struct arm_smmu_ll_queue llq = {
733 		.max_n_shift = cmdq->q.llq.max_n_shift,
734 	}, head = llq;
735 	int ret = 0;
736 
737 	/* 1. Allocate some space in the queue */
738 	local_irq_save(flags);
739 	llq.val = READ_ONCE(cmdq->q.llq.val);
740 	do {
741 		u64 old;
742 
743 		while (!queue_has_space(&llq, n + sync)) {
744 			local_irq_restore(flags);
745 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
746 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
747 			local_irq_save(flags);
748 		}
749 
750 		head.cons = llq.cons;
751 		head.prod = queue_inc_prod_n(&llq, n + sync) |
752 					     CMDQ_PROD_OWNED_FLAG;
753 
754 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
755 		if (old == llq.val)
756 			break;
757 
758 		llq.val = old;
759 	} while (1);
760 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
761 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
762 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
763 
764 	/*
765 	 * 2. Write our commands into the queue
766 	 * Dependency ordering from the cmpxchg() loop above.
767 	 */
768 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
769 	if (sync) {
770 		prod = queue_inc_prod_n(&llq, n);
771 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
772 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
773 
774 		/*
775 		 * In order to determine completion of our CMD_SYNC, we must
776 		 * ensure that the queue can't wrap twice without us noticing.
777 		 * We achieve that by taking the cmdq lock as shared before
778 		 * marking our slot as valid.
779 		 */
780 		arm_smmu_cmdq_shared_lock(cmdq);
781 	}
782 
783 	/* 3. Mark our slots as valid, ensuring commands are visible first */
784 	dma_wmb();
785 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
786 
787 	/* 4. If we are the owner, take control of the SMMU hardware */
788 	if (owner) {
789 		/* a. Wait for previous owner to finish */
790 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
791 
792 		/* b. Stop gathering work by clearing the owned flag */
793 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
794 						   &cmdq->q.llq.atomic.prod);
795 		prod &= ~CMDQ_PROD_OWNED_FLAG;
796 
797 		/*
798 		 * c. Wait for any gathered work to be written to the queue.
799 		 * Note that we read our own entries so that we have the control
800 		 * dependency required by (d).
801 		 */
802 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
803 
804 		/*
805 		 * d. Advance the hardware prod pointer
806 		 * Control dependency ordering from the entries becoming valid.
807 		 */
808 		writel_relaxed(prod, cmdq->q.prod_reg);
809 
810 		/*
811 		 * e. Tell the next owner we're done
812 		 * Make sure we've updated the hardware first, so that we don't
813 		 * race to update prod and potentially move it backwards.
814 		 */
815 		atomic_set_release(&cmdq->owner_prod, prod);
816 	}
817 
818 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
819 	if (sync) {
820 		llq.prod = queue_inc_prod_n(&llq, n);
821 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
822 		if (ret) {
823 			dev_err_ratelimited(smmu->dev,
824 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
825 					    llq.prod,
826 					    readl_relaxed(cmdq->q.prod_reg),
827 					    readl_relaxed(cmdq->q.cons_reg));
828 		}
829 
830 		/*
831 		 * Try to unlock the cmdq lock. This will fail if we're the last
832 		 * reader, in which case we can safely update cmdq->q.llq.cons
833 		 */
834 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
835 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
836 			arm_smmu_cmdq_shared_unlock(cmdq);
837 		}
838 	}
839 
840 	local_irq_restore(flags);
841 	return ret;
842 }
843 
844 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
845 				   struct arm_smmu_cmdq_ent *ent)
846 {
847 	u64 cmd[CMDQ_ENT_DWORDS];
848 
849 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
850 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
851 			 ent->opcode);
852 		return -EINVAL;
853 	}
854 
855 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
856 }
857 
858 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
859 {
860 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
861 }
862 
863 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
864 				    struct arm_smmu_cmdq_batch *cmds,
865 				    struct arm_smmu_cmdq_ent *cmd)
866 {
867 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
868 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
869 		cmds->num = 0;
870 	}
871 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
872 	cmds->num++;
873 }
874 
875 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
876 				      struct arm_smmu_cmdq_batch *cmds)
877 {
878 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
879 }
880 
881 /* Context descriptor manipulation functions */
882 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
883 {
884 	struct arm_smmu_cmdq_ent cmd = {
885 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
886 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
887 		.tlbi.asid = asid,
888 	};
889 
890 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
891 	arm_smmu_cmdq_issue_sync(smmu);
892 }
893 
894 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
895 			     int ssid, bool leaf)
896 {
897 	size_t i;
898 	unsigned long flags;
899 	struct arm_smmu_master *master;
900 	struct arm_smmu_cmdq_batch cmds = {};
901 	struct arm_smmu_device *smmu = smmu_domain->smmu;
902 	struct arm_smmu_cmdq_ent cmd = {
903 		.opcode	= CMDQ_OP_CFGI_CD,
904 		.cfgi	= {
905 			.ssid	= ssid,
906 			.leaf	= leaf,
907 		},
908 	};
909 
910 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
911 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
912 		for (i = 0; i < master->num_sids; i++) {
913 			cmd.cfgi.sid = master->sids[i];
914 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
915 		}
916 	}
917 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
918 
919 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
920 }
921 
922 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
923 					struct arm_smmu_l1_ctx_desc *l1_desc)
924 {
925 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
926 
927 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
928 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
929 	if (!l1_desc->l2ptr) {
930 		dev_warn(smmu->dev,
931 			 "failed to allocate context descriptor table\n");
932 		return -ENOMEM;
933 	}
934 	return 0;
935 }
936 
937 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
938 				      struct arm_smmu_l1_ctx_desc *l1_desc)
939 {
940 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
941 		  CTXDESC_L1_DESC_V;
942 
943 	/* See comment in arm_smmu_write_ctx_desc() */
944 	WRITE_ONCE(*dst, cpu_to_le64(val));
945 }
946 
947 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
948 				   u32 ssid)
949 {
950 	__le64 *l1ptr;
951 	unsigned int idx;
952 	struct arm_smmu_l1_ctx_desc *l1_desc;
953 	struct arm_smmu_device *smmu = smmu_domain->smmu;
954 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
955 
956 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
957 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
958 
959 	idx = ssid >> CTXDESC_SPLIT;
960 	l1_desc = &cdcfg->l1_desc[idx];
961 	if (!l1_desc->l2ptr) {
962 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
963 			return NULL;
964 
965 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
966 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
967 		/* An invalid L1CD can be cached */
968 		arm_smmu_sync_cd(smmu_domain, ssid, false);
969 	}
970 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
971 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
972 }
973 
974 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
975 			    struct arm_smmu_ctx_desc *cd)
976 {
977 	/*
978 	 * This function handles the following cases:
979 	 *
980 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
981 	 * (2) Install a secondary CD, for SID+SSID traffic.
982 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
983 	 *     CD, then invalidate the old entry and mappings.
984 	 * (4) Quiesce the context without clearing the valid bit. Disable
985 	 *     translation, and ignore any translation fault.
986 	 * (5) Remove a secondary CD.
987 	 */
988 	u64 val;
989 	bool cd_live;
990 	__le64 *cdptr;
991 	struct arm_smmu_device *smmu = smmu_domain->smmu;
992 
993 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
994 		return -E2BIG;
995 
996 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
997 	if (!cdptr)
998 		return -ENOMEM;
999 
1000 	val = le64_to_cpu(cdptr[0]);
1001 	cd_live = !!(val & CTXDESC_CD_0_V);
1002 
1003 	if (!cd) { /* (5) */
1004 		val = 0;
1005 	} else if (cd == &quiet_cd) { /* (4) */
1006 		val |= CTXDESC_CD_0_TCR_EPD0;
1007 	} else if (cd_live) { /* (3) */
1008 		val &= ~CTXDESC_CD_0_ASID;
1009 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1010 		/*
1011 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1012 		 * this substream's traffic
1013 		 */
1014 	} else { /* (1) and (2) */
1015 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1016 		cdptr[2] = 0;
1017 		cdptr[3] = cpu_to_le64(cd->mair);
1018 
1019 		/*
1020 		 * STE is live, and the SMMU might read dwords of this CD in any
1021 		 * order. Ensure that it observes valid values before reading
1022 		 * V=1.
1023 		 */
1024 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1025 
1026 		val = cd->tcr |
1027 #ifdef __BIG_ENDIAN
1028 			CTXDESC_CD_0_ENDI |
1029 #endif
1030 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1031 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1032 			CTXDESC_CD_0_AA64 |
1033 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1034 			CTXDESC_CD_0_V;
1035 
1036 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1037 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1038 			val |= CTXDESC_CD_0_S;
1039 	}
1040 
1041 	/*
1042 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1043 	 * "Configuration structures and configuration invalidation completion"
1044 	 *
1045 	 *   The size of single-copy atomic reads made by the SMMU is
1046 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1047 	 *   field within an aligned 64-bit span of a structure can be altered
1048 	 *   without first making the structure invalid.
1049 	 */
1050 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1051 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1052 	return 0;
1053 }
1054 
1055 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1056 {
1057 	int ret;
1058 	size_t l1size;
1059 	size_t max_contexts;
1060 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1061 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1062 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1063 
1064 	max_contexts = 1 << cfg->s1cdmax;
1065 
1066 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1067 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1068 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1069 		cdcfg->num_l1_ents = max_contexts;
1070 
1071 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1072 	} else {
1073 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1074 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1075 						  CTXDESC_L2_ENTRIES);
1076 
1077 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1078 					      sizeof(*cdcfg->l1_desc),
1079 					      GFP_KERNEL);
1080 		if (!cdcfg->l1_desc)
1081 			return -ENOMEM;
1082 
1083 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1084 	}
1085 
1086 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1087 					   GFP_KERNEL);
1088 	if (!cdcfg->cdtab) {
1089 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1090 		ret = -ENOMEM;
1091 		goto err_free_l1;
1092 	}
1093 
1094 	return 0;
1095 
1096 err_free_l1:
1097 	if (cdcfg->l1_desc) {
1098 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1099 		cdcfg->l1_desc = NULL;
1100 	}
1101 	return ret;
1102 }
1103 
1104 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1105 {
1106 	int i;
1107 	size_t size, l1size;
1108 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1109 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1110 
1111 	if (cdcfg->l1_desc) {
1112 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1113 
1114 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1115 			if (!cdcfg->l1_desc[i].l2ptr)
1116 				continue;
1117 
1118 			dmam_free_coherent(smmu->dev, size,
1119 					   cdcfg->l1_desc[i].l2ptr,
1120 					   cdcfg->l1_desc[i].l2ptr_dma);
1121 		}
1122 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1123 		cdcfg->l1_desc = NULL;
1124 
1125 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1126 	} else {
1127 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1128 	}
1129 
1130 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1131 	cdcfg->cdtab_dma = 0;
1132 	cdcfg->cdtab = NULL;
1133 }
1134 
1135 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1136 {
1137 	bool free;
1138 	struct arm_smmu_ctx_desc *old_cd;
1139 
1140 	if (!cd->asid)
1141 		return false;
1142 
1143 	free = refcount_dec_and_test(&cd->refs);
1144 	if (free) {
1145 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1146 		WARN_ON(old_cd != cd);
1147 	}
1148 	return free;
1149 }
1150 
1151 /* Stream table manipulation functions */
1152 static void
1153 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1154 {
1155 	u64 val = 0;
1156 
1157 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1158 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1159 
1160 	/* See comment in arm_smmu_write_ctx_desc() */
1161 	WRITE_ONCE(*dst, cpu_to_le64(val));
1162 }
1163 
1164 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1165 {
1166 	struct arm_smmu_cmdq_ent cmd = {
1167 		.opcode	= CMDQ_OP_CFGI_STE,
1168 		.cfgi	= {
1169 			.sid	= sid,
1170 			.leaf	= true,
1171 		},
1172 	};
1173 
1174 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1175 	arm_smmu_cmdq_issue_sync(smmu);
1176 }
1177 
1178 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1179 				      __le64 *dst)
1180 {
1181 	/*
1182 	 * This is hideously complicated, but we only really care about
1183 	 * three cases at the moment:
1184 	 *
1185 	 * 1. Invalid (all zero) -> bypass/fault (init)
1186 	 * 2. Bypass/fault -> translation/bypass (attach)
1187 	 * 3. Translation/bypass -> bypass/fault (detach)
1188 	 *
1189 	 * Given that we can't update the STE atomically and the SMMU
1190 	 * doesn't read the thing in a defined order, that leaves us
1191 	 * with the following maintenance requirements:
1192 	 *
1193 	 * 1. Update Config, return (init time STEs aren't live)
1194 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1195 	 * 3. Update Config, sync
1196 	 */
1197 	u64 val = le64_to_cpu(dst[0]);
1198 	bool ste_live = false;
1199 	struct arm_smmu_device *smmu = NULL;
1200 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1201 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1202 	struct arm_smmu_domain *smmu_domain = NULL;
1203 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1204 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1205 		.prefetch	= {
1206 			.sid	= sid,
1207 		},
1208 	};
1209 
1210 	if (master) {
1211 		smmu_domain = master->domain;
1212 		smmu = master->smmu;
1213 	}
1214 
1215 	if (smmu_domain) {
1216 		switch (smmu_domain->stage) {
1217 		case ARM_SMMU_DOMAIN_S1:
1218 			s1_cfg = &smmu_domain->s1_cfg;
1219 			break;
1220 		case ARM_SMMU_DOMAIN_S2:
1221 		case ARM_SMMU_DOMAIN_NESTED:
1222 			s2_cfg = &smmu_domain->s2_cfg;
1223 			break;
1224 		default:
1225 			break;
1226 		}
1227 	}
1228 
1229 	if (val & STRTAB_STE_0_V) {
1230 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1231 		case STRTAB_STE_0_CFG_BYPASS:
1232 			break;
1233 		case STRTAB_STE_0_CFG_S1_TRANS:
1234 		case STRTAB_STE_0_CFG_S2_TRANS:
1235 			ste_live = true;
1236 			break;
1237 		case STRTAB_STE_0_CFG_ABORT:
1238 			BUG_ON(!disable_bypass);
1239 			break;
1240 		default:
1241 			BUG(); /* STE corruption */
1242 		}
1243 	}
1244 
1245 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1246 	val = STRTAB_STE_0_V;
1247 
1248 	/* Bypass/fault */
1249 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1250 		if (!smmu_domain && disable_bypass)
1251 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1252 		else
1253 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1254 
1255 		dst[0] = cpu_to_le64(val);
1256 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1257 						STRTAB_STE_1_SHCFG_INCOMING));
1258 		dst[2] = 0; /* Nuke the VMID */
1259 		/*
1260 		 * The SMMU can perform negative caching, so we must sync
1261 		 * the STE regardless of whether the old value was live.
1262 		 */
1263 		if (smmu)
1264 			arm_smmu_sync_ste_for_sid(smmu, sid);
1265 		return;
1266 	}
1267 
1268 	if (s1_cfg) {
1269 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1270 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1271 
1272 		BUG_ON(ste_live);
1273 		dst[1] = cpu_to_le64(
1274 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1275 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1276 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1277 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1278 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1279 
1280 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1281 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1282 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1283 
1284 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1285 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1286 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1287 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1288 	}
1289 
1290 	if (s2_cfg) {
1291 		BUG_ON(ste_live);
1292 		dst[2] = cpu_to_le64(
1293 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1294 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1295 #ifdef __BIG_ENDIAN
1296 			 STRTAB_STE_2_S2ENDI |
1297 #endif
1298 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1299 			 STRTAB_STE_2_S2R);
1300 
1301 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1302 
1303 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1304 	}
1305 
1306 	if (master->ats_enabled)
1307 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1308 						 STRTAB_STE_1_EATS_TRANS));
1309 
1310 	arm_smmu_sync_ste_for_sid(smmu, sid);
1311 	/* See comment in arm_smmu_write_ctx_desc() */
1312 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1313 	arm_smmu_sync_ste_for_sid(smmu, sid);
1314 
1315 	/* It's likely that we'll want to use the new STE soon */
1316 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1317 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1318 }
1319 
1320 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1321 {
1322 	unsigned int i;
1323 
1324 	for (i = 0; i < nent; ++i) {
1325 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1326 		strtab += STRTAB_STE_DWORDS;
1327 	}
1328 }
1329 
1330 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1331 {
1332 	size_t size;
1333 	void *strtab;
1334 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1335 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1336 
1337 	if (desc->l2ptr)
1338 		return 0;
1339 
1340 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1341 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1342 
1343 	desc->span = STRTAB_SPLIT + 1;
1344 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1345 					  GFP_KERNEL);
1346 	if (!desc->l2ptr) {
1347 		dev_err(smmu->dev,
1348 			"failed to allocate l2 stream table for SID %u\n",
1349 			sid);
1350 		return -ENOMEM;
1351 	}
1352 
1353 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1354 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1355 	return 0;
1356 }
1357 
1358 /* IRQ and event handlers */
1359 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1360 {
1361 	int i;
1362 	struct arm_smmu_device *smmu = dev;
1363 	struct arm_smmu_queue *q = &smmu->evtq.q;
1364 	struct arm_smmu_ll_queue *llq = &q->llq;
1365 	u64 evt[EVTQ_ENT_DWORDS];
1366 
1367 	do {
1368 		while (!queue_remove_raw(q, evt)) {
1369 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1370 
1371 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1372 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1373 				dev_info(smmu->dev, "\t0x%016llx\n",
1374 					 (unsigned long long)evt[i]);
1375 
1376 		}
1377 
1378 		/*
1379 		 * Not much we can do on overflow, so scream and pretend we're
1380 		 * trying harder.
1381 		 */
1382 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1383 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1384 	} while (!queue_empty(llq));
1385 
1386 	/* Sync our overflow flag, as we believe we're up to speed */
1387 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1388 		    Q_IDX(llq, llq->cons);
1389 	return IRQ_HANDLED;
1390 }
1391 
1392 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1393 {
1394 	u32 sid, ssid;
1395 	u16 grpid;
1396 	bool ssv, last;
1397 
1398 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1399 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1400 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1401 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1402 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1403 
1404 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1405 	dev_info(smmu->dev,
1406 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1407 		 sid, ssid, grpid, last ? "L" : "",
1408 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1409 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1410 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1411 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1412 		 evt[1] & PRIQ_1_ADDR_MASK);
1413 
1414 	if (last) {
1415 		struct arm_smmu_cmdq_ent cmd = {
1416 			.opcode			= CMDQ_OP_PRI_RESP,
1417 			.substream_valid	= ssv,
1418 			.pri			= {
1419 				.sid	= sid,
1420 				.ssid	= ssid,
1421 				.grpid	= grpid,
1422 				.resp	= PRI_RESP_DENY,
1423 			},
1424 		};
1425 
1426 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1427 	}
1428 }
1429 
1430 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1431 {
1432 	struct arm_smmu_device *smmu = dev;
1433 	struct arm_smmu_queue *q = &smmu->priq.q;
1434 	struct arm_smmu_ll_queue *llq = &q->llq;
1435 	u64 evt[PRIQ_ENT_DWORDS];
1436 
1437 	do {
1438 		while (!queue_remove_raw(q, evt))
1439 			arm_smmu_handle_ppr(smmu, evt);
1440 
1441 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1442 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1443 	} while (!queue_empty(llq));
1444 
1445 	/* Sync our overflow flag, as we believe we're up to speed */
1446 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1447 		      Q_IDX(llq, llq->cons);
1448 	queue_sync_cons_out(q);
1449 	return IRQ_HANDLED;
1450 }
1451 
1452 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1453 
1454 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1455 {
1456 	u32 gerror, gerrorn, active;
1457 	struct arm_smmu_device *smmu = dev;
1458 
1459 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1460 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1461 
1462 	active = gerror ^ gerrorn;
1463 	if (!(active & GERROR_ERR_MASK))
1464 		return IRQ_NONE; /* No errors pending */
1465 
1466 	dev_warn(smmu->dev,
1467 		 "unexpected global error reported (0x%08x), this could be serious\n",
1468 		 active);
1469 
1470 	if (active & GERROR_SFM_ERR) {
1471 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1472 		arm_smmu_device_disable(smmu);
1473 	}
1474 
1475 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1476 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1477 
1478 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1479 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1480 
1481 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1482 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1483 
1484 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1485 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1486 
1487 	if (active & GERROR_PRIQ_ABT_ERR)
1488 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1489 
1490 	if (active & GERROR_EVTQ_ABT_ERR)
1491 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1492 
1493 	if (active & GERROR_CMDQ_ERR)
1494 		arm_smmu_cmdq_skip_err(smmu);
1495 
1496 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1497 	return IRQ_HANDLED;
1498 }
1499 
1500 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1501 {
1502 	struct arm_smmu_device *smmu = dev;
1503 
1504 	arm_smmu_evtq_thread(irq, dev);
1505 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1506 		arm_smmu_priq_thread(irq, dev);
1507 
1508 	return IRQ_HANDLED;
1509 }
1510 
1511 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1512 {
1513 	arm_smmu_gerror_handler(irq, dev);
1514 	return IRQ_WAKE_THREAD;
1515 }
1516 
1517 static void
1518 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1519 			struct arm_smmu_cmdq_ent *cmd)
1520 {
1521 	size_t log2_span;
1522 	size_t span_mask;
1523 	/* ATC invalidates are always on 4096-bytes pages */
1524 	size_t inval_grain_shift = 12;
1525 	unsigned long page_start, page_end;
1526 
1527 	/*
1528 	 * ATS and PASID:
1529 	 *
1530 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1531 	 * prefix. In that case all ATC entries within the address range are
1532 	 * invalidated, including those that were requested with a PASID! There
1533 	 * is no way to invalidate only entries without PASID.
1534 	 *
1535 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1536 	 * traffic), translation requests without PASID create ATC entries
1537 	 * without PASID, which must be invalidated with substream_valid clear.
1538 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1539 	 * ATC entries within the address range.
1540 	 */
1541 	*cmd = (struct arm_smmu_cmdq_ent) {
1542 		.opcode			= CMDQ_OP_ATC_INV,
1543 		.substream_valid	= !!ssid,
1544 		.atc.ssid		= ssid,
1545 	};
1546 
1547 	if (!size) {
1548 		cmd->atc.size = ATC_INV_SIZE_ALL;
1549 		return;
1550 	}
1551 
1552 	page_start	= iova >> inval_grain_shift;
1553 	page_end	= (iova + size - 1) >> inval_grain_shift;
1554 
1555 	/*
1556 	 * In an ATS Invalidate Request, the address must be aligned on the
1557 	 * range size, which must be a power of two number of page sizes. We
1558 	 * thus have to choose between grossly over-invalidating the region, or
1559 	 * splitting the invalidation into multiple commands. For simplicity
1560 	 * we'll go with the first solution, but should refine it in the future
1561 	 * if multiple commands are shown to be more efficient.
1562 	 *
1563 	 * Find the smallest power of two that covers the range. The most
1564 	 * significant differing bit between the start and end addresses,
1565 	 * fls(start ^ end), indicates the required span. For example:
1566 	 *
1567 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1568 	 *		x = 0b1000 ^ 0b1011 = 0b11
1569 	 *		span = 1 << fls(x) = 4
1570 	 *
1571 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1572 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1573 	 *		span = 1 << fls(x) = 16
1574 	 */
1575 	log2_span	= fls_long(page_start ^ page_end);
1576 	span_mask	= (1ULL << log2_span) - 1;
1577 
1578 	page_start	&= ~span_mask;
1579 
1580 	cmd->atc.addr	= page_start << inval_grain_shift;
1581 	cmd->atc.size	= log2_span;
1582 }
1583 
1584 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1585 {
1586 	int i;
1587 	struct arm_smmu_cmdq_ent cmd;
1588 
1589 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1590 
1591 	for (i = 0; i < master->num_sids; i++) {
1592 		cmd.atc.sid = master->sids[i];
1593 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1594 	}
1595 
1596 	return arm_smmu_cmdq_issue_sync(master->smmu);
1597 }
1598 
1599 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1600 			    unsigned long iova, size_t size)
1601 {
1602 	int i;
1603 	unsigned long flags;
1604 	struct arm_smmu_cmdq_ent cmd;
1605 	struct arm_smmu_master *master;
1606 	struct arm_smmu_cmdq_batch cmds = {};
1607 
1608 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1609 		return 0;
1610 
1611 	/*
1612 	 * Ensure that we've completed prior invalidation of the main TLBs
1613 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1614 	 * arm_smmu_enable_ats():
1615 	 *
1616 	 *	// unmap()			// arm_smmu_enable_ats()
1617 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1618 	 *	smp_mb();			[...]
1619 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1620 	 *
1621 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1622 	 * ATS was enabled at the PCI device before completion of the TLBI.
1623 	 */
1624 	smp_mb();
1625 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1626 		return 0;
1627 
1628 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1629 
1630 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1631 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1632 		if (!master->ats_enabled)
1633 			continue;
1634 
1635 		for (i = 0; i < master->num_sids; i++) {
1636 			cmd.atc.sid = master->sids[i];
1637 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1638 		}
1639 	}
1640 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1641 
1642 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1643 }
1644 
1645 /* IO_PGTABLE API */
1646 static void arm_smmu_tlb_inv_context(void *cookie)
1647 {
1648 	struct arm_smmu_domain *smmu_domain = cookie;
1649 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1650 	struct arm_smmu_cmdq_ent cmd;
1651 
1652 	/*
1653 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1654 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1655 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1656 	 * insertion to guarantee those are observed before the TLBI. Do be
1657 	 * careful, 007.
1658 	 */
1659 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1660 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1661 	} else {
1662 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1663 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1664 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1665 		arm_smmu_cmdq_issue_sync(smmu);
1666 	}
1667 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1668 }
1669 
1670 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1671 				     unsigned long iova, size_t size,
1672 				     size_t granule,
1673 				     struct arm_smmu_domain *smmu_domain)
1674 {
1675 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1676 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1677 	size_t inv_range = granule;
1678 	struct arm_smmu_cmdq_batch cmds = {};
1679 
1680 	if (!size)
1681 		return;
1682 
1683 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1684 		/* Get the leaf page size */
1685 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1686 
1687 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1688 		cmd->tlbi.tg = (tg - 10) / 2;
1689 
1690 		/* Determine what level the granule is at */
1691 		cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1692 
1693 		num_pages = size >> tg;
1694 	}
1695 
1696 	while (iova < end) {
1697 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1698 			/*
1699 			 * On each iteration of the loop, the range is 5 bits
1700 			 * worth of the aligned size remaining.
1701 			 * The range in pages is:
1702 			 *
1703 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1704 			 */
1705 			unsigned long scale, num;
1706 
1707 			/* Determine the power of 2 multiple number of pages */
1708 			scale = __ffs(num_pages);
1709 			cmd->tlbi.scale = scale;
1710 
1711 			/* Determine how many chunks of 2^scale size we have */
1712 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1713 			cmd->tlbi.num = num - 1;
1714 
1715 			/* range is num * 2^scale * pgsize */
1716 			inv_range = num << (scale + tg);
1717 
1718 			/* Clear out the lower order bits for the next iteration */
1719 			num_pages -= num << scale;
1720 		}
1721 
1722 		cmd->tlbi.addr = iova;
1723 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1724 		iova += inv_range;
1725 	}
1726 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1727 }
1728 
1729 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1730 					  size_t granule, bool leaf,
1731 					  struct arm_smmu_domain *smmu_domain)
1732 {
1733 	struct arm_smmu_cmdq_ent cmd = {
1734 		.tlbi = {
1735 			.leaf	= leaf,
1736 		},
1737 	};
1738 
1739 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1740 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1741 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1742 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1743 	} else {
1744 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1745 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1746 	}
1747 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1748 
1749 	/*
1750 	 * Unfortunately, this can't be leaf-only since we may have
1751 	 * zapped an entire table.
1752 	 */
1753 	arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1754 }
1755 
1756 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1757 				 size_t granule, bool leaf,
1758 				 struct arm_smmu_domain *smmu_domain)
1759 {
1760 	struct arm_smmu_cmdq_ent cmd = {
1761 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1762 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1763 		.tlbi = {
1764 			.asid	= asid,
1765 			.leaf	= leaf,
1766 		},
1767 	};
1768 
1769 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1770 }
1771 
1772 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1773 					 unsigned long iova, size_t granule,
1774 					 void *cookie)
1775 {
1776 	struct arm_smmu_domain *smmu_domain = cookie;
1777 	struct iommu_domain *domain = &smmu_domain->domain;
1778 
1779 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1780 }
1781 
1782 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1783 				  size_t granule, void *cookie)
1784 {
1785 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1786 }
1787 
1788 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1789 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1790 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1791 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1792 };
1793 
1794 /* IOMMU API */
1795 static bool arm_smmu_capable(enum iommu_cap cap)
1796 {
1797 	switch (cap) {
1798 	case IOMMU_CAP_CACHE_COHERENCY:
1799 		return true;
1800 	case IOMMU_CAP_NOEXEC:
1801 		return true;
1802 	default:
1803 		return false;
1804 	}
1805 }
1806 
1807 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1808 {
1809 	struct arm_smmu_domain *smmu_domain;
1810 
1811 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1812 	    type != IOMMU_DOMAIN_DMA &&
1813 	    type != IOMMU_DOMAIN_IDENTITY)
1814 		return NULL;
1815 
1816 	/*
1817 	 * Allocate the domain and initialise some of its data structures.
1818 	 * We can't really do anything meaningful until we've added a
1819 	 * master.
1820 	 */
1821 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1822 	if (!smmu_domain)
1823 		return NULL;
1824 
1825 	if (type == IOMMU_DOMAIN_DMA &&
1826 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1827 		kfree(smmu_domain);
1828 		return NULL;
1829 	}
1830 
1831 	mutex_init(&smmu_domain->init_mutex);
1832 	INIT_LIST_HEAD(&smmu_domain->devices);
1833 	spin_lock_init(&smmu_domain->devices_lock);
1834 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
1835 
1836 	return &smmu_domain->domain;
1837 }
1838 
1839 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1840 {
1841 	int idx, size = 1 << span;
1842 
1843 	do {
1844 		idx = find_first_zero_bit(map, size);
1845 		if (idx == size)
1846 			return -ENOSPC;
1847 	} while (test_and_set_bit(idx, map));
1848 
1849 	return idx;
1850 }
1851 
1852 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1853 {
1854 	clear_bit(idx, map);
1855 }
1856 
1857 static void arm_smmu_domain_free(struct iommu_domain *domain)
1858 {
1859 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1860 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1861 
1862 	iommu_put_dma_cookie(domain);
1863 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1864 
1865 	/* Free the CD and ASID, if we allocated them */
1866 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1867 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1868 
1869 		/* Prevent SVA from touching the CD while we're freeing it */
1870 		mutex_lock(&arm_smmu_asid_lock);
1871 		if (cfg->cdcfg.cdtab)
1872 			arm_smmu_free_cd_tables(smmu_domain);
1873 		arm_smmu_free_asid(&cfg->cd);
1874 		mutex_unlock(&arm_smmu_asid_lock);
1875 	} else {
1876 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1877 		if (cfg->vmid)
1878 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1879 	}
1880 
1881 	kfree(smmu_domain);
1882 }
1883 
1884 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1885 				       struct arm_smmu_master *master,
1886 				       struct io_pgtable_cfg *pgtbl_cfg)
1887 {
1888 	int ret;
1889 	u32 asid;
1890 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1891 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1892 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1893 
1894 	refcount_set(&cfg->cd.refs, 1);
1895 
1896 	/* Prevent SVA from modifying the ASID until it is written to the CD */
1897 	mutex_lock(&arm_smmu_asid_lock);
1898 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1899 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1900 	if (ret)
1901 		goto out_unlock;
1902 
1903 	cfg->s1cdmax = master->ssid_bits;
1904 
1905 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
1906 	if (ret)
1907 		goto out_free_asid;
1908 
1909 	cfg->cd.asid	= (u16)asid;
1910 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1911 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1912 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1913 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1914 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1915 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1916 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1917 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1918 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
1919 
1920 	/*
1921 	 * Note that this will end up calling arm_smmu_sync_cd() before
1922 	 * the master has been added to the devices list for this domain.
1923 	 * This isn't an issue because the STE hasn't been installed yet.
1924 	 */
1925 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1926 	if (ret)
1927 		goto out_free_cd_tables;
1928 
1929 	mutex_unlock(&arm_smmu_asid_lock);
1930 	return 0;
1931 
1932 out_free_cd_tables:
1933 	arm_smmu_free_cd_tables(smmu_domain);
1934 out_free_asid:
1935 	arm_smmu_free_asid(&cfg->cd);
1936 out_unlock:
1937 	mutex_unlock(&arm_smmu_asid_lock);
1938 	return ret;
1939 }
1940 
1941 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1942 				       struct arm_smmu_master *master,
1943 				       struct io_pgtable_cfg *pgtbl_cfg)
1944 {
1945 	int vmid;
1946 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1947 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1948 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1949 
1950 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1951 	if (vmid < 0)
1952 		return vmid;
1953 
1954 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1955 	cfg->vmid	= (u16)vmid;
1956 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1957 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1958 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1959 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1960 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1961 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1962 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1963 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1964 	return 0;
1965 }
1966 
1967 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1968 				    struct arm_smmu_master *master)
1969 {
1970 	int ret;
1971 	unsigned long ias, oas;
1972 	enum io_pgtable_fmt fmt;
1973 	struct io_pgtable_cfg pgtbl_cfg;
1974 	struct io_pgtable_ops *pgtbl_ops;
1975 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1976 				 struct arm_smmu_master *,
1977 				 struct io_pgtable_cfg *);
1978 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1979 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1980 
1981 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1982 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1983 		return 0;
1984 	}
1985 
1986 	/* Restrict the stage to what we can actually support */
1987 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1988 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1989 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1990 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1991 
1992 	switch (smmu_domain->stage) {
1993 	case ARM_SMMU_DOMAIN_S1:
1994 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1995 		ias = min_t(unsigned long, ias, VA_BITS);
1996 		oas = smmu->ias;
1997 		fmt = ARM_64_LPAE_S1;
1998 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1999 		break;
2000 	case ARM_SMMU_DOMAIN_NESTED:
2001 	case ARM_SMMU_DOMAIN_S2:
2002 		ias = smmu->ias;
2003 		oas = smmu->oas;
2004 		fmt = ARM_64_LPAE_S2;
2005 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2006 		break;
2007 	default:
2008 		return -EINVAL;
2009 	}
2010 
2011 	pgtbl_cfg = (struct io_pgtable_cfg) {
2012 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2013 		.ias		= ias,
2014 		.oas		= oas,
2015 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2016 		.tlb		= &arm_smmu_flush_ops,
2017 		.iommu_dev	= smmu->dev,
2018 	};
2019 
2020 	if (smmu_domain->non_strict)
2021 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2022 
2023 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2024 	if (!pgtbl_ops)
2025 		return -ENOMEM;
2026 
2027 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2028 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2029 	domain->geometry.force_aperture = true;
2030 
2031 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2032 	if (ret < 0) {
2033 		free_io_pgtable_ops(pgtbl_ops);
2034 		return ret;
2035 	}
2036 
2037 	smmu_domain->pgtbl_ops = pgtbl_ops;
2038 	return 0;
2039 }
2040 
2041 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2042 {
2043 	__le64 *step;
2044 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2045 
2046 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2047 		struct arm_smmu_strtab_l1_desc *l1_desc;
2048 		int idx;
2049 
2050 		/* Two-level walk */
2051 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2052 		l1_desc = &cfg->l1_desc[idx];
2053 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2054 		step = &l1_desc->l2ptr[idx];
2055 	} else {
2056 		/* Simple linear lookup */
2057 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2058 	}
2059 
2060 	return step;
2061 }
2062 
2063 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2064 {
2065 	int i, j;
2066 	struct arm_smmu_device *smmu = master->smmu;
2067 
2068 	for (i = 0; i < master->num_sids; ++i) {
2069 		u32 sid = master->sids[i];
2070 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2071 
2072 		/* Bridged PCI devices may end up with duplicated IDs */
2073 		for (j = 0; j < i; j++)
2074 			if (master->sids[j] == sid)
2075 				break;
2076 		if (j < i)
2077 			continue;
2078 
2079 		arm_smmu_write_strtab_ent(master, sid, step);
2080 	}
2081 }
2082 
2083 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2084 {
2085 	struct device *dev = master->dev;
2086 	struct arm_smmu_device *smmu = master->smmu;
2087 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2088 
2089 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2090 		return false;
2091 
2092 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2093 		return false;
2094 
2095 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2096 }
2097 
2098 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2099 {
2100 	size_t stu;
2101 	struct pci_dev *pdev;
2102 	struct arm_smmu_device *smmu = master->smmu;
2103 	struct arm_smmu_domain *smmu_domain = master->domain;
2104 
2105 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2106 	if (!master->ats_enabled)
2107 		return;
2108 
2109 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2110 	stu = __ffs(smmu->pgsize_bitmap);
2111 	pdev = to_pci_dev(master->dev);
2112 
2113 	atomic_inc(&smmu_domain->nr_ats_masters);
2114 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2115 	if (pci_enable_ats(pdev, stu))
2116 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2117 }
2118 
2119 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2120 {
2121 	struct arm_smmu_domain *smmu_domain = master->domain;
2122 
2123 	if (!master->ats_enabled)
2124 		return;
2125 
2126 	pci_disable_ats(to_pci_dev(master->dev));
2127 	/*
2128 	 * Ensure ATS is disabled at the endpoint before we issue the
2129 	 * ATC invalidation via the SMMU.
2130 	 */
2131 	wmb();
2132 	arm_smmu_atc_inv_master(master);
2133 	atomic_dec(&smmu_domain->nr_ats_masters);
2134 }
2135 
2136 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2137 {
2138 	int ret;
2139 	int features;
2140 	int num_pasids;
2141 	struct pci_dev *pdev;
2142 
2143 	if (!dev_is_pci(master->dev))
2144 		return -ENODEV;
2145 
2146 	pdev = to_pci_dev(master->dev);
2147 
2148 	features = pci_pasid_features(pdev);
2149 	if (features < 0)
2150 		return features;
2151 
2152 	num_pasids = pci_max_pasids(pdev);
2153 	if (num_pasids <= 0)
2154 		return num_pasids;
2155 
2156 	ret = pci_enable_pasid(pdev, features);
2157 	if (ret) {
2158 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2159 		return ret;
2160 	}
2161 
2162 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2163 				  master->smmu->ssid_bits);
2164 	return 0;
2165 }
2166 
2167 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2168 {
2169 	struct pci_dev *pdev;
2170 
2171 	if (!dev_is_pci(master->dev))
2172 		return;
2173 
2174 	pdev = to_pci_dev(master->dev);
2175 
2176 	if (!pdev->pasid_enabled)
2177 		return;
2178 
2179 	master->ssid_bits = 0;
2180 	pci_disable_pasid(pdev);
2181 }
2182 
2183 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2184 {
2185 	unsigned long flags;
2186 	struct arm_smmu_domain *smmu_domain = master->domain;
2187 
2188 	if (!smmu_domain)
2189 		return;
2190 
2191 	arm_smmu_disable_ats(master);
2192 
2193 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2194 	list_del(&master->domain_head);
2195 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2196 
2197 	master->domain = NULL;
2198 	master->ats_enabled = false;
2199 	arm_smmu_install_ste_for_dev(master);
2200 }
2201 
2202 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2203 {
2204 	int ret = 0;
2205 	unsigned long flags;
2206 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2207 	struct arm_smmu_device *smmu;
2208 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2209 	struct arm_smmu_master *master;
2210 
2211 	if (!fwspec)
2212 		return -ENOENT;
2213 
2214 	master = dev_iommu_priv_get(dev);
2215 	smmu = master->smmu;
2216 
2217 	/*
2218 	 * Checking that SVA is disabled ensures that this device isn't bound to
2219 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2220 	 * be removed concurrently since we're holding the group mutex.
2221 	 */
2222 	if (arm_smmu_master_sva_enabled(master)) {
2223 		dev_err(dev, "cannot attach - SVA enabled\n");
2224 		return -EBUSY;
2225 	}
2226 
2227 	arm_smmu_detach_dev(master);
2228 
2229 	mutex_lock(&smmu_domain->init_mutex);
2230 
2231 	if (!smmu_domain->smmu) {
2232 		smmu_domain->smmu = smmu;
2233 		ret = arm_smmu_domain_finalise(domain, master);
2234 		if (ret) {
2235 			smmu_domain->smmu = NULL;
2236 			goto out_unlock;
2237 		}
2238 	} else if (smmu_domain->smmu != smmu) {
2239 		dev_err(dev,
2240 			"cannot attach to SMMU %s (upstream of %s)\n",
2241 			dev_name(smmu_domain->smmu->dev),
2242 			dev_name(smmu->dev));
2243 		ret = -ENXIO;
2244 		goto out_unlock;
2245 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2246 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2247 		dev_err(dev,
2248 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2249 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2250 		ret = -EINVAL;
2251 		goto out_unlock;
2252 	}
2253 
2254 	master->domain = smmu_domain;
2255 
2256 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2257 		master->ats_enabled = arm_smmu_ats_supported(master);
2258 
2259 	arm_smmu_install_ste_for_dev(master);
2260 
2261 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2262 	list_add(&master->domain_head, &smmu_domain->devices);
2263 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2264 
2265 	arm_smmu_enable_ats(master);
2266 
2267 out_unlock:
2268 	mutex_unlock(&smmu_domain->init_mutex);
2269 	return ret;
2270 }
2271 
2272 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2273 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2274 {
2275 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2276 
2277 	if (!ops)
2278 		return -ENODEV;
2279 
2280 	return ops->map(ops, iova, paddr, size, prot, gfp);
2281 }
2282 
2283 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2284 			     size_t size, struct iommu_iotlb_gather *gather)
2285 {
2286 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2287 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2288 
2289 	if (!ops)
2290 		return 0;
2291 
2292 	return ops->unmap(ops, iova, size, gather);
2293 }
2294 
2295 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2296 {
2297 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2298 
2299 	if (smmu_domain->smmu)
2300 		arm_smmu_tlb_inv_context(smmu_domain);
2301 }
2302 
2303 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2304 				struct iommu_iotlb_gather *gather)
2305 {
2306 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2307 
2308 	arm_smmu_tlb_inv_range_domain(gather->start,
2309 				      gather->end - gather->start + 1,
2310 				      gather->pgsize, true, smmu_domain);
2311 }
2312 
2313 static phys_addr_t
2314 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2315 {
2316 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2317 
2318 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2319 		return iova;
2320 
2321 	if (!ops)
2322 		return 0;
2323 
2324 	return ops->iova_to_phys(ops, iova);
2325 }
2326 
2327 static struct platform_driver arm_smmu_driver;
2328 
2329 static
2330 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2331 {
2332 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2333 							  fwnode);
2334 	put_device(dev);
2335 	return dev ? dev_get_drvdata(dev) : NULL;
2336 }
2337 
2338 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2339 {
2340 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2341 
2342 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2343 		limit *= 1UL << STRTAB_SPLIT;
2344 
2345 	return sid < limit;
2346 }
2347 
2348 static struct iommu_ops arm_smmu_ops;
2349 
2350 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2351 {
2352 	int i, ret;
2353 	struct arm_smmu_device *smmu;
2354 	struct arm_smmu_master *master;
2355 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2356 
2357 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2358 		return ERR_PTR(-ENODEV);
2359 
2360 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2361 		return ERR_PTR(-EBUSY);
2362 
2363 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2364 	if (!smmu)
2365 		return ERR_PTR(-ENODEV);
2366 
2367 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2368 	if (!master)
2369 		return ERR_PTR(-ENOMEM);
2370 
2371 	master->dev = dev;
2372 	master->smmu = smmu;
2373 	master->sids = fwspec->ids;
2374 	master->num_sids = fwspec->num_ids;
2375 	INIT_LIST_HEAD(&master->bonds);
2376 	dev_iommu_priv_set(dev, master);
2377 
2378 	/* Check the SIDs are in range of the SMMU and our stream table */
2379 	for (i = 0; i < master->num_sids; i++) {
2380 		u32 sid = master->sids[i];
2381 
2382 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2383 			ret = -ERANGE;
2384 			goto err_free_master;
2385 		}
2386 
2387 		/* Ensure l2 strtab is initialised */
2388 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2389 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2390 			if (ret)
2391 				goto err_free_master;
2392 		}
2393 	}
2394 
2395 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2396 
2397 	/*
2398 	 * Note that PASID must be enabled before, and disabled after ATS:
2399 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2400 	 *
2401 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2402 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2403 	 *   are changed.
2404 	 */
2405 	arm_smmu_enable_pasid(master);
2406 
2407 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2408 		master->ssid_bits = min_t(u8, master->ssid_bits,
2409 					  CTXDESC_LINEAR_CDMAX);
2410 
2411 	return &smmu->iommu;
2412 
2413 err_free_master:
2414 	kfree(master);
2415 	dev_iommu_priv_set(dev, NULL);
2416 	return ERR_PTR(ret);
2417 }
2418 
2419 static void arm_smmu_release_device(struct device *dev)
2420 {
2421 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2422 	struct arm_smmu_master *master;
2423 
2424 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2425 		return;
2426 
2427 	master = dev_iommu_priv_get(dev);
2428 	WARN_ON(arm_smmu_master_sva_enabled(master));
2429 	arm_smmu_detach_dev(master);
2430 	arm_smmu_disable_pasid(master);
2431 	kfree(master);
2432 	iommu_fwspec_free(dev);
2433 }
2434 
2435 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2436 {
2437 	struct iommu_group *group;
2438 
2439 	/*
2440 	 * We don't support devices sharing stream IDs other than PCI RID
2441 	 * aliases, since the necessary ID-to-device lookup becomes rather
2442 	 * impractical given a potential sparse 32-bit stream ID space.
2443 	 */
2444 	if (dev_is_pci(dev))
2445 		group = pci_device_group(dev);
2446 	else
2447 		group = generic_device_group(dev);
2448 
2449 	return group;
2450 }
2451 
2452 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2453 				    enum iommu_attr attr, void *data)
2454 {
2455 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2456 
2457 	switch (domain->type) {
2458 	case IOMMU_DOMAIN_UNMANAGED:
2459 		switch (attr) {
2460 		case DOMAIN_ATTR_NESTING:
2461 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2462 			return 0;
2463 		default:
2464 			return -ENODEV;
2465 		}
2466 		break;
2467 	case IOMMU_DOMAIN_DMA:
2468 		switch (attr) {
2469 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2470 			*(int *)data = smmu_domain->non_strict;
2471 			return 0;
2472 		default:
2473 			return -ENODEV;
2474 		}
2475 		break;
2476 	default:
2477 		return -EINVAL;
2478 	}
2479 }
2480 
2481 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2482 				    enum iommu_attr attr, void *data)
2483 {
2484 	int ret = 0;
2485 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2486 
2487 	mutex_lock(&smmu_domain->init_mutex);
2488 
2489 	switch (domain->type) {
2490 	case IOMMU_DOMAIN_UNMANAGED:
2491 		switch (attr) {
2492 		case DOMAIN_ATTR_NESTING:
2493 			if (smmu_domain->smmu) {
2494 				ret = -EPERM;
2495 				goto out_unlock;
2496 			}
2497 
2498 			if (*(int *)data)
2499 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2500 			else
2501 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2502 			break;
2503 		default:
2504 			ret = -ENODEV;
2505 		}
2506 		break;
2507 	case IOMMU_DOMAIN_DMA:
2508 		switch(attr) {
2509 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2510 			smmu_domain->non_strict = *(int *)data;
2511 			break;
2512 		default:
2513 			ret = -ENODEV;
2514 		}
2515 		break;
2516 	default:
2517 		ret = -EINVAL;
2518 	}
2519 
2520 out_unlock:
2521 	mutex_unlock(&smmu_domain->init_mutex);
2522 	return ret;
2523 }
2524 
2525 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2526 {
2527 	return iommu_fwspec_add_ids(dev, args->args, 1);
2528 }
2529 
2530 static void arm_smmu_get_resv_regions(struct device *dev,
2531 				      struct list_head *head)
2532 {
2533 	struct iommu_resv_region *region;
2534 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2535 
2536 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2537 					 prot, IOMMU_RESV_SW_MSI);
2538 	if (!region)
2539 		return;
2540 
2541 	list_add_tail(&region->list, head);
2542 
2543 	iommu_dma_get_resv_regions(dev, head);
2544 }
2545 
2546 static bool arm_smmu_dev_has_feature(struct device *dev,
2547 				     enum iommu_dev_features feat)
2548 {
2549 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2550 
2551 	if (!master)
2552 		return false;
2553 
2554 	switch (feat) {
2555 	case IOMMU_DEV_FEAT_SVA:
2556 		return arm_smmu_master_sva_supported(master);
2557 	default:
2558 		return false;
2559 	}
2560 }
2561 
2562 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2563 					 enum iommu_dev_features feat)
2564 {
2565 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2566 
2567 	if (!master)
2568 		return false;
2569 
2570 	switch (feat) {
2571 	case IOMMU_DEV_FEAT_SVA:
2572 		return arm_smmu_master_sva_enabled(master);
2573 	default:
2574 		return false;
2575 	}
2576 }
2577 
2578 static int arm_smmu_dev_enable_feature(struct device *dev,
2579 				       enum iommu_dev_features feat)
2580 {
2581 	if (!arm_smmu_dev_has_feature(dev, feat))
2582 		return -ENODEV;
2583 
2584 	if (arm_smmu_dev_feature_enabled(dev, feat))
2585 		return -EBUSY;
2586 
2587 	switch (feat) {
2588 	case IOMMU_DEV_FEAT_SVA:
2589 		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2590 	default:
2591 		return -EINVAL;
2592 	}
2593 }
2594 
2595 static int arm_smmu_dev_disable_feature(struct device *dev,
2596 					enum iommu_dev_features feat)
2597 {
2598 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2599 		return -EINVAL;
2600 
2601 	switch (feat) {
2602 	case IOMMU_DEV_FEAT_SVA:
2603 		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2604 	default:
2605 		return -EINVAL;
2606 	}
2607 }
2608 
2609 static struct iommu_ops arm_smmu_ops = {
2610 	.capable		= arm_smmu_capable,
2611 	.domain_alloc		= arm_smmu_domain_alloc,
2612 	.domain_free		= arm_smmu_domain_free,
2613 	.attach_dev		= arm_smmu_attach_dev,
2614 	.map			= arm_smmu_map,
2615 	.unmap			= arm_smmu_unmap,
2616 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2617 	.iotlb_sync		= arm_smmu_iotlb_sync,
2618 	.iova_to_phys		= arm_smmu_iova_to_phys,
2619 	.probe_device		= arm_smmu_probe_device,
2620 	.release_device		= arm_smmu_release_device,
2621 	.device_group		= arm_smmu_device_group,
2622 	.domain_get_attr	= arm_smmu_domain_get_attr,
2623 	.domain_set_attr	= arm_smmu_domain_set_attr,
2624 	.of_xlate		= arm_smmu_of_xlate,
2625 	.get_resv_regions	= arm_smmu_get_resv_regions,
2626 	.put_resv_regions	= generic_iommu_put_resv_regions,
2627 	.dev_has_feat		= arm_smmu_dev_has_feature,
2628 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2629 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2630 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2631 	.sva_bind		= arm_smmu_sva_bind,
2632 	.sva_unbind		= arm_smmu_sva_unbind,
2633 	.sva_get_pasid		= arm_smmu_sva_get_pasid,
2634 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2635 };
2636 
2637 /* Probing and initialisation functions */
2638 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2639 				   struct arm_smmu_queue *q,
2640 				   void __iomem *page,
2641 				   unsigned long prod_off,
2642 				   unsigned long cons_off,
2643 				   size_t dwords, const char *name)
2644 {
2645 	size_t qsz;
2646 
2647 	do {
2648 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2649 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2650 					      GFP_KERNEL);
2651 		if (q->base || qsz < PAGE_SIZE)
2652 			break;
2653 
2654 		q->llq.max_n_shift--;
2655 	} while (1);
2656 
2657 	if (!q->base) {
2658 		dev_err(smmu->dev,
2659 			"failed to allocate queue (0x%zx bytes) for %s\n",
2660 			qsz, name);
2661 		return -ENOMEM;
2662 	}
2663 
2664 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2665 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2666 			 1 << q->llq.max_n_shift, name);
2667 	}
2668 
2669 	q->prod_reg	= page + prod_off;
2670 	q->cons_reg	= page + cons_off;
2671 	q->ent_dwords	= dwords;
2672 
2673 	q->q_base  = Q_BASE_RWA;
2674 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2675 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2676 
2677 	q->llq.prod = q->llq.cons = 0;
2678 	return 0;
2679 }
2680 
2681 static void arm_smmu_cmdq_free_bitmap(void *data)
2682 {
2683 	unsigned long *bitmap = data;
2684 	bitmap_free(bitmap);
2685 }
2686 
2687 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2688 {
2689 	int ret = 0;
2690 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2691 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2692 	atomic_long_t *bitmap;
2693 
2694 	atomic_set(&cmdq->owner_prod, 0);
2695 	atomic_set(&cmdq->lock, 0);
2696 
2697 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2698 	if (!bitmap) {
2699 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2700 		ret = -ENOMEM;
2701 	} else {
2702 		cmdq->valid_map = bitmap;
2703 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2704 	}
2705 
2706 	return ret;
2707 }
2708 
2709 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2710 {
2711 	int ret;
2712 
2713 	/* cmdq */
2714 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2715 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2716 				      CMDQ_ENT_DWORDS, "cmdq");
2717 	if (ret)
2718 		return ret;
2719 
2720 	ret = arm_smmu_cmdq_init(smmu);
2721 	if (ret)
2722 		return ret;
2723 
2724 	/* evtq */
2725 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2726 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2727 				      EVTQ_ENT_DWORDS, "evtq");
2728 	if (ret)
2729 		return ret;
2730 
2731 	/* priq */
2732 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2733 		return 0;
2734 
2735 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2736 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2737 				       PRIQ_ENT_DWORDS, "priq");
2738 }
2739 
2740 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2741 {
2742 	unsigned int i;
2743 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2744 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2745 	void *strtab = smmu->strtab_cfg.strtab;
2746 
2747 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2748 	if (!cfg->l1_desc) {
2749 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2750 		return -ENOMEM;
2751 	}
2752 
2753 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2754 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2755 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2756 	}
2757 
2758 	return 0;
2759 }
2760 
2761 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2762 {
2763 	void *strtab;
2764 	u64 reg;
2765 	u32 size, l1size;
2766 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2767 
2768 	/* Calculate the L1 size, capped to the SIDSIZE. */
2769 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2770 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2771 	cfg->num_l1_ents = 1 << size;
2772 
2773 	size += STRTAB_SPLIT;
2774 	if (size < smmu->sid_bits)
2775 		dev_warn(smmu->dev,
2776 			 "2-level strtab only covers %u/%u bits of SID\n",
2777 			 size, smmu->sid_bits);
2778 
2779 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2780 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2781 				     GFP_KERNEL);
2782 	if (!strtab) {
2783 		dev_err(smmu->dev,
2784 			"failed to allocate l1 stream table (%u bytes)\n",
2785 			l1size);
2786 		return -ENOMEM;
2787 	}
2788 	cfg->strtab = strtab;
2789 
2790 	/* Configure strtab_base_cfg for 2 levels */
2791 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2792 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2793 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2794 	cfg->strtab_base_cfg = reg;
2795 
2796 	return arm_smmu_init_l1_strtab(smmu);
2797 }
2798 
2799 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2800 {
2801 	void *strtab;
2802 	u64 reg;
2803 	u32 size;
2804 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2805 
2806 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2807 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2808 				     GFP_KERNEL);
2809 	if (!strtab) {
2810 		dev_err(smmu->dev,
2811 			"failed to allocate linear stream table (%u bytes)\n",
2812 			size);
2813 		return -ENOMEM;
2814 	}
2815 	cfg->strtab = strtab;
2816 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2817 
2818 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2819 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2820 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2821 	cfg->strtab_base_cfg = reg;
2822 
2823 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2824 	return 0;
2825 }
2826 
2827 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2828 {
2829 	u64 reg;
2830 	int ret;
2831 
2832 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2833 		ret = arm_smmu_init_strtab_2lvl(smmu);
2834 	else
2835 		ret = arm_smmu_init_strtab_linear(smmu);
2836 
2837 	if (ret)
2838 		return ret;
2839 
2840 	/* Set the strtab base address */
2841 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2842 	reg |= STRTAB_BASE_RA;
2843 	smmu->strtab_cfg.strtab_base = reg;
2844 
2845 	/* Allocate the first VMID for stage-2 bypass STEs */
2846 	set_bit(0, smmu->vmid_map);
2847 	return 0;
2848 }
2849 
2850 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2851 {
2852 	int ret;
2853 
2854 	ret = arm_smmu_init_queues(smmu);
2855 	if (ret)
2856 		return ret;
2857 
2858 	return arm_smmu_init_strtab(smmu);
2859 }
2860 
2861 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2862 				   unsigned int reg_off, unsigned int ack_off)
2863 {
2864 	u32 reg;
2865 
2866 	writel_relaxed(val, smmu->base + reg_off);
2867 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2868 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2869 }
2870 
2871 /* GBPA is "special" */
2872 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2873 {
2874 	int ret;
2875 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2876 
2877 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2878 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2879 	if (ret)
2880 		return ret;
2881 
2882 	reg &= ~clr;
2883 	reg |= set;
2884 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2885 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2886 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2887 
2888 	if (ret)
2889 		dev_err(smmu->dev, "GBPA not responding to update\n");
2890 	return ret;
2891 }
2892 
2893 static void arm_smmu_free_msis(void *data)
2894 {
2895 	struct device *dev = data;
2896 	platform_msi_domain_free_irqs(dev);
2897 }
2898 
2899 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2900 {
2901 	phys_addr_t doorbell;
2902 	struct device *dev = msi_desc_to_dev(desc);
2903 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2904 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2905 
2906 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2907 	doorbell &= MSI_CFG0_ADDR_MASK;
2908 
2909 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2910 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2911 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2912 }
2913 
2914 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2915 {
2916 	struct msi_desc *desc;
2917 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2918 	struct device *dev = smmu->dev;
2919 
2920 	/* Clear the MSI address regs */
2921 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2922 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2923 
2924 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2925 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2926 	else
2927 		nvec--;
2928 
2929 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2930 		return;
2931 
2932 	if (!dev->msi_domain) {
2933 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2934 		return;
2935 	}
2936 
2937 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2938 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2939 	if (ret) {
2940 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2941 		return;
2942 	}
2943 
2944 	for_each_msi_entry(desc, dev) {
2945 		switch (desc->platform.msi_index) {
2946 		case EVTQ_MSI_INDEX:
2947 			smmu->evtq.q.irq = desc->irq;
2948 			break;
2949 		case GERROR_MSI_INDEX:
2950 			smmu->gerr_irq = desc->irq;
2951 			break;
2952 		case PRIQ_MSI_INDEX:
2953 			smmu->priq.q.irq = desc->irq;
2954 			break;
2955 		default:	/* Unknown */
2956 			continue;
2957 		}
2958 	}
2959 
2960 	/* Add callback to free MSIs on teardown */
2961 	devm_add_action(dev, arm_smmu_free_msis, dev);
2962 }
2963 
2964 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2965 {
2966 	int irq, ret;
2967 
2968 	arm_smmu_setup_msis(smmu);
2969 
2970 	/* Request interrupt lines */
2971 	irq = smmu->evtq.q.irq;
2972 	if (irq) {
2973 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2974 						arm_smmu_evtq_thread,
2975 						IRQF_ONESHOT,
2976 						"arm-smmu-v3-evtq", smmu);
2977 		if (ret < 0)
2978 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2979 	} else {
2980 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2981 	}
2982 
2983 	irq = smmu->gerr_irq;
2984 	if (irq) {
2985 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2986 				       0, "arm-smmu-v3-gerror", smmu);
2987 		if (ret < 0)
2988 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2989 	} else {
2990 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2991 	}
2992 
2993 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2994 		irq = smmu->priq.q.irq;
2995 		if (irq) {
2996 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2997 							arm_smmu_priq_thread,
2998 							IRQF_ONESHOT,
2999 							"arm-smmu-v3-priq",
3000 							smmu);
3001 			if (ret < 0)
3002 				dev_warn(smmu->dev,
3003 					 "failed to enable priq irq\n");
3004 		} else {
3005 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3006 		}
3007 	}
3008 }
3009 
3010 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3011 {
3012 	int ret, irq;
3013 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3014 
3015 	/* Disable IRQs first */
3016 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3017 				      ARM_SMMU_IRQ_CTRLACK);
3018 	if (ret) {
3019 		dev_err(smmu->dev, "failed to disable irqs\n");
3020 		return ret;
3021 	}
3022 
3023 	irq = smmu->combined_irq;
3024 	if (irq) {
3025 		/*
3026 		 * Cavium ThunderX2 implementation doesn't support unique irq
3027 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3028 		 */
3029 		ret = devm_request_threaded_irq(smmu->dev, irq,
3030 					arm_smmu_combined_irq_handler,
3031 					arm_smmu_combined_irq_thread,
3032 					IRQF_ONESHOT,
3033 					"arm-smmu-v3-combined-irq", smmu);
3034 		if (ret < 0)
3035 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3036 	} else
3037 		arm_smmu_setup_unique_irqs(smmu);
3038 
3039 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3040 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3041 
3042 	/* Enable interrupt generation on the SMMU */
3043 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3044 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3045 	if (ret)
3046 		dev_warn(smmu->dev, "failed to enable irqs\n");
3047 
3048 	return 0;
3049 }
3050 
3051 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3052 {
3053 	int ret;
3054 
3055 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3056 	if (ret)
3057 		dev_err(smmu->dev, "failed to clear cr0\n");
3058 
3059 	return ret;
3060 }
3061 
3062 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3063 {
3064 	int ret;
3065 	u32 reg, enables;
3066 	struct arm_smmu_cmdq_ent cmd;
3067 
3068 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3069 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3070 	if (reg & CR0_SMMUEN) {
3071 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3072 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3073 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3074 	}
3075 
3076 	ret = arm_smmu_device_disable(smmu);
3077 	if (ret)
3078 		return ret;
3079 
3080 	/* CR1 (table and queue memory attributes) */
3081 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3082 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3083 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3084 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3085 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3086 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3087 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3088 
3089 	/* CR2 (random crap) */
3090 	reg = CR2_PTM | CR2_RECINVSID;
3091 
3092 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3093 		reg |= CR2_E2H;
3094 
3095 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3096 
3097 	/* Stream table */
3098 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3099 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3100 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3101 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3102 
3103 	/* Command queue */
3104 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3105 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3106 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3107 
3108 	enables = CR0_CMDQEN;
3109 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3110 				      ARM_SMMU_CR0ACK);
3111 	if (ret) {
3112 		dev_err(smmu->dev, "failed to enable command queue\n");
3113 		return ret;
3114 	}
3115 
3116 	/* Invalidate any cached configuration */
3117 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3118 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3119 	arm_smmu_cmdq_issue_sync(smmu);
3120 
3121 	/* Invalidate any stale TLB entries */
3122 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3123 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3124 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3125 	}
3126 
3127 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3128 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3129 	arm_smmu_cmdq_issue_sync(smmu);
3130 
3131 	/* Event queue */
3132 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3133 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3134 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3135 
3136 	enables |= CR0_EVTQEN;
3137 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3138 				      ARM_SMMU_CR0ACK);
3139 	if (ret) {
3140 		dev_err(smmu->dev, "failed to enable event queue\n");
3141 		return ret;
3142 	}
3143 
3144 	/* PRI queue */
3145 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3146 		writeq_relaxed(smmu->priq.q.q_base,
3147 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3148 		writel_relaxed(smmu->priq.q.llq.prod,
3149 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3150 		writel_relaxed(smmu->priq.q.llq.cons,
3151 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3152 
3153 		enables |= CR0_PRIQEN;
3154 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3155 					      ARM_SMMU_CR0ACK);
3156 		if (ret) {
3157 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3158 			return ret;
3159 		}
3160 	}
3161 
3162 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3163 		enables |= CR0_ATSCHK;
3164 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3165 					      ARM_SMMU_CR0ACK);
3166 		if (ret) {
3167 			dev_err(smmu->dev, "failed to enable ATS check\n");
3168 			return ret;
3169 		}
3170 	}
3171 
3172 	ret = arm_smmu_setup_irqs(smmu);
3173 	if (ret) {
3174 		dev_err(smmu->dev, "failed to setup irqs\n");
3175 		return ret;
3176 	}
3177 
3178 	if (is_kdump_kernel())
3179 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3180 
3181 	/* Enable the SMMU interface, or ensure bypass */
3182 	if (!bypass || disable_bypass) {
3183 		enables |= CR0_SMMUEN;
3184 	} else {
3185 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3186 		if (ret)
3187 			return ret;
3188 	}
3189 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3190 				      ARM_SMMU_CR0ACK);
3191 	if (ret) {
3192 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3193 		return ret;
3194 	}
3195 
3196 	return 0;
3197 }
3198 
3199 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3200 {
3201 	u32 reg;
3202 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3203 
3204 	/* IDR0 */
3205 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3206 
3207 	/* 2-level structures */
3208 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3209 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3210 
3211 	if (reg & IDR0_CD2L)
3212 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3213 
3214 	/*
3215 	 * Translation table endianness.
3216 	 * We currently require the same endianness as the CPU, but this
3217 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3218 	 */
3219 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3220 	case IDR0_TTENDIAN_MIXED:
3221 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3222 		break;
3223 #ifdef __BIG_ENDIAN
3224 	case IDR0_TTENDIAN_BE:
3225 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3226 		break;
3227 #else
3228 	case IDR0_TTENDIAN_LE:
3229 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3230 		break;
3231 #endif
3232 	default:
3233 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3234 		return -ENXIO;
3235 	}
3236 
3237 	/* Boolean feature flags */
3238 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3239 		smmu->features |= ARM_SMMU_FEAT_PRI;
3240 
3241 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3242 		smmu->features |= ARM_SMMU_FEAT_ATS;
3243 
3244 	if (reg & IDR0_SEV)
3245 		smmu->features |= ARM_SMMU_FEAT_SEV;
3246 
3247 	if (reg & IDR0_MSI) {
3248 		smmu->features |= ARM_SMMU_FEAT_MSI;
3249 		if (coherent && !disable_msipolling)
3250 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3251 	}
3252 
3253 	if (reg & IDR0_HYP) {
3254 		smmu->features |= ARM_SMMU_FEAT_HYP;
3255 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3256 			smmu->features |= ARM_SMMU_FEAT_E2H;
3257 	}
3258 
3259 	/*
3260 	 * The coherency feature as set by FW is used in preference to the ID
3261 	 * register, but warn on mismatch.
3262 	 */
3263 	if (!!(reg & IDR0_COHACC) != coherent)
3264 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3265 			 coherent ? "true" : "false");
3266 
3267 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3268 	case IDR0_STALL_MODEL_FORCE:
3269 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3270 		fallthrough;
3271 	case IDR0_STALL_MODEL_STALL:
3272 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3273 	}
3274 
3275 	if (reg & IDR0_S1P)
3276 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3277 
3278 	if (reg & IDR0_S2P)
3279 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3280 
3281 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3282 		dev_err(smmu->dev, "no translation support!\n");
3283 		return -ENXIO;
3284 	}
3285 
3286 	/* We only support the AArch64 table format at present */
3287 	switch (FIELD_GET(IDR0_TTF, reg)) {
3288 	case IDR0_TTF_AARCH32_64:
3289 		smmu->ias = 40;
3290 		fallthrough;
3291 	case IDR0_TTF_AARCH64:
3292 		break;
3293 	default:
3294 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3295 		return -ENXIO;
3296 	}
3297 
3298 	/* ASID/VMID sizes */
3299 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3300 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3301 
3302 	/* IDR1 */
3303 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3304 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3305 		dev_err(smmu->dev, "embedded implementation not supported\n");
3306 		return -ENXIO;
3307 	}
3308 
3309 	/* Queue sizes, capped to ensure natural alignment */
3310 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3311 					     FIELD_GET(IDR1_CMDQS, reg));
3312 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3313 		/*
3314 		 * We don't support splitting up batches, so one batch of
3315 		 * commands plus an extra sync needs to fit inside the command
3316 		 * queue. There's also no way we can handle the weird alignment
3317 		 * restrictions on the base pointer for a unit-length queue.
3318 		 */
3319 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3320 			CMDQ_BATCH_ENTRIES);
3321 		return -ENXIO;
3322 	}
3323 
3324 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3325 					     FIELD_GET(IDR1_EVTQS, reg));
3326 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3327 					     FIELD_GET(IDR1_PRIQS, reg));
3328 
3329 	/* SID/SSID sizes */
3330 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3331 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3332 
3333 	/*
3334 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3335 	 * table, use a linear table instead.
3336 	 */
3337 	if (smmu->sid_bits <= STRTAB_SPLIT)
3338 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3339 
3340 	/* IDR3 */
3341 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3342 	if (FIELD_GET(IDR3_RIL, reg))
3343 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3344 
3345 	/* IDR5 */
3346 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3347 
3348 	/* Maximum number of outstanding stalls */
3349 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3350 
3351 	/* Page sizes */
3352 	if (reg & IDR5_GRAN64K)
3353 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3354 	if (reg & IDR5_GRAN16K)
3355 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3356 	if (reg & IDR5_GRAN4K)
3357 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3358 
3359 	/* Input address size */
3360 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3361 		smmu->features |= ARM_SMMU_FEAT_VAX;
3362 
3363 	/* Output address size */
3364 	switch (FIELD_GET(IDR5_OAS, reg)) {
3365 	case IDR5_OAS_32_BIT:
3366 		smmu->oas = 32;
3367 		break;
3368 	case IDR5_OAS_36_BIT:
3369 		smmu->oas = 36;
3370 		break;
3371 	case IDR5_OAS_40_BIT:
3372 		smmu->oas = 40;
3373 		break;
3374 	case IDR5_OAS_42_BIT:
3375 		smmu->oas = 42;
3376 		break;
3377 	case IDR5_OAS_44_BIT:
3378 		smmu->oas = 44;
3379 		break;
3380 	case IDR5_OAS_52_BIT:
3381 		smmu->oas = 52;
3382 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3383 		break;
3384 	default:
3385 		dev_info(smmu->dev,
3386 			"unknown output address size. Truncating to 48-bit\n");
3387 		fallthrough;
3388 	case IDR5_OAS_48_BIT:
3389 		smmu->oas = 48;
3390 	}
3391 
3392 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3393 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3394 	else
3395 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3396 
3397 	/* Set the DMA mask for our table walker */
3398 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3399 		dev_warn(smmu->dev,
3400 			 "failed to set DMA mask for table walker\n");
3401 
3402 	smmu->ias = max(smmu->ias, smmu->oas);
3403 
3404 	if (arm_smmu_sva_supported(smmu))
3405 		smmu->features |= ARM_SMMU_FEAT_SVA;
3406 
3407 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3408 		 smmu->ias, smmu->oas, smmu->features);
3409 	return 0;
3410 }
3411 
3412 #ifdef CONFIG_ACPI
3413 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3414 {
3415 	switch (model) {
3416 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3417 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3418 		break;
3419 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3420 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3421 		break;
3422 	}
3423 
3424 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3425 }
3426 
3427 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3428 				      struct arm_smmu_device *smmu)
3429 {
3430 	struct acpi_iort_smmu_v3 *iort_smmu;
3431 	struct device *dev = smmu->dev;
3432 	struct acpi_iort_node *node;
3433 
3434 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3435 
3436 	/* Retrieve SMMUv3 specific data */
3437 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3438 
3439 	acpi_smmu_get_options(iort_smmu->model, smmu);
3440 
3441 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3442 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3443 
3444 	return 0;
3445 }
3446 #else
3447 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3448 					     struct arm_smmu_device *smmu)
3449 {
3450 	return -ENODEV;
3451 }
3452 #endif
3453 
3454 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3455 				    struct arm_smmu_device *smmu)
3456 {
3457 	struct device *dev = &pdev->dev;
3458 	u32 cells;
3459 	int ret = -EINVAL;
3460 
3461 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3462 		dev_err(dev, "missing #iommu-cells property\n");
3463 	else if (cells != 1)
3464 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3465 	else
3466 		ret = 0;
3467 
3468 	parse_driver_options(smmu);
3469 
3470 	if (of_dma_is_coherent(dev->of_node))
3471 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3472 
3473 	return ret;
3474 }
3475 
3476 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3477 {
3478 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3479 		return SZ_64K;
3480 	else
3481 		return SZ_128K;
3482 }
3483 
3484 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3485 {
3486 	int err;
3487 
3488 #ifdef CONFIG_PCI
3489 	if (pci_bus_type.iommu_ops != ops) {
3490 		err = bus_set_iommu(&pci_bus_type, ops);
3491 		if (err)
3492 			return err;
3493 	}
3494 #endif
3495 #ifdef CONFIG_ARM_AMBA
3496 	if (amba_bustype.iommu_ops != ops) {
3497 		err = bus_set_iommu(&amba_bustype, ops);
3498 		if (err)
3499 			goto err_reset_pci_ops;
3500 	}
3501 #endif
3502 	if (platform_bus_type.iommu_ops != ops) {
3503 		err = bus_set_iommu(&platform_bus_type, ops);
3504 		if (err)
3505 			goto err_reset_amba_ops;
3506 	}
3507 
3508 	return 0;
3509 
3510 err_reset_amba_ops:
3511 #ifdef CONFIG_ARM_AMBA
3512 	bus_set_iommu(&amba_bustype, NULL);
3513 #endif
3514 err_reset_pci_ops: __maybe_unused;
3515 #ifdef CONFIG_PCI
3516 	bus_set_iommu(&pci_bus_type, NULL);
3517 #endif
3518 	return err;
3519 }
3520 
3521 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3522 				      resource_size_t size)
3523 {
3524 	struct resource res = DEFINE_RES_MEM(start, size);
3525 
3526 	return devm_ioremap_resource(dev, &res);
3527 }
3528 
3529 static int arm_smmu_device_probe(struct platform_device *pdev)
3530 {
3531 	int irq, ret;
3532 	struct resource *res;
3533 	resource_size_t ioaddr;
3534 	struct arm_smmu_device *smmu;
3535 	struct device *dev = &pdev->dev;
3536 	bool bypass;
3537 
3538 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3539 	if (!smmu) {
3540 		dev_err(dev, "failed to allocate arm_smmu_device\n");
3541 		return -ENOMEM;
3542 	}
3543 	smmu->dev = dev;
3544 
3545 	if (dev->of_node) {
3546 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3547 	} else {
3548 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3549 		if (ret == -ENODEV)
3550 			return ret;
3551 	}
3552 
3553 	/* Set bypass mode according to firmware probing result */
3554 	bypass = !!ret;
3555 
3556 	/* Base address */
3557 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3558 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3559 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3560 		return -EINVAL;
3561 	}
3562 	ioaddr = res->start;
3563 
3564 	/*
3565 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3566 	 * the PMCG registers which are reserved by the PMU driver.
3567 	 */
3568 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3569 	if (IS_ERR(smmu->base))
3570 		return PTR_ERR(smmu->base);
3571 
3572 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3573 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3574 					       ARM_SMMU_REG_SZ);
3575 		if (IS_ERR(smmu->page1))
3576 			return PTR_ERR(smmu->page1);
3577 	} else {
3578 		smmu->page1 = smmu->base;
3579 	}
3580 
3581 	/* Interrupt lines */
3582 
3583 	irq = platform_get_irq_byname_optional(pdev, "combined");
3584 	if (irq > 0)
3585 		smmu->combined_irq = irq;
3586 	else {
3587 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3588 		if (irq > 0)
3589 			smmu->evtq.q.irq = irq;
3590 
3591 		irq = platform_get_irq_byname_optional(pdev, "priq");
3592 		if (irq > 0)
3593 			smmu->priq.q.irq = irq;
3594 
3595 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3596 		if (irq > 0)
3597 			smmu->gerr_irq = irq;
3598 	}
3599 	/* Probe the h/w */
3600 	ret = arm_smmu_device_hw_probe(smmu);
3601 	if (ret)
3602 		return ret;
3603 
3604 	/* Initialise in-memory data structures */
3605 	ret = arm_smmu_init_structures(smmu);
3606 	if (ret)
3607 		return ret;
3608 
3609 	/* Record our private device structure */
3610 	platform_set_drvdata(pdev, smmu);
3611 
3612 	/* Reset the device */
3613 	ret = arm_smmu_device_reset(smmu, bypass);
3614 	if (ret)
3615 		return ret;
3616 
3617 	/* And we're up. Go go go! */
3618 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3619 				     "smmu3.%pa", &ioaddr);
3620 	if (ret)
3621 		return ret;
3622 
3623 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3624 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3625 
3626 	ret = iommu_device_register(&smmu->iommu);
3627 	if (ret) {
3628 		dev_err(dev, "Failed to register iommu\n");
3629 		return ret;
3630 	}
3631 
3632 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
3633 }
3634 
3635 static int arm_smmu_device_remove(struct platform_device *pdev)
3636 {
3637 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3638 
3639 	arm_smmu_set_bus_ops(NULL);
3640 	iommu_device_unregister(&smmu->iommu);
3641 	iommu_device_sysfs_remove(&smmu->iommu);
3642 	arm_smmu_device_disable(smmu);
3643 
3644 	return 0;
3645 }
3646 
3647 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3648 {
3649 	arm_smmu_device_remove(pdev);
3650 }
3651 
3652 static const struct of_device_id arm_smmu_of_match[] = {
3653 	{ .compatible = "arm,smmu-v3", },
3654 	{ },
3655 };
3656 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3657 
3658 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3659 {
3660 	arm_smmu_sva_notifier_synchronize();
3661 	platform_driver_unregister(drv);
3662 }
3663 
3664 static struct platform_driver arm_smmu_driver = {
3665 	.driver	= {
3666 		.name			= "arm-smmu-v3",
3667 		.of_match_table		= arm_smmu_of_match,
3668 		.suppress_bind_attrs	= true,
3669 	},
3670 	.probe	= arm_smmu_device_probe,
3671 	.remove	= arm_smmu_device_remove,
3672 	.shutdown = arm_smmu_device_shutdown,
3673 };
3674 module_driver(arm_smmu_driver, platform_driver_register,
3675 	      arm_smmu_driver_unregister);
3676 
3677 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3678 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3679 MODULE_ALIAS("platform:arm-smmu-v3");
3680 MODULE_LICENSE("GPL v2");
3681