xref: /openbmc/linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 5ed132db5ad4f58156ae9d28219396b6f764a9cb)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
31 
32 #include <linux/amba/bus.h>
33 
34 #include "arm-smmu-v3.h"
35 
36 static bool disable_bypass = 1;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 static struct arm_smmu_option_prop arm_smmu_options[] = {
80 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
81 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
82 	{ 0, NULL},
83 };
84 
85 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
86 						 struct arm_smmu_device *smmu)
87 {
88 	if (offset > SZ_64K)
89 		return smmu->page1 + offset - SZ_64K;
90 
91 	return smmu->base + offset;
92 }
93 
94 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
95 {
96 	return container_of(dom, struct arm_smmu_domain, domain);
97 }
98 
99 static void parse_driver_options(struct arm_smmu_device *smmu)
100 {
101 	int i = 0;
102 
103 	do {
104 		if (of_property_read_bool(smmu->dev->of_node,
105 						arm_smmu_options[i].prop)) {
106 			smmu->options |= arm_smmu_options[i].opt;
107 			dev_notice(smmu->dev, "option %s\n",
108 				arm_smmu_options[i].prop);
109 		}
110 	} while (arm_smmu_options[++i].opt);
111 }
112 
113 /* Low-level queue manipulation functions */
114 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
115 {
116 	u32 space, prod, cons;
117 
118 	prod = Q_IDX(q, q->prod);
119 	cons = Q_IDX(q, q->cons);
120 
121 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
122 		space = (1 << q->max_n_shift) - (prod - cons);
123 	else
124 		space = cons - prod;
125 
126 	return space >= n;
127 }
128 
129 static bool queue_full(struct arm_smmu_ll_queue *q)
130 {
131 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
132 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
133 }
134 
135 static bool queue_empty(struct arm_smmu_ll_queue *q)
136 {
137 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
138 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
139 }
140 
141 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
142 {
143 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
144 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
145 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
146 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
147 }
148 
149 static void queue_sync_cons_out(struct arm_smmu_queue *q)
150 {
151 	/*
152 	 * Ensure that all CPU accesses (reads and writes) to the queue
153 	 * are complete before we update the cons pointer.
154 	 */
155 	__iomb();
156 	writel_relaxed(q->llq.cons, q->cons_reg);
157 }
158 
159 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
160 {
161 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
162 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
163 }
164 
165 static int queue_sync_prod_in(struct arm_smmu_queue *q)
166 {
167 	u32 prod;
168 	int ret = 0;
169 
170 	/*
171 	 * We can't use the _relaxed() variant here, as we must prevent
172 	 * speculative reads of the queue before we have determined that
173 	 * prod has indeed moved.
174 	 */
175 	prod = readl(q->prod_reg);
176 
177 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
178 		ret = -EOVERFLOW;
179 
180 	q->llq.prod = prod;
181 	return ret;
182 }
183 
184 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
185 {
186 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
187 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
188 }
189 
190 static void queue_poll_init(struct arm_smmu_device *smmu,
191 			    struct arm_smmu_queue_poll *qp)
192 {
193 	qp->delay = 1;
194 	qp->spin_cnt = 0;
195 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
196 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
197 }
198 
199 static int queue_poll(struct arm_smmu_queue_poll *qp)
200 {
201 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
202 		return -ETIMEDOUT;
203 
204 	if (qp->wfe) {
205 		wfe();
206 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
207 		cpu_relax();
208 	} else {
209 		udelay(qp->delay);
210 		qp->delay *= 2;
211 		qp->spin_cnt = 0;
212 	}
213 
214 	return 0;
215 }
216 
217 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
218 {
219 	int i;
220 
221 	for (i = 0; i < n_dwords; ++i)
222 		*dst++ = cpu_to_le64(*src++);
223 }
224 
225 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
226 {
227 	int i;
228 
229 	for (i = 0; i < n_dwords; ++i)
230 		*dst++ = le64_to_cpu(*src++);
231 }
232 
233 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
234 {
235 	if (queue_empty(&q->llq))
236 		return -EAGAIN;
237 
238 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
239 	queue_inc_cons(&q->llq);
240 	queue_sync_cons_out(q);
241 	return 0;
242 }
243 
244 /* High-level queue accessors */
245 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
246 {
247 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
248 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
249 
250 	switch (ent->opcode) {
251 	case CMDQ_OP_TLBI_EL2_ALL:
252 	case CMDQ_OP_TLBI_NSNH_ALL:
253 		break;
254 	case CMDQ_OP_PREFETCH_CFG:
255 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
256 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
257 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
258 		break;
259 	case CMDQ_OP_CFGI_CD:
260 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261 		fallthrough;
262 	case CMDQ_OP_CFGI_STE:
263 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265 		break;
266 	case CMDQ_OP_CFGI_CD_ALL:
267 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268 		break;
269 	case CMDQ_OP_CFGI_ALL:
270 		/* Cover the entire SID range */
271 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272 		break;
273 	case CMDQ_OP_TLBI_NH_VA:
274 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
275 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
278 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
281 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
282 		break;
283 	case CMDQ_OP_TLBI_S2_IPA:
284 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
285 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
286 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
287 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
288 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
289 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
290 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
291 		break;
292 	case CMDQ_OP_TLBI_NH_ASID:
293 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
294 		fallthrough;
295 	case CMDQ_OP_TLBI_S12_VMALL:
296 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
297 		break;
298 	case CMDQ_OP_ATC_INV:
299 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
300 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
301 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
302 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
303 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
304 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
305 		break;
306 	case CMDQ_OP_PRI_RESP:
307 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
308 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
309 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
310 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
311 		switch (ent->pri.resp) {
312 		case PRI_RESP_DENY:
313 		case PRI_RESP_FAIL:
314 		case PRI_RESP_SUCC:
315 			break;
316 		default:
317 			return -EINVAL;
318 		}
319 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
320 		break;
321 	case CMDQ_OP_CMD_SYNC:
322 		if (ent->sync.msiaddr) {
323 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325 		} else {
326 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327 		}
328 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330 		break;
331 	default:
332 		return -ENOENT;
333 	}
334 
335 	return 0;
336 }
337 
338 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
339 					 u32 prod)
340 {
341 	struct arm_smmu_queue *q = &smmu->cmdq.q;
342 	struct arm_smmu_cmdq_ent ent = {
343 		.opcode = CMDQ_OP_CMD_SYNC,
344 	};
345 
346 	/*
347 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
348 	 * payload, so the write will zero the entire command on that platform.
349 	 */
350 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
351 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
352 				   q->ent_dwords * 8;
353 	}
354 
355 	arm_smmu_cmdq_build_cmd(cmd, &ent);
356 }
357 
358 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
359 {
360 	static const char *cerror_str[] = {
361 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
362 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
363 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
364 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
365 	};
366 
367 	int i;
368 	u64 cmd[CMDQ_ENT_DWORDS];
369 	struct arm_smmu_queue *q = &smmu->cmdq.q;
370 	u32 cons = readl_relaxed(q->cons_reg);
371 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
372 	struct arm_smmu_cmdq_ent cmd_sync = {
373 		.opcode = CMDQ_OP_CMD_SYNC,
374 	};
375 
376 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
377 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
378 
379 	switch (idx) {
380 	case CMDQ_ERR_CERROR_ABT_IDX:
381 		dev_err(smmu->dev, "retrying command fetch\n");
382 	case CMDQ_ERR_CERROR_NONE_IDX:
383 		return;
384 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
385 		/*
386 		 * ATC Invalidation Completion timeout. CONS is still pointing
387 		 * at the CMD_SYNC. Attempt to complete other pending commands
388 		 * by repeating the CMD_SYNC, though we might well end up back
389 		 * here since the ATC invalidation may still be pending.
390 		 */
391 		return;
392 	case CMDQ_ERR_CERROR_ILL_IDX:
393 	default:
394 		break;
395 	}
396 
397 	/*
398 	 * We may have concurrent producers, so we need to be careful
399 	 * not to touch any of the shadow cmdq state.
400 	 */
401 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
402 	dev_err(smmu->dev, "skipping command in error state:\n");
403 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
404 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
405 
406 	/* Convert the erroneous command into a CMD_SYNC */
407 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
408 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
409 		return;
410 	}
411 
412 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
413 }
414 
415 /*
416  * Command queue locking.
417  * This is a form of bastardised rwlock with the following major changes:
418  *
419  * - The only LOCK routines are exclusive_trylock() and shared_lock().
420  *   Neither have barrier semantics, and instead provide only a control
421  *   dependency.
422  *
423  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
424  *   fails if the caller appears to be the last lock holder (yes, this is
425  *   racy). All successful UNLOCK routines have RELEASE semantics.
426  */
427 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
428 {
429 	int val;
430 
431 	/*
432 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
433 	 * lock counter. When held in exclusive state, the lock counter is set
434 	 * to INT_MIN so these increments won't hurt as the value will remain
435 	 * negative.
436 	 */
437 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
438 		return;
439 
440 	do {
441 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
442 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
443 }
444 
445 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
446 {
447 	(void)atomic_dec_return_release(&cmdq->lock);
448 }
449 
450 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
451 {
452 	if (atomic_read(&cmdq->lock) == 1)
453 		return false;
454 
455 	arm_smmu_cmdq_shared_unlock(cmdq);
456 	return true;
457 }
458 
459 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
460 ({									\
461 	bool __ret;							\
462 	local_irq_save(flags);						\
463 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
464 	if (!__ret)							\
465 		local_irq_restore(flags);				\
466 	__ret;								\
467 })
468 
469 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
470 ({									\
471 	atomic_set_release(&cmdq->lock, 0);				\
472 	local_irq_restore(flags);					\
473 })
474 
475 
476 /*
477  * Command queue insertion.
478  * This is made fiddly by our attempts to achieve some sort of scalability
479  * since there is one queue shared amongst all of the CPUs in the system.  If
480  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
481  * then you'll *love* this monstrosity.
482  *
483  * The basic idea is to split the queue up into ranges of commands that are
484  * owned by a given CPU; the owner may not have written all of the commands
485  * itself, but is responsible for advancing the hardware prod pointer when
486  * the time comes. The algorithm is roughly:
487  *
488  * 	1. Allocate some space in the queue. At this point we also discover
489  *	   whether the head of the queue is currently owned by another CPU,
490  *	   or whether we are the owner.
491  *
492  *	2. Write our commands into our allocated slots in the queue.
493  *
494  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
495  *
496  *	4. If we are an owner:
497  *		a. Wait for the previous owner to finish.
498  *		b. Mark the queue head as unowned, which tells us the range
499  *		   that we are responsible for publishing.
500  *		c. Wait for all commands in our owned range to become valid.
501  *		d. Advance the hardware prod pointer.
502  *		e. Tell the next owner we've finished.
503  *
504  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
505  *	   owner), then we need to stick around until it has completed:
506  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
507  *		   to clear the first 4 bytes.
508  *		b. Otherwise, we spin waiting for the hardware cons pointer to
509  *		   advance past our command.
510  *
511  * The devil is in the details, particularly the use of locking for handling
512  * SYNC completion and freeing up space in the queue before we think that it is
513  * full.
514  */
515 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
516 					       u32 sprod, u32 eprod, bool set)
517 {
518 	u32 swidx, sbidx, ewidx, ebidx;
519 	struct arm_smmu_ll_queue llq = {
520 		.max_n_shift	= cmdq->q.llq.max_n_shift,
521 		.prod		= sprod,
522 	};
523 
524 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
525 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
526 
527 	while (llq.prod != eprod) {
528 		unsigned long mask;
529 		atomic_long_t *ptr;
530 		u32 limit = BITS_PER_LONG;
531 
532 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
533 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
534 
535 		ptr = &cmdq->valid_map[swidx];
536 
537 		if ((swidx == ewidx) && (sbidx < ebidx))
538 			limit = ebidx;
539 
540 		mask = GENMASK(limit - 1, sbidx);
541 
542 		/*
543 		 * The valid bit is the inverse of the wrap bit. This means
544 		 * that a zero-initialised queue is invalid and, after marking
545 		 * all entries as valid, they become invalid again when we
546 		 * wrap.
547 		 */
548 		if (set) {
549 			atomic_long_xor(mask, ptr);
550 		} else { /* Poll */
551 			unsigned long valid;
552 
553 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
554 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
555 		}
556 
557 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
558 	}
559 }
560 
561 /* Mark all entries in the range [sprod, eprod) as valid */
562 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
563 					u32 sprod, u32 eprod)
564 {
565 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
566 }
567 
568 /* Wait for all entries in the range [sprod, eprod) to become valid */
569 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
570 					 u32 sprod, u32 eprod)
571 {
572 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
573 }
574 
575 /* Wait for the command queue to become non-full */
576 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
577 					     struct arm_smmu_ll_queue *llq)
578 {
579 	unsigned long flags;
580 	struct arm_smmu_queue_poll qp;
581 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
582 	int ret = 0;
583 
584 	/*
585 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
586 	 * that fails, spin until somebody else updates it for us.
587 	 */
588 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
589 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
590 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
591 		llq->val = READ_ONCE(cmdq->q.llq.val);
592 		return 0;
593 	}
594 
595 	queue_poll_init(smmu, &qp);
596 	do {
597 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
598 		if (!queue_full(llq))
599 			break;
600 
601 		ret = queue_poll(&qp);
602 	} while (!ret);
603 
604 	return ret;
605 }
606 
607 /*
608  * Wait until the SMMU signals a CMD_SYNC completion MSI.
609  * Must be called with the cmdq lock held in some capacity.
610  */
611 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
612 					  struct arm_smmu_ll_queue *llq)
613 {
614 	int ret = 0;
615 	struct arm_smmu_queue_poll qp;
616 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
617 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
618 
619 	queue_poll_init(smmu, &qp);
620 
621 	/*
622 	 * The MSI won't generate an event, since it's being written back
623 	 * into the command queue.
624 	 */
625 	qp.wfe = false;
626 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
627 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
628 	return ret;
629 }
630 
631 /*
632  * Wait until the SMMU cons index passes llq->prod.
633  * Must be called with the cmdq lock held in some capacity.
634  */
635 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
636 					       struct arm_smmu_ll_queue *llq)
637 {
638 	struct arm_smmu_queue_poll qp;
639 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
640 	u32 prod = llq->prod;
641 	int ret = 0;
642 
643 	queue_poll_init(smmu, &qp);
644 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
645 	do {
646 		if (queue_consumed(llq, prod))
647 			break;
648 
649 		ret = queue_poll(&qp);
650 
651 		/*
652 		 * This needs to be a readl() so that our subsequent call
653 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
654 		 *
655 		 * Specifically, we need to ensure that we observe all
656 		 * shared_lock()s by other CMD_SYNCs that share our owner,
657 		 * so that a failing call to tryunlock() means that we're
658 		 * the last one out and therefore we can safely advance
659 		 * cmdq->q.llq.cons. Roughly speaking:
660 		 *
661 		 * CPU 0		CPU1			CPU2 (us)
662 		 *
663 		 * if (sync)
664 		 * 	shared_lock();
665 		 *
666 		 * dma_wmb();
667 		 * set_valid_map();
668 		 *
669 		 * 			if (owner) {
670 		 *				poll_valid_map();
671 		 *				<control dependency>
672 		 *				writel(prod_reg);
673 		 *
674 		 *						readl(cons_reg);
675 		 *						tryunlock();
676 		 *
677 		 * Requires us to see CPU 0's shared_lock() acquisition.
678 		 */
679 		llq->cons = readl(cmdq->q.cons_reg);
680 	} while (!ret);
681 
682 	return ret;
683 }
684 
685 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
686 					 struct arm_smmu_ll_queue *llq)
687 {
688 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
689 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
690 
691 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
692 }
693 
694 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
695 					u32 prod, int n)
696 {
697 	int i;
698 	struct arm_smmu_ll_queue llq = {
699 		.max_n_shift	= cmdq->q.llq.max_n_shift,
700 		.prod		= prod,
701 	};
702 
703 	for (i = 0; i < n; ++i) {
704 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
705 
706 		prod = queue_inc_prod_n(&llq, i);
707 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
708 	}
709 }
710 
711 /*
712  * This is the actual insertion function, and provides the following
713  * ordering guarantees to callers:
714  *
715  * - There is a dma_wmb() before publishing any commands to the queue.
716  *   This can be relied upon to order prior writes to data structures
717  *   in memory (such as a CD or an STE) before the command.
718  *
719  * - On completion of a CMD_SYNC, there is a control dependency.
720  *   This can be relied upon to order subsequent writes to memory (e.g.
721  *   freeing an IOVA) after completion of the CMD_SYNC.
722  *
723  * - Command insertion is totally ordered, so if two CPUs each race to
724  *   insert their own list of commands then all of the commands from one
725  *   CPU will appear before any of the commands from the other CPU.
726  */
727 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
728 				       u64 *cmds, int n, bool sync)
729 {
730 	u64 cmd_sync[CMDQ_ENT_DWORDS];
731 	u32 prod;
732 	unsigned long flags;
733 	bool owner;
734 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
735 	struct arm_smmu_ll_queue llq = {
736 		.max_n_shift = cmdq->q.llq.max_n_shift,
737 	}, head = llq;
738 	int ret = 0;
739 
740 	/* 1. Allocate some space in the queue */
741 	local_irq_save(flags);
742 	llq.val = READ_ONCE(cmdq->q.llq.val);
743 	do {
744 		u64 old;
745 
746 		while (!queue_has_space(&llq, n + sync)) {
747 			local_irq_restore(flags);
748 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
749 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
750 			local_irq_save(flags);
751 		}
752 
753 		head.cons = llq.cons;
754 		head.prod = queue_inc_prod_n(&llq, n + sync) |
755 					     CMDQ_PROD_OWNED_FLAG;
756 
757 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
758 		if (old == llq.val)
759 			break;
760 
761 		llq.val = old;
762 	} while (1);
763 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
764 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
765 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
766 
767 	/*
768 	 * 2. Write our commands into the queue
769 	 * Dependency ordering from the cmpxchg() loop above.
770 	 */
771 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
772 	if (sync) {
773 		prod = queue_inc_prod_n(&llq, n);
774 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
775 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
776 
777 		/*
778 		 * In order to determine completion of our CMD_SYNC, we must
779 		 * ensure that the queue can't wrap twice without us noticing.
780 		 * We achieve that by taking the cmdq lock as shared before
781 		 * marking our slot as valid.
782 		 */
783 		arm_smmu_cmdq_shared_lock(cmdq);
784 	}
785 
786 	/* 3. Mark our slots as valid, ensuring commands are visible first */
787 	dma_wmb();
788 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
789 
790 	/* 4. If we are the owner, take control of the SMMU hardware */
791 	if (owner) {
792 		/* a. Wait for previous owner to finish */
793 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
794 
795 		/* b. Stop gathering work by clearing the owned flag */
796 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
797 						   &cmdq->q.llq.atomic.prod);
798 		prod &= ~CMDQ_PROD_OWNED_FLAG;
799 
800 		/*
801 		 * c. Wait for any gathered work to be written to the queue.
802 		 * Note that we read our own entries so that we have the control
803 		 * dependency required by (d).
804 		 */
805 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
806 
807 		/*
808 		 * d. Advance the hardware prod pointer
809 		 * Control dependency ordering from the entries becoming valid.
810 		 */
811 		writel_relaxed(prod, cmdq->q.prod_reg);
812 
813 		/*
814 		 * e. Tell the next owner we're done
815 		 * Make sure we've updated the hardware first, so that we don't
816 		 * race to update prod and potentially move it backwards.
817 		 */
818 		atomic_set_release(&cmdq->owner_prod, prod);
819 	}
820 
821 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
822 	if (sync) {
823 		llq.prod = queue_inc_prod_n(&llq, n);
824 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
825 		if (ret) {
826 			dev_err_ratelimited(smmu->dev,
827 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
828 					    llq.prod,
829 					    readl_relaxed(cmdq->q.prod_reg),
830 					    readl_relaxed(cmdq->q.cons_reg));
831 		}
832 
833 		/*
834 		 * Try to unlock the cmdq lock. This will fail if we're the last
835 		 * reader, in which case we can safely update cmdq->q.llq.cons
836 		 */
837 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
838 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
839 			arm_smmu_cmdq_shared_unlock(cmdq);
840 		}
841 	}
842 
843 	local_irq_restore(flags);
844 	return ret;
845 }
846 
847 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
848 				   struct arm_smmu_cmdq_ent *ent)
849 {
850 	u64 cmd[CMDQ_ENT_DWORDS];
851 
852 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
853 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
854 			 ent->opcode);
855 		return -EINVAL;
856 	}
857 
858 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
859 }
860 
861 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
862 {
863 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
864 }
865 
866 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
867 				    struct arm_smmu_cmdq_batch *cmds,
868 				    struct arm_smmu_cmdq_ent *cmd)
869 {
870 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
871 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
872 		cmds->num = 0;
873 	}
874 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
875 	cmds->num++;
876 }
877 
878 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
879 				      struct arm_smmu_cmdq_batch *cmds)
880 {
881 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
882 }
883 
884 /* Context descriptor manipulation functions */
885 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
886 {
887 	struct arm_smmu_cmdq_ent cmd = {
888 		.opcode = CMDQ_OP_TLBI_NH_ASID,
889 		.tlbi.asid = asid,
890 	};
891 
892 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
893 	arm_smmu_cmdq_issue_sync(smmu);
894 }
895 
896 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
897 			     int ssid, bool leaf)
898 {
899 	size_t i;
900 	unsigned long flags;
901 	struct arm_smmu_master *master;
902 	struct arm_smmu_cmdq_batch cmds = {};
903 	struct arm_smmu_device *smmu = smmu_domain->smmu;
904 	struct arm_smmu_cmdq_ent cmd = {
905 		.opcode	= CMDQ_OP_CFGI_CD,
906 		.cfgi	= {
907 			.ssid	= ssid,
908 			.leaf	= leaf,
909 		},
910 	};
911 
912 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
913 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
914 		for (i = 0; i < master->num_sids; i++) {
915 			cmd.cfgi.sid = master->sids[i];
916 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
917 		}
918 	}
919 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
920 
921 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
922 }
923 
924 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
925 					struct arm_smmu_l1_ctx_desc *l1_desc)
926 {
927 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
928 
929 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
930 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
931 	if (!l1_desc->l2ptr) {
932 		dev_warn(smmu->dev,
933 			 "failed to allocate context descriptor table\n");
934 		return -ENOMEM;
935 	}
936 	return 0;
937 }
938 
939 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
940 				      struct arm_smmu_l1_ctx_desc *l1_desc)
941 {
942 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
943 		  CTXDESC_L1_DESC_V;
944 
945 	/* See comment in arm_smmu_write_ctx_desc() */
946 	WRITE_ONCE(*dst, cpu_to_le64(val));
947 }
948 
949 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
950 				   u32 ssid)
951 {
952 	__le64 *l1ptr;
953 	unsigned int idx;
954 	struct arm_smmu_l1_ctx_desc *l1_desc;
955 	struct arm_smmu_device *smmu = smmu_domain->smmu;
956 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
957 
958 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
959 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
960 
961 	idx = ssid >> CTXDESC_SPLIT;
962 	l1_desc = &cdcfg->l1_desc[idx];
963 	if (!l1_desc->l2ptr) {
964 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
965 			return NULL;
966 
967 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
968 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
969 		/* An invalid L1CD can be cached */
970 		arm_smmu_sync_cd(smmu_domain, ssid, false);
971 	}
972 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
973 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
974 }
975 
976 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
977 			    struct arm_smmu_ctx_desc *cd)
978 {
979 	/*
980 	 * This function handles the following cases:
981 	 *
982 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
983 	 * (2) Install a secondary CD, for SID+SSID traffic.
984 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
985 	 *     CD, then invalidate the old entry and mappings.
986 	 * (4) Remove a secondary CD.
987 	 */
988 	u64 val;
989 	bool cd_live;
990 	__le64 *cdptr;
991 	struct arm_smmu_device *smmu = smmu_domain->smmu;
992 
993 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
994 		return -E2BIG;
995 
996 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
997 	if (!cdptr)
998 		return -ENOMEM;
999 
1000 	val = le64_to_cpu(cdptr[0]);
1001 	cd_live = !!(val & CTXDESC_CD_0_V);
1002 
1003 	if (!cd) { /* (4) */
1004 		val = 0;
1005 	} else if (cd_live) { /* (3) */
1006 		val &= ~CTXDESC_CD_0_ASID;
1007 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1008 		/*
1009 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1010 		 * this substream's traffic
1011 		 */
1012 	} else { /* (1) and (2) */
1013 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1014 		cdptr[2] = 0;
1015 		cdptr[3] = cpu_to_le64(cd->mair);
1016 
1017 		/*
1018 		 * STE is live, and the SMMU might read dwords of this CD in any
1019 		 * order. Ensure that it observes valid values before reading
1020 		 * V=1.
1021 		 */
1022 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1023 
1024 		val = cd->tcr |
1025 #ifdef __BIG_ENDIAN
1026 			CTXDESC_CD_0_ENDI |
1027 #endif
1028 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1029 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1030 			CTXDESC_CD_0_AA64 |
1031 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1032 			CTXDESC_CD_0_V;
1033 
1034 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1035 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1036 			val |= CTXDESC_CD_0_S;
1037 	}
1038 
1039 	/*
1040 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1041 	 * "Configuration structures and configuration invalidation completion"
1042 	 *
1043 	 *   The size of single-copy atomic reads made by the SMMU is
1044 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1045 	 *   field within an aligned 64-bit span of a structure can be altered
1046 	 *   without first making the structure invalid.
1047 	 */
1048 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1049 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1050 	return 0;
1051 }
1052 
1053 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1054 {
1055 	int ret;
1056 	size_t l1size;
1057 	size_t max_contexts;
1058 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1059 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1060 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1061 
1062 	max_contexts = 1 << cfg->s1cdmax;
1063 
1064 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1065 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1066 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1067 		cdcfg->num_l1_ents = max_contexts;
1068 
1069 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1070 	} else {
1071 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1072 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1073 						  CTXDESC_L2_ENTRIES);
1074 
1075 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1076 					      sizeof(*cdcfg->l1_desc),
1077 					      GFP_KERNEL);
1078 		if (!cdcfg->l1_desc)
1079 			return -ENOMEM;
1080 
1081 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1082 	}
1083 
1084 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1085 					   GFP_KERNEL);
1086 	if (!cdcfg->cdtab) {
1087 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1088 		ret = -ENOMEM;
1089 		goto err_free_l1;
1090 	}
1091 
1092 	return 0;
1093 
1094 err_free_l1:
1095 	if (cdcfg->l1_desc) {
1096 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1097 		cdcfg->l1_desc = NULL;
1098 	}
1099 	return ret;
1100 }
1101 
1102 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1103 {
1104 	int i;
1105 	size_t size, l1size;
1106 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1107 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1108 
1109 	if (cdcfg->l1_desc) {
1110 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1111 
1112 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1113 			if (!cdcfg->l1_desc[i].l2ptr)
1114 				continue;
1115 
1116 			dmam_free_coherent(smmu->dev, size,
1117 					   cdcfg->l1_desc[i].l2ptr,
1118 					   cdcfg->l1_desc[i].l2ptr_dma);
1119 		}
1120 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1121 		cdcfg->l1_desc = NULL;
1122 
1123 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1124 	} else {
1125 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1126 	}
1127 
1128 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1129 	cdcfg->cdtab_dma = 0;
1130 	cdcfg->cdtab = NULL;
1131 }
1132 
1133 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1134 {
1135 	bool free;
1136 	struct arm_smmu_ctx_desc *old_cd;
1137 
1138 	if (!cd->asid)
1139 		return false;
1140 
1141 	free = refcount_dec_and_test(&cd->refs);
1142 	if (free) {
1143 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1144 		WARN_ON(old_cd != cd);
1145 	}
1146 	return free;
1147 }
1148 
1149 /* Stream table manipulation functions */
1150 static void
1151 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1152 {
1153 	u64 val = 0;
1154 
1155 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1156 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1157 
1158 	/* See comment in arm_smmu_write_ctx_desc() */
1159 	WRITE_ONCE(*dst, cpu_to_le64(val));
1160 }
1161 
1162 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1163 {
1164 	struct arm_smmu_cmdq_ent cmd = {
1165 		.opcode	= CMDQ_OP_CFGI_STE,
1166 		.cfgi	= {
1167 			.sid	= sid,
1168 			.leaf	= true,
1169 		},
1170 	};
1171 
1172 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1173 	arm_smmu_cmdq_issue_sync(smmu);
1174 }
1175 
1176 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1177 				      __le64 *dst)
1178 {
1179 	/*
1180 	 * This is hideously complicated, but we only really care about
1181 	 * three cases at the moment:
1182 	 *
1183 	 * 1. Invalid (all zero) -> bypass/fault (init)
1184 	 * 2. Bypass/fault -> translation/bypass (attach)
1185 	 * 3. Translation/bypass -> bypass/fault (detach)
1186 	 *
1187 	 * Given that we can't update the STE atomically and the SMMU
1188 	 * doesn't read the thing in a defined order, that leaves us
1189 	 * with the following maintenance requirements:
1190 	 *
1191 	 * 1. Update Config, return (init time STEs aren't live)
1192 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1193 	 * 3. Update Config, sync
1194 	 */
1195 	u64 val = le64_to_cpu(dst[0]);
1196 	bool ste_live = false;
1197 	struct arm_smmu_device *smmu = NULL;
1198 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1199 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1200 	struct arm_smmu_domain *smmu_domain = NULL;
1201 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1202 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1203 		.prefetch	= {
1204 			.sid	= sid,
1205 		},
1206 	};
1207 
1208 	if (master) {
1209 		smmu_domain = master->domain;
1210 		smmu = master->smmu;
1211 	}
1212 
1213 	if (smmu_domain) {
1214 		switch (smmu_domain->stage) {
1215 		case ARM_SMMU_DOMAIN_S1:
1216 			s1_cfg = &smmu_domain->s1_cfg;
1217 			break;
1218 		case ARM_SMMU_DOMAIN_S2:
1219 		case ARM_SMMU_DOMAIN_NESTED:
1220 			s2_cfg = &smmu_domain->s2_cfg;
1221 			break;
1222 		default:
1223 			break;
1224 		}
1225 	}
1226 
1227 	if (val & STRTAB_STE_0_V) {
1228 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1229 		case STRTAB_STE_0_CFG_BYPASS:
1230 			break;
1231 		case STRTAB_STE_0_CFG_S1_TRANS:
1232 		case STRTAB_STE_0_CFG_S2_TRANS:
1233 			ste_live = true;
1234 			break;
1235 		case STRTAB_STE_0_CFG_ABORT:
1236 			BUG_ON(!disable_bypass);
1237 			break;
1238 		default:
1239 			BUG(); /* STE corruption */
1240 		}
1241 	}
1242 
1243 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1244 	val = STRTAB_STE_0_V;
1245 
1246 	/* Bypass/fault */
1247 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1248 		if (!smmu_domain && disable_bypass)
1249 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1250 		else
1251 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1252 
1253 		dst[0] = cpu_to_le64(val);
1254 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1255 						STRTAB_STE_1_SHCFG_INCOMING));
1256 		dst[2] = 0; /* Nuke the VMID */
1257 		/*
1258 		 * The SMMU can perform negative caching, so we must sync
1259 		 * the STE regardless of whether the old value was live.
1260 		 */
1261 		if (smmu)
1262 			arm_smmu_sync_ste_for_sid(smmu, sid);
1263 		return;
1264 	}
1265 
1266 	if (s1_cfg) {
1267 		BUG_ON(ste_live);
1268 		dst[1] = cpu_to_le64(
1269 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1270 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1271 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1272 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1273 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1274 
1275 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1276 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1277 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1278 
1279 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1280 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1281 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1282 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1283 	}
1284 
1285 	if (s2_cfg) {
1286 		BUG_ON(ste_live);
1287 		dst[2] = cpu_to_le64(
1288 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1289 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1290 #ifdef __BIG_ENDIAN
1291 			 STRTAB_STE_2_S2ENDI |
1292 #endif
1293 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1294 			 STRTAB_STE_2_S2R);
1295 
1296 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1297 
1298 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1299 	}
1300 
1301 	if (master->ats_enabled)
1302 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1303 						 STRTAB_STE_1_EATS_TRANS));
1304 
1305 	arm_smmu_sync_ste_for_sid(smmu, sid);
1306 	/* See comment in arm_smmu_write_ctx_desc() */
1307 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1308 	arm_smmu_sync_ste_for_sid(smmu, sid);
1309 
1310 	/* It's likely that we'll want to use the new STE soon */
1311 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1312 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1313 }
1314 
1315 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1316 {
1317 	unsigned int i;
1318 
1319 	for (i = 0; i < nent; ++i) {
1320 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1321 		strtab += STRTAB_STE_DWORDS;
1322 	}
1323 }
1324 
1325 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1326 {
1327 	size_t size;
1328 	void *strtab;
1329 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1330 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1331 
1332 	if (desc->l2ptr)
1333 		return 0;
1334 
1335 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1336 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1337 
1338 	desc->span = STRTAB_SPLIT + 1;
1339 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1340 					  GFP_KERNEL);
1341 	if (!desc->l2ptr) {
1342 		dev_err(smmu->dev,
1343 			"failed to allocate l2 stream table for SID %u\n",
1344 			sid);
1345 		return -ENOMEM;
1346 	}
1347 
1348 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1349 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1350 	return 0;
1351 }
1352 
1353 /* IRQ and event handlers */
1354 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1355 {
1356 	int i;
1357 	struct arm_smmu_device *smmu = dev;
1358 	struct arm_smmu_queue *q = &smmu->evtq.q;
1359 	struct arm_smmu_ll_queue *llq = &q->llq;
1360 	u64 evt[EVTQ_ENT_DWORDS];
1361 
1362 	do {
1363 		while (!queue_remove_raw(q, evt)) {
1364 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1365 
1366 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1367 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1368 				dev_info(smmu->dev, "\t0x%016llx\n",
1369 					 (unsigned long long)evt[i]);
1370 
1371 		}
1372 
1373 		/*
1374 		 * Not much we can do on overflow, so scream and pretend we're
1375 		 * trying harder.
1376 		 */
1377 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1378 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1379 	} while (!queue_empty(llq));
1380 
1381 	/* Sync our overflow flag, as we believe we're up to speed */
1382 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1383 		    Q_IDX(llq, llq->cons);
1384 	return IRQ_HANDLED;
1385 }
1386 
1387 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1388 {
1389 	u32 sid, ssid;
1390 	u16 grpid;
1391 	bool ssv, last;
1392 
1393 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1394 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1395 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1396 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1397 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1398 
1399 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1400 	dev_info(smmu->dev,
1401 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1402 		 sid, ssid, grpid, last ? "L" : "",
1403 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1404 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1405 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1406 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1407 		 evt[1] & PRIQ_1_ADDR_MASK);
1408 
1409 	if (last) {
1410 		struct arm_smmu_cmdq_ent cmd = {
1411 			.opcode			= CMDQ_OP_PRI_RESP,
1412 			.substream_valid	= ssv,
1413 			.pri			= {
1414 				.sid	= sid,
1415 				.ssid	= ssid,
1416 				.grpid	= grpid,
1417 				.resp	= PRI_RESP_DENY,
1418 			},
1419 		};
1420 
1421 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1422 	}
1423 }
1424 
1425 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1426 {
1427 	struct arm_smmu_device *smmu = dev;
1428 	struct arm_smmu_queue *q = &smmu->priq.q;
1429 	struct arm_smmu_ll_queue *llq = &q->llq;
1430 	u64 evt[PRIQ_ENT_DWORDS];
1431 
1432 	do {
1433 		while (!queue_remove_raw(q, evt))
1434 			arm_smmu_handle_ppr(smmu, evt);
1435 
1436 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1437 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1438 	} while (!queue_empty(llq));
1439 
1440 	/* Sync our overflow flag, as we believe we're up to speed */
1441 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1442 		      Q_IDX(llq, llq->cons);
1443 	queue_sync_cons_out(q);
1444 	return IRQ_HANDLED;
1445 }
1446 
1447 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1448 
1449 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1450 {
1451 	u32 gerror, gerrorn, active;
1452 	struct arm_smmu_device *smmu = dev;
1453 
1454 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1455 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1456 
1457 	active = gerror ^ gerrorn;
1458 	if (!(active & GERROR_ERR_MASK))
1459 		return IRQ_NONE; /* No errors pending */
1460 
1461 	dev_warn(smmu->dev,
1462 		 "unexpected global error reported (0x%08x), this could be serious\n",
1463 		 active);
1464 
1465 	if (active & GERROR_SFM_ERR) {
1466 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1467 		arm_smmu_device_disable(smmu);
1468 	}
1469 
1470 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1471 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1472 
1473 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1474 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1475 
1476 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1477 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1478 
1479 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1480 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1481 
1482 	if (active & GERROR_PRIQ_ABT_ERR)
1483 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1484 
1485 	if (active & GERROR_EVTQ_ABT_ERR)
1486 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1487 
1488 	if (active & GERROR_CMDQ_ERR)
1489 		arm_smmu_cmdq_skip_err(smmu);
1490 
1491 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1492 	return IRQ_HANDLED;
1493 }
1494 
1495 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1496 {
1497 	struct arm_smmu_device *smmu = dev;
1498 
1499 	arm_smmu_evtq_thread(irq, dev);
1500 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1501 		arm_smmu_priq_thread(irq, dev);
1502 
1503 	return IRQ_HANDLED;
1504 }
1505 
1506 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1507 {
1508 	arm_smmu_gerror_handler(irq, dev);
1509 	return IRQ_WAKE_THREAD;
1510 }
1511 
1512 static void
1513 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1514 			struct arm_smmu_cmdq_ent *cmd)
1515 {
1516 	size_t log2_span;
1517 	size_t span_mask;
1518 	/* ATC invalidates are always on 4096-bytes pages */
1519 	size_t inval_grain_shift = 12;
1520 	unsigned long page_start, page_end;
1521 
1522 	*cmd = (struct arm_smmu_cmdq_ent) {
1523 		.opcode			= CMDQ_OP_ATC_INV,
1524 		.substream_valid	= !!ssid,
1525 		.atc.ssid		= ssid,
1526 	};
1527 
1528 	if (!size) {
1529 		cmd->atc.size = ATC_INV_SIZE_ALL;
1530 		return;
1531 	}
1532 
1533 	page_start	= iova >> inval_grain_shift;
1534 	page_end	= (iova + size - 1) >> inval_grain_shift;
1535 
1536 	/*
1537 	 * In an ATS Invalidate Request, the address must be aligned on the
1538 	 * range size, which must be a power of two number of page sizes. We
1539 	 * thus have to choose between grossly over-invalidating the region, or
1540 	 * splitting the invalidation into multiple commands. For simplicity
1541 	 * we'll go with the first solution, but should refine it in the future
1542 	 * if multiple commands are shown to be more efficient.
1543 	 *
1544 	 * Find the smallest power of two that covers the range. The most
1545 	 * significant differing bit between the start and end addresses,
1546 	 * fls(start ^ end), indicates the required span. For example:
1547 	 *
1548 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1549 	 *		x = 0b1000 ^ 0b1011 = 0b11
1550 	 *		span = 1 << fls(x) = 4
1551 	 *
1552 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1553 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1554 	 *		span = 1 << fls(x) = 16
1555 	 */
1556 	log2_span	= fls_long(page_start ^ page_end);
1557 	span_mask	= (1ULL << log2_span) - 1;
1558 
1559 	page_start	&= ~span_mask;
1560 
1561 	cmd->atc.addr	= page_start << inval_grain_shift;
1562 	cmd->atc.size	= log2_span;
1563 }
1564 
1565 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1566 {
1567 	int i;
1568 	struct arm_smmu_cmdq_ent cmd;
1569 
1570 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1571 
1572 	for (i = 0; i < master->num_sids; i++) {
1573 		cmd.atc.sid = master->sids[i];
1574 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1575 	}
1576 
1577 	return arm_smmu_cmdq_issue_sync(master->smmu);
1578 }
1579 
1580 static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1581 				   int ssid, unsigned long iova, size_t size)
1582 {
1583 	int i;
1584 	unsigned long flags;
1585 	struct arm_smmu_cmdq_ent cmd;
1586 	struct arm_smmu_master *master;
1587 	struct arm_smmu_cmdq_batch cmds = {};
1588 
1589 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1590 		return 0;
1591 
1592 	/*
1593 	 * Ensure that we've completed prior invalidation of the main TLBs
1594 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1595 	 * arm_smmu_enable_ats():
1596 	 *
1597 	 *	// unmap()			// arm_smmu_enable_ats()
1598 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1599 	 *	smp_mb();			[...]
1600 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1601 	 *
1602 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1603 	 * ATS was enabled at the PCI device before completion of the TLBI.
1604 	 */
1605 	smp_mb();
1606 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1607 		return 0;
1608 
1609 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1610 
1611 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1612 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1613 		if (!master->ats_enabled)
1614 			continue;
1615 
1616 		for (i = 0; i < master->num_sids; i++) {
1617 			cmd.atc.sid = master->sids[i];
1618 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1619 		}
1620 	}
1621 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1622 
1623 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1624 }
1625 
1626 /* IO_PGTABLE API */
1627 static void arm_smmu_tlb_inv_context(void *cookie)
1628 {
1629 	struct arm_smmu_domain *smmu_domain = cookie;
1630 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1631 	struct arm_smmu_cmdq_ent cmd;
1632 
1633 	/*
1634 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1635 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1636 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1637 	 * insertion to guarantee those are observed before the TLBI. Do be
1638 	 * careful, 007.
1639 	 */
1640 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1641 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1642 	} else {
1643 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1644 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1645 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1646 		arm_smmu_cmdq_issue_sync(smmu);
1647 	}
1648 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1649 }
1650 
1651 static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1652 				   size_t granule, bool leaf,
1653 				   struct arm_smmu_domain *smmu_domain)
1654 {
1655 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1656 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1657 	size_t inv_range = granule;
1658 	struct arm_smmu_cmdq_batch cmds = {};
1659 	struct arm_smmu_cmdq_ent cmd = {
1660 		.tlbi = {
1661 			.leaf	= leaf,
1662 		},
1663 	};
1664 
1665 	if (!size)
1666 		return;
1667 
1668 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1669 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1670 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1671 	} else {
1672 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1673 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1674 	}
1675 
1676 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1677 		/* Get the leaf page size */
1678 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1679 
1680 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1681 		cmd.tlbi.tg = (tg - 10) / 2;
1682 
1683 		/* Determine what level the granule is at */
1684 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1685 
1686 		num_pages = size >> tg;
1687 	}
1688 
1689 	while (iova < end) {
1690 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1691 			/*
1692 			 * On each iteration of the loop, the range is 5 bits
1693 			 * worth of the aligned size remaining.
1694 			 * The range in pages is:
1695 			 *
1696 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1697 			 */
1698 			unsigned long scale, num;
1699 
1700 			/* Determine the power of 2 multiple number of pages */
1701 			scale = __ffs(num_pages);
1702 			cmd.tlbi.scale = scale;
1703 
1704 			/* Determine how many chunks of 2^scale size we have */
1705 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1706 			cmd.tlbi.num = num - 1;
1707 
1708 			/* range is num * 2^scale * pgsize */
1709 			inv_range = num << (scale + tg);
1710 
1711 			/* Clear out the lower order bits for the next iteration */
1712 			num_pages -= num << scale;
1713 		}
1714 
1715 		cmd.tlbi.addr = iova;
1716 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1717 		iova += inv_range;
1718 	}
1719 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1720 
1721 	/*
1722 	 * Unfortunately, this can't be leaf-only since we may have
1723 	 * zapped an entire table.
1724 	 */
1725 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1726 }
1727 
1728 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1729 					 unsigned long iova, size_t granule,
1730 					 void *cookie)
1731 {
1732 	struct arm_smmu_domain *smmu_domain = cookie;
1733 	struct iommu_domain *domain = &smmu_domain->domain;
1734 
1735 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1736 }
1737 
1738 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1739 				  size_t granule, void *cookie)
1740 {
1741 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1742 }
1743 
1744 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
1745 				  size_t granule, void *cookie)
1746 {
1747 	arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
1748 }
1749 
1750 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1751 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1752 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1753 	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
1754 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1755 };
1756 
1757 /* IOMMU API */
1758 static bool arm_smmu_capable(enum iommu_cap cap)
1759 {
1760 	switch (cap) {
1761 	case IOMMU_CAP_CACHE_COHERENCY:
1762 		return true;
1763 	case IOMMU_CAP_NOEXEC:
1764 		return true;
1765 	default:
1766 		return false;
1767 	}
1768 }
1769 
1770 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1771 {
1772 	struct arm_smmu_domain *smmu_domain;
1773 
1774 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1775 	    type != IOMMU_DOMAIN_DMA &&
1776 	    type != IOMMU_DOMAIN_IDENTITY)
1777 		return NULL;
1778 
1779 	/*
1780 	 * Allocate the domain and initialise some of its data structures.
1781 	 * We can't really do anything meaningful until we've added a
1782 	 * master.
1783 	 */
1784 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1785 	if (!smmu_domain)
1786 		return NULL;
1787 
1788 	if (type == IOMMU_DOMAIN_DMA &&
1789 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1790 		kfree(smmu_domain);
1791 		return NULL;
1792 	}
1793 
1794 	mutex_init(&smmu_domain->init_mutex);
1795 	INIT_LIST_HEAD(&smmu_domain->devices);
1796 	spin_lock_init(&smmu_domain->devices_lock);
1797 
1798 	return &smmu_domain->domain;
1799 }
1800 
1801 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1802 {
1803 	int idx, size = 1 << span;
1804 
1805 	do {
1806 		idx = find_first_zero_bit(map, size);
1807 		if (idx == size)
1808 			return -ENOSPC;
1809 	} while (test_and_set_bit(idx, map));
1810 
1811 	return idx;
1812 }
1813 
1814 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1815 {
1816 	clear_bit(idx, map);
1817 }
1818 
1819 static void arm_smmu_domain_free(struct iommu_domain *domain)
1820 {
1821 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1822 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1823 
1824 	iommu_put_dma_cookie(domain);
1825 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1826 
1827 	/* Free the CD and ASID, if we allocated them */
1828 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1829 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1830 
1831 		/* Prevent SVA from touching the CD while we're freeing it */
1832 		mutex_lock(&arm_smmu_asid_lock);
1833 		if (cfg->cdcfg.cdtab)
1834 			arm_smmu_free_cd_tables(smmu_domain);
1835 		arm_smmu_free_asid(&cfg->cd);
1836 		mutex_unlock(&arm_smmu_asid_lock);
1837 	} else {
1838 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1839 		if (cfg->vmid)
1840 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1841 	}
1842 
1843 	kfree(smmu_domain);
1844 }
1845 
1846 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1847 				       struct arm_smmu_master *master,
1848 				       struct io_pgtable_cfg *pgtbl_cfg)
1849 {
1850 	int ret;
1851 	u32 asid;
1852 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1853 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1854 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1855 
1856 	refcount_set(&cfg->cd.refs, 1);
1857 
1858 	/* Prevent SVA from modifying the ASID until it is written to the CD */
1859 	mutex_lock(&arm_smmu_asid_lock);
1860 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1861 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1862 	if (ret)
1863 		goto out_unlock;
1864 
1865 	cfg->s1cdmax = master->ssid_bits;
1866 
1867 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
1868 	if (ret)
1869 		goto out_free_asid;
1870 
1871 	cfg->cd.asid	= (u16)asid;
1872 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1873 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1874 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1875 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1876 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1877 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1878 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1879 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1880 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
1881 
1882 	/*
1883 	 * Note that this will end up calling arm_smmu_sync_cd() before
1884 	 * the master has been added to the devices list for this domain.
1885 	 * This isn't an issue because the STE hasn't been installed yet.
1886 	 */
1887 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1888 	if (ret)
1889 		goto out_free_cd_tables;
1890 
1891 	mutex_unlock(&arm_smmu_asid_lock);
1892 	return 0;
1893 
1894 out_free_cd_tables:
1895 	arm_smmu_free_cd_tables(smmu_domain);
1896 out_free_asid:
1897 	arm_smmu_free_asid(&cfg->cd);
1898 out_unlock:
1899 	mutex_unlock(&arm_smmu_asid_lock);
1900 	return ret;
1901 }
1902 
1903 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1904 				       struct arm_smmu_master *master,
1905 				       struct io_pgtable_cfg *pgtbl_cfg)
1906 {
1907 	int vmid;
1908 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1909 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1910 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1911 
1912 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1913 	if (vmid < 0)
1914 		return vmid;
1915 
1916 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1917 	cfg->vmid	= (u16)vmid;
1918 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1919 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1920 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1921 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1922 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1923 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1924 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1925 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1926 	return 0;
1927 }
1928 
1929 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1930 				    struct arm_smmu_master *master)
1931 {
1932 	int ret;
1933 	unsigned long ias, oas;
1934 	enum io_pgtable_fmt fmt;
1935 	struct io_pgtable_cfg pgtbl_cfg;
1936 	struct io_pgtable_ops *pgtbl_ops;
1937 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1938 				 struct arm_smmu_master *,
1939 				 struct io_pgtable_cfg *);
1940 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1941 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1942 
1943 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1944 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1945 		return 0;
1946 	}
1947 
1948 	/* Restrict the stage to what we can actually support */
1949 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1950 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1951 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1952 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1953 
1954 	switch (smmu_domain->stage) {
1955 	case ARM_SMMU_DOMAIN_S1:
1956 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1957 		ias = min_t(unsigned long, ias, VA_BITS);
1958 		oas = smmu->ias;
1959 		fmt = ARM_64_LPAE_S1;
1960 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1961 		break;
1962 	case ARM_SMMU_DOMAIN_NESTED:
1963 	case ARM_SMMU_DOMAIN_S2:
1964 		ias = smmu->ias;
1965 		oas = smmu->oas;
1966 		fmt = ARM_64_LPAE_S2;
1967 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1968 		break;
1969 	default:
1970 		return -EINVAL;
1971 	}
1972 
1973 	pgtbl_cfg = (struct io_pgtable_cfg) {
1974 		.pgsize_bitmap	= smmu->pgsize_bitmap,
1975 		.ias		= ias,
1976 		.oas		= oas,
1977 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
1978 		.tlb		= &arm_smmu_flush_ops,
1979 		.iommu_dev	= smmu->dev,
1980 	};
1981 
1982 	if (smmu_domain->non_strict)
1983 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1984 
1985 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1986 	if (!pgtbl_ops)
1987 		return -ENOMEM;
1988 
1989 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1990 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1991 	domain->geometry.force_aperture = true;
1992 
1993 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
1994 	if (ret < 0) {
1995 		free_io_pgtable_ops(pgtbl_ops);
1996 		return ret;
1997 	}
1998 
1999 	smmu_domain->pgtbl_ops = pgtbl_ops;
2000 	return 0;
2001 }
2002 
2003 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2004 {
2005 	__le64 *step;
2006 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2007 
2008 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2009 		struct arm_smmu_strtab_l1_desc *l1_desc;
2010 		int idx;
2011 
2012 		/* Two-level walk */
2013 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2014 		l1_desc = &cfg->l1_desc[idx];
2015 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2016 		step = &l1_desc->l2ptr[idx];
2017 	} else {
2018 		/* Simple linear lookup */
2019 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2020 	}
2021 
2022 	return step;
2023 }
2024 
2025 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2026 {
2027 	int i, j;
2028 	struct arm_smmu_device *smmu = master->smmu;
2029 
2030 	for (i = 0; i < master->num_sids; ++i) {
2031 		u32 sid = master->sids[i];
2032 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2033 
2034 		/* Bridged PCI devices may end up with duplicated IDs */
2035 		for (j = 0; j < i; j++)
2036 			if (master->sids[j] == sid)
2037 				break;
2038 		if (j < i)
2039 			continue;
2040 
2041 		arm_smmu_write_strtab_ent(master, sid, step);
2042 	}
2043 }
2044 
2045 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2046 {
2047 	struct device *dev = master->dev;
2048 	struct arm_smmu_device *smmu = master->smmu;
2049 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2050 
2051 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2052 		return false;
2053 
2054 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2055 		return false;
2056 
2057 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2058 }
2059 
2060 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2061 {
2062 	size_t stu;
2063 	struct pci_dev *pdev;
2064 	struct arm_smmu_device *smmu = master->smmu;
2065 	struct arm_smmu_domain *smmu_domain = master->domain;
2066 
2067 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2068 	if (!master->ats_enabled)
2069 		return;
2070 
2071 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2072 	stu = __ffs(smmu->pgsize_bitmap);
2073 	pdev = to_pci_dev(master->dev);
2074 
2075 	atomic_inc(&smmu_domain->nr_ats_masters);
2076 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2077 	if (pci_enable_ats(pdev, stu))
2078 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2079 }
2080 
2081 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2082 {
2083 	struct arm_smmu_domain *smmu_domain = master->domain;
2084 
2085 	if (!master->ats_enabled)
2086 		return;
2087 
2088 	pci_disable_ats(to_pci_dev(master->dev));
2089 	/*
2090 	 * Ensure ATS is disabled at the endpoint before we issue the
2091 	 * ATC invalidation via the SMMU.
2092 	 */
2093 	wmb();
2094 	arm_smmu_atc_inv_master(master);
2095 	atomic_dec(&smmu_domain->nr_ats_masters);
2096 }
2097 
2098 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2099 {
2100 	int ret;
2101 	int features;
2102 	int num_pasids;
2103 	struct pci_dev *pdev;
2104 
2105 	if (!dev_is_pci(master->dev))
2106 		return -ENODEV;
2107 
2108 	pdev = to_pci_dev(master->dev);
2109 
2110 	features = pci_pasid_features(pdev);
2111 	if (features < 0)
2112 		return features;
2113 
2114 	num_pasids = pci_max_pasids(pdev);
2115 	if (num_pasids <= 0)
2116 		return num_pasids;
2117 
2118 	ret = pci_enable_pasid(pdev, features);
2119 	if (ret) {
2120 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2121 		return ret;
2122 	}
2123 
2124 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2125 				  master->smmu->ssid_bits);
2126 	return 0;
2127 }
2128 
2129 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2130 {
2131 	struct pci_dev *pdev;
2132 
2133 	if (!dev_is_pci(master->dev))
2134 		return;
2135 
2136 	pdev = to_pci_dev(master->dev);
2137 
2138 	if (!pdev->pasid_enabled)
2139 		return;
2140 
2141 	master->ssid_bits = 0;
2142 	pci_disable_pasid(pdev);
2143 }
2144 
2145 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2146 {
2147 	unsigned long flags;
2148 	struct arm_smmu_domain *smmu_domain = master->domain;
2149 
2150 	if (!smmu_domain)
2151 		return;
2152 
2153 	arm_smmu_disable_ats(master);
2154 
2155 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2156 	list_del(&master->domain_head);
2157 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2158 
2159 	master->domain = NULL;
2160 	master->ats_enabled = false;
2161 	arm_smmu_install_ste_for_dev(master);
2162 }
2163 
2164 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2165 {
2166 	int ret = 0;
2167 	unsigned long flags;
2168 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2169 	struct arm_smmu_device *smmu;
2170 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2171 	struct arm_smmu_master *master;
2172 
2173 	if (!fwspec)
2174 		return -ENOENT;
2175 
2176 	master = dev_iommu_priv_get(dev);
2177 	smmu = master->smmu;
2178 
2179 	/*
2180 	 * Checking that SVA is disabled ensures that this device isn't bound to
2181 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2182 	 * be removed concurrently since we're holding the group mutex.
2183 	 */
2184 	if (arm_smmu_master_sva_enabled(master)) {
2185 		dev_err(dev, "cannot attach - SVA enabled\n");
2186 		return -EBUSY;
2187 	}
2188 
2189 	arm_smmu_detach_dev(master);
2190 
2191 	mutex_lock(&smmu_domain->init_mutex);
2192 
2193 	if (!smmu_domain->smmu) {
2194 		smmu_domain->smmu = smmu;
2195 		ret = arm_smmu_domain_finalise(domain, master);
2196 		if (ret) {
2197 			smmu_domain->smmu = NULL;
2198 			goto out_unlock;
2199 		}
2200 	} else if (smmu_domain->smmu != smmu) {
2201 		dev_err(dev,
2202 			"cannot attach to SMMU %s (upstream of %s)\n",
2203 			dev_name(smmu_domain->smmu->dev),
2204 			dev_name(smmu->dev));
2205 		ret = -ENXIO;
2206 		goto out_unlock;
2207 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2208 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2209 		dev_err(dev,
2210 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2211 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2212 		ret = -EINVAL;
2213 		goto out_unlock;
2214 	}
2215 
2216 	master->domain = smmu_domain;
2217 
2218 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2219 		master->ats_enabled = arm_smmu_ats_supported(master);
2220 
2221 	arm_smmu_install_ste_for_dev(master);
2222 
2223 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2224 	list_add(&master->domain_head, &smmu_domain->devices);
2225 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2226 
2227 	arm_smmu_enable_ats(master);
2228 
2229 out_unlock:
2230 	mutex_unlock(&smmu_domain->init_mutex);
2231 	return ret;
2232 }
2233 
2234 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2235 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2236 {
2237 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2238 
2239 	if (!ops)
2240 		return -ENODEV;
2241 
2242 	return ops->map(ops, iova, paddr, size, prot, gfp);
2243 }
2244 
2245 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2246 			     size_t size, struct iommu_iotlb_gather *gather)
2247 {
2248 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2249 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2250 
2251 	if (!ops)
2252 		return 0;
2253 
2254 	return ops->unmap(ops, iova, size, gather);
2255 }
2256 
2257 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2258 {
2259 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2260 
2261 	if (smmu_domain->smmu)
2262 		arm_smmu_tlb_inv_context(smmu_domain);
2263 }
2264 
2265 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2266 				struct iommu_iotlb_gather *gather)
2267 {
2268 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2269 
2270 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2271 			       gather->pgsize, true, smmu_domain);
2272 }
2273 
2274 static phys_addr_t
2275 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2276 {
2277 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2278 
2279 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2280 		return iova;
2281 
2282 	if (!ops)
2283 		return 0;
2284 
2285 	return ops->iova_to_phys(ops, iova);
2286 }
2287 
2288 static struct platform_driver arm_smmu_driver;
2289 
2290 static
2291 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2292 {
2293 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2294 							  fwnode);
2295 	put_device(dev);
2296 	return dev ? dev_get_drvdata(dev) : NULL;
2297 }
2298 
2299 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2300 {
2301 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2302 
2303 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2304 		limit *= 1UL << STRTAB_SPLIT;
2305 
2306 	return sid < limit;
2307 }
2308 
2309 static struct iommu_ops arm_smmu_ops;
2310 
2311 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2312 {
2313 	int i, ret;
2314 	struct arm_smmu_device *smmu;
2315 	struct arm_smmu_master *master;
2316 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2317 
2318 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2319 		return ERR_PTR(-ENODEV);
2320 
2321 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2322 		return ERR_PTR(-EBUSY);
2323 
2324 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2325 	if (!smmu)
2326 		return ERR_PTR(-ENODEV);
2327 
2328 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2329 	if (!master)
2330 		return ERR_PTR(-ENOMEM);
2331 
2332 	master->dev = dev;
2333 	master->smmu = smmu;
2334 	master->sids = fwspec->ids;
2335 	master->num_sids = fwspec->num_ids;
2336 	INIT_LIST_HEAD(&master->bonds);
2337 	dev_iommu_priv_set(dev, master);
2338 
2339 	/* Check the SIDs are in range of the SMMU and our stream table */
2340 	for (i = 0; i < master->num_sids; i++) {
2341 		u32 sid = master->sids[i];
2342 
2343 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2344 			ret = -ERANGE;
2345 			goto err_free_master;
2346 		}
2347 
2348 		/* Ensure l2 strtab is initialised */
2349 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2350 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2351 			if (ret)
2352 				goto err_free_master;
2353 		}
2354 	}
2355 
2356 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2357 
2358 	/*
2359 	 * Note that PASID must be enabled before, and disabled after ATS:
2360 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2361 	 *
2362 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2363 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2364 	 *   are changed.
2365 	 */
2366 	arm_smmu_enable_pasid(master);
2367 
2368 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2369 		master->ssid_bits = min_t(u8, master->ssid_bits,
2370 					  CTXDESC_LINEAR_CDMAX);
2371 
2372 	return &smmu->iommu;
2373 
2374 err_free_master:
2375 	kfree(master);
2376 	dev_iommu_priv_set(dev, NULL);
2377 	return ERR_PTR(ret);
2378 }
2379 
2380 static void arm_smmu_release_device(struct device *dev)
2381 {
2382 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2383 	struct arm_smmu_master *master;
2384 
2385 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2386 		return;
2387 
2388 	master = dev_iommu_priv_get(dev);
2389 	WARN_ON(arm_smmu_master_sva_enabled(master));
2390 	arm_smmu_detach_dev(master);
2391 	arm_smmu_disable_pasid(master);
2392 	kfree(master);
2393 	iommu_fwspec_free(dev);
2394 }
2395 
2396 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2397 {
2398 	struct iommu_group *group;
2399 
2400 	/*
2401 	 * We don't support devices sharing stream IDs other than PCI RID
2402 	 * aliases, since the necessary ID-to-device lookup becomes rather
2403 	 * impractical given a potential sparse 32-bit stream ID space.
2404 	 */
2405 	if (dev_is_pci(dev))
2406 		group = pci_device_group(dev);
2407 	else
2408 		group = generic_device_group(dev);
2409 
2410 	return group;
2411 }
2412 
2413 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2414 				    enum iommu_attr attr, void *data)
2415 {
2416 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2417 
2418 	switch (domain->type) {
2419 	case IOMMU_DOMAIN_UNMANAGED:
2420 		switch (attr) {
2421 		case DOMAIN_ATTR_NESTING:
2422 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2423 			return 0;
2424 		default:
2425 			return -ENODEV;
2426 		}
2427 		break;
2428 	case IOMMU_DOMAIN_DMA:
2429 		switch (attr) {
2430 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2431 			*(int *)data = smmu_domain->non_strict;
2432 			return 0;
2433 		default:
2434 			return -ENODEV;
2435 		}
2436 		break;
2437 	default:
2438 		return -EINVAL;
2439 	}
2440 }
2441 
2442 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2443 				    enum iommu_attr attr, void *data)
2444 {
2445 	int ret = 0;
2446 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2447 
2448 	mutex_lock(&smmu_domain->init_mutex);
2449 
2450 	switch (domain->type) {
2451 	case IOMMU_DOMAIN_UNMANAGED:
2452 		switch (attr) {
2453 		case DOMAIN_ATTR_NESTING:
2454 			if (smmu_domain->smmu) {
2455 				ret = -EPERM;
2456 				goto out_unlock;
2457 			}
2458 
2459 			if (*(int *)data)
2460 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2461 			else
2462 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2463 			break;
2464 		default:
2465 			ret = -ENODEV;
2466 		}
2467 		break;
2468 	case IOMMU_DOMAIN_DMA:
2469 		switch(attr) {
2470 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2471 			smmu_domain->non_strict = *(int *)data;
2472 			break;
2473 		default:
2474 			ret = -ENODEV;
2475 		}
2476 		break;
2477 	default:
2478 		ret = -EINVAL;
2479 	}
2480 
2481 out_unlock:
2482 	mutex_unlock(&smmu_domain->init_mutex);
2483 	return ret;
2484 }
2485 
2486 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2487 {
2488 	return iommu_fwspec_add_ids(dev, args->args, 1);
2489 }
2490 
2491 static void arm_smmu_get_resv_regions(struct device *dev,
2492 				      struct list_head *head)
2493 {
2494 	struct iommu_resv_region *region;
2495 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2496 
2497 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2498 					 prot, IOMMU_RESV_SW_MSI);
2499 	if (!region)
2500 		return;
2501 
2502 	list_add_tail(&region->list, head);
2503 
2504 	iommu_dma_get_resv_regions(dev, head);
2505 }
2506 
2507 static bool arm_smmu_dev_has_feature(struct device *dev,
2508 				     enum iommu_dev_features feat)
2509 {
2510 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2511 
2512 	if (!master)
2513 		return false;
2514 
2515 	switch (feat) {
2516 	case IOMMU_DEV_FEAT_SVA:
2517 		return arm_smmu_master_sva_supported(master);
2518 	default:
2519 		return false;
2520 	}
2521 }
2522 
2523 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2524 					 enum iommu_dev_features feat)
2525 {
2526 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2527 
2528 	if (!master)
2529 		return false;
2530 
2531 	switch (feat) {
2532 	case IOMMU_DEV_FEAT_SVA:
2533 		return arm_smmu_master_sva_enabled(master);
2534 	default:
2535 		return false;
2536 	}
2537 }
2538 
2539 static int arm_smmu_dev_enable_feature(struct device *dev,
2540 				       enum iommu_dev_features feat)
2541 {
2542 	if (!arm_smmu_dev_has_feature(dev, feat))
2543 		return -ENODEV;
2544 
2545 	if (arm_smmu_dev_feature_enabled(dev, feat))
2546 		return -EBUSY;
2547 
2548 	switch (feat) {
2549 	case IOMMU_DEV_FEAT_SVA:
2550 		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2551 	default:
2552 		return -EINVAL;
2553 	}
2554 }
2555 
2556 static int arm_smmu_dev_disable_feature(struct device *dev,
2557 					enum iommu_dev_features feat)
2558 {
2559 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2560 		return -EINVAL;
2561 
2562 	switch (feat) {
2563 	case IOMMU_DEV_FEAT_SVA:
2564 		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2565 	default:
2566 		return -EINVAL;
2567 	}
2568 }
2569 
2570 static struct iommu_ops arm_smmu_ops = {
2571 	.capable		= arm_smmu_capable,
2572 	.domain_alloc		= arm_smmu_domain_alloc,
2573 	.domain_free		= arm_smmu_domain_free,
2574 	.attach_dev		= arm_smmu_attach_dev,
2575 	.map			= arm_smmu_map,
2576 	.unmap			= arm_smmu_unmap,
2577 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2578 	.iotlb_sync		= arm_smmu_iotlb_sync,
2579 	.iova_to_phys		= arm_smmu_iova_to_phys,
2580 	.probe_device		= arm_smmu_probe_device,
2581 	.release_device		= arm_smmu_release_device,
2582 	.device_group		= arm_smmu_device_group,
2583 	.domain_get_attr	= arm_smmu_domain_get_attr,
2584 	.domain_set_attr	= arm_smmu_domain_set_attr,
2585 	.of_xlate		= arm_smmu_of_xlate,
2586 	.get_resv_regions	= arm_smmu_get_resv_regions,
2587 	.put_resv_regions	= generic_iommu_put_resv_regions,
2588 	.dev_has_feat		= arm_smmu_dev_has_feature,
2589 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2590 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2591 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2592 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2593 };
2594 
2595 /* Probing and initialisation functions */
2596 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2597 				   struct arm_smmu_queue *q,
2598 				   unsigned long prod_off,
2599 				   unsigned long cons_off,
2600 				   size_t dwords, const char *name)
2601 {
2602 	size_t qsz;
2603 
2604 	do {
2605 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2606 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2607 					      GFP_KERNEL);
2608 		if (q->base || qsz < PAGE_SIZE)
2609 			break;
2610 
2611 		q->llq.max_n_shift--;
2612 	} while (1);
2613 
2614 	if (!q->base) {
2615 		dev_err(smmu->dev,
2616 			"failed to allocate queue (0x%zx bytes) for %s\n",
2617 			qsz, name);
2618 		return -ENOMEM;
2619 	}
2620 
2621 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2622 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2623 			 1 << q->llq.max_n_shift, name);
2624 	}
2625 
2626 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
2627 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
2628 	q->ent_dwords	= dwords;
2629 
2630 	q->q_base  = Q_BASE_RWA;
2631 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2632 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2633 
2634 	q->llq.prod = q->llq.cons = 0;
2635 	return 0;
2636 }
2637 
2638 static void arm_smmu_cmdq_free_bitmap(void *data)
2639 {
2640 	unsigned long *bitmap = data;
2641 	bitmap_free(bitmap);
2642 }
2643 
2644 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2645 {
2646 	int ret = 0;
2647 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2648 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2649 	atomic_long_t *bitmap;
2650 
2651 	atomic_set(&cmdq->owner_prod, 0);
2652 	atomic_set(&cmdq->lock, 0);
2653 
2654 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2655 	if (!bitmap) {
2656 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2657 		ret = -ENOMEM;
2658 	} else {
2659 		cmdq->valid_map = bitmap;
2660 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2661 	}
2662 
2663 	return ret;
2664 }
2665 
2666 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2667 {
2668 	int ret;
2669 
2670 	/* cmdq */
2671 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2672 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2673 				      "cmdq");
2674 	if (ret)
2675 		return ret;
2676 
2677 	ret = arm_smmu_cmdq_init(smmu);
2678 	if (ret)
2679 		return ret;
2680 
2681 	/* evtq */
2682 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2683 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2684 				      "evtq");
2685 	if (ret)
2686 		return ret;
2687 
2688 	/* priq */
2689 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2690 		return 0;
2691 
2692 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2693 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2694 				       "priq");
2695 }
2696 
2697 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2698 {
2699 	unsigned int i;
2700 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2701 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2702 	void *strtab = smmu->strtab_cfg.strtab;
2703 
2704 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2705 	if (!cfg->l1_desc) {
2706 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2707 		return -ENOMEM;
2708 	}
2709 
2710 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2711 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2712 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2713 	}
2714 
2715 	return 0;
2716 }
2717 
2718 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2719 {
2720 	void *strtab;
2721 	u64 reg;
2722 	u32 size, l1size;
2723 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2724 
2725 	/* Calculate the L1 size, capped to the SIDSIZE. */
2726 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2727 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2728 	cfg->num_l1_ents = 1 << size;
2729 
2730 	size += STRTAB_SPLIT;
2731 	if (size < smmu->sid_bits)
2732 		dev_warn(smmu->dev,
2733 			 "2-level strtab only covers %u/%u bits of SID\n",
2734 			 size, smmu->sid_bits);
2735 
2736 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2737 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2738 				     GFP_KERNEL);
2739 	if (!strtab) {
2740 		dev_err(smmu->dev,
2741 			"failed to allocate l1 stream table (%u bytes)\n",
2742 			l1size);
2743 		return -ENOMEM;
2744 	}
2745 	cfg->strtab = strtab;
2746 
2747 	/* Configure strtab_base_cfg for 2 levels */
2748 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2749 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2750 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2751 	cfg->strtab_base_cfg = reg;
2752 
2753 	return arm_smmu_init_l1_strtab(smmu);
2754 }
2755 
2756 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2757 {
2758 	void *strtab;
2759 	u64 reg;
2760 	u32 size;
2761 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2762 
2763 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2764 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2765 				     GFP_KERNEL);
2766 	if (!strtab) {
2767 		dev_err(smmu->dev,
2768 			"failed to allocate linear stream table (%u bytes)\n",
2769 			size);
2770 		return -ENOMEM;
2771 	}
2772 	cfg->strtab = strtab;
2773 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2774 
2775 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2776 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2777 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2778 	cfg->strtab_base_cfg = reg;
2779 
2780 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2781 	return 0;
2782 }
2783 
2784 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2785 {
2786 	u64 reg;
2787 	int ret;
2788 
2789 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2790 		ret = arm_smmu_init_strtab_2lvl(smmu);
2791 	else
2792 		ret = arm_smmu_init_strtab_linear(smmu);
2793 
2794 	if (ret)
2795 		return ret;
2796 
2797 	/* Set the strtab base address */
2798 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2799 	reg |= STRTAB_BASE_RA;
2800 	smmu->strtab_cfg.strtab_base = reg;
2801 
2802 	/* Allocate the first VMID for stage-2 bypass STEs */
2803 	set_bit(0, smmu->vmid_map);
2804 	return 0;
2805 }
2806 
2807 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2808 {
2809 	int ret;
2810 
2811 	ret = arm_smmu_init_queues(smmu);
2812 	if (ret)
2813 		return ret;
2814 
2815 	return arm_smmu_init_strtab(smmu);
2816 }
2817 
2818 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2819 				   unsigned int reg_off, unsigned int ack_off)
2820 {
2821 	u32 reg;
2822 
2823 	writel_relaxed(val, smmu->base + reg_off);
2824 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2825 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2826 }
2827 
2828 /* GBPA is "special" */
2829 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2830 {
2831 	int ret;
2832 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2833 
2834 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2835 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2836 	if (ret)
2837 		return ret;
2838 
2839 	reg &= ~clr;
2840 	reg |= set;
2841 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2842 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2843 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2844 
2845 	if (ret)
2846 		dev_err(smmu->dev, "GBPA not responding to update\n");
2847 	return ret;
2848 }
2849 
2850 static void arm_smmu_free_msis(void *data)
2851 {
2852 	struct device *dev = data;
2853 	platform_msi_domain_free_irqs(dev);
2854 }
2855 
2856 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2857 {
2858 	phys_addr_t doorbell;
2859 	struct device *dev = msi_desc_to_dev(desc);
2860 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2861 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2862 
2863 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2864 	doorbell &= MSI_CFG0_ADDR_MASK;
2865 
2866 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2867 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2868 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2869 }
2870 
2871 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2872 {
2873 	struct msi_desc *desc;
2874 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2875 	struct device *dev = smmu->dev;
2876 
2877 	/* Clear the MSI address regs */
2878 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2879 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2880 
2881 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2882 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2883 	else
2884 		nvec--;
2885 
2886 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2887 		return;
2888 
2889 	if (!dev->msi_domain) {
2890 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2891 		return;
2892 	}
2893 
2894 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2895 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2896 	if (ret) {
2897 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2898 		return;
2899 	}
2900 
2901 	for_each_msi_entry(desc, dev) {
2902 		switch (desc->platform.msi_index) {
2903 		case EVTQ_MSI_INDEX:
2904 			smmu->evtq.q.irq = desc->irq;
2905 			break;
2906 		case GERROR_MSI_INDEX:
2907 			smmu->gerr_irq = desc->irq;
2908 			break;
2909 		case PRIQ_MSI_INDEX:
2910 			smmu->priq.q.irq = desc->irq;
2911 			break;
2912 		default:	/* Unknown */
2913 			continue;
2914 		}
2915 	}
2916 
2917 	/* Add callback to free MSIs on teardown */
2918 	devm_add_action(dev, arm_smmu_free_msis, dev);
2919 }
2920 
2921 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2922 {
2923 	int irq, ret;
2924 
2925 	arm_smmu_setup_msis(smmu);
2926 
2927 	/* Request interrupt lines */
2928 	irq = smmu->evtq.q.irq;
2929 	if (irq) {
2930 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2931 						arm_smmu_evtq_thread,
2932 						IRQF_ONESHOT,
2933 						"arm-smmu-v3-evtq", smmu);
2934 		if (ret < 0)
2935 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2936 	} else {
2937 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2938 	}
2939 
2940 	irq = smmu->gerr_irq;
2941 	if (irq) {
2942 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2943 				       0, "arm-smmu-v3-gerror", smmu);
2944 		if (ret < 0)
2945 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2946 	} else {
2947 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2948 	}
2949 
2950 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2951 		irq = smmu->priq.q.irq;
2952 		if (irq) {
2953 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2954 							arm_smmu_priq_thread,
2955 							IRQF_ONESHOT,
2956 							"arm-smmu-v3-priq",
2957 							smmu);
2958 			if (ret < 0)
2959 				dev_warn(smmu->dev,
2960 					 "failed to enable priq irq\n");
2961 		} else {
2962 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2963 		}
2964 	}
2965 }
2966 
2967 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2968 {
2969 	int ret, irq;
2970 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2971 
2972 	/* Disable IRQs first */
2973 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2974 				      ARM_SMMU_IRQ_CTRLACK);
2975 	if (ret) {
2976 		dev_err(smmu->dev, "failed to disable irqs\n");
2977 		return ret;
2978 	}
2979 
2980 	irq = smmu->combined_irq;
2981 	if (irq) {
2982 		/*
2983 		 * Cavium ThunderX2 implementation doesn't support unique irq
2984 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
2985 		 */
2986 		ret = devm_request_threaded_irq(smmu->dev, irq,
2987 					arm_smmu_combined_irq_handler,
2988 					arm_smmu_combined_irq_thread,
2989 					IRQF_ONESHOT,
2990 					"arm-smmu-v3-combined-irq", smmu);
2991 		if (ret < 0)
2992 			dev_warn(smmu->dev, "failed to enable combined irq\n");
2993 	} else
2994 		arm_smmu_setup_unique_irqs(smmu);
2995 
2996 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2997 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2998 
2999 	/* Enable interrupt generation on the SMMU */
3000 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3001 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3002 	if (ret)
3003 		dev_warn(smmu->dev, "failed to enable irqs\n");
3004 
3005 	return 0;
3006 }
3007 
3008 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3009 {
3010 	int ret;
3011 
3012 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3013 	if (ret)
3014 		dev_err(smmu->dev, "failed to clear cr0\n");
3015 
3016 	return ret;
3017 }
3018 
3019 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3020 {
3021 	int ret;
3022 	u32 reg, enables;
3023 	struct arm_smmu_cmdq_ent cmd;
3024 
3025 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3026 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3027 	if (reg & CR0_SMMUEN) {
3028 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3029 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3030 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3031 	}
3032 
3033 	ret = arm_smmu_device_disable(smmu);
3034 	if (ret)
3035 		return ret;
3036 
3037 	/* CR1 (table and queue memory attributes) */
3038 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3039 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3040 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3041 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3042 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3043 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3044 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3045 
3046 	/* CR2 (random crap) */
3047 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3048 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3049 
3050 	/* Stream table */
3051 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3052 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3053 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3054 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3055 
3056 	/* Command queue */
3057 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3058 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3059 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3060 
3061 	enables = CR0_CMDQEN;
3062 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3063 				      ARM_SMMU_CR0ACK);
3064 	if (ret) {
3065 		dev_err(smmu->dev, "failed to enable command queue\n");
3066 		return ret;
3067 	}
3068 
3069 	/* Invalidate any cached configuration */
3070 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3071 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3072 	arm_smmu_cmdq_issue_sync(smmu);
3073 
3074 	/* Invalidate any stale TLB entries */
3075 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3076 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3077 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3078 	}
3079 
3080 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3081 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3082 	arm_smmu_cmdq_issue_sync(smmu);
3083 
3084 	/* Event queue */
3085 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3086 	writel_relaxed(smmu->evtq.q.llq.prod,
3087 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3088 	writel_relaxed(smmu->evtq.q.llq.cons,
3089 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3090 
3091 	enables |= CR0_EVTQEN;
3092 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3093 				      ARM_SMMU_CR0ACK);
3094 	if (ret) {
3095 		dev_err(smmu->dev, "failed to enable event queue\n");
3096 		return ret;
3097 	}
3098 
3099 	/* PRI queue */
3100 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3101 		writeq_relaxed(smmu->priq.q.q_base,
3102 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3103 		writel_relaxed(smmu->priq.q.llq.prod,
3104 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3105 		writel_relaxed(smmu->priq.q.llq.cons,
3106 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3107 
3108 		enables |= CR0_PRIQEN;
3109 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3110 					      ARM_SMMU_CR0ACK);
3111 		if (ret) {
3112 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3113 			return ret;
3114 		}
3115 	}
3116 
3117 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3118 		enables |= CR0_ATSCHK;
3119 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3120 					      ARM_SMMU_CR0ACK);
3121 		if (ret) {
3122 			dev_err(smmu->dev, "failed to enable ATS check\n");
3123 			return ret;
3124 		}
3125 	}
3126 
3127 	ret = arm_smmu_setup_irqs(smmu);
3128 	if (ret) {
3129 		dev_err(smmu->dev, "failed to setup irqs\n");
3130 		return ret;
3131 	}
3132 
3133 	if (is_kdump_kernel())
3134 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3135 
3136 	/* Enable the SMMU interface, or ensure bypass */
3137 	if (!bypass || disable_bypass) {
3138 		enables |= CR0_SMMUEN;
3139 	} else {
3140 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3141 		if (ret)
3142 			return ret;
3143 	}
3144 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3145 				      ARM_SMMU_CR0ACK);
3146 	if (ret) {
3147 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3148 		return ret;
3149 	}
3150 
3151 	return 0;
3152 }
3153 
3154 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3155 {
3156 	u32 reg;
3157 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3158 
3159 	/* IDR0 */
3160 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3161 
3162 	/* 2-level structures */
3163 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3164 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3165 
3166 	if (reg & IDR0_CD2L)
3167 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3168 
3169 	/*
3170 	 * Translation table endianness.
3171 	 * We currently require the same endianness as the CPU, but this
3172 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3173 	 */
3174 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3175 	case IDR0_TTENDIAN_MIXED:
3176 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3177 		break;
3178 #ifdef __BIG_ENDIAN
3179 	case IDR0_TTENDIAN_BE:
3180 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3181 		break;
3182 #else
3183 	case IDR0_TTENDIAN_LE:
3184 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3185 		break;
3186 #endif
3187 	default:
3188 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3189 		return -ENXIO;
3190 	}
3191 
3192 	/* Boolean feature flags */
3193 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3194 		smmu->features |= ARM_SMMU_FEAT_PRI;
3195 
3196 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3197 		smmu->features |= ARM_SMMU_FEAT_ATS;
3198 
3199 	if (reg & IDR0_SEV)
3200 		smmu->features |= ARM_SMMU_FEAT_SEV;
3201 
3202 	if (reg & IDR0_MSI) {
3203 		smmu->features |= ARM_SMMU_FEAT_MSI;
3204 		if (coherent && !disable_msipolling)
3205 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3206 	}
3207 
3208 	if (reg & IDR0_HYP)
3209 		smmu->features |= ARM_SMMU_FEAT_HYP;
3210 
3211 	/*
3212 	 * The coherency feature as set by FW is used in preference to the ID
3213 	 * register, but warn on mismatch.
3214 	 */
3215 	if (!!(reg & IDR0_COHACC) != coherent)
3216 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3217 			 coherent ? "true" : "false");
3218 
3219 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3220 	case IDR0_STALL_MODEL_FORCE:
3221 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3222 		fallthrough;
3223 	case IDR0_STALL_MODEL_STALL:
3224 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3225 	}
3226 
3227 	if (reg & IDR0_S1P)
3228 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3229 
3230 	if (reg & IDR0_S2P)
3231 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3232 
3233 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3234 		dev_err(smmu->dev, "no translation support!\n");
3235 		return -ENXIO;
3236 	}
3237 
3238 	/* We only support the AArch64 table format at present */
3239 	switch (FIELD_GET(IDR0_TTF, reg)) {
3240 	case IDR0_TTF_AARCH32_64:
3241 		smmu->ias = 40;
3242 		fallthrough;
3243 	case IDR0_TTF_AARCH64:
3244 		break;
3245 	default:
3246 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3247 		return -ENXIO;
3248 	}
3249 
3250 	/* ASID/VMID sizes */
3251 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3252 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3253 
3254 	/* IDR1 */
3255 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3256 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3257 		dev_err(smmu->dev, "embedded implementation not supported\n");
3258 		return -ENXIO;
3259 	}
3260 
3261 	/* Queue sizes, capped to ensure natural alignment */
3262 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3263 					     FIELD_GET(IDR1_CMDQS, reg));
3264 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3265 		/*
3266 		 * We don't support splitting up batches, so one batch of
3267 		 * commands plus an extra sync needs to fit inside the command
3268 		 * queue. There's also no way we can handle the weird alignment
3269 		 * restrictions on the base pointer for a unit-length queue.
3270 		 */
3271 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3272 			CMDQ_BATCH_ENTRIES);
3273 		return -ENXIO;
3274 	}
3275 
3276 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3277 					     FIELD_GET(IDR1_EVTQS, reg));
3278 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3279 					     FIELD_GET(IDR1_PRIQS, reg));
3280 
3281 	/* SID/SSID sizes */
3282 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3283 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3284 
3285 	/*
3286 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3287 	 * table, use a linear table instead.
3288 	 */
3289 	if (smmu->sid_bits <= STRTAB_SPLIT)
3290 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3291 
3292 	/* IDR3 */
3293 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3294 	if (FIELD_GET(IDR3_RIL, reg))
3295 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3296 
3297 	/* IDR5 */
3298 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3299 
3300 	/* Maximum number of outstanding stalls */
3301 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3302 
3303 	/* Page sizes */
3304 	if (reg & IDR5_GRAN64K)
3305 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3306 	if (reg & IDR5_GRAN16K)
3307 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3308 	if (reg & IDR5_GRAN4K)
3309 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3310 
3311 	/* Input address size */
3312 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3313 		smmu->features |= ARM_SMMU_FEAT_VAX;
3314 
3315 	/* Output address size */
3316 	switch (FIELD_GET(IDR5_OAS, reg)) {
3317 	case IDR5_OAS_32_BIT:
3318 		smmu->oas = 32;
3319 		break;
3320 	case IDR5_OAS_36_BIT:
3321 		smmu->oas = 36;
3322 		break;
3323 	case IDR5_OAS_40_BIT:
3324 		smmu->oas = 40;
3325 		break;
3326 	case IDR5_OAS_42_BIT:
3327 		smmu->oas = 42;
3328 		break;
3329 	case IDR5_OAS_44_BIT:
3330 		smmu->oas = 44;
3331 		break;
3332 	case IDR5_OAS_52_BIT:
3333 		smmu->oas = 52;
3334 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3335 		break;
3336 	default:
3337 		dev_info(smmu->dev,
3338 			"unknown output address size. Truncating to 48-bit\n");
3339 		fallthrough;
3340 	case IDR5_OAS_48_BIT:
3341 		smmu->oas = 48;
3342 	}
3343 
3344 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3345 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3346 	else
3347 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3348 
3349 	/* Set the DMA mask for our table walker */
3350 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3351 		dev_warn(smmu->dev,
3352 			 "failed to set DMA mask for table walker\n");
3353 
3354 	smmu->ias = max(smmu->ias, smmu->oas);
3355 
3356 	if (arm_smmu_sva_supported(smmu))
3357 		smmu->features |= ARM_SMMU_FEAT_SVA;
3358 
3359 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3360 		 smmu->ias, smmu->oas, smmu->features);
3361 	return 0;
3362 }
3363 
3364 #ifdef CONFIG_ACPI
3365 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3366 {
3367 	switch (model) {
3368 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3369 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3370 		break;
3371 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3372 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3373 		break;
3374 	}
3375 
3376 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3377 }
3378 
3379 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3380 				      struct arm_smmu_device *smmu)
3381 {
3382 	struct acpi_iort_smmu_v3 *iort_smmu;
3383 	struct device *dev = smmu->dev;
3384 	struct acpi_iort_node *node;
3385 
3386 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3387 
3388 	/* Retrieve SMMUv3 specific data */
3389 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3390 
3391 	acpi_smmu_get_options(iort_smmu->model, smmu);
3392 
3393 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3394 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3395 
3396 	return 0;
3397 }
3398 #else
3399 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3400 					     struct arm_smmu_device *smmu)
3401 {
3402 	return -ENODEV;
3403 }
3404 #endif
3405 
3406 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3407 				    struct arm_smmu_device *smmu)
3408 {
3409 	struct device *dev = &pdev->dev;
3410 	u32 cells;
3411 	int ret = -EINVAL;
3412 
3413 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3414 		dev_err(dev, "missing #iommu-cells property\n");
3415 	else if (cells != 1)
3416 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3417 	else
3418 		ret = 0;
3419 
3420 	parse_driver_options(smmu);
3421 
3422 	if (of_dma_is_coherent(dev->of_node))
3423 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3424 
3425 	return ret;
3426 }
3427 
3428 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3429 {
3430 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3431 		return SZ_64K;
3432 	else
3433 		return SZ_128K;
3434 }
3435 
3436 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3437 {
3438 	int err;
3439 
3440 #ifdef CONFIG_PCI
3441 	if (pci_bus_type.iommu_ops != ops) {
3442 		err = bus_set_iommu(&pci_bus_type, ops);
3443 		if (err)
3444 			return err;
3445 	}
3446 #endif
3447 #ifdef CONFIG_ARM_AMBA
3448 	if (amba_bustype.iommu_ops != ops) {
3449 		err = bus_set_iommu(&amba_bustype, ops);
3450 		if (err)
3451 			goto err_reset_pci_ops;
3452 	}
3453 #endif
3454 	if (platform_bus_type.iommu_ops != ops) {
3455 		err = bus_set_iommu(&platform_bus_type, ops);
3456 		if (err)
3457 			goto err_reset_amba_ops;
3458 	}
3459 
3460 	return 0;
3461 
3462 err_reset_amba_ops:
3463 #ifdef CONFIG_ARM_AMBA
3464 	bus_set_iommu(&amba_bustype, NULL);
3465 #endif
3466 err_reset_pci_ops: __maybe_unused;
3467 #ifdef CONFIG_PCI
3468 	bus_set_iommu(&pci_bus_type, NULL);
3469 #endif
3470 	return err;
3471 }
3472 
3473 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3474 				      resource_size_t size)
3475 {
3476 	struct resource res = {
3477 		.flags = IORESOURCE_MEM,
3478 		.start = start,
3479 		.end = start + size - 1,
3480 	};
3481 
3482 	return devm_ioremap_resource(dev, &res);
3483 }
3484 
3485 static int arm_smmu_device_probe(struct platform_device *pdev)
3486 {
3487 	int irq, ret;
3488 	struct resource *res;
3489 	resource_size_t ioaddr;
3490 	struct arm_smmu_device *smmu;
3491 	struct device *dev = &pdev->dev;
3492 	bool bypass;
3493 
3494 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3495 	if (!smmu) {
3496 		dev_err(dev, "failed to allocate arm_smmu_device\n");
3497 		return -ENOMEM;
3498 	}
3499 	smmu->dev = dev;
3500 
3501 	if (dev->of_node) {
3502 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3503 	} else {
3504 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3505 		if (ret == -ENODEV)
3506 			return ret;
3507 	}
3508 
3509 	/* Set bypass mode according to firmware probing result */
3510 	bypass = !!ret;
3511 
3512 	/* Base address */
3513 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3514 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3515 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3516 		return -EINVAL;
3517 	}
3518 	ioaddr = res->start;
3519 
3520 	/*
3521 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3522 	 * the PMCG registers which are reserved by the PMU driver.
3523 	 */
3524 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3525 	if (IS_ERR(smmu->base))
3526 		return PTR_ERR(smmu->base);
3527 
3528 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3529 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3530 					       ARM_SMMU_REG_SZ);
3531 		if (IS_ERR(smmu->page1))
3532 			return PTR_ERR(smmu->page1);
3533 	} else {
3534 		smmu->page1 = smmu->base;
3535 	}
3536 
3537 	/* Interrupt lines */
3538 
3539 	irq = platform_get_irq_byname_optional(pdev, "combined");
3540 	if (irq > 0)
3541 		smmu->combined_irq = irq;
3542 	else {
3543 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3544 		if (irq > 0)
3545 			smmu->evtq.q.irq = irq;
3546 
3547 		irq = platform_get_irq_byname_optional(pdev, "priq");
3548 		if (irq > 0)
3549 			smmu->priq.q.irq = irq;
3550 
3551 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3552 		if (irq > 0)
3553 			smmu->gerr_irq = irq;
3554 	}
3555 	/* Probe the h/w */
3556 	ret = arm_smmu_device_hw_probe(smmu);
3557 	if (ret)
3558 		return ret;
3559 
3560 	/* Initialise in-memory data structures */
3561 	ret = arm_smmu_init_structures(smmu);
3562 	if (ret)
3563 		return ret;
3564 
3565 	/* Record our private device structure */
3566 	platform_set_drvdata(pdev, smmu);
3567 
3568 	/* Reset the device */
3569 	ret = arm_smmu_device_reset(smmu, bypass);
3570 	if (ret)
3571 		return ret;
3572 
3573 	/* And we're up. Go go go! */
3574 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3575 				     "smmu3.%pa", &ioaddr);
3576 	if (ret)
3577 		return ret;
3578 
3579 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3580 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3581 
3582 	ret = iommu_device_register(&smmu->iommu);
3583 	if (ret) {
3584 		dev_err(dev, "Failed to register iommu\n");
3585 		return ret;
3586 	}
3587 
3588 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
3589 }
3590 
3591 static int arm_smmu_device_remove(struct platform_device *pdev)
3592 {
3593 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3594 
3595 	arm_smmu_set_bus_ops(NULL);
3596 	iommu_device_unregister(&smmu->iommu);
3597 	iommu_device_sysfs_remove(&smmu->iommu);
3598 	arm_smmu_device_disable(smmu);
3599 
3600 	return 0;
3601 }
3602 
3603 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3604 {
3605 	arm_smmu_device_remove(pdev);
3606 }
3607 
3608 static const struct of_device_id arm_smmu_of_match[] = {
3609 	{ .compatible = "arm,smmu-v3", },
3610 	{ },
3611 };
3612 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3613 
3614 static struct platform_driver arm_smmu_driver = {
3615 	.driver	= {
3616 		.name			= "arm-smmu-v3",
3617 		.of_match_table		= arm_smmu_of_match,
3618 		.suppress_bind_attrs	= true,
3619 	},
3620 	.probe	= arm_smmu_device_probe,
3621 	.remove	= arm_smmu_device_remove,
3622 	.shutdown = arm_smmu_device_shutdown,
3623 };
3624 module_platform_driver(arm_smmu_driver);
3625 
3626 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3627 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3628 MODULE_ALIAS("platform:arm-smmu-v3");
3629 MODULE_LICENSE("GPL v2");
3630