xref: /openbmc/linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 5a4c98323b01d52382575a7a4d6bf7bf5f326047)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
32 #include "../../iommu-sva.h"
33 
34 static bool disable_bypass = true;
35 module_param(disable_bypass, bool, 0444);
36 MODULE_PARM_DESC(disable_bypass,
37 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
38 
39 static bool disable_msipolling;
40 module_param(disable_msipolling, bool, 0444);
41 MODULE_PARM_DESC(disable_msipolling,
42 	"Disable MSI-based polling for CMD_SYNC completion.");
43 
44 enum arm_smmu_msi_index {
45 	EVTQ_MSI_INDEX,
46 	GERROR_MSI_INDEX,
47 	PRIQ_MSI_INDEX,
48 	ARM_SMMU_MAX_MSIS,
49 };
50 
51 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
52 	[EVTQ_MSI_INDEX] = {
53 		ARM_SMMU_EVTQ_IRQ_CFG0,
54 		ARM_SMMU_EVTQ_IRQ_CFG1,
55 		ARM_SMMU_EVTQ_IRQ_CFG2,
56 	},
57 	[GERROR_MSI_INDEX] = {
58 		ARM_SMMU_GERROR_IRQ_CFG0,
59 		ARM_SMMU_GERROR_IRQ_CFG1,
60 		ARM_SMMU_GERROR_IRQ_CFG2,
61 	},
62 	[PRIQ_MSI_INDEX] = {
63 		ARM_SMMU_PRIQ_IRQ_CFG0,
64 		ARM_SMMU_PRIQ_IRQ_CFG1,
65 		ARM_SMMU_PRIQ_IRQ_CFG2,
66 	},
67 };
68 
69 struct arm_smmu_option_prop {
70 	u32 opt;
71 	const char *prop;
72 };
73 
74 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
75 DEFINE_MUTEX(arm_smmu_asid_lock);
76 
77 /*
78  * Special value used by SVA when a process dies, to quiesce a CD without
79  * disabling it.
80  */
81 struct arm_smmu_ctx_desc quiet_cd = { 0 };
82 
83 static struct arm_smmu_option_prop arm_smmu_options[] = {
84 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
85 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
86 	{ 0, NULL},
87 };
88 
parse_driver_options(struct arm_smmu_device * smmu)89 static void parse_driver_options(struct arm_smmu_device *smmu)
90 {
91 	int i = 0;
92 
93 	do {
94 		if (of_property_read_bool(smmu->dev->of_node,
95 						arm_smmu_options[i].prop)) {
96 			smmu->options |= arm_smmu_options[i].opt;
97 			dev_notice(smmu->dev, "option %s\n",
98 				arm_smmu_options[i].prop);
99 		}
100 	} while (arm_smmu_options[++i].opt);
101 }
102 
103 /* Low-level queue manipulation functions */
queue_has_space(struct arm_smmu_ll_queue * q,u32 n)104 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
105 {
106 	u32 space, prod, cons;
107 
108 	prod = Q_IDX(q, q->prod);
109 	cons = Q_IDX(q, q->cons);
110 
111 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
112 		space = (1 << q->max_n_shift) - (prod - cons);
113 	else
114 		space = cons - prod;
115 
116 	return space >= n;
117 }
118 
queue_full(struct arm_smmu_ll_queue * q)119 static bool queue_full(struct arm_smmu_ll_queue *q)
120 {
121 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
122 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
123 }
124 
queue_empty(struct arm_smmu_ll_queue * q)125 static bool queue_empty(struct arm_smmu_ll_queue *q)
126 {
127 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
128 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
129 }
130 
queue_consumed(struct arm_smmu_ll_queue * q,u32 prod)131 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
132 {
133 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
134 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
135 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
137 }
138 
queue_sync_cons_out(struct arm_smmu_queue * q)139 static void queue_sync_cons_out(struct arm_smmu_queue *q)
140 {
141 	/*
142 	 * Ensure that all CPU accesses (reads and writes) to the queue
143 	 * are complete before we update the cons pointer.
144 	 */
145 	__iomb();
146 	writel_relaxed(q->llq.cons, q->cons_reg);
147 }
148 
queue_inc_cons(struct arm_smmu_ll_queue * q)149 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
150 {
151 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
152 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
153 }
154 
queue_sync_cons_ovf(struct arm_smmu_queue * q)155 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
156 {
157 	struct arm_smmu_ll_queue *llq = &q->llq;
158 
159 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
160 		return;
161 
162 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
163 		      Q_IDX(llq, llq->cons);
164 	queue_sync_cons_out(q);
165 }
166 
queue_sync_prod_in(struct arm_smmu_queue * q)167 static int queue_sync_prod_in(struct arm_smmu_queue *q)
168 {
169 	u32 prod;
170 	int ret = 0;
171 
172 	/*
173 	 * We can't use the _relaxed() variant here, as we must prevent
174 	 * speculative reads of the queue before we have determined that
175 	 * prod has indeed moved.
176 	 */
177 	prod = readl(q->prod_reg);
178 
179 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
180 		ret = -EOVERFLOW;
181 
182 	q->llq.prod = prod;
183 	return ret;
184 }
185 
queue_inc_prod_n(struct arm_smmu_ll_queue * q,int n)186 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
187 {
188 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
189 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
190 }
191 
queue_poll_init(struct arm_smmu_device * smmu,struct arm_smmu_queue_poll * qp)192 static void queue_poll_init(struct arm_smmu_device *smmu,
193 			    struct arm_smmu_queue_poll *qp)
194 {
195 	qp->delay = 1;
196 	qp->spin_cnt = 0;
197 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
198 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
199 }
200 
queue_poll(struct arm_smmu_queue_poll * qp)201 static int queue_poll(struct arm_smmu_queue_poll *qp)
202 {
203 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
204 		return -ETIMEDOUT;
205 
206 	if (qp->wfe) {
207 		wfe();
208 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
209 		cpu_relax();
210 	} else {
211 		udelay(qp->delay);
212 		qp->delay *= 2;
213 		qp->spin_cnt = 0;
214 	}
215 
216 	return 0;
217 }
218 
queue_write(__le64 * dst,u64 * src,size_t n_dwords)219 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
220 {
221 	int i;
222 
223 	for (i = 0; i < n_dwords; ++i)
224 		*dst++ = cpu_to_le64(*src++);
225 }
226 
queue_read(u64 * dst,__le64 * src,size_t n_dwords)227 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
228 {
229 	int i;
230 
231 	for (i = 0; i < n_dwords; ++i)
232 		*dst++ = le64_to_cpu(*src++);
233 }
234 
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)235 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
236 {
237 	if (queue_empty(&q->llq))
238 		return -EAGAIN;
239 
240 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
241 	queue_inc_cons(&q->llq);
242 	queue_sync_cons_out(q);
243 	return 0;
244 }
245 
246 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)247 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
248 {
249 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
250 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
251 
252 	switch (ent->opcode) {
253 	case CMDQ_OP_TLBI_EL2_ALL:
254 	case CMDQ_OP_TLBI_NSNH_ALL:
255 		break;
256 	case CMDQ_OP_PREFETCH_CFG:
257 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
258 		break;
259 	case CMDQ_OP_CFGI_CD:
260 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261 		fallthrough;
262 	case CMDQ_OP_CFGI_STE:
263 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265 		break;
266 	case CMDQ_OP_CFGI_CD_ALL:
267 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268 		break;
269 	case CMDQ_OP_CFGI_ALL:
270 		/* Cover the entire SID range */
271 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272 		break;
273 	case CMDQ_OP_TLBI_NH_VA:
274 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
275 		fallthrough;
276 	case CMDQ_OP_TLBI_EL2_VA:
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
279 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
282 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
283 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
284 		break;
285 	case CMDQ_OP_TLBI_S2_IPA:
286 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
287 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
290 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
291 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
292 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
293 		break;
294 	case CMDQ_OP_TLBI_NH_ASID:
295 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
296 		fallthrough;
297 	case CMDQ_OP_TLBI_S12_VMALL:
298 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
299 		break;
300 	case CMDQ_OP_TLBI_EL2_ASID:
301 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
302 		break;
303 	case CMDQ_OP_ATC_INV:
304 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
305 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
306 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
307 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
308 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
309 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
310 		break;
311 	case CMDQ_OP_PRI_RESP:
312 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
313 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
314 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
315 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
316 		switch (ent->pri.resp) {
317 		case PRI_RESP_DENY:
318 		case PRI_RESP_FAIL:
319 		case PRI_RESP_SUCC:
320 			break;
321 		default:
322 			return -EINVAL;
323 		}
324 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
325 		break;
326 	case CMDQ_OP_RESUME:
327 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
328 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
329 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
330 		break;
331 	case CMDQ_OP_CMD_SYNC:
332 		if (ent->sync.msiaddr) {
333 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
334 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
335 		} else {
336 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
337 		}
338 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
339 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
340 		break;
341 	default:
342 		return -ENOENT;
343 	}
344 
345 	return 0;
346 }
347 
arm_smmu_get_cmdq(struct arm_smmu_device * smmu)348 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
349 {
350 	return &smmu->cmdq;
351 }
352 
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,struct arm_smmu_queue * q,u32 prod)353 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
354 					 struct arm_smmu_queue *q, u32 prod)
355 {
356 	struct arm_smmu_cmdq_ent ent = {
357 		.opcode = CMDQ_OP_CMD_SYNC,
358 	};
359 
360 	/*
361 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
362 	 * payload, so the write will zero the entire command on that platform.
363 	 */
364 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
365 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
366 				   q->ent_dwords * 8;
367 	}
368 
369 	arm_smmu_cmdq_build_cmd(cmd, &ent);
370 }
371 
__arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu,struct arm_smmu_queue * q)372 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
373 				     struct arm_smmu_queue *q)
374 {
375 	static const char * const cerror_str[] = {
376 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
377 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
378 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
379 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
380 	};
381 
382 	int i;
383 	u64 cmd[CMDQ_ENT_DWORDS];
384 	u32 cons = readl_relaxed(q->cons_reg);
385 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
386 	struct arm_smmu_cmdq_ent cmd_sync = {
387 		.opcode = CMDQ_OP_CMD_SYNC,
388 	};
389 
390 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
391 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
392 
393 	switch (idx) {
394 	case CMDQ_ERR_CERROR_ABT_IDX:
395 		dev_err(smmu->dev, "retrying command fetch\n");
396 		return;
397 	case CMDQ_ERR_CERROR_NONE_IDX:
398 		return;
399 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
400 		/*
401 		 * ATC Invalidation Completion timeout. CONS is still pointing
402 		 * at the CMD_SYNC. Attempt to complete other pending commands
403 		 * by repeating the CMD_SYNC, though we might well end up back
404 		 * here since the ATC invalidation may still be pending.
405 		 */
406 		return;
407 	case CMDQ_ERR_CERROR_ILL_IDX:
408 	default:
409 		break;
410 	}
411 
412 	/*
413 	 * We may have concurrent producers, so we need to be careful
414 	 * not to touch any of the shadow cmdq state.
415 	 */
416 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
417 	dev_err(smmu->dev, "skipping command in error state:\n");
418 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
419 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
420 
421 	/* Convert the erroneous command into a CMD_SYNC */
422 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
423 
424 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
425 }
426 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)427 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
428 {
429 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
430 }
431 
432 /*
433  * Command queue locking.
434  * This is a form of bastardised rwlock with the following major changes:
435  *
436  * - The only LOCK routines are exclusive_trylock() and shared_lock().
437  *   Neither have barrier semantics, and instead provide only a control
438  *   dependency.
439  *
440  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
441  *   fails if the caller appears to be the last lock holder (yes, this is
442  *   racy). All successful UNLOCK routines have RELEASE semantics.
443  */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)444 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
445 {
446 	int val;
447 
448 	/*
449 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
450 	 * lock counter. When held in exclusive state, the lock counter is set
451 	 * to INT_MIN so these increments won't hurt as the value will remain
452 	 * negative.
453 	 */
454 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
455 		return;
456 
457 	do {
458 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
459 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
460 }
461 
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)462 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
463 {
464 	(void)atomic_dec_return_release(&cmdq->lock);
465 }
466 
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)467 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
468 {
469 	if (atomic_read(&cmdq->lock) == 1)
470 		return false;
471 
472 	arm_smmu_cmdq_shared_unlock(cmdq);
473 	return true;
474 }
475 
476 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
477 ({									\
478 	bool __ret;							\
479 	local_irq_save(flags);						\
480 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
481 	if (!__ret)							\
482 		local_irq_restore(flags);				\
483 	__ret;								\
484 })
485 
486 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
487 ({									\
488 	atomic_set_release(&cmdq->lock, 0);				\
489 	local_irq_restore(flags);					\
490 })
491 
492 
493 /*
494  * Command queue insertion.
495  * This is made fiddly by our attempts to achieve some sort of scalability
496  * since there is one queue shared amongst all of the CPUs in the system.  If
497  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
498  * then you'll *love* this monstrosity.
499  *
500  * The basic idea is to split the queue up into ranges of commands that are
501  * owned by a given CPU; the owner may not have written all of the commands
502  * itself, but is responsible for advancing the hardware prod pointer when
503  * the time comes. The algorithm is roughly:
504  *
505  * 	1. Allocate some space in the queue. At this point we also discover
506  *	   whether the head of the queue is currently owned by another CPU,
507  *	   or whether we are the owner.
508  *
509  *	2. Write our commands into our allocated slots in the queue.
510  *
511  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
512  *
513  *	4. If we are an owner:
514  *		a. Wait for the previous owner to finish.
515  *		b. Mark the queue head as unowned, which tells us the range
516  *		   that we are responsible for publishing.
517  *		c. Wait for all commands in our owned range to become valid.
518  *		d. Advance the hardware prod pointer.
519  *		e. Tell the next owner we've finished.
520  *
521  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
522  *	   owner), then we need to stick around until it has completed:
523  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
524  *		   to clear the first 4 bytes.
525  *		b. Otherwise, we spin waiting for the hardware cons pointer to
526  *		   advance past our command.
527  *
528  * The devil is in the details, particularly the use of locking for handling
529  * SYNC completion and freeing up space in the queue before we think that it is
530  * full.
531  */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)532 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
533 					       u32 sprod, u32 eprod, bool set)
534 {
535 	u32 swidx, sbidx, ewidx, ebidx;
536 	struct arm_smmu_ll_queue llq = {
537 		.max_n_shift	= cmdq->q.llq.max_n_shift,
538 		.prod		= sprod,
539 	};
540 
541 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
542 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
543 
544 	while (llq.prod != eprod) {
545 		unsigned long mask;
546 		atomic_long_t *ptr;
547 		u32 limit = BITS_PER_LONG;
548 
549 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
550 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
551 
552 		ptr = &cmdq->valid_map[swidx];
553 
554 		if ((swidx == ewidx) && (sbidx < ebidx))
555 			limit = ebidx;
556 
557 		mask = GENMASK(limit - 1, sbidx);
558 
559 		/*
560 		 * The valid bit is the inverse of the wrap bit. This means
561 		 * that a zero-initialised queue is invalid and, after marking
562 		 * all entries as valid, they become invalid again when we
563 		 * wrap.
564 		 */
565 		if (set) {
566 			atomic_long_xor(mask, ptr);
567 		} else { /* Poll */
568 			unsigned long valid;
569 
570 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
571 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
572 		}
573 
574 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
575 	}
576 }
577 
578 /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)579 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
580 					u32 sprod, u32 eprod)
581 {
582 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
583 }
584 
585 /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)586 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
587 					 u32 sprod, u32 eprod)
588 {
589 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
590 }
591 
592 /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)593 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
594 					     struct arm_smmu_ll_queue *llq)
595 {
596 	unsigned long flags;
597 	struct arm_smmu_queue_poll qp;
598 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
599 	int ret = 0;
600 
601 	/*
602 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
603 	 * that fails, spin until somebody else updates it for us.
604 	 */
605 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
606 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
607 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
608 		llq->val = READ_ONCE(cmdq->q.llq.val);
609 		return 0;
610 	}
611 
612 	queue_poll_init(smmu, &qp);
613 	do {
614 		llq->val = READ_ONCE(cmdq->q.llq.val);
615 		if (!queue_full(llq))
616 			break;
617 
618 		ret = queue_poll(&qp);
619 	} while (!ret);
620 
621 	return ret;
622 }
623 
624 /*
625  * Wait until the SMMU signals a CMD_SYNC completion MSI.
626  * Must be called with the cmdq lock held in some capacity.
627  */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)628 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
629 					  struct arm_smmu_ll_queue *llq)
630 {
631 	int ret = 0;
632 	struct arm_smmu_queue_poll qp;
633 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
634 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
635 
636 	queue_poll_init(smmu, &qp);
637 
638 	/*
639 	 * The MSI won't generate an event, since it's being written back
640 	 * into the command queue.
641 	 */
642 	qp.wfe = false;
643 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
644 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
645 	return ret;
646 }
647 
648 /*
649  * Wait until the SMMU cons index passes llq->prod.
650  * Must be called with the cmdq lock held in some capacity.
651  */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)652 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
653 					       struct arm_smmu_ll_queue *llq)
654 {
655 	struct arm_smmu_queue_poll qp;
656 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
657 	u32 prod = llq->prod;
658 	int ret = 0;
659 
660 	queue_poll_init(smmu, &qp);
661 	llq->val = READ_ONCE(cmdq->q.llq.val);
662 	do {
663 		if (queue_consumed(llq, prod))
664 			break;
665 
666 		ret = queue_poll(&qp);
667 
668 		/*
669 		 * This needs to be a readl() so that our subsequent call
670 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
671 		 *
672 		 * Specifically, we need to ensure that we observe all
673 		 * shared_lock()s by other CMD_SYNCs that share our owner,
674 		 * so that a failing call to tryunlock() means that we're
675 		 * the last one out and therefore we can safely advance
676 		 * cmdq->q.llq.cons. Roughly speaking:
677 		 *
678 		 * CPU 0		CPU1			CPU2 (us)
679 		 *
680 		 * if (sync)
681 		 * 	shared_lock();
682 		 *
683 		 * dma_wmb();
684 		 * set_valid_map();
685 		 *
686 		 * 			if (owner) {
687 		 *				poll_valid_map();
688 		 *				<control dependency>
689 		 *				writel(prod_reg);
690 		 *
691 		 *						readl(cons_reg);
692 		 *						tryunlock();
693 		 *
694 		 * Requires us to see CPU 0's shared_lock() acquisition.
695 		 */
696 		llq->cons = readl(cmdq->q.cons_reg);
697 	} while (!ret);
698 
699 	return ret;
700 }
701 
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)702 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
703 					 struct arm_smmu_ll_queue *llq)
704 {
705 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
706 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
707 
708 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
709 }
710 
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)711 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
712 					u32 prod, int n)
713 {
714 	int i;
715 	struct arm_smmu_ll_queue llq = {
716 		.max_n_shift	= cmdq->q.llq.max_n_shift,
717 		.prod		= prod,
718 	};
719 
720 	for (i = 0; i < n; ++i) {
721 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
722 
723 		prod = queue_inc_prod_n(&llq, i);
724 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
725 	}
726 }
727 
728 /*
729  * This is the actual insertion function, and provides the following
730  * ordering guarantees to callers:
731  *
732  * - There is a dma_wmb() before publishing any commands to the queue.
733  *   This can be relied upon to order prior writes to data structures
734  *   in memory (such as a CD or an STE) before the command.
735  *
736  * - On completion of a CMD_SYNC, there is a control dependency.
737  *   This can be relied upon to order subsequent writes to memory (e.g.
738  *   freeing an IOVA) after completion of the CMD_SYNC.
739  *
740  * - Command insertion is totally ordered, so if two CPUs each race to
741  *   insert their own list of commands then all of the commands from one
742  *   CPU will appear before any of the commands from the other CPU.
743  */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,u64 * cmds,int n,bool sync)744 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
745 				       u64 *cmds, int n, bool sync)
746 {
747 	u64 cmd_sync[CMDQ_ENT_DWORDS];
748 	u32 prod;
749 	unsigned long flags;
750 	bool owner;
751 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
752 	struct arm_smmu_ll_queue llq, head;
753 	int ret = 0;
754 
755 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
756 
757 	/* 1. Allocate some space in the queue */
758 	local_irq_save(flags);
759 	llq.val = READ_ONCE(cmdq->q.llq.val);
760 	do {
761 		u64 old;
762 
763 		while (!queue_has_space(&llq, n + sync)) {
764 			local_irq_restore(flags);
765 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
766 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
767 			local_irq_save(flags);
768 		}
769 
770 		head.cons = llq.cons;
771 		head.prod = queue_inc_prod_n(&llq, n + sync) |
772 					     CMDQ_PROD_OWNED_FLAG;
773 
774 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
775 		if (old == llq.val)
776 			break;
777 
778 		llq.val = old;
779 	} while (1);
780 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
781 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
782 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
783 
784 	/*
785 	 * 2. Write our commands into the queue
786 	 * Dependency ordering from the cmpxchg() loop above.
787 	 */
788 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
789 	if (sync) {
790 		prod = queue_inc_prod_n(&llq, n);
791 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
792 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
793 
794 		/*
795 		 * In order to determine completion of our CMD_SYNC, we must
796 		 * ensure that the queue can't wrap twice without us noticing.
797 		 * We achieve that by taking the cmdq lock as shared before
798 		 * marking our slot as valid.
799 		 */
800 		arm_smmu_cmdq_shared_lock(cmdq);
801 	}
802 
803 	/* 3. Mark our slots as valid, ensuring commands are visible first */
804 	dma_wmb();
805 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
806 
807 	/* 4. If we are the owner, take control of the SMMU hardware */
808 	if (owner) {
809 		/* a. Wait for previous owner to finish */
810 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
811 
812 		/* b. Stop gathering work by clearing the owned flag */
813 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
814 						   &cmdq->q.llq.atomic.prod);
815 		prod &= ~CMDQ_PROD_OWNED_FLAG;
816 
817 		/*
818 		 * c. Wait for any gathered work to be written to the queue.
819 		 * Note that we read our own entries so that we have the control
820 		 * dependency required by (d).
821 		 */
822 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
823 
824 		/*
825 		 * d. Advance the hardware prod pointer
826 		 * Control dependency ordering from the entries becoming valid.
827 		 */
828 		writel_relaxed(prod, cmdq->q.prod_reg);
829 
830 		/*
831 		 * e. Tell the next owner we're done
832 		 * Make sure we've updated the hardware first, so that we don't
833 		 * race to update prod and potentially move it backwards.
834 		 */
835 		atomic_set_release(&cmdq->owner_prod, prod);
836 	}
837 
838 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
839 	if (sync) {
840 		llq.prod = queue_inc_prod_n(&llq, n);
841 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
842 		if (ret) {
843 			dev_err_ratelimited(smmu->dev,
844 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
845 					    llq.prod,
846 					    readl_relaxed(cmdq->q.prod_reg),
847 					    readl_relaxed(cmdq->q.cons_reg));
848 		}
849 
850 		/*
851 		 * Try to unlock the cmdq lock. This will fail if we're the last
852 		 * reader, in which case we can safely update cmdq->q.llq.cons
853 		 */
854 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
855 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
856 			arm_smmu_cmdq_shared_unlock(cmdq);
857 		}
858 	}
859 
860 	local_irq_restore(flags);
861 	return ret;
862 }
863 
__arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent,bool sync)864 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
865 				     struct arm_smmu_cmdq_ent *ent,
866 				     bool sync)
867 {
868 	u64 cmd[CMDQ_ENT_DWORDS];
869 
870 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
871 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
872 			 ent->opcode);
873 		return -EINVAL;
874 	}
875 
876 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
877 }
878 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)879 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
880 				   struct arm_smmu_cmdq_ent *ent)
881 {
882 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
883 }
884 
arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)885 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
886 					     struct arm_smmu_cmdq_ent *ent)
887 {
888 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
889 }
890 
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)891 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
892 				    struct arm_smmu_cmdq_batch *cmds,
893 				    struct arm_smmu_cmdq_ent *cmd)
894 {
895 	int index;
896 
897 	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
898 	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
899 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
900 		cmds->num = 0;
901 	}
902 
903 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
904 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
905 		cmds->num = 0;
906 	}
907 
908 	index = cmds->num * CMDQ_ENT_DWORDS;
909 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
910 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
911 			 cmd->opcode);
912 		return;
913 	}
914 
915 	cmds->num++;
916 }
917 
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)918 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
919 				      struct arm_smmu_cmdq_batch *cmds)
920 {
921 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
922 }
923 
arm_smmu_page_response(struct device * dev,struct iommu_fault_event * unused,struct iommu_page_response * resp)924 static int arm_smmu_page_response(struct device *dev,
925 				  struct iommu_fault_event *unused,
926 				  struct iommu_page_response *resp)
927 {
928 	struct arm_smmu_cmdq_ent cmd = {0};
929 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
930 	int sid = master->streams[0].id;
931 
932 	if (master->stall_enabled) {
933 		cmd.opcode		= CMDQ_OP_RESUME;
934 		cmd.resume.sid		= sid;
935 		cmd.resume.stag		= resp->grpid;
936 		switch (resp->code) {
937 		case IOMMU_PAGE_RESP_INVALID:
938 		case IOMMU_PAGE_RESP_FAILURE:
939 			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
940 			break;
941 		case IOMMU_PAGE_RESP_SUCCESS:
942 			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
943 			break;
944 		default:
945 			return -EINVAL;
946 		}
947 	} else {
948 		return -ENODEV;
949 	}
950 
951 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
952 	/*
953 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
954 	 * RESUME consumption guarantees that the stalled transaction will be
955 	 * terminated... at some point in the future. PRI_RESP is fire and
956 	 * forget.
957 	 */
958 
959 	return 0;
960 }
961 
962 /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)963 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
964 {
965 	struct arm_smmu_cmdq_ent cmd = {
966 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
967 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
968 		.tlbi.asid = asid,
969 	};
970 
971 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
972 }
973 
arm_smmu_sync_cd(struct arm_smmu_domain * smmu_domain,int ssid,bool leaf)974 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
975 			     int ssid, bool leaf)
976 {
977 	size_t i;
978 	unsigned long flags;
979 	struct arm_smmu_master *master;
980 	struct arm_smmu_cmdq_batch cmds;
981 	struct arm_smmu_device *smmu = smmu_domain->smmu;
982 	struct arm_smmu_cmdq_ent cmd = {
983 		.opcode	= CMDQ_OP_CFGI_CD,
984 		.cfgi	= {
985 			.ssid	= ssid,
986 			.leaf	= leaf,
987 		},
988 	};
989 
990 	cmds.num = 0;
991 
992 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
993 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
994 		for (i = 0; i < master->num_streams; i++) {
995 			cmd.cfgi.sid = master->streams[i].id;
996 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
997 		}
998 	}
999 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1000 
1001 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1002 }
1003 
arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device * smmu,struct arm_smmu_l1_ctx_desc * l1_desc)1004 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1005 					struct arm_smmu_l1_ctx_desc *l1_desc)
1006 {
1007 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1008 
1009 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1010 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1011 	if (!l1_desc->l2ptr) {
1012 		dev_warn(smmu->dev,
1013 			 "failed to allocate context descriptor table\n");
1014 		return -ENOMEM;
1015 	}
1016 	return 0;
1017 }
1018 
arm_smmu_write_cd_l1_desc(__le64 * dst,struct arm_smmu_l1_ctx_desc * l1_desc)1019 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1020 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1021 {
1022 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1023 		  CTXDESC_L1_DESC_V;
1024 
1025 	/* See comment in arm_smmu_write_ctx_desc() */
1026 	WRITE_ONCE(*dst, cpu_to_le64(val));
1027 }
1028 
arm_smmu_get_cd_ptr(struct arm_smmu_domain * smmu_domain,u32 ssid)1029 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1030 				   u32 ssid)
1031 {
1032 	__le64 *l1ptr;
1033 	unsigned int idx;
1034 	struct arm_smmu_l1_ctx_desc *l1_desc;
1035 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1036 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1037 
1038 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1039 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1040 
1041 	idx = ssid >> CTXDESC_SPLIT;
1042 	l1_desc = &cdcfg->l1_desc[idx];
1043 	if (!l1_desc->l2ptr) {
1044 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1045 			return NULL;
1046 
1047 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1048 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1049 		/* An invalid L1CD can be cached */
1050 		arm_smmu_sync_cd(smmu_domain, ssid, false);
1051 	}
1052 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1053 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1054 }
1055 
arm_smmu_write_ctx_desc(struct arm_smmu_domain * smmu_domain,int ssid,struct arm_smmu_ctx_desc * cd)1056 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1057 			    struct arm_smmu_ctx_desc *cd)
1058 {
1059 	/*
1060 	 * This function handles the following cases:
1061 	 *
1062 	 * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
1063 	 * (2) Install a secondary CD, for SID+SSID traffic.
1064 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1065 	 *     CD, then invalidate the old entry and mappings.
1066 	 * (4) Quiesce the context without clearing the valid bit. Disable
1067 	 *     translation, and ignore any translation fault.
1068 	 * (5) Remove a secondary CD.
1069 	 */
1070 	u64 val;
1071 	bool cd_live;
1072 	__le64 *cdptr;
1073 
1074 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1075 		return -E2BIG;
1076 
1077 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1078 	if (!cdptr)
1079 		return -ENOMEM;
1080 
1081 	val = le64_to_cpu(cdptr[0]);
1082 	cd_live = !!(val & CTXDESC_CD_0_V);
1083 
1084 	if (!cd) { /* (5) */
1085 		val = 0;
1086 	} else if (cd == &quiet_cd) { /* (4) */
1087 		val |= CTXDESC_CD_0_TCR_EPD0;
1088 	} else if (cd_live) { /* (3) */
1089 		val &= ~CTXDESC_CD_0_ASID;
1090 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1091 		/*
1092 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1093 		 * this substream's traffic
1094 		 */
1095 	} else { /* (1) and (2) */
1096 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1097 		cdptr[2] = 0;
1098 		cdptr[3] = cpu_to_le64(cd->mair);
1099 
1100 		/*
1101 		 * STE is live, and the SMMU might read dwords of this CD in any
1102 		 * order. Ensure that it observes valid values before reading
1103 		 * V=1.
1104 		 */
1105 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1106 
1107 		val = cd->tcr |
1108 #ifdef __BIG_ENDIAN
1109 			CTXDESC_CD_0_ENDI |
1110 #endif
1111 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1112 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1113 			CTXDESC_CD_0_AA64 |
1114 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1115 			CTXDESC_CD_0_V;
1116 
1117 		if (smmu_domain->stall_enabled)
1118 			val |= CTXDESC_CD_0_S;
1119 	}
1120 
1121 	/*
1122 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1123 	 * "Configuration structures and configuration invalidation completion"
1124 	 *
1125 	 *   The size of single-copy atomic reads made by the SMMU is
1126 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1127 	 *   field within an aligned 64-bit span of a structure can be altered
1128 	 *   without first making the structure invalid.
1129 	 */
1130 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1131 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1132 	return 0;
1133 }
1134 
arm_smmu_alloc_cd_tables(struct arm_smmu_domain * smmu_domain)1135 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1136 {
1137 	int ret;
1138 	size_t l1size;
1139 	size_t max_contexts;
1140 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1141 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1142 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1143 
1144 	max_contexts = 1 << cfg->s1cdmax;
1145 
1146 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1147 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1148 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1149 		cdcfg->num_l1_ents = max_contexts;
1150 
1151 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1152 	} else {
1153 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1154 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1155 						  CTXDESC_L2_ENTRIES);
1156 
1157 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1158 					      sizeof(*cdcfg->l1_desc),
1159 					      GFP_KERNEL);
1160 		if (!cdcfg->l1_desc)
1161 			return -ENOMEM;
1162 
1163 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1164 	}
1165 
1166 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1167 					   GFP_KERNEL);
1168 	if (!cdcfg->cdtab) {
1169 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1170 		ret = -ENOMEM;
1171 		goto err_free_l1;
1172 	}
1173 
1174 	return 0;
1175 
1176 err_free_l1:
1177 	if (cdcfg->l1_desc) {
1178 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1179 		cdcfg->l1_desc = NULL;
1180 	}
1181 	return ret;
1182 }
1183 
arm_smmu_free_cd_tables(struct arm_smmu_domain * smmu_domain)1184 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1185 {
1186 	int i;
1187 	size_t size, l1size;
1188 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1189 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1190 
1191 	if (cdcfg->l1_desc) {
1192 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1193 
1194 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1195 			if (!cdcfg->l1_desc[i].l2ptr)
1196 				continue;
1197 
1198 			dmam_free_coherent(smmu->dev, size,
1199 					   cdcfg->l1_desc[i].l2ptr,
1200 					   cdcfg->l1_desc[i].l2ptr_dma);
1201 		}
1202 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1203 		cdcfg->l1_desc = NULL;
1204 
1205 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1206 	} else {
1207 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1208 	}
1209 
1210 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1211 	cdcfg->cdtab_dma = 0;
1212 	cdcfg->cdtab = NULL;
1213 }
1214 
arm_smmu_free_asid(struct arm_smmu_ctx_desc * cd)1215 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1216 {
1217 	bool free;
1218 	struct arm_smmu_ctx_desc *old_cd;
1219 
1220 	if (!cd->asid)
1221 		return false;
1222 
1223 	free = refcount_dec_and_test(&cd->refs);
1224 	if (free) {
1225 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1226 		WARN_ON(old_cd != cd);
1227 	}
1228 	return free;
1229 }
1230 
1231 /* Stream table manipulation functions */
1232 static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)1233 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1234 {
1235 	u64 val = 0;
1236 
1237 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1238 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1239 
1240 	/* See comment in arm_smmu_write_ctx_desc() */
1241 	WRITE_ONCE(*dst, cpu_to_le64(val));
1242 }
1243 
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)1244 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1245 {
1246 	struct arm_smmu_cmdq_ent cmd = {
1247 		.opcode	= CMDQ_OP_CFGI_STE,
1248 		.cfgi	= {
1249 			.sid	= sid,
1250 			.leaf	= true,
1251 		},
1252 	};
1253 
1254 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1255 }
1256 
arm_smmu_write_strtab_ent(struct arm_smmu_master * master,u32 sid,__le64 * dst)1257 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1258 				      __le64 *dst)
1259 {
1260 	/*
1261 	 * This is hideously complicated, but we only really care about
1262 	 * three cases at the moment:
1263 	 *
1264 	 * 1. Invalid (all zero) -> bypass/fault (init)
1265 	 * 2. Bypass/fault -> translation/bypass (attach)
1266 	 * 3. Translation/bypass -> bypass/fault (detach)
1267 	 *
1268 	 * Given that we can't update the STE atomically and the SMMU
1269 	 * doesn't read the thing in a defined order, that leaves us
1270 	 * with the following maintenance requirements:
1271 	 *
1272 	 * 1. Update Config, return (init time STEs aren't live)
1273 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1274 	 * 3. Update Config, sync
1275 	 */
1276 	u64 val = le64_to_cpu(dst[0]);
1277 	bool ste_live = false;
1278 	struct arm_smmu_device *smmu = NULL;
1279 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1280 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1281 	struct arm_smmu_domain *smmu_domain = NULL;
1282 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1283 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1284 		.prefetch	= {
1285 			.sid	= sid,
1286 		},
1287 	};
1288 
1289 	if (master) {
1290 		smmu_domain = master->domain;
1291 		smmu = master->smmu;
1292 	}
1293 
1294 	if (smmu_domain) {
1295 		switch (smmu_domain->stage) {
1296 		case ARM_SMMU_DOMAIN_S1:
1297 			s1_cfg = &smmu_domain->s1_cfg;
1298 			break;
1299 		case ARM_SMMU_DOMAIN_S2:
1300 		case ARM_SMMU_DOMAIN_NESTED:
1301 			s2_cfg = &smmu_domain->s2_cfg;
1302 			break;
1303 		default:
1304 			break;
1305 		}
1306 	}
1307 
1308 	if (val & STRTAB_STE_0_V) {
1309 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1310 		case STRTAB_STE_0_CFG_BYPASS:
1311 			break;
1312 		case STRTAB_STE_0_CFG_S1_TRANS:
1313 		case STRTAB_STE_0_CFG_S2_TRANS:
1314 			ste_live = true;
1315 			break;
1316 		case STRTAB_STE_0_CFG_ABORT:
1317 			BUG_ON(!disable_bypass);
1318 			break;
1319 		default:
1320 			BUG(); /* STE corruption */
1321 		}
1322 	}
1323 
1324 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1325 	val = STRTAB_STE_0_V;
1326 
1327 	/* Bypass/fault */
1328 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1329 		if (!smmu_domain && disable_bypass)
1330 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1331 		else
1332 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1333 
1334 		dst[0] = cpu_to_le64(val);
1335 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1336 						STRTAB_STE_1_SHCFG_INCOMING));
1337 		dst[2] = 0; /* Nuke the VMID */
1338 		/*
1339 		 * The SMMU can perform negative caching, so we must sync
1340 		 * the STE regardless of whether the old value was live.
1341 		 */
1342 		if (smmu)
1343 			arm_smmu_sync_ste_for_sid(smmu, sid);
1344 		return;
1345 	}
1346 
1347 	if (s1_cfg) {
1348 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1349 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1350 
1351 		BUG_ON(ste_live);
1352 		dst[1] = cpu_to_le64(
1353 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1354 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1355 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1356 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1357 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1358 
1359 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1360 		    !master->stall_enabled)
1361 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1362 
1363 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1364 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1365 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1366 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1367 	}
1368 
1369 	if (s2_cfg) {
1370 		BUG_ON(ste_live);
1371 		dst[2] = cpu_to_le64(
1372 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1373 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1374 #ifdef __BIG_ENDIAN
1375 			 STRTAB_STE_2_S2ENDI |
1376 #endif
1377 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1378 			 STRTAB_STE_2_S2R);
1379 
1380 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1381 
1382 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1383 	}
1384 
1385 	if (master->ats_enabled)
1386 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1387 						 STRTAB_STE_1_EATS_TRANS));
1388 
1389 	arm_smmu_sync_ste_for_sid(smmu, sid);
1390 	/* See comment in arm_smmu_write_ctx_desc() */
1391 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1392 	arm_smmu_sync_ste_for_sid(smmu, sid);
1393 
1394 	/* It's likely that we'll want to use the new STE soon */
1395 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1396 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1397 }
1398 
arm_smmu_init_bypass_stes(__le64 * strtab,unsigned int nent,bool force)1399 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
1400 {
1401 	unsigned int i;
1402 	u64 val = STRTAB_STE_0_V;
1403 
1404 	if (disable_bypass && !force)
1405 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1406 	else
1407 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1408 
1409 	for (i = 0; i < nent; ++i) {
1410 		strtab[0] = cpu_to_le64(val);
1411 		strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1412 						   STRTAB_STE_1_SHCFG_INCOMING));
1413 		strtab[2] = 0;
1414 		strtab += STRTAB_STE_DWORDS;
1415 	}
1416 }
1417 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1418 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1419 {
1420 	size_t size;
1421 	void *strtab;
1422 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1423 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1424 
1425 	if (desc->l2ptr)
1426 		return 0;
1427 
1428 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1429 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1430 
1431 	desc->span = STRTAB_SPLIT + 1;
1432 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1433 					  GFP_KERNEL);
1434 	if (!desc->l2ptr) {
1435 		dev_err(smmu->dev,
1436 			"failed to allocate l2 stream table for SID %u\n",
1437 			sid);
1438 		return -ENOMEM;
1439 	}
1440 
1441 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1442 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1443 	return 0;
1444 }
1445 
arm_smmu_streams_cmp_key(const void * lhs,const struct rb_node * rhs)1446 static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs)
1447 {
1448 	struct arm_smmu_stream *stream_rhs =
1449 		rb_entry(rhs, struct arm_smmu_stream, node);
1450 	const u32 *sid_lhs = lhs;
1451 
1452 	if (*sid_lhs < stream_rhs->id)
1453 		return -1;
1454 	if (*sid_lhs > stream_rhs->id)
1455 		return 1;
1456 	return 0;
1457 }
1458 
arm_smmu_streams_cmp_node(struct rb_node * lhs,const struct rb_node * rhs)1459 static int arm_smmu_streams_cmp_node(struct rb_node *lhs,
1460 				     const struct rb_node *rhs)
1461 {
1462 	return arm_smmu_streams_cmp_key(
1463 		&rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs);
1464 }
1465 
1466 static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device * smmu,u32 sid)1467 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1468 {
1469 	struct rb_node *node;
1470 
1471 	lockdep_assert_held(&smmu->streams_mutex);
1472 
1473 	node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key);
1474 	if (!node)
1475 		return NULL;
1476 	return rb_entry(node, struct arm_smmu_stream, node)->master;
1477 }
1478 
1479 /* IRQ and event handlers */
arm_smmu_handle_evt(struct arm_smmu_device * smmu,u64 * evt)1480 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1481 {
1482 	int ret;
1483 	u32 reason;
1484 	u32 perm = 0;
1485 	struct arm_smmu_master *master;
1486 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1487 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1488 	struct iommu_fault_event fault_evt = { };
1489 	struct iommu_fault *flt = &fault_evt.fault;
1490 
1491 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1492 	case EVT_ID_TRANSLATION_FAULT:
1493 		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1494 		break;
1495 	case EVT_ID_ADDR_SIZE_FAULT:
1496 		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1497 		break;
1498 	case EVT_ID_ACCESS_FAULT:
1499 		reason = IOMMU_FAULT_REASON_ACCESS;
1500 		break;
1501 	case EVT_ID_PERMISSION_FAULT:
1502 		reason = IOMMU_FAULT_REASON_PERMISSION;
1503 		break;
1504 	default:
1505 		return -EOPNOTSUPP;
1506 	}
1507 
1508 	/* Stage-2 is always pinned at the moment */
1509 	if (evt[1] & EVTQ_1_S2)
1510 		return -EFAULT;
1511 
1512 	if (evt[1] & EVTQ_1_RnW)
1513 		perm |= IOMMU_FAULT_PERM_READ;
1514 	else
1515 		perm |= IOMMU_FAULT_PERM_WRITE;
1516 
1517 	if (evt[1] & EVTQ_1_InD)
1518 		perm |= IOMMU_FAULT_PERM_EXEC;
1519 
1520 	if (evt[1] & EVTQ_1_PnU)
1521 		perm |= IOMMU_FAULT_PERM_PRIV;
1522 
1523 	if (evt[1] & EVTQ_1_STALL) {
1524 		flt->type = IOMMU_FAULT_PAGE_REQ;
1525 		flt->prm = (struct iommu_fault_page_request) {
1526 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1527 			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1528 			.perm = perm,
1529 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1530 		};
1531 
1532 		if (ssid_valid) {
1533 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1534 			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1535 		}
1536 	} else {
1537 		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1538 		flt->event = (struct iommu_fault_unrecoverable) {
1539 			.reason = reason,
1540 			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1541 			.perm = perm,
1542 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1543 		};
1544 
1545 		if (ssid_valid) {
1546 			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1547 			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1548 		}
1549 	}
1550 
1551 	mutex_lock(&smmu->streams_mutex);
1552 	master = arm_smmu_find_master(smmu, sid);
1553 	if (!master) {
1554 		ret = -EINVAL;
1555 		goto out_unlock;
1556 	}
1557 
1558 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1559 	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1560 		/* Nobody cared, abort the access */
1561 		struct iommu_page_response resp = {
1562 			.pasid		= flt->prm.pasid,
1563 			.grpid		= flt->prm.grpid,
1564 			.code		= IOMMU_PAGE_RESP_FAILURE,
1565 		};
1566 		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1567 	}
1568 
1569 out_unlock:
1570 	mutex_unlock(&smmu->streams_mutex);
1571 	return ret;
1572 }
1573 
arm_smmu_evtq_thread(int irq,void * dev)1574 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1575 {
1576 	int i, ret;
1577 	struct arm_smmu_device *smmu = dev;
1578 	struct arm_smmu_queue *q = &smmu->evtq.q;
1579 	struct arm_smmu_ll_queue *llq = &q->llq;
1580 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1581 				      DEFAULT_RATELIMIT_BURST);
1582 	u64 evt[EVTQ_ENT_DWORDS];
1583 
1584 	do {
1585 		while (!queue_remove_raw(q, evt)) {
1586 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1587 
1588 			ret = arm_smmu_handle_evt(smmu, evt);
1589 			if (!ret || !__ratelimit(&rs))
1590 				continue;
1591 
1592 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1593 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1594 				dev_info(smmu->dev, "\t0x%016llx\n",
1595 					 (unsigned long long)evt[i]);
1596 
1597 			cond_resched();
1598 		}
1599 
1600 		/*
1601 		 * Not much we can do on overflow, so scream and pretend we're
1602 		 * trying harder.
1603 		 */
1604 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1605 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1606 	} while (!queue_empty(llq));
1607 
1608 	/* Sync our overflow flag, as we believe we're up to speed */
1609 	queue_sync_cons_ovf(q);
1610 	return IRQ_HANDLED;
1611 }
1612 
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1613 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1614 {
1615 	u32 sid, ssid;
1616 	u16 grpid;
1617 	bool ssv, last;
1618 
1619 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1620 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1621 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1622 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1623 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1624 
1625 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1626 	dev_info(smmu->dev,
1627 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1628 		 sid, ssid, grpid, last ? "L" : "",
1629 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1630 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1631 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1632 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1633 		 evt[1] & PRIQ_1_ADDR_MASK);
1634 
1635 	if (last) {
1636 		struct arm_smmu_cmdq_ent cmd = {
1637 			.opcode			= CMDQ_OP_PRI_RESP,
1638 			.substream_valid	= ssv,
1639 			.pri			= {
1640 				.sid	= sid,
1641 				.ssid	= ssid,
1642 				.grpid	= grpid,
1643 				.resp	= PRI_RESP_DENY,
1644 			},
1645 		};
1646 
1647 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1648 	}
1649 }
1650 
arm_smmu_priq_thread(int irq,void * dev)1651 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1652 {
1653 	struct arm_smmu_device *smmu = dev;
1654 	struct arm_smmu_queue *q = &smmu->priq.q;
1655 	struct arm_smmu_ll_queue *llq = &q->llq;
1656 	u64 evt[PRIQ_ENT_DWORDS];
1657 
1658 	do {
1659 		while (!queue_remove_raw(q, evt))
1660 			arm_smmu_handle_ppr(smmu, evt);
1661 
1662 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1663 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1664 	} while (!queue_empty(llq));
1665 
1666 	/* Sync our overflow flag, as we believe we're up to speed */
1667 	queue_sync_cons_ovf(q);
1668 	return IRQ_HANDLED;
1669 }
1670 
1671 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1672 
arm_smmu_gerror_handler(int irq,void * dev)1673 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1674 {
1675 	u32 gerror, gerrorn, active;
1676 	struct arm_smmu_device *smmu = dev;
1677 
1678 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1679 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1680 
1681 	active = gerror ^ gerrorn;
1682 	if (!(active & GERROR_ERR_MASK))
1683 		return IRQ_NONE; /* No errors pending */
1684 
1685 	dev_warn(smmu->dev,
1686 		 "unexpected global error reported (0x%08x), this could be serious\n",
1687 		 active);
1688 
1689 	if (active & GERROR_SFM_ERR) {
1690 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1691 		arm_smmu_device_disable(smmu);
1692 	}
1693 
1694 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1695 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1696 
1697 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1698 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1699 
1700 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1701 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1702 
1703 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1704 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1705 
1706 	if (active & GERROR_PRIQ_ABT_ERR)
1707 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1708 
1709 	if (active & GERROR_EVTQ_ABT_ERR)
1710 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1711 
1712 	if (active & GERROR_CMDQ_ERR)
1713 		arm_smmu_cmdq_skip_err(smmu);
1714 
1715 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1716 	return IRQ_HANDLED;
1717 }
1718 
arm_smmu_combined_irq_thread(int irq,void * dev)1719 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1720 {
1721 	struct arm_smmu_device *smmu = dev;
1722 
1723 	arm_smmu_evtq_thread(irq, dev);
1724 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1725 		arm_smmu_priq_thread(irq, dev);
1726 
1727 	return IRQ_HANDLED;
1728 }
1729 
arm_smmu_combined_irq_handler(int irq,void * dev)1730 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1731 {
1732 	arm_smmu_gerror_handler(irq, dev);
1733 	return IRQ_WAKE_THREAD;
1734 }
1735 
1736 static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)1737 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1738 			struct arm_smmu_cmdq_ent *cmd)
1739 {
1740 	size_t log2_span;
1741 	size_t span_mask;
1742 	/* ATC invalidates are always on 4096-bytes pages */
1743 	size_t inval_grain_shift = 12;
1744 	unsigned long page_start, page_end;
1745 
1746 	/*
1747 	 * ATS and PASID:
1748 	 *
1749 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1750 	 * prefix. In that case all ATC entries within the address range are
1751 	 * invalidated, including those that were requested with a PASID! There
1752 	 * is no way to invalidate only entries without PASID.
1753 	 *
1754 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1755 	 * traffic), translation requests without PASID create ATC entries
1756 	 * without PASID, which must be invalidated with substream_valid clear.
1757 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1758 	 * ATC entries within the address range.
1759 	 */
1760 	*cmd = (struct arm_smmu_cmdq_ent) {
1761 		.opcode			= CMDQ_OP_ATC_INV,
1762 		.substream_valid	= (ssid != IOMMU_NO_PASID),
1763 		.atc.ssid		= ssid,
1764 	};
1765 
1766 	if (!size) {
1767 		cmd->atc.size = ATC_INV_SIZE_ALL;
1768 		return;
1769 	}
1770 
1771 	page_start	= iova >> inval_grain_shift;
1772 	page_end	= (iova + size - 1) >> inval_grain_shift;
1773 
1774 	/*
1775 	 * In an ATS Invalidate Request, the address must be aligned on the
1776 	 * range size, which must be a power of two number of page sizes. We
1777 	 * thus have to choose between grossly over-invalidating the region, or
1778 	 * splitting the invalidation into multiple commands. For simplicity
1779 	 * we'll go with the first solution, but should refine it in the future
1780 	 * if multiple commands are shown to be more efficient.
1781 	 *
1782 	 * Find the smallest power of two that covers the range. The most
1783 	 * significant differing bit between the start and end addresses,
1784 	 * fls(start ^ end), indicates the required span. For example:
1785 	 *
1786 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1787 	 *		x = 0b1000 ^ 0b1011 = 0b11
1788 	 *		span = 1 << fls(x) = 4
1789 	 *
1790 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1791 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1792 	 *		span = 1 << fls(x) = 16
1793 	 */
1794 	log2_span	= fls_long(page_start ^ page_end);
1795 	span_mask	= (1ULL << log2_span) - 1;
1796 
1797 	page_start	&= ~span_mask;
1798 
1799 	cmd->atc.addr	= page_start << inval_grain_shift;
1800 	cmd->atc.size	= log2_span;
1801 }
1802 
arm_smmu_atc_inv_master(struct arm_smmu_master * master)1803 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1804 {
1805 	int i;
1806 	struct arm_smmu_cmdq_ent cmd;
1807 	struct arm_smmu_cmdq_batch cmds;
1808 
1809 	arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
1810 
1811 	cmds.num = 0;
1812 	for (i = 0; i < master->num_streams; i++) {
1813 		cmd.atc.sid = master->streams[i].id;
1814 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1815 	}
1816 
1817 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1818 }
1819 
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,int ssid,unsigned long iova,size_t size)1820 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1821 			    unsigned long iova, size_t size)
1822 {
1823 	int i;
1824 	unsigned long flags;
1825 	struct arm_smmu_cmdq_ent cmd;
1826 	struct arm_smmu_master *master;
1827 	struct arm_smmu_cmdq_batch cmds;
1828 
1829 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1830 		return 0;
1831 
1832 	/*
1833 	 * Ensure that we've completed prior invalidation of the main TLBs
1834 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1835 	 * arm_smmu_enable_ats():
1836 	 *
1837 	 *	// unmap()			// arm_smmu_enable_ats()
1838 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1839 	 *	smp_mb();			[...]
1840 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1841 	 *
1842 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1843 	 * ATS was enabled at the PCI device before completion of the TLBI.
1844 	 */
1845 	smp_mb();
1846 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1847 		return 0;
1848 
1849 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1850 
1851 	cmds.num = 0;
1852 
1853 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1854 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1855 		if (!master->ats_enabled)
1856 			continue;
1857 
1858 		for (i = 0; i < master->num_streams; i++) {
1859 			cmd.atc.sid = master->streams[i].id;
1860 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1861 		}
1862 	}
1863 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1864 
1865 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1866 }
1867 
1868 /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)1869 static void arm_smmu_tlb_inv_context(void *cookie)
1870 {
1871 	struct arm_smmu_domain *smmu_domain = cookie;
1872 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1873 	struct arm_smmu_cmdq_ent cmd;
1874 
1875 	/*
1876 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1877 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1878 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1879 	 * insertion to guarantee those are observed before the TLBI. Do be
1880 	 * careful, 007.
1881 	 */
1882 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1883 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1884 	} else {
1885 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1886 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1887 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1888 	}
1889 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
1890 }
1891 
__arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent * cmd,unsigned long iova,size_t size,size_t granule,struct arm_smmu_domain * smmu_domain)1892 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1893 				     unsigned long iova, size_t size,
1894 				     size_t granule,
1895 				     struct arm_smmu_domain *smmu_domain)
1896 {
1897 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1898 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1899 	size_t inv_range = granule;
1900 	struct arm_smmu_cmdq_batch cmds;
1901 
1902 	if (!size)
1903 		return;
1904 
1905 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1906 		/* Get the leaf page size */
1907 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1908 
1909 		num_pages = size >> tg;
1910 
1911 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1912 		cmd->tlbi.tg = (tg - 10) / 2;
1913 
1914 		/*
1915 		 * Determine what level the granule is at. For non-leaf, both
1916 		 * io-pgtable and SVA pass a nominal last-level granule because
1917 		 * they don't know what level(s) actually apply, so ignore that
1918 		 * and leave TTL=0. However for various errata reasons we still
1919 		 * want to use a range command, so avoid the SVA corner case
1920 		 * where both scale and num could be 0 as well.
1921 		 */
1922 		if (cmd->tlbi.leaf)
1923 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1924 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
1925 			num_pages++;
1926 	}
1927 
1928 	cmds.num = 0;
1929 
1930 	while (iova < end) {
1931 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1932 			/*
1933 			 * On each iteration of the loop, the range is 5 bits
1934 			 * worth of the aligned size remaining.
1935 			 * The range in pages is:
1936 			 *
1937 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1938 			 */
1939 			unsigned long scale, num;
1940 
1941 			/* Determine the power of 2 multiple number of pages */
1942 			scale = __ffs(num_pages);
1943 			cmd->tlbi.scale = scale;
1944 
1945 			/* Determine how many chunks of 2^scale size we have */
1946 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1947 			cmd->tlbi.num = num - 1;
1948 
1949 			/* range is num * 2^scale * pgsize */
1950 			inv_range = num << (scale + tg);
1951 
1952 			/* Clear out the lower order bits for the next iteration */
1953 			num_pages -= num << scale;
1954 		}
1955 
1956 		cmd->tlbi.addr = iova;
1957 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1958 		iova += inv_range;
1959 	}
1960 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1961 }
1962 
arm_smmu_tlb_inv_range_domain(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1963 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1964 					  size_t granule, bool leaf,
1965 					  struct arm_smmu_domain *smmu_domain)
1966 {
1967 	struct arm_smmu_cmdq_ent cmd = {
1968 		.tlbi = {
1969 			.leaf	= leaf,
1970 		},
1971 	};
1972 
1973 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1974 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1975 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1976 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1977 	} else {
1978 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1979 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1980 	}
1981 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1982 
1983 	/*
1984 	 * Unfortunately, this can't be leaf-only since we may have
1985 	 * zapped an entire table.
1986 	 */
1987 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
1988 }
1989 
arm_smmu_tlb_inv_range_asid(unsigned long iova,size_t size,int asid,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1990 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1991 				 size_t granule, bool leaf,
1992 				 struct arm_smmu_domain *smmu_domain)
1993 {
1994 	struct arm_smmu_cmdq_ent cmd = {
1995 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1996 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1997 		.tlbi = {
1998 			.asid	= asid,
1999 			.leaf	= leaf,
2000 		},
2001 	};
2002 
2003 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2004 }
2005 
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)2006 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2007 					 unsigned long iova, size_t granule,
2008 					 void *cookie)
2009 {
2010 	struct arm_smmu_domain *smmu_domain = cookie;
2011 	struct iommu_domain *domain = &smmu_domain->domain;
2012 
2013 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2014 }
2015 
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)2016 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2017 				  size_t granule, void *cookie)
2018 {
2019 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2020 }
2021 
2022 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2023 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2024 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2025 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2026 };
2027 
2028 /* IOMMU API */
arm_smmu_capable(struct device * dev,enum iommu_cap cap)2029 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2030 {
2031 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2032 
2033 	switch (cap) {
2034 	case IOMMU_CAP_CACHE_COHERENCY:
2035 		/* Assume that a coherent TCU implies coherent TBUs */
2036 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2037 	case IOMMU_CAP_NOEXEC:
2038 	case IOMMU_CAP_DEFERRED_FLUSH:
2039 		return true;
2040 	default:
2041 		return false;
2042 	}
2043 }
2044 
arm_smmu_domain_alloc(unsigned type)2045 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2046 {
2047 	struct arm_smmu_domain *smmu_domain;
2048 
2049 	if (type == IOMMU_DOMAIN_SVA)
2050 		return arm_smmu_sva_domain_alloc();
2051 
2052 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2053 	    type != IOMMU_DOMAIN_DMA &&
2054 	    type != IOMMU_DOMAIN_IDENTITY)
2055 		return NULL;
2056 
2057 	/*
2058 	 * Allocate the domain and initialise some of its data structures.
2059 	 * We can't really do anything meaningful until we've added a
2060 	 * master.
2061 	 */
2062 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2063 	if (!smmu_domain)
2064 		return NULL;
2065 
2066 	mutex_init(&smmu_domain->init_mutex);
2067 	INIT_LIST_HEAD(&smmu_domain->devices);
2068 	spin_lock_init(&smmu_domain->devices_lock);
2069 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2070 
2071 	return &smmu_domain->domain;
2072 }
2073 
arm_smmu_domain_free(struct iommu_domain * domain)2074 static void arm_smmu_domain_free(struct iommu_domain *domain)
2075 {
2076 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2077 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2078 
2079 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2080 
2081 	/* Free the CD and ASID, if we allocated them */
2082 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2083 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2084 
2085 		/* Prevent SVA from touching the CD while we're freeing it */
2086 		mutex_lock(&arm_smmu_asid_lock);
2087 		if (cfg->cdcfg.cdtab)
2088 			arm_smmu_free_cd_tables(smmu_domain);
2089 		arm_smmu_free_asid(&cfg->cd);
2090 		mutex_unlock(&arm_smmu_asid_lock);
2091 	} else {
2092 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2093 		if (cfg->vmid)
2094 			ida_free(&smmu->vmid_map, cfg->vmid);
2095 	}
2096 
2097 	kfree(smmu_domain);
2098 }
2099 
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)2100 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2101 				       struct arm_smmu_master *master,
2102 				       struct io_pgtable_cfg *pgtbl_cfg)
2103 {
2104 	int ret;
2105 	u32 asid;
2106 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2107 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2108 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2109 
2110 	refcount_set(&cfg->cd.refs, 1);
2111 
2112 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2113 	mutex_lock(&arm_smmu_asid_lock);
2114 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2115 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2116 	if (ret)
2117 		goto out_unlock;
2118 
2119 	cfg->s1cdmax = master->ssid_bits;
2120 
2121 	smmu_domain->stall_enabled = master->stall_enabled;
2122 
2123 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
2124 	if (ret)
2125 		goto out_free_asid;
2126 
2127 	cfg->cd.asid	= (u16)asid;
2128 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2129 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2130 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2131 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2132 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2133 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2134 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2135 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2136 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2137 
2138 	/*
2139 	 * Note that this will end up calling arm_smmu_sync_cd() before
2140 	 * the master has been added to the devices list for this domain.
2141 	 * This isn't an issue because the STE hasn't been installed yet.
2142 	 */
2143 	ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd);
2144 	if (ret)
2145 		goto out_free_cd_tables;
2146 
2147 	mutex_unlock(&arm_smmu_asid_lock);
2148 	return 0;
2149 
2150 out_free_cd_tables:
2151 	arm_smmu_free_cd_tables(smmu_domain);
2152 out_free_asid:
2153 	arm_smmu_free_asid(&cfg->cd);
2154 out_unlock:
2155 	mutex_unlock(&arm_smmu_asid_lock);
2156 	return ret;
2157 }
2158 
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)2159 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2160 				       struct arm_smmu_master *master,
2161 				       struct io_pgtable_cfg *pgtbl_cfg)
2162 {
2163 	int vmid;
2164 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2165 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2166 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2167 
2168 	/* Reserve VMID 0 for stage-2 bypass STEs */
2169 	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2170 			       GFP_KERNEL);
2171 	if (vmid < 0)
2172 		return vmid;
2173 
2174 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2175 	cfg->vmid	= (u16)vmid;
2176 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2177 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2178 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2179 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2180 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2181 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2182 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2183 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2184 	return 0;
2185 }
2186 
arm_smmu_domain_finalise(struct iommu_domain * domain,struct arm_smmu_master * master)2187 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2188 				    struct arm_smmu_master *master)
2189 {
2190 	int ret;
2191 	unsigned long ias, oas;
2192 	enum io_pgtable_fmt fmt;
2193 	struct io_pgtable_cfg pgtbl_cfg;
2194 	struct io_pgtable_ops *pgtbl_ops;
2195 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2196 				 struct arm_smmu_master *,
2197 				 struct io_pgtable_cfg *);
2198 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2199 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2200 
2201 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2202 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2203 		return 0;
2204 	}
2205 
2206 	/* Restrict the stage to what we can actually support */
2207 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2208 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2209 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2210 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2211 
2212 	switch (smmu_domain->stage) {
2213 	case ARM_SMMU_DOMAIN_S1:
2214 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2215 		ias = min_t(unsigned long, ias, VA_BITS);
2216 		oas = smmu->ias;
2217 		fmt = ARM_64_LPAE_S1;
2218 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2219 		break;
2220 	case ARM_SMMU_DOMAIN_NESTED:
2221 	case ARM_SMMU_DOMAIN_S2:
2222 		ias = smmu->ias;
2223 		oas = smmu->oas;
2224 		fmt = ARM_64_LPAE_S2;
2225 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2226 		break;
2227 	default:
2228 		return -EINVAL;
2229 	}
2230 
2231 	pgtbl_cfg = (struct io_pgtable_cfg) {
2232 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2233 		.ias		= ias,
2234 		.oas		= oas,
2235 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2236 		.tlb		= &arm_smmu_flush_ops,
2237 		.iommu_dev	= smmu->dev,
2238 	};
2239 
2240 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2241 	if (!pgtbl_ops)
2242 		return -ENOMEM;
2243 
2244 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2245 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2246 	domain->geometry.force_aperture = true;
2247 
2248 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2249 	if (ret < 0) {
2250 		free_io_pgtable_ops(pgtbl_ops);
2251 		return ret;
2252 	}
2253 
2254 	smmu_domain->pgtbl_ops = pgtbl_ops;
2255 	return 0;
2256 }
2257 
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)2258 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2259 {
2260 	__le64 *step;
2261 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2262 
2263 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2264 		struct arm_smmu_strtab_l1_desc *l1_desc;
2265 		int idx;
2266 
2267 		/* Two-level walk */
2268 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2269 		l1_desc = &cfg->l1_desc[idx];
2270 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2271 		step = &l1_desc->l2ptr[idx];
2272 	} else {
2273 		/* Simple linear lookup */
2274 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2275 	}
2276 
2277 	return step;
2278 }
2279 
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master)2280 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2281 {
2282 	int i, j;
2283 	struct arm_smmu_device *smmu = master->smmu;
2284 
2285 	for (i = 0; i < master->num_streams; ++i) {
2286 		u32 sid = master->streams[i].id;
2287 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2288 
2289 		/* Bridged PCI devices may end up with duplicated IDs */
2290 		for (j = 0; j < i; j++)
2291 			if (master->streams[j].id == sid)
2292 				break;
2293 		if (j < i)
2294 			continue;
2295 
2296 		arm_smmu_write_strtab_ent(master, sid, step);
2297 	}
2298 }
2299 
arm_smmu_ats_supported(struct arm_smmu_master * master)2300 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2301 {
2302 	struct device *dev = master->dev;
2303 	struct arm_smmu_device *smmu = master->smmu;
2304 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2305 
2306 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2307 		return false;
2308 
2309 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2310 		return false;
2311 
2312 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2313 }
2314 
arm_smmu_enable_ats(struct arm_smmu_master * master)2315 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2316 {
2317 	size_t stu;
2318 	struct pci_dev *pdev;
2319 	struct arm_smmu_device *smmu = master->smmu;
2320 	struct arm_smmu_domain *smmu_domain = master->domain;
2321 
2322 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2323 	if (!master->ats_enabled)
2324 		return;
2325 
2326 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2327 	stu = __ffs(smmu->pgsize_bitmap);
2328 	pdev = to_pci_dev(master->dev);
2329 
2330 	atomic_inc(&smmu_domain->nr_ats_masters);
2331 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2332 	if (pci_enable_ats(pdev, stu))
2333 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2334 }
2335 
arm_smmu_disable_ats(struct arm_smmu_master * master)2336 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2337 {
2338 	struct arm_smmu_domain *smmu_domain = master->domain;
2339 
2340 	if (!master->ats_enabled)
2341 		return;
2342 
2343 	pci_disable_ats(to_pci_dev(master->dev));
2344 	/*
2345 	 * Ensure ATS is disabled at the endpoint before we issue the
2346 	 * ATC invalidation via the SMMU.
2347 	 */
2348 	wmb();
2349 	arm_smmu_atc_inv_master(master);
2350 	atomic_dec(&smmu_domain->nr_ats_masters);
2351 }
2352 
arm_smmu_enable_pasid(struct arm_smmu_master * master)2353 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2354 {
2355 	int ret;
2356 	int features;
2357 	int num_pasids;
2358 	struct pci_dev *pdev;
2359 
2360 	if (!dev_is_pci(master->dev))
2361 		return -ENODEV;
2362 
2363 	pdev = to_pci_dev(master->dev);
2364 
2365 	features = pci_pasid_features(pdev);
2366 	if (features < 0)
2367 		return features;
2368 
2369 	num_pasids = pci_max_pasids(pdev);
2370 	if (num_pasids <= 0)
2371 		return num_pasids;
2372 
2373 	ret = pci_enable_pasid(pdev, features);
2374 	if (ret) {
2375 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2376 		return ret;
2377 	}
2378 
2379 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2380 				  master->smmu->ssid_bits);
2381 	return 0;
2382 }
2383 
arm_smmu_disable_pasid(struct arm_smmu_master * master)2384 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2385 {
2386 	struct pci_dev *pdev;
2387 
2388 	if (!dev_is_pci(master->dev))
2389 		return;
2390 
2391 	pdev = to_pci_dev(master->dev);
2392 
2393 	if (!pdev->pasid_enabled)
2394 		return;
2395 
2396 	master->ssid_bits = 0;
2397 	pci_disable_pasid(pdev);
2398 }
2399 
arm_smmu_detach_dev(struct arm_smmu_master * master)2400 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2401 {
2402 	unsigned long flags;
2403 	struct arm_smmu_domain *smmu_domain = master->domain;
2404 
2405 	if (!smmu_domain)
2406 		return;
2407 
2408 	arm_smmu_disable_ats(master);
2409 
2410 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2411 	list_del(&master->domain_head);
2412 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2413 
2414 	master->domain = NULL;
2415 	master->ats_enabled = false;
2416 	arm_smmu_install_ste_for_dev(master);
2417 }
2418 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)2419 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2420 {
2421 	int ret = 0;
2422 	unsigned long flags;
2423 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2424 	struct arm_smmu_device *smmu;
2425 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2426 	struct arm_smmu_master *master;
2427 
2428 	if (!fwspec)
2429 		return -ENOENT;
2430 
2431 	master = dev_iommu_priv_get(dev);
2432 	smmu = master->smmu;
2433 
2434 	/*
2435 	 * Checking that SVA is disabled ensures that this device isn't bound to
2436 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2437 	 * be removed concurrently since we're holding the group mutex.
2438 	 */
2439 	if (arm_smmu_master_sva_enabled(master)) {
2440 		dev_err(dev, "cannot attach - SVA enabled\n");
2441 		return -EBUSY;
2442 	}
2443 
2444 	arm_smmu_detach_dev(master);
2445 
2446 	mutex_lock(&smmu_domain->init_mutex);
2447 
2448 	if (!smmu_domain->smmu) {
2449 		smmu_domain->smmu = smmu;
2450 		ret = arm_smmu_domain_finalise(domain, master);
2451 		if (ret) {
2452 			smmu_domain->smmu = NULL;
2453 			goto out_unlock;
2454 		}
2455 	} else if (smmu_domain->smmu != smmu) {
2456 		ret = -EINVAL;
2457 		goto out_unlock;
2458 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2459 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2460 		ret = -EINVAL;
2461 		goto out_unlock;
2462 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2463 		   smmu_domain->stall_enabled != master->stall_enabled) {
2464 		ret = -EINVAL;
2465 		goto out_unlock;
2466 	}
2467 
2468 	master->domain = smmu_domain;
2469 
2470 	/*
2471 	 * The SMMU does not support enabling ATS with bypass. When the STE is
2472 	 * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
2473 	 * Translated transactions are denied as though ATS is disabled for the
2474 	 * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2475 	 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2476 	 */
2477 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2478 		master->ats_enabled = arm_smmu_ats_supported(master);
2479 
2480 	arm_smmu_install_ste_for_dev(master);
2481 
2482 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2483 	list_add(&master->domain_head, &smmu_domain->devices);
2484 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2485 
2486 	arm_smmu_enable_ats(master);
2487 
2488 out_unlock:
2489 	mutex_unlock(&smmu_domain->init_mutex);
2490 	return ret;
2491 }
2492 
arm_smmu_map_pages(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)2493 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2494 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2495 			      int prot, gfp_t gfp, size_t *mapped)
2496 {
2497 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2498 
2499 	if (!ops)
2500 		return -ENODEV;
2501 
2502 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2503 }
2504 
arm_smmu_unmap_pages(struct iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)2505 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2506 				   size_t pgsize, size_t pgcount,
2507 				   struct iommu_iotlb_gather *gather)
2508 {
2509 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2510 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2511 
2512 	if (!ops)
2513 		return 0;
2514 
2515 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2516 }
2517 
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)2518 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2519 {
2520 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2521 
2522 	if (smmu_domain->smmu)
2523 		arm_smmu_tlb_inv_context(smmu_domain);
2524 }
2525 
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)2526 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2527 				struct iommu_iotlb_gather *gather)
2528 {
2529 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2530 
2531 	if (!gather->pgsize)
2532 		return;
2533 
2534 	arm_smmu_tlb_inv_range_domain(gather->start,
2535 				      gather->end - gather->start + 1,
2536 				      gather->pgsize, true, smmu_domain);
2537 }
2538 
2539 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)2540 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2541 {
2542 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2543 
2544 	if (!ops)
2545 		return 0;
2546 
2547 	return ops->iova_to_phys(ops, iova);
2548 }
2549 
2550 static struct platform_driver arm_smmu_driver;
2551 
2552 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)2553 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2554 {
2555 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2556 							  fwnode);
2557 	put_device(dev);
2558 	return dev ? dev_get_drvdata(dev) : NULL;
2559 }
2560 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)2561 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2562 {
2563 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2564 
2565 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2566 		limit *= 1UL << STRTAB_SPLIT;
2567 
2568 	return sid < limit;
2569 }
2570 
arm_smmu_init_sid_strtab(struct arm_smmu_device * smmu,u32 sid)2571 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2572 {
2573 	/* Check the SIDs are in range of the SMMU and our stream table */
2574 	if (!arm_smmu_sid_in_range(smmu, sid))
2575 		return -ERANGE;
2576 
2577 	/* Ensure l2 strtab is initialised */
2578 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2579 		return arm_smmu_init_l2_strtab(smmu, sid);
2580 
2581 	return 0;
2582 }
2583 
arm_smmu_insert_master(struct arm_smmu_device * smmu,struct arm_smmu_master * master)2584 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2585 				  struct arm_smmu_master *master)
2586 {
2587 	int i;
2588 	int ret = 0;
2589 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2590 
2591 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2592 				  GFP_KERNEL);
2593 	if (!master->streams)
2594 		return -ENOMEM;
2595 	master->num_streams = fwspec->num_ids;
2596 
2597 	mutex_lock(&smmu->streams_mutex);
2598 	for (i = 0; i < fwspec->num_ids; i++) {
2599 		struct arm_smmu_stream *new_stream = &master->streams[i];
2600 		struct rb_node *existing;
2601 		u32 sid = fwspec->ids[i];
2602 
2603 		new_stream->id = sid;
2604 		new_stream->master = master;
2605 
2606 		ret = arm_smmu_init_sid_strtab(smmu, sid);
2607 		if (ret)
2608 			break;
2609 
2610 		/* Insert into SID tree */
2611 		existing = rb_find_add(&new_stream->node, &smmu->streams,
2612 				       arm_smmu_streams_cmp_node);
2613 		if (existing) {
2614 			struct arm_smmu_master *existing_master =
2615 				rb_entry(existing, struct arm_smmu_stream, node)
2616 					->master;
2617 
2618 			/* Bridged PCI devices may end up with duplicated IDs */
2619 			if (existing_master == master)
2620 				continue;
2621 
2622 			dev_warn(master->dev,
2623 				 "stream %u already in tree from dev %s\n", sid,
2624 				 dev_name(existing_master->dev));
2625 			ret = -EINVAL;
2626 			break;
2627 		}
2628 	}
2629 
2630 	if (ret) {
2631 		for (i--; i >= 0; i--)
2632 			rb_erase(&master->streams[i].node, &smmu->streams);
2633 		kfree(master->streams);
2634 	}
2635 	mutex_unlock(&smmu->streams_mutex);
2636 
2637 	return ret;
2638 }
2639 
arm_smmu_remove_master(struct arm_smmu_master * master)2640 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2641 {
2642 	int i;
2643 	struct arm_smmu_device *smmu = master->smmu;
2644 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2645 
2646 	if (!smmu || !master->streams)
2647 		return;
2648 
2649 	mutex_lock(&smmu->streams_mutex);
2650 	for (i = 0; i < fwspec->num_ids; i++)
2651 		rb_erase(&master->streams[i].node, &smmu->streams);
2652 	mutex_unlock(&smmu->streams_mutex);
2653 
2654 	kfree(master->streams);
2655 }
2656 
2657 static struct iommu_ops arm_smmu_ops;
2658 
arm_smmu_probe_device(struct device * dev)2659 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2660 {
2661 	int ret;
2662 	struct arm_smmu_device *smmu;
2663 	struct arm_smmu_master *master;
2664 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2665 
2666 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2667 		return ERR_PTR(-ENODEV);
2668 
2669 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2670 		return ERR_PTR(-EBUSY);
2671 
2672 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2673 	if (!smmu)
2674 		return ERR_PTR(-ENODEV);
2675 
2676 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2677 	if (!master)
2678 		return ERR_PTR(-ENOMEM);
2679 
2680 	master->dev = dev;
2681 	master->smmu = smmu;
2682 	INIT_LIST_HEAD(&master->bonds);
2683 	dev_iommu_priv_set(dev, master);
2684 
2685 	ret = arm_smmu_insert_master(smmu, master);
2686 	if (ret)
2687 		goto err_free_master;
2688 
2689 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2690 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2691 
2692 	/*
2693 	 * Note that PASID must be enabled before, and disabled after ATS:
2694 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2695 	 *
2696 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2697 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2698 	 *   are changed.
2699 	 */
2700 	arm_smmu_enable_pasid(master);
2701 
2702 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2703 		master->ssid_bits = min_t(u8, master->ssid_bits,
2704 					  CTXDESC_LINEAR_CDMAX);
2705 
2706 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2707 	     device_property_read_bool(dev, "dma-can-stall")) ||
2708 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2709 		master->stall_enabled = true;
2710 
2711 	return &smmu->iommu;
2712 
2713 err_free_master:
2714 	kfree(master);
2715 	dev_iommu_priv_set(dev, NULL);
2716 	return ERR_PTR(ret);
2717 }
2718 
arm_smmu_release_device(struct device * dev)2719 static void arm_smmu_release_device(struct device *dev)
2720 {
2721 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2722 
2723 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2724 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2725 	arm_smmu_detach_dev(master);
2726 	arm_smmu_disable_pasid(master);
2727 	arm_smmu_remove_master(master);
2728 	kfree(master);
2729 }
2730 
arm_smmu_device_group(struct device * dev)2731 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2732 {
2733 	struct iommu_group *group;
2734 
2735 	/*
2736 	 * We don't support devices sharing stream IDs other than PCI RID
2737 	 * aliases, since the necessary ID-to-device lookup becomes rather
2738 	 * impractical given a potential sparse 32-bit stream ID space.
2739 	 */
2740 	if (dev_is_pci(dev))
2741 		group = pci_device_group(dev);
2742 	else
2743 		group = generic_device_group(dev);
2744 
2745 	return group;
2746 }
2747 
arm_smmu_enable_nesting(struct iommu_domain * domain)2748 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2749 {
2750 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2751 	int ret = 0;
2752 
2753 	mutex_lock(&smmu_domain->init_mutex);
2754 	if (smmu_domain->smmu)
2755 		ret = -EPERM;
2756 	else
2757 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2758 	mutex_unlock(&smmu_domain->init_mutex);
2759 
2760 	return ret;
2761 }
2762 
arm_smmu_of_xlate(struct device * dev,struct of_phandle_args * args)2763 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2764 {
2765 	return iommu_fwspec_add_ids(dev, args->args, 1);
2766 }
2767 
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)2768 static void arm_smmu_get_resv_regions(struct device *dev,
2769 				      struct list_head *head)
2770 {
2771 	struct iommu_resv_region *region;
2772 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2773 
2774 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2775 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2776 	if (!region)
2777 		return;
2778 
2779 	list_add_tail(&region->list, head);
2780 
2781 	iommu_dma_get_resv_regions(dev, head);
2782 }
2783 
arm_smmu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)2784 static int arm_smmu_dev_enable_feature(struct device *dev,
2785 				       enum iommu_dev_features feat)
2786 {
2787 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2788 
2789 	if (!master)
2790 		return -ENODEV;
2791 
2792 	switch (feat) {
2793 	case IOMMU_DEV_FEAT_IOPF:
2794 		if (!arm_smmu_master_iopf_supported(master))
2795 			return -EINVAL;
2796 		if (master->iopf_enabled)
2797 			return -EBUSY;
2798 		master->iopf_enabled = true;
2799 		return 0;
2800 	case IOMMU_DEV_FEAT_SVA:
2801 		if (!arm_smmu_master_sva_supported(master))
2802 			return -EINVAL;
2803 		if (arm_smmu_master_sva_enabled(master))
2804 			return -EBUSY;
2805 		return arm_smmu_master_enable_sva(master);
2806 	default:
2807 		return -EINVAL;
2808 	}
2809 }
2810 
arm_smmu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)2811 static int arm_smmu_dev_disable_feature(struct device *dev,
2812 					enum iommu_dev_features feat)
2813 {
2814 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2815 
2816 	if (!master)
2817 		return -EINVAL;
2818 
2819 	switch (feat) {
2820 	case IOMMU_DEV_FEAT_IOPF:
2821 		if (!master->iopf_enabled)
2822 			return -EINVAL;
2823 		if (master->sva_enabled)
2824 			return -EBUSY;
2825 		master->iopf_enabled = false;
2826 		return 0;
2827 	case IOMMU_DEV_FEAT_SVA:
2828 		if (!arm_smmu_master_sva_enabled(master))
2829 			return -EINVAL;
2830 		return arm_smmu_master_disable_sva(master);
2831 	default:
2832 		return -EINVAL;
2833 	}
2834 }
2835 
2836 /*
2837  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2838  * PCIe link and save the data to memory by DMA. The hardware is restricted to
2839  * use identity mapping only.
2840  */
2841 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2842 					 (pdev)->device == 0xa12e)
2843 
arm_smmu_def_domain_type(struct device * dev)2844 static int arm_smmu_def_domain_type(struct device *dev)
2845 {
2846 	if (dev_is_pci(dev)) {
2847 		struct pci_dev *pdev = to_pci_dev(dev);
2848 
2849 		if (IS_HISI_PTT_DEVICE(pdev))
2850 			return IOMMU_DOMAIN_IDENTITY;
2851 	}
2852 
2853 	return 0;
2854 }
2855 
arm_smmu_remove_dev_pasid(struct device * dev,ioasid_t pasid)2856 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
2857 {
2858 	struct iommu_domain *domain;
2859 
2860 	domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
2861 	if (WARN_ON(IS_ERR(domain)) || !domain)
2862 		return;
2863 
2864 	arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
2865 }
2866 
2867 static struct iommu_ops arm_smmu_ops = {
2868 	.capable		= arm_smmu_capable,
2869 	.domain_alloc		= arm_smmu_domain_alloc,
2870 	.probe_device		= arm_smmu_probe_device,
2871 	.release_device		= arm_smmu_release_device,
2872 	.device_group		= arm_smmu_device_group,
2873 	.of_xlate		= arm_smmu_of_xlate,
2874 	.get_resv_regions	= arm_smmu_get_resv_regions,
2875 	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
2876 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2877 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2878 	.page_response		= arm_smmu_page_response,
2879 	.def_domain_type	= arm_smmu_def_domain_type,
2880 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2881 	.owner			= THIS_MODULE,
2882 	.default_domain_ops = &(const struct iommu_domain_ops) {
2883 		.attach_dev		= arm_smmu_attach_dev,
2884 		.map_pages		= arm_smmu_map_pages,
2885 		.unmap_pages		= arm_smmu_unmap_pages,
2886 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2887 		.iotlb_sync		= arm_smmu_iotlb_sync,
2888 		.iova_to_phys		= arm_smmu_iova_to_phys,
2889 		.enable_nesting		= arm_smmu_enable_nesting,
2890 		.free			= arm_smmu_domain_free,
2891 	}
2892 };
2893 
2894 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,void __iomem * page,unsigned long prod_off,unsigned long cons_off,size_t dwords,const char * name)2895 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2896 				   struct arm_smmu_queue *q,
2897 				   void __iomem *page,
2898 				   unsigned long prod_off,
2899 				   unsigned long cons_off,
2900 				   size_t dwords, const char *name)
2901 {
2902 	size_t qsz;
2903 
2904 	do {
2905 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2906 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2907 					      GFP_KERNEL);
2908 		if (q->base || qsz < PAGE_SIZE)
2909 			break;
2910 
2911 		q->llq.max_n_shift--;
2912 	} while (1);
2913 
2914 	if (!q->base) {
2915 		dev_err(smmu->dev,
2916 			"failed to allocate queue (0x%zx bytes) for %s\n",
2917 			qsz, name);
2918 		return -ENOMEM;
2919 	}
2920 
2921 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2922 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2923 			 1 << q->llq.max_n_shift, name);
2924 	}
2925 
2926 	q->prod_reg	= page + prod_off;
2927 	q->cons_reg	= page + cons_off;
2928 	q->ent_dwords	= dwords;
2929 
2930 	q->q_base  = Q_BASE_RWA;
2931 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2932 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2933 
2934 	q->llq.prod = q->llq.cons = 0;
2935 	return 0;
2936 }
2937 
arm_smmu_cmdq_init(struct arm_smmu_device * smmu)2938 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2939 {
2940 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2941 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2942 
2943 	atomic_set(&cmdq->owner_prod, 0);
2944 	atomic_set(&cmdq->lock, 0);
2945 
2946 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2947 							      GFP_KERNEL);
2948 	if (!cmdq->valid_map)
2949 		return -ENOMEM;
2950 
2951 	return 0;
2952 }
2953 
arm_smmu_init_queues(struct arm_smmu_device * smmu)2954 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2955 {
2956 	int ret;
2957 
2958 	/* cmdq */
2959 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2960 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2961 				      CMDQ_ENT_DWORDS, "cmdq");
2962 	if (ret)
2963 		return ret;
2964 
2965 	ret = arm_smmu_cmdq_init(smmu);
2966 	if (ret)
2967 		return ret;
2968 
2969 	/* evtq */
2970 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2971 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2972 				      EVTQ_ENT_DWORDS, "evtq");
2973 	if (ret)
2974 		return ret;
2975 
2976 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2977 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2978 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2979 		if (!smmu->evtq.iopf)
2980 			return -ENOMEM;
2981 	}
2982 
2983 	/* priq */
2984 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2985 		return 0;
2986 
2987 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2988 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2989 				       PRIQ_ENT_DWORDS, "priq");
2990 }
2991 
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2992 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2993 {
2994 	unsigned int i;
2995 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2996 	void *strtab = smmu->strtab_cfg.strtab;
2997 
2998 	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
2999 				    sizeof(*cfg->l1_desc), GFP_KERNEL);
3000 	if (!cfg->l1_desc)
3001 		return -ENOMEM;
3002 
3003 	for (i = 0; i < cfg->num_l1_ents; ++i) {
3004 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3005 		strtab += STRTAB_L1_DESC_DWORDS << 3;
3006 	}
3007 
3008 	return 0;
3009 }
3010 
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)3011 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3012 {
3013 	void *strtab;
3014 	u64 reg;
3015 	u32 size, l1size;
3016 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3017 
3018 	/* Calculate the L1 size, capped to the SIDSIZE. */
3019 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3020 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3021 	cfg->num_l1_ents = 1 << size;
3022 
3023 	size += STRTAB_SPLIT;
3024 	if (size < smmu->sid_bits)
3025 		dev_warn(smmu->dev,
3026 			 "2-level strtab only covers %u/%u bits of SID\n",
3027 			 size, smmu->sid_bits);
3028 
3029 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3030 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3031 				     GFP_KERNEL);
3032 	if (!strtab) {
3033 		dev_err(smmu->dev,
3034 			"failed to allocate l1 stream table (%u bytes)\n",
3035 			l1size);
3036 		return -ENOMEM;
3037 	}
3038 	cfg->strtab = strtab;
3039 
3040 	/* Configure strtab_base_cfg for 2 levels */
3041 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3042 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3043 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3044 	cfg->strtab_base_cfg = reg;
3045 
3046 	return arm_smmu_init_l1_strtab(smmu);
3047 }
3048 
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)3049 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3050 {
3051 	void *strtab;
3052 	u64 reg;
3053 	u32 size;
3054 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3055 
3056 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3057 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3058 				     GFP_KERNEL);
3059 	if (!strtab) {
3060 		dev_err(smmu->dev,
3061 			"failed to allocate linear stream table (%u bytes)\n",
3062 			size);
3063 		return -ENOMEM;
3064 	}
3065 	cfg->strtab = strtab;
3066 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3067 
3068 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3069 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3070 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3071 	cfg->strtab_base_cfg = reg;
3072 
3073 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3074 	return 0;
3075 }
3076 
arm_smmu_init_strtab(struct arm_smmu_device * smmu)3077 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3078 {
3079 	u64 reg;
3080 	int ret;
3081 
3082 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3083 		ret = arm_smmu_init_strtab_2lvl(smmu);
3084 	else
3085 		ret = arm_smmu_init_strtab_linear(smmu);
3086 
3087 	if (ret)
3088 		return ret;
3089 
3090 	/* Set the strtab base address */
3091 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3092 	reg |= STRTAB_BASE_RA;
3093 	smmu->strtab_cfg.strtab_base = reg;
3094 
3095 	ida_init(&smmu->vmid_map);
3096 
3097 	return 0;
3098 }
3099 
arm_smmu_init_structures(struct arm_smmu_device * smmu)3100 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3101 {
3102 	int ret;
3103 
3104 	mutex_init(&smmu->streams_mutex);
3105 	smmu->streams = RB_ROOT;
3106 
3107 	ret = arm_smmu_init_queues(smmu);
3108 	if (ret)
3109 		return ret;
3110 
3111 	return arm_smmu_init_strtab(smmu);
3112 }
3113 
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)3114 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3115 				   unsigned int reg_off, unsigned int ack_off)
3116 {
3117 	u32 reg;
3118 
3119 	writel_relaxed(val, smmu->base + reg_off);
3120 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3121 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3122 }
3123 
3124 /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)3125 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3126 {
3127 	int ret;
3128 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3129 
3130 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3131 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3132 	if (ret)
3133 		return ret;
3134 
3135 	reg &= ~clr;
3136 	reg |= set;
3137 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3138 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3139 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3140 
3141 	if (ret)
3142 		dev_err(smmu->dev, "GBPA not responding to update\n");
3143 	return ret;
3144 }
3145 
arm_smmu_free_msis(void * data)3146 static void arm_smmu_free_msis(void *data)
3147 {
3148 	struct device *dev = data;
3149 	platform_msi_domain_free_irqs(dev);
3150 }
3151 
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)3152 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3153 {
3154 	phys_addr_t doorbell;
3155 	struct device *dev = msi_desc_to_dev(desc);
3156 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3157 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3158 
3159 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3160 	doorbell &= MSI_CFG0_ADDR_MASK;
3161 
3162 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3163 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3164 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3165 }
3166 
arm_smmu_setup_msis(struct arm_smmu_device * smmu)3167 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3168 {
3169 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3170 	struct device *dev = smmu->dev;
3171 
3172 	/* Clear the MSI address regs */
3173 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3174 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3175 
3176 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3177 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3178 	else
3179 		nvec--;
3180 
3181 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3182 		return;
3183 
3184 	if (!dev->msi.domain) {
3185 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3186 		return;
3187 	}
3188 
3189 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3190 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3191 	if (ret) {
3192 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3193 		return;
3194 	}
3195 
3196 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3197 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3198 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3199 
3200 	/* Add callback to free MSIs on teardown */
3201 	devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
3202 }
3203 
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)3204 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3205 {
3206 	int irq, ret;
3207 
3208 	arm_smmu_setup_msis(smmu);
3209 
3210 	/* Request interrupt lines */
3211 	irq = smmu->evtq.q.irq;
3212 	if (irq) {
3213 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3214 						arm_smmu_evtq_thread,
3215 						IRQF_ONESHOT,
3216 						"arm-smmu-v3-evtq", smmu);
3217 		if (ret < 0)
3218 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3219 	} else {
3220 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3221 	}
3222 
3223 	irq = smmu->gerr_irq;
3224 	if (irq) {
3225 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3226 				       0, "arm-smmu-v3-gerror", smmu);
3227 		if (ret < 0)
3228 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3229 	} else {
3230 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3231 	}
3232 
3233 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3234 		irq = smmu->priq.q.irq;
3235 		if (irq) {
3236 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3237 							arm_smmu_priq_thread,
3238 							IRQF_ONESHOT,
3239 							"arm-smmu-v3-priq",
3240 							smmu);
3241 			if (ret < 0)
3242 				dev_warn(smmu->dev,
3243 					 "failed to enable priq irq\n");
3244 		} else {
3245 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3246 		}
3247 	}
3248 }
3249 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)3250 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3251 {
3252 	int ret, irq;
3253 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3254 
3255 	/* Disable IRQs first */
3256 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3257 				      ARM_SMMU_IRQ_CTRLACK);
3258 	if (ret) {
3259 		dev_err(smmu->dev, "failed to disable irqs\n");
3260 		return ret;
3261 	}
3262 
3263 	irq = smmu->combined_irq;
3264 	if (irq) {
3265 		/*
3266 		 * Cavium ThunderX2 implementation doesn't support unique irq
3267 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3268 		 */
3269 		ret = devm_request_threaded_irq(smmu->dev, irq,
3270 					arm_smmu_combined_irq_handler,
3271 					arm_smmu_combined_irq_thread,
3272 					IRQF_ONESHOT,
3273 					"arm-smmu-v3-combined-irq", smmu);
3274 		if (ret < 0)
3275 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3276 	} else
3277 		arm_smmu_setup_unique_irqs(smmu);
3278 
3279 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3280 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3281 
3282 	/* Enable interrupt generation on the SMMU */
3283 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3284 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3285 	if (ret)
3286 		dev_warn(smmu->dev, "failed to enable irqs\n");
3287 
3288 	return 0;
3289 }
3290 
arm_smmu_device_disable(struct arm_smmu_device * smmu)3291 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3292 {
3293 	int ret;
3294 
3295 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3296 	if (ret)
3297 		dev_err(smmu->dev, "failed to clear cr0\n");
3298 
3299 	return ret;
3300 }
3301 
arm_smmu_device_reset(struct arm_smmu_device * smmu,bool bypass)3302 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3303 {
3304 	int ret;
3305 	u32 reg, enables;
3306 	struct arm_smmu_cmdq_ent cmd;
3307 
3308 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3309 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3310 	if (reg & CR0_SMMUEN) {
3311 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3312 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3313 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3314 	}
3315 
3316 	ret = arm_smmu_device_disable(smmu);
3317 	if (ret)
3318 		return ret;
3319 
3320 	/* CR1 (table and queue memory attributes) */
3321 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3322 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3323 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3324 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3325 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3326 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3327 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3328 
3329 	/* CR2 (random crap) */
3330 	reg = CR2_PTM | CR2_RECINVSID;
3331 
3332 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3333 		reg |= CR2_E2H;
3334 
3335 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3336 
3337 	/* Stream table */
3338 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3339 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3340 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3341 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3342 
3343 	/* Command queue */
3344 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3345 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3346 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3347 
3348 	enables = CR0_CMDQEN;
3349 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3350 				      ARM_SMMU_CR0ACK);
3351 	if (ret) {
3352 		dev_err(smmu->dev, "failed to enable command queue\n");
3353 		return ret;
3354 	}
3355 
3356 	/* Invalidate any cached configuration */
3357 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3358 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3359 
3360 	/* Invalidate any stale TLB entries */
3361 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3362 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3363 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3364 	}
3365 
3366 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3367 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3368 
3369 	/* Event queue */
3370 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3371 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3372 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3373 
3374 	enables |= CR0_EVTQEN;
3375 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3376 				      ARM_SMMU_CR0ACK);
3377 	if (ret) {
3378 		dev_err(smmu->dev, "failed to enable event queue\n");
3379 		return ret;
3380 	}
3381 
3382 	/* PRI queue */
3383 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3384 		writeq_relaxed(smmu->priq.q.q_base,
3385 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3386 		writel_relaxed(smmu->priq.q.llq.prod,
3387 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3388 		writel_relaxed(smmu->priq.q.llq.cons,
3389 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3390 
3391 		enables |= CR0_PRIQEN;
3392 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3393 					      ARM_SMMU_CR0ACK);
3394 		if (ret) {
3395 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3396 			return ret;
3397 		}
3398 	}
3399 
3400 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3401 		enables |= CR0_ATSCHK;
3402 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3403 					      ARM_SMMU_CR0ACK);
3404 		if (ret) {
3405 			dev_err(smmu->dev, "failed to enable ATS check\n");
3406 			return ret;
3407 		}
3408 	}
3409 
3410 	ret = arm_smmu_setup_irqs(smmu);
3411 	if (ret) {
3412 		dev_err(smmu->dev, "failed to setup irqs\n");
3413 		return ret;
3414 	}
3415 
3416 	if (is_kdump_kernel())
3417 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3418 
3419 	/* Enable the SMMU interface, or ensure bypass */
3420 	if (!bypass || disable_bypass) {
3421 		enables |= CR0_SMMUEN;
3422 	} else {
3423 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3424 		if (ret)
3425 			return ret;
3426 	}
3427 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3428 				      ARM_SMMU_CR0ACK);
3429 	if (ret) {
3430 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3431 		return ret;
3432 	}
3433 
3434 	return 0;
3435 }
3436 
3437 #define IIDR_IMPLEMENTER_ARM		0x43b
3438 #define IIDR_PRODUCTID_ARM_MMU_600	0x483
3439 #define IIDR_PRODUCTID_ARM_MMU_700	0x487
3440 
arm_smmu_device_iidr_probe(struct arm_smmu_device * smmu)3441 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3442 {
3443 	u32 reg;
3444 	unsigned int implementer, productid, variant, revision;
3445 
3446 	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3447 	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3448 	productid = FIELD_GET(IIDR_PRODUCTID, reg);
3449 	variant = FIELD_GET(IIDR_VARIANT, reg);
3450 	revision = FIELD_GET(IIDR_REVISION, reg);
3451 
3452 	switch (implementer) {
3453 	case IIDR_IMPLEMENTER_ARM:
3454 		switch (productid) {
3455 		case IIDR_PRODUCTID_ARM_MMU_600:
3456 			/* Arm erratum 1076982 */
3457 			if (variant == 0 && revision <= 2)
3458 				smmu->features &= ~ARM_SMMU_FEAT_SEV;
3459 			/* Arm erratum 1209401 */
3460 			if (variant < 2)
3461 				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3462 			break;
3463 		case IIDR_PRODUCTID_ARM_MMU_700:
3464 			/* Arm erratum 2812531 */
3465 			smmu->features &= ~ARM_SMMU_FEAT_BTM;
3466 			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3467 			/* Arm errata 2268618, 2812531 */
3468 			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3469 			break;
3470 		}
3471 		break;
3472 	}
3473 }
3474 
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)3475 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3476 {
3477 	u32 reg;
3478 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3479 
3480 	/* IDR0 */
3481 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3482 
3483 	/* 2-level structures */
3484 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3485 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3486 
3487 	if (reg & IDR0_CD2L)
3488 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3489 
3490 	/*
3491 	 * Translation table endianness.
3492 	 * We currently require the same endianness as the CPU, but this
3493 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3494 	 */
3495 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3496 	case IDR0_TTENDIAN_MIXED:
3497 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3498 		break;
3499 #ifdef __BIG_ENDIAN
3500 	case IDR0_TTENDIAN_BE:
3501 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3502 		break;
3503 #else
3504 	case IDR0_TTENDIAN_LE:
3505 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3506 		break;
3507 #endif
3508 	default:
3509 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3510 		return -ENXIO;
3511 	}
3512 
3513 	/* Boolean feature flags */
3514 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3515 		smmu->features |= ARM_SMMU_FEAT_PRI;
3516 
3517 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3518 		smmu->features |= ARM_SMMU_FEAT_ATS;
3519 
3520 	if (reg & IDR0_SEV)
3521 		smmu->features |= ARM_SMMU_FEAT_SEV;
3522 
3523 	if (reg & IDR0_MSI) {
3524 		smmu->features |= ARM_SMMU_FEAT_MSI;
3525 		if (coherent && !disable_msipolling)
3526 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3527 	}
3528 
3529 	if (reg & IDR0_HYP) {
3530 		smmu->features |= ARM_SMMU_FEAT_HYP;
3531 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3532 			smmu->features |= ARM_SMMU_FEAT_E2H;
3533 	}
3534 
3535 	/*
3536 	 * The coherency feature as set by FW is used in preference to the ID
3537 	 * register, but warn on mismatch.
3538 	 */
3539 	if (!!(reg & IDR0_COHACC) != coherent)
3540 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3541 			 coherent ? "true" : "false");
3542 
3543 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3544 	case IDR0_STALL_MODEL_FORCE:
3545 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3546 		fallthrough;
3547 	case IDR0_STALL_MODEL_STALL:
3548 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3549 	}
3550 
3551 	if (reg & IDR0_S1P)
3552 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3553 
3554 	if (reg & IDR0_S2P)
3555 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3556 
3557 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3558 		dev_err(smmu->dev, "no translation support!\n");
3559 		return -ENXIO;
3560 	}
3561 
3562 	/* We only support the AArch64 table format at present */
3563 	switch (FIELD_GET(IDR0_TTF, reg)) {
3564 	case IDR0_TTF_AARCH32_64:
3565 		smmu->ias = 40;
3566 		fallthrough;
3567 	case IDR0_TTF_AARCH64:
3568 		break;
3569 	default:
3570 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3571 		return -ENXIO;
3572 	}
3573 
3574 	/* ASID/VMID sizes */
3575 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3576 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3577 
3578 	/* IDR1 */
3579 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3580 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3581 		dev_err(smmu->dev, "embedded implementation not supported\n");
3582 		return -ENXIO;
3583 	}
3584 
3585 	/* Queue sizes, capped to ensure natural alignment */
3586 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3587 					     FIELD_GET(IDR1_CMDQS, reg));
3588 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3589 		/*
3590 		 * We don't support splitting up batches, so one batch of
3591 		 * commands plus an extra sync needs to fit inside the command
3592 		 * queue. There's also no way we can handle the weird alignment
3593 		 * restrictions on the base pointer for a unit-length queue.
3594 		 */
3595 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3596 			CMDQ_BATCH_ENTRIES);
3597 		return -ENXIO;
3598 	}
3599 
3600 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3601 					     FIELD_GET(IDR1_EVTQS, reg));
3602 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3603 					     FIELD_GET(IDR1_PRIQS, reg));
3604 
3605 	/* SID/SSID sizes */
3606 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3607 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3608 	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3609 
3610 	/*
3611 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3612 	 * table, use a linear table instead.
3613 	 */
3614 	if (smmu->sid_bits <= STRTAB_SPLIT)
3615 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3616 
3617 	/* IDR3 */
3618 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3619 	if (FIELD_GET(IDR3_RIL, reg))
3620 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3621 
3622 	/* IDR5 */
3623 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3624 
3625 	/* Maximum number of outstanding stalls */
3626 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3627 
3628 	/* Page sizes */
3629 	if (reg & IDR5_GRAN64K)
3630 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3631 	if (reg & IDR5_GRAN16K)
3632 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3633 	if (reg & IDR5_GRAN4K)
3634 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3635 
3636 	/* Input address size */
3637 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3638 		smmu->features |= ARM_SMMU_FEAT_VAX;
3639 
3640 	/* Output address size */
3641 	switch (FIELD_GET(IDR5_OAS, reg)) {
3642 	case IDR5_OAS_32_BIT:
3643 		smmu->oas = 32;
3644 		break;
3645 	case IDR5_OAS_36_BIT:
3646 		smmu->oas = 36;
3647 		break;
3648 	case IDR5_OAS_40_BIT:
3649 		smmu->oas = 40;
3650 		break;
3651 	case IDR5_OAS_42_BIT:
3652 		smmu->oas = 42;
3653 		break;
3654 	case IDR5_OAS_44_BIT:
3655 		smmu->oas = 44;
3656 		break;
3657 	case IDR5_OAS_52_BIT:
3658 		smmu->oas = 52;
3659 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3660 		break;
3661 	default:
3662 		dev_info(smmu->dev,
3663 			"unknown output address size. Truncating to 48-bit\n");
3664 		fallthrough;
3665 	case IDR5_OAS_48_BIT:
3666 		smmu->oas = 48;
3667 	}
3668 
3669 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3670 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3671 	else
3672 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3673 
3674 	/* Set the DMA mask for our table walker */
3675 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3676 		dev_warn(smmu->dev,
3677 			 "failed to set DMA mask for table walker\n");
3678 
3679 	smmu->ias = max(smmu->ias, smmu->oas);
3680 
3681 	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3682 	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3683 		smmu->features |= ARM_SMMU_FEAT_NESTING;
3684 
3685 	arm_smmu_device_iidr_probe(smmu);
3686 
3687 	if (arm_smmu_sva_supported(smmu))
3688 		smmu->features |= ARM_SMMU_FEAT_SVA;
3689 
3690 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3691 		 smmu->ias, smmu->oas, smmu->features);
3692 	return 0;
3693 }
3694 
3695 #ifdef CONFIG_ACPI
acpi_smmu_get_options(u32 model,struct arm_smmu_device * smmu)3696 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3697 {
3698 	switch (model) {
3699 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3700 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3701 		break;
3702 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3703 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3704 		break;
3705 	}
3706 
3707 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3708 }
3709 
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3710 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3711 				      struct arm_smmu_device *smmu)
3712 {
3713 	struct acpi_iort_smmu_v3 *iort_smmu;
3714 	struct device *dev = smmu->dev;
3715 	struct acpi_iort_node *node;
3716 
3717 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3718 
3719 	/* Retrieve SMMUv3 specific data */
3720 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3721 
3722 	acpi_smmu_get_options(iort_smmu->model, smmu);
3723 
3724 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3725 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3726 
3727 	return 0;
3728 }
3729 #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3730 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3731 					     struct arm_smmu_device *smmu)
3732 {
3733 	return -ENODEV;
3734 }
3735 #endif
3736 
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3737 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3738 				    struct arm_smmu_device *smmu)
3739 {
3740 	struct device *dev = &pdev->dev;
3741 	u32 cells;
3742 	int ret = -EINVAL;
3743 
3744 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3745 		dev_err(dev, "missing #iommu-cells property\n");
3746 	else if (cells != 1)
3747 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3748 	else
3749 		ret = 0;
3750 
3751 	parse_driver_options(smmu);
3752 
3753 	if (of_dma_is_coherent(dev->of_node))
3754 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3755 
3756 	return ret;
3757 }
3758 
arm_smmu_resource_size(struct arm_smmu_device * smmu)3759 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3760 {
3761 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3762 		return SZ_64K;
3763 	else
3764 		return SZ_128K;
3765 }
3766 
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)3767 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3768 				      resource_size_t size)
3769 {
3770 	struct resource res = DEFINE_RES_MEM(start, size);
3771 
3772 	return devm_ioremap_resource(dev, &res);
3773 }
3774 
arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device * smmu)3775 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3776 {
3777 	struct list_head rmr_list;
3778 	struct iommu_resv_region *e;
3779 
3780 	INIT_LIST_HEAD(&rmr_list);
3781 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3782 
3783 	list_for_each_entry(e, &rmr_list, list) {
3784 		__le64 *step;
3785 		struct iommu_iort_rmr_data *rmr;
3786 		int ret, i;
3787 
3788 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3789 		for (i = 0; i < rmr->num_sids; i++) {
3790 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3791 			if (ret) {
3792 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3793 					rmr->sids[i]);
3794 				continue;
3795 			}
3796 
3797 			step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3798 			arm_smmu_init_bypass_stes(step, 1, true);
3799 		}
3800 	}
3801 
3802 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3803 }
3804 
arm_smmu_device_probe(struct platform_device * pdev)3805 static int arm_smmu_device_probe(struct platform_device *pdev)
3806 {
3807 	int irq, ret;
3808 	struct resource *res;
3809 	resource_size_t ioaddr;
3810 	struct arm_smmu_device *smmu;
3811 	struct device *dev = &pdev->dev;
3812 	bool bypass;
3813 
3814 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3815 	if (!smmu)
3816 		return -ENOMEM;
3817 	smmu->dev = dev;
3818 
3819 	if (dev->of_node) {
3820 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3821 	} else {
3822 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3823 		if (ret == -ENODEV)
3824 			return ret;
3825 	}
3826 
3827 	/* Set bypass mode according to firmware probing result */
3828 	bypass = !!ret;
3829 
3830 	/* Base address */
3831 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3832 	if (!res)
3833 		return -EINVAL;
3834 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3835 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3836 		return -EINVAL;
3837 	}
3838 	ioaddr = res->start;
3839 
3840 	/*
3841 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3842 	 * the PMCG registers which are reserved by the PMU driver.
3843 	 */
3844 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3845 	if (IS_ERR(smmu->base))
3846 		return PTR_ERR(smmu->base);
3847 
3848 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3849 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3850 					       ARM_SMMU_REG_SZ);
3851 		if (IS_ERR(smmu->page1))
3852 			return PTR_ERR(smmu->page1);
3853 	} else {
3854 		smmu->page1 = smmu->base;
3855 	}
3856 
3857 	/* Interrupt lines */
3858 
3859 	irq = platform_get_irq_byname_optional(pdev, "combined");
3860 	if (irq > 0)
3861 		smmu->combined_irq = irq;
3862 	else {
3863 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3864 		if (irq > 0)
3865 			smmu->evtq.q.irq = irq;
3866 
3867 		irq = platform_get_irq_byname_optional(pdev, "priq");
3868 		if (irq > 0)
3869 			smmu->priq.q.irq = irq;
3870 
3871 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3872 		if (irq > 0)
3873 			smmu->gerr_irq = irq;
3874 	}
3875 	/* Probe the h/w */
3876 	ret = arm_smmu_device_hw_probe(smmu);
3877 	if (ret)
3878 		return ret;
3879 
3880 	/* Initialise in-memory data structures */
3881 	ret = arm_smmu_init_structures(smmu);
3882 	if (ret)
3883 		goto err_free_iopf;
3884 
3885 	/* Record our private device structure */
3886 	platform_set_drvdata(pdev, smmu);
3887 
3888 	/* Check for RMRs and install bypass STEs if any */
3889 	arm_smmu_rmr_install_bypass_ste(smmu);
3890 
3891 	/* Reset the device */
3892 	ret = arm_smmu_device_reset(smmu, bypass);
3893 	if (ret)
3894 		goto err_disable;
3895 
3896 	/* And we're up. Go go go! */
3897 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3898 				     "smmu3.%pa", &ioaddr);
3899 	if (ret)
3900 		goto err_disable;
3901 
3902 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3903 	if (ret) {
3904 		dev_err(dev, "Failed to register iommu\n");
3905 		goto err_free_sysfs;
3906 	}
3907 
3908 	return 0;
3909 
3910 err_free_sysfs:
3911 	iommu_device_sysfs_remove(&smmu->iommu);
3912 err_disable:
3913 	arm_smmu_device_disable(smmu);
3914 err_free_iopf:
3915 	iopf_queue_free(smmu->evtq.iopf);
3916 	return ret;
3917 }
3918 
arm_smmu_device_remove(struct platform_device * pdev)3919 static void arm_smmu_device_remove(struct platform_device *pdev)
3920 {
3921 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3922 
3923 	iommu_device_unregister(&smmu->iommu);
3924 	iommu_device_sysfs_remove(&smmu->iommu);
3925 	arm_smmu_device_disable(smmu);
3926 	iopf_queue_free(smmu->evtq.iopf);
3927 	ida_destroy(&smmu->vmid_map);
3928 }
3929 
arm_smmu_device_shutdown(struct platform_device * pdev)3930 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3931 {
3932 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3933 
3934 	arm_smmu_device_disable(smmu);
3935 }
3936 
3937 static const struct of_device_id arm_smmu_of_match[] = {
3938 	{ .compatible = "arm,smmu-v3", },
3939 	{ },
3940 };
3941 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3942 
arm_smmu_driver_unregister(struct platform_driver * drv)3943 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3944 {
3945 	arm_smmu_sva_notifier_synchronize();
3946 	platform_driver_unregister(drv);
3947 }
3948 
3949 static struct platform_driver arm_smmu_driver = {
3950 	.driver	= {
3951 		.name			= "arm-smmu-v3",
3952 		.of_match_table		= arm_smmu_of_match,
3953 		.suppress_bind_attrs	= true,
3954 	},
3955 	.probe	= arm_smmu_device_probe,
3956 	.remove_new = arm_smmu_device_remove,
3957 	.shutdown = arm_smmu_device_shutdown,
3958 };
3959 module_driver(arm_smmu_driver, platform_driver_register,
3960 	      arm_smmu_driver_unregister);
3961 
3962 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3963 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3964 MODULE_ALIAS("platform:arm-smmu-v3");
3965 MODULE_LICENSE("GPL v2");
3966