1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * IOMMU API for ARM architected SMMUv3 implementations.
4 *
5 * Copyright (C) 2015 ARM Limited
6 *
7 * Author: Will Deacon <will.deacon@arm.com>
8 *
9 * This driver is powered by bad coffee and bombay mix.
10 */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
32 #include "../../iommu-sva.h"
33
34 static bool disable_bypass = true;
35 module_param(disable_bypass, bool, 0444);
36 MODULE_PARM_DESC(disable_bypass,
37 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
38
39 static bool disable_msipolling;
40 module_param(disable_msipolling, bool, 0444);
41 MODULE_PARM_DESC(disable_msipolling,
42 "Disable MSI-based polling for CMD_SYNC completion.");
43
44 enum arm_smmu_msi_index {
45 EVTQ_MSI_INDEX,
46 GERROR_MSI_INDEX,
47 PRIQ_MSI_INDEX,
48 ARM_SMMU_MAX_MSIS,
49 };
50
51 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
52 [EVTQ_MSI_INDEX] = {
53 ARM_SMMU_EVTQ_IRQ_CFG0,
54 ARM_SMMU_EVTQ_IRQ_CFG1,
55 ARM_SMMU_EVTQ_IRQ_CFG2,
56 },
57 [GERROR_MSI_INDEX] = {
58 ARM_SMMU_GERROR_IRQ_CFG0,
59 ARM_SMMU_GERROR_IRQ_CFG1,
60 ARM_SMMU_GERROR_IRQ_CFG2,
61 },
62 [PRIQ_MSI_INDEX] = {
63 ARM_SMMU_PRIQ_IRQ_CFG0,
64 ARM_SMMU_PRIQ_IRQ_CFG1,
65 ARM_SMMU_PRIQ_IRQ_CFG2,
66 },
67 };
68
69 struct arm_smmu_option_prop {
70 u32 opt;
71 const char *prop;
72 };
73
74 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
75 DEFINE_MUTEX(arm_smmu_asid_lock);
76
77 /*
78 * Special value used by SVA when a process dies, to quiesce a CD without
79 * disabling it.
80 */
81 struct arm_smmu_ctx_desc quiet_cd = { 0 };
82
83 static struct arm_smmu_option_prop arm_smmu_options[] = {
84 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
85 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
86 { 0, NULL},
87 };
88
parse_driver_options(struct arm_smmu_device * smmu)89 static void parse_driver_options(struct arm_smmu_device *smmu)
90 {
91 int i = 0;
92
93 do {
94 if (of_property_read_bool(smmu->dev->of_node,
95 arm_smmu_options[i].prop)) {
96 smmu->options |= arm_smmu_options[i].opt;
97 dev_notice(smmu->dev, "option %s\n",
98 arm_smmu_options[i].prop);
99 }
100 } while (arm_smmu_options[++i].opt);
101 }
102
103 /* Low-level queue manipulation functions */
queue_has_space(struct arm_smmu_ll_queue * q,u32 n)104 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
105 {
106 u32 space, prod, cons;
107
108 prod = Q_IDX(q, q->prod);
109 cons = Q_IDX(q, q->cons);
110
111 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
112 space = (1 << q->max_n_shift) - (prod - cons);
113 else
114 space = cons - prod;
115
116 return space >= n;
117 }
118
queue_full(struct arm_smmu_ll_queue * q)119 static bool queue_full(struct arm_smmu_ll_queue *q)
120 {
121 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
122 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
123 }
124
queue_empty(struct arm_smmu_ll_queue * q)125 static bool queue_empty(struct arm_smmu_ll_queue *q)
126 {
127 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
128 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
129 }
130
queue_consumed(struct arm_smmu_ll_queue * q,u32 prod)131 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
132 {
133 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
134 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
135 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
136 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
137 }
138
queue_sync_cons_out(struct arm_smmu_queue * q)139 static void queue_sync_cons_out(struct arm_smmu_queue *q)
140 {
141 /*
142 * Ensure that all CPU accesses (reads and writes) to the queue
143 * are complete before we update the cons pointer.
144 */
145 __iomb();
146 writel_relaxed(q->llq.cons, q->cons_reg);
147 }
148
queue_inc_cons(struct arm_smmu_ll_queue * q)149 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
150 {
151 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
152 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
153 }
154
queue_sync_cons_ovf(struct arm_smmu_queue * q)155 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
156 {
157 struct arm_smmu_ll_queue *llq = &q->llq;
158
159 if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
160 return;
161
162 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
163 Q_IDX(llq, llq->cons);
164 queue_sync_cons_out(q);
165 }
166
queue_sync_prod_in(struct arm_smmu_queue * q)167 static int queue_sync_prod_in(struct arm_smmu_queue *q)
168 {
169 u32 prod;
170 int ret = 0;
171
172 /*
173 * We can't use the _relaxed() variant here, as we must prevent
174 * speculative reads of the queue before we have determined that
175 * prod has indeed moved.
176 */
177 prod = readl(q->prod_reg);
178
179 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
180 ret = -EOVERFLOW;
181
182 q->llq.prod = prod;
183 return ret;
184 }
185
queue_inc_prod_n(struct arm_smmu_ll_queue * q,int n)186 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
187 {
188 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
189 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
190 }
191
queue_poll_init(struct arm_smmu_device * smmu,struct arm_smmu_queue_poll * qp)192 static void queue_poll_init(struct arm_smmu_device *smmu,
193 struct arm_smmu_queue_poll *qp)
194 {
195 qp->delay = 1;
196 qp->spin_cnt = 0;
197 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
198 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
199 }
200
queue_poll(struct arm_smmu_queue_poll * qp)201 static int queue_poll(struct arm_smmu_queue_poll *qp)
202 {
203 if (ktime_compare(ktime_get(), qp->timeout) > 0)
204 return -ETIMEDOUT;
205
206 if (qp->wfe) {
207 wfe();
208 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
209 cpu_relax();
210 } else {
211 udelay(qp->delay);
212 qp->delay *= 2;
213 qp->spin_cnt = 0;
214 }
215
216 return 0;
217 }
218
queue_write(__le64 * dst,u64 * src,size_t n_dwords)219 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
220 {
221 int i;
222
223 for (i = 0; i < n_dwords; ++i)
224 *dst++ = cpu_to_le64(*src++);
225 }
226
queue_read(u64 * dst,__le64 * src,size_t n_dwords)227 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
228 {
229 int i;
230
231 for (i = 0; i < n_dwords; ++i)
232 *dst++ = le64_to_cpu(*src++);
233 }
234
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)235 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
236 {
237 if (queue_empty(&q->llq))
238 return -EAGAIN;
239
240 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
241 queue_inc_cons(&q->llq);
242 queue_sync_cons_out(q);
243 return 0;
244 }
245
246 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)247 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
248 {
249 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
250 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
251
252 switch (ent->opcode) {
253 case CMDQ_OP_TLBI_EL2_ALL:
254 case CMDQ_OP_TLBI_NSNH_ALL:
255 break;
256 case CMDQ_OP_PREFETCH_CFG:
257 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
258 break;
259 case CMDQ_OP_CFGI_CD:
260 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261 fallthrough;
262 case CMDQ_OP_CFGI_STE:
263 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265 break;
266 case CMDQ_OP_CFGI_CD_ALL:
267 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268 break;
269 case CMDQ_OP_CFGI_ALL:
270 /* Cover the entire SID range */
271 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272 break;
273 case CMDQ_OP_TLBI_NH_VA:
274 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
275 fallthrough;
276 case CMDQ_OP_TLBI_EL2_VA:
277 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
279 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
280 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
282 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
283 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
284 break;
285 case CMDQ_OP_TLBI_S2_IPA:
286 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
287 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
288 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
290 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
291 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
292 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
293 break;
294 case CMDQ_OP_TLBI_NH_ASID:
295 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
296 fallthrough;
297 case CMDQ_OP_TLBI_S12_VMALL:
298 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
299 break;
300 case CMDQ_OP_TLBI_EL2_ASID:
301 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
302 break;
303 case CMDQ_OP_ATC_INV:
304 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
305 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
306 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
307 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
308 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
309 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
310 break;
311 case CMDQ_OP_PRI_RESP:
312 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
313 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
314 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
315 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
316 switch (ent->pri.resp) {
317 case PRI_RESP_DENY:
318 case PRI_RESP_FAIL:
319 case PRI_RESP_SUCC:
320 break;
321 default:
322 return -EINVAL;
323 }
324 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
325 break;
326 case CMDQ_OP_RESUME:
327 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
328 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
329 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
330 break;
331 case CMDQ_OP_CMD_SYNC:
332 if (ent->sync.msiaddr) {
333 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
334 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
335 } else {
336 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
337 }
338 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
339 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
340 break;
341 default:
342 return -ENOENT;
343 }
344
345 return 0;
346 }
347
arm_smmu_get_cmdq(struct arm_smmu_device * smmu)348 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
349 {
350 return &smmu->cmdq;
351 }
352
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,struct arm_smmu_queue * q,u32 prod)353 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
354 struct arm_smmu_queue *q, u32 prod)
355 {
356 struct arm_smmu_cmdq_ent ent = {
357 .opcode = CMDQ_OP_CMD_SYNC,
358 };
359
360 /*
361 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
362 * payload, so the write will zero the entire command on that platform.
363 */
364 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
365 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
366 q->ent_dwords * 8;
367 }
368
369 arm_smmu_cmdq_build_cmd(cmd, &ent);
370 }
371
__arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu,struct arm_smmu_queue * q)372 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
373 struct arm_smmu_queue *q)
374 {
375 static const char * const cerror_str[] = {
376 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
377 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
378 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
379 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
380 };
381
382 int i;
383 u64 cmd[CMDQ_ENT_DWORDS];
384 u32 cons = readl_relaxed(q->cons_reg);
385 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
386 struct arm_smmu_cmdq_ent cmd_sync = {
387 .opcode = CMDQ_OP_CMD_SYNC,
388 };
389
390 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
391 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
392
393 switch (idx) {
394 case CMDQ_ERR_CERROR_ABT_IDX:
395 dev_err(smmu->dev, "retrying command fetch\n");
396 return;
397 case CMDQ_ERR_CERROR_NONE_IDX:
398 return;
399 case CMDQ_ERR_CERROR_ATC_INV_IDX:
400 /*
401 * ATC Invalidation Completion timeout. CONS is still pointing
402 * at the CMD_SYNC. Attempt to complete other pending commands
403 * by repeating the CMD_SYNC, though we might well end up back
404 * here since the ATC invalidation may still be pending.
405 */
406 return;
407 case CMDQ_ERR_CERROR_ILL_IDX:
408 default:
409 break;
410 }
411
412 /*
413 * We may have concurrent producers, so we need to be careful
414 * not to touch any of the shadow cmdq state.
415 */
416 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
417 dev_err(smmu->dev, "skipping command in error state:\n");
418 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
419 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
420
421 /* Convert the erroneous command into a CMD_SYNC */
422 arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
423
424 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
425 }
426
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)427 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
428 {
429 __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
430 }
431
432 /*
433 * Command queue locking.
434 * This is a form of bastardised rwlock with the following major changes:
435 *
436 * - The only LOCK routines are exclusive_trylock() and shared_lock().
437 * Neither have barrier semantics, and instead provide only a control
438 * dependency.
439 *
440 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
441 * fails if the caller appears to be the last lock holder (yes, this is
442 * racy). All successful UNLOCK routines have RELEASE semantics.
443 */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)444 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
445 {
446 int val;
447
448 /*
449 * We can try to avoid the cmpxchg() loop by simply incrementing the
450 * lock counter. When held in exclusive state, the lock counter is set
451 * to INT_MIN so these increments won't hurt as the value will remain
452 * negative.
453 */
454 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
455 return;
456
457 do {
458 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
459 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
460 }
461
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)462 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
463 {
464 (void)atomic_dec_return_release(&cmdq->lock);
465 }
466
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)467 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
468 {
469 if (atomic_read(&cmdq->lock) == 1)
470 return false;
471
472 arm_smmu_cmdq_shared_unlock(cmdq);
473 return true;
474 }
475
476 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
477 ({ \
478 bool __ret; \
479 local_irq_save(flags); \
480 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
481 if (!__ret) \
482 local_irq_restore(flags); \
483 __ret; \
484 })
485
486 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
487 ({ \
488 atomic_set_release(&cmdq->lock, 0); \
489 local_irq_restore(flags); \
490 })
491
492
493 /*
494 * Command queue insertion.
495 * This is made fiddly by our attempts to achieve some sort of scalability
496 * since there is one queue shared amongst all of the CPUs in the system. If
497 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
498 * then you'll *love* this monstrosity.
499 *
500 * The basic idea is to split the queue up into ranges of commands that are
501 * owned by a given CPU; the owner may not have written all of the commands
502 * itself, but is responsible for advancing the hardware prod pointer when
503 * the time comes. The algorithm is roughly:
504 *
505 * 1. Allocate some space in the queue. At this point we also discover
506 * whether the head of the queue is currently owned by another CPU,
507 * or whether we are the owner.
508 *
509 * 2. Write our commands into our allocated slots in the queue.
510 *
511 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
512 *
513 * 4. If we are an owner:
514 * a. Wait for the previous owner to finish.
515 * b. Mark the queue head as unowned, which tells us the range
516 * that we are responsible for publishing.
517 * c. Wait for all commands in our owned range to become valid.
518 * d. Advance the hardware prod pointer.
519 * e. Tell the next owner we've finished.
520 *
521 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
522 * owner), then we need to stick around until it has completed:
523 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
524 * to clear the first 4 bytes.
525 * b. Otherwise, we spin waiting for the hardware cons pointer to
526 * advance past our command.
527 *
528 * The devil is in the details, particularly the use of locking for handling
529 * SYNC completion and freeing up space in the queue before we think that it is
530 * full.
531 */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)532 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
533 u32 sprod, u32 eprod, bool set)
534 {
535 u32 swidx, sbidx, ewidx, ebidx;
536 struct arm_smmu_ll_queue llq = {
537 .max_n_shift = cmdq->q.llq.max_n_shift,
538 .prod = sprod,
539 };
540
541 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
542 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
543
544 while (llq.prod != eprod) {
545 unsigned long mask;
546 atomic_long_t *ptr;
547 u32 limit = BITS_PER_LONG;
548
549 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
550 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
551
552 ptr = &cmdq->valid_map[swidx];
553
554 if ((swidx == ewidx) && (sbidx < ebidx))
555 limit = ebidx;
556
557 mask = GENMASK(limit - 1, sbidx);
558
559 /*
560 * The valid bit is the inverse of the wrap bit. This means
561 * that a zero-initialised queue is invalid and, after marking
562 * all entries as valid, they become invalid again when we
563 * wrap.
564 */
565 if (set) {
566 atomic_long_xor(mask, ptr);
567 } else { /* Poll */
568 unsigned long valid;
569
570 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
571 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
572 }
573
574 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
575 }
576 }
577
578 /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)579 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
580 u32 sprod, u32 eprod)
581 {
582 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
583 }
584
585 /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)586 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
587 u32 sprod, u32 eprod)
588 {
589 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
590 }
591
592 /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)593 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
594 struct arm_smmu_ll_queue *llq)
595 {
596 unsigned long flags;
597 struct arm_smmu_queue_poll qp;
598 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
599 int ret = 0;
600
601 /*
602 * Try to update our copy of cons by grabbing exclusive cmdq access. If
603 * that fails, spin until somebody else updates it for us.
604 */
605 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
606 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
607 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
608 llq->val = READ_ONCE(cmdq->q.llq.val);
609 return 0;
610 }
611
612 queue_poll_init(smmu, &qp);
613 do {
614 llq->val = READ_ONCE(cmdq->q.llq.val);
615 if (!queue_full(llq))
616 break;
617
618 ret = queue_poll(&qp);
619 } while (!ret);
620
621 return ret;
622 }
623
624 /*
625 * Wait until the SMMU signals a CMD_SYNC completion MSI.
626 * Must be called with the cmdq lock held in some capacity.
627 */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)628 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
629 struct arm_smmu_ll_queue *llq)
630 {
631 int ret = 0;
632 struct arm_smmu_queue_poll qp;
633 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
634 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
635
636 queue_poll_init(smmu, &qp);
637
638 /*
639 * The MSI won't generate an event, since it's being written back
640 * into the command queue.
641 */
642 qp.wfe = false;
643 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
644 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
645 return ret;
646 }
647
648 /*
649 * Wait until the SMMU cons index passes llq->prod.
650 * Must be called with the cmdq lock held in some capacity.
651 */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)652 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
653 struct arm_smmu_ll_queue *llq)
654 {
655 struct arm_smmu_queue_poll qp;
656 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
657 u32 prod = llq->prod;
658 int ret = 0;
659
660 queue_poll_init(smmu, &qp);
661 llq->val = READ_ONCE(cmdq->q.llq.val);
662 do {
663 if (queue_consumed(llq, prod))
664 break;
665
666 ret = queue_poll(&qp);
667
668 /*
669 * This needs to be a readl() so that our subsequent call
670 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
671 *
672 * Specifically, we need to ensure that we observe all
673 * shared_lock()s by other CMD_SYNCs that share our owner,
674 * so that a failing call to tryunlock() means that we're
675 * the last one out and therefore we can safely advance
676 * cmdq->q.llq.cons. Roughly speaking:
677 *
678 * CPU 0 CPU1 CPU2 (us)
679 *
680 * if (sync)
681 * shared_lock();
682 *
683 * dma_wmb();
684 * set_valid_map();
685 *
686 * if (owner) {
687 * poll_valid_map();
688 * <control dependency>
689 * writel(prod_reg);
690 *
691 * readl(cons_reg);
692 * tryunlock();
693 *
694 * Requires us to see CPU 0's shared_lock() acquisition.
695 */
696 llq->cons = readl(cmdq->q.cons_reg);
697 } while (!ret);
698
699 return ret;
700 }
701
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)702 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
703 struct arm_smmu_ll_queue *llq)
704 {
705 if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
706 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
707
708 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
709 }
710
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)711 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
712 u32 prod, int n)
713 {
714 int i;
715 struct arm_smmu_ll_queue llq = {
716 .max_n_shift = cmdq->q.llq.max_n_shift,
717 .prod = prod,
718 };
719
720 for (i = 0; i < n; ++i) {
721 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
722
723 prod = queue_inc_prod_n(&llq, i);
724 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
725 }
726 }
727
728 /*
729 * This is the actual insertion function, and provides the following
730 * ordering guarantees to callers:
731 *
732 * - There is a dma_wmb() before publishing any commands to the queue.
733 * This can be relied upon to order prior writes to data structures
734 * in memory (such as a CD or an STE) before the command.
735 *
736 * - On completion of a CMD_SYNC, there is a control dependency.
737 * This can be relied upon to order subsequent writes to memory (e.g.
738 * freeing an IOVA) after completion of the CMD_SYNC.
739 *
740 * - Command insertion is totally ordered, so if two CPUs each race to
741 * insert their own list of commands then all of the commands from one
742 * CPU will appear before any of the commands from the other CPU.
743 */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,u64 * cmds,int n,bool sync)744 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
745 u64 *cmds, int n, bool sync)
746 {
747 u64 cmd_sync[CMDQ_ENT_DWORDS];
748 u32 prod;
749 unsigned long flags;
750 bool owner;
751 struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
752 struct arm_smmu_ll_queue llq, head;
753 int ret = 0;
754
755 llq.max_n_shift = cmdq->q.llq.max_n_shift;
756
757 /* 1. Allocate some space in the queue */
758 local_irq_save(flags);
759 llq.val = READ_ONCE(cmdq->q.llq.val);
760 do {
761 u64 old;
762
763 while (!queue_has_space(&llq, n + sync)) {
764 local_irq_restore(flags);
765 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
766 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
767 local_irq_save(flags);
768 }
769
770 head.cons = llq.cons;
771 head.prod = queue_inc_prod_n(&llq, n + sync) |
772 CMDQ_PROD_OWNED_FLAG;
773
774 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
775 if (old == llq.val)
776 break;
777
778 llq.val = old;
779 } while (1);
780 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
781 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
782 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
783
784 /*
785 * 2. Write our commands into the queue
786 * Dependency ordering from the cmpxchg() loop above.
787 */
788 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
789 if (sync) {
790 prod = queue_inc_prod_n(&llq, n);
791 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
792 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
793
794 /*
795 * In order to determine completion of our CMD_SYNC, we must
796 * ensure that the queue can't wrap twice without us noticing.
797 * We achieve that by taking the cmdq lock as shared before
798 * marking our slot as valid.
799 */
800 arm_smmu_cmdq_shared_lock(cmdq);
801 }
802
803 /* 3. Mark our slots as valid, ensuring commands are visible first */
804 dma_wmb();
805 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
806
807 /* 4. If we are the owner, take control of the SMMU hardware */
808 if (owner) {
809 /* a. Wait for previous owner to finish */
810 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
811
812 /* b. Stop gathering work by clearing the owned flag */
813 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
814 &cmdq->q.llq.atomic.prod);
815 prod &= ~CMDQ_PROD_OWNED_FLAG;
816
817 /*
818 * c. Wait for any gathered work to be written to the queue.
819 * Note that we read our own entries so that we have the control
820 * dependency required by (d).
821 */
822 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
823
824 /*
825 * d. Advance the hardware prod pointer
826 * Control dependency ordering from the entries becoming valid.
827 */
828 writel_relaxed(prod, cmdq->q.prod_reg);
829
830 /*
831 * e. Tell the next owner we're done
832 * Make sure we've updated the hardware first, so that we don't
833 * race to update prod and potentially move it backwards.
834 */
835 atomic_set_release(&cmdq->owner_prod, prod);
836 }
837
838 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
839 if (sync) {
840 llq.prod = queue_inc_prod_n(&llq, n);
841 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
842 if (ret) {
843 dev_err_ratelimited(smmu->dev,
844 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
845 llq.prod,
846 readl_relaxed(cmdq->q.prod_reg),
847 readl_relaxed(cmdq->q.cons_reg));
848 }
849
850 /*
851 * Try to unlock the cmdq lock. This will fail if we're the last
852 * reader, in which case we can safely update cmdq->q.llq.cons
853 */
854 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
855 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
856 arm_smmu_cmdq_shared_unlock(cmdq);
857 }
858 }
859
860 local_irq_restore(flags);
861 return ret;
862 }
863
__arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent,bool sync)864 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
865 struct arm_smmu_cmdq_ent *ent,
866 bool sync)
867 {
868 u64 cmd[CMDQ_ENT_DWORDS];
869
870 if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
871 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
872 ent->opcode);
873 return -EINVAL;
874 }
875
876 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
877 }
878
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)879 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
880 struct arm_smmu_cmdq_ent *ent)
881 {
882 return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
883 }
884
arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)885 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
886 struct arm_smmu_cmdq_ent *ent)
887 {
888 return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
889 }
890
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)891 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
892 struct arm_smmu_cmdq_batch *cmds,
893 struct arm_smmu_cmdq_ent *cmd)
894 {
895 int index;
896
897 if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
898 (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
899 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
900 cmds->num = 0;
901 }
902
903 if (cmds->num == CMDQ_BATCH_ENTRIES) {
904 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
905 cmds->num = 0;
906 }
907
908 index = cmds->num * CMDQ_ENT_DWORDS;
909 if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
910 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
911 cmd->opcode);
912 return;
913 }
914
915 cmds->num++;
916 }
917
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)918 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
919 struct arm_smmu_cmdq_batch *cmds)
920 {
921 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
922 }
923
arm_smmu_page_response(struct device * dev,struct iommu_fault_event * unused,struct iommu_page_response * resp)924 static int arm_smmu_page_response(struct device *dev,
925 struct iommu_fault_event *unused,
926 struct iommu_page_response *resp)
927 {
928 struct arm_smmu_cmdq_ent cmd = {0};
929 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
930 int sid = master->streams[0].id;
931
932 if (master->stall_enabled) {
933 cmd.opcode = CMDQ_OP_RESUME;
934 cmd.resume.sid = sid;
935 cmd.resume.stag = resp->grpid;
936 switch (resp->code) {
937 case IOMMU_PAGE_RESP_INVALID:
938 case IOMMU_PAGE_RESP_FAILURE:
939 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
940 break;
941 case IOMMU_PAGE_RESP_SUCCESS:
942 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
943 break;
944 default:
945 return -EINVAL;
946 }
947 } else {
948 return -ENODEV;
949 }
950
951 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
952 /*
953 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
954 * RESUME consumption guarantees that the stalled transaction will be
955 * terminated... at some point in the future. PRI_RESP is fire and
956 * forget.
957 */
958
959 return 0;
960 }
961
962 /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)963 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
964 {
965 struct arm_smmu_cmdq_ent cmd = {
966 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
967 CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
968 .tlbi.asid = asid,
969 };
970
971 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
972 }
973
arm_smmu_sync_cd(struct arm_smmu_domain * smmu_domain,int ssid,bool leaf)974 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
975 int ssid, bool leaf)
976 {
977 size_t i;
978 unsigned long flags;
979 struct arm_smmu_master *master;
980 struct arm_smmu_cmdq_batch cmds;
981 struct arm_smmu_device *smmu = smmu_domain->smmu;
982 struct arm_smmu_cmdq_ent cmd = {
983 .opcode = CMDQ_OP_CFGI_CD,
984 .cfgi = {
985 .ssid = ssid,
986 .leaf = leaf,
987 },
988 };
989
990 cmds.num = 0;
991
992 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
993 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
994 for (i = 0; i < master->num_streams; i++) {
995 cmd.cfgi.sid = master->streams[i].id;
996 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
997 }
998 }
999 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1000
1001 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1002 }
1003
arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device * smmu,struct arm_smmu_l1_ctx_desc * l1_desc)1004 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1005 struct arm_smmu_l1_ctx_desc *l1_desc)
1006 {
1007 size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1008
1009 l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1010 &l1_desc->l2ptr_dma, GFP_KERNEL);
1011 if (!l1_desc->l2ptr) {
1012 dev_warn(smmu->dev,
1013 "failed to allocate context descriptor table\n");
1014 return -ENOMEM;
1015 }
1016 return 0;
1017 }
1018
arm_smmu_write_cd_l1_desc(__le64 * dst,struct arm_smmu_l1_ctx_desc * l1_desc)1019 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1020 struct arm_smmu_l1_ctx_desc *l1_desc)
1021 {
1022 u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1023 CTXDESC_L1_DESC_V;
1024
1025 /* See comment in arm_smmu_write_ctx_desc() */
1026 WRITE_ONCE(*dst, cpu_to_le64(val));
1027 }
1028
arm_smmu_get_cd_ptr(struct arm_smmu_domain * smmu_domain,u32 ssid)1029 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1030 u32 ssid)
1031 {
1032 __le64 *l1ptr;
1033 unsigned int idx;
1034 struct arm_smmu_l1_ctx_desc *l1_desc;
1035 struct arm_smmu_device *smmu = smmu_domain->smmu;
1036 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1037
1038 if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1039 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1040
1041 idx = ssid >> CTXDESC_SPLIT;
1042 l1_desc = &cdcfg->l1_desc[idx];
1043 if (!l1_desc->l2ptr) {
1044 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1045 return NULL;
1046
1047 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1048 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1049 /* An invalid L1CD can be cached */
1050 arm_smmu_sync_cd(smmu_domain, ssid, false);
1051 }
1052 idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1053 return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1054 }
1055
arm_smmu_write_ctx_desc(struct arm_smmu_domain * smmu_domain,int ssid,struct arm_smmu_ctx_desc * cd)1056 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1057 struct arm_smmu_ctx_desc *cd)
1058 {
1059 /*
1060 * This function handles the following cases:
1061 *
1062 * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
1063 * (2) Install a secondary CD, for SID+SSID traffic.
1064 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1065 * CD, then invalidate the old entry and mappings.
1066 * (4) Quiesce the context without clearing the valid bit. Disable
1067 * translation, and ignore any translation fault.
1068 * (5) Remove a secondary CD.
1069 */
1070 u64 val;
1071 bool cd_live;
1072 __le64 *cdptr;
1073
1074 if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1075 return -E2BIG;
1076
1077 cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1078 if (!cdptr)
1079 return -ENOMEM;
1080
1081 val = le64_to_cpu(cdptr[0]);
1082 cd_live = !!(val & CTXDESC_CD_0_V);
1083
1084 if (!cd) { /* (5) */
1085 val = 0;
1086 } else if (cd == &quiet_cd) { /* (4) */
1087 val |= CTXDESC_CD_0_TCR_EPD0;
1088 } else if (cd_live) { /* (3) */
1089 val &= ~CTXDESC_CD_0_ASID;
1090 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1091 /*
1092 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1093 * this substream's traffic
1094 */
1095 } else { /* (1) and (2) */
1096 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1097 cdptr[2] = 0;
1098 cdptr[3] = cpu_to_le64(cd->mair);
1099
1100 /*
1101 * STE is live, and the SMMU might read dwords of this CD in any
1102 * order. Ensure that it observes valid values before reading
1103 * V=1.
1104 */
1105 arm_smmu_sync_cd(smmu_domain, ssid, true);
1106
1107 val = cd->tcr |
1108 #ifdef __BIG_ENDIAN
1109 CTXDESC_CD_0_ENDI |
1110 #endif
1111 CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1112 (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1113 CTXDESC_CD_0_AA64 |
1114 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1115 CTXDESC_CD_0_V;
1116
1117 if (smmu_domain->stall_enabled)
1118 val |= CTXDESC_CD_0_S;
1119 }
1120
1121 /*
1122 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1123 * "Configuration structures and configuration invalidation completion"
1124 *
1125 * The size of single-copy atomic reads made by the SMMU is
1126 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1127 * field within an aligned 64-bit span of a structure can be altered
1128 * without first making the structure invalid.
1129 */
1130 WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1131 arm_smmu_sync_cd(smmu_domain, ssid, true);
1132 return 0;
1133 }
1134
arm_smmu_alloc_cd_tables(struct arm_smmu_domain * smmu_domain)1135 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1136 {
1137 int ret;
1138 size_t l1size;
1139 size_t max_contexts;
1140 struct arm_smmu_device *smmu = smmu_domain->smmu;
1141 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1142 struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1143
1144 max_contexts = 1 << cfg->s1cdmax;
1145
1146 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1147 max_contexts <= CTXDESC_L2_ENTRIES) {
1148 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1149 cdcfg->num_l1_ents = max_contexts;
1150
1151 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1152 } else {
1153 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1154 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1155 CTXDESC_L2_ENTRIES);
1156
1157 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1158 sizeof(*cdcfg->l1_desc),
1159 GFP_KERNEL);
1160 if (!cdcfg->l1_desc)
1161 return -ENOMEM;
1162
1163 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1164 }
1165
1166 cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1167 GFP_KERNEL);
1168 if (!cdcfg->cdtab) {
1169 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1170 ret = -ENOMEM;
1171 goto err_free_l1;
1172 }
1173
1174 return 0;
1175
1176 err_free_l1:
1177 if (cdcfg->l1_desc) {
1178 devm_kfree(smmu->dev, cdcfg->l1_desc);
1179 cdcfg->l1_desc = NULL;
1180 }
1181 return ret;
1182 }
1183
arm_smmu_free_cd_tables(struct arm_smmu_domain * smmu_domain)1184 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1185 {
1186 int i;
1187 size_t size, l1size;
1188 struct arm_smmu_device *smmu = smmu_domain->smmu;
1189 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1190
1191 if (cdcfg->l1_desc) {
1192 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1193
1194 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1195 if (!cdcfg->l1_desc[i].l2ptr)
1196 continue;
1197
1198 dmam_free_coherent(smmu->dev, size,
1199 cdcfg->l1_desc[i].l2ptr,
1200 cdcfg->l1_desc[i].l2ptr_dma);
1201 }
1202 devm_kfree(smmu->dev, cdcfg->l1_desc);
1203 cdcfg->l1_desc = NULL;
1204
1205 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1206 } else {
1207 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1208 }
1209
1210 dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1211 cdcfg->cdtab_dma = 0;
1212 cdcfg->cdtab = NULL;
1213 }
1214
arm_smmu_free_asid(struct arm_smmu_ctx_desc * cd)1215 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1216 {
1217 bool free;
1218 struct arm_smmu_ctx_desc *old_cd;
1219
1220 if (!cd->asid)
1221 return false;
1222
1223 free = refcount_dec_and_test(&cd->refs);
1224 if (free) {
1225 old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1226 WARN_ON(old_cd != cd);
1227 }
1228 return free;
1229 }
1230
1231 /* Stream table manipulation functions */
1232 static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)1233 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1234 {
1235 u64 val = 0;
1236
1237 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1238 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1239
1240 /* See comment in arm_smmu_write_ctx_desc() */
1241 WRITE_ONCE(*dst, cpu_to_le64(val));
1242 }
1243
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)1244 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1245 {
1246 struct arm_smmu_cmdq_ent cmd = {
1247 .opcode = CMDQ_OP_CFGI_STE,
1248 .cfgi = {
1249 .sid = sid,
1250 .leaf = true,
1251 },
1252 };
1253
1254 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1255 }
1256
arm_smmu_write_strtab_ent(struct arm_smmu_master * master,u32 sid,__le64 * dst)1257 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1258 __le64 *dst)
1259 {
1260 /*
1261 * This is hideously complicated, but we only really care about
1262 * three cases at the moment:
1263 *
1264 * 1. Invalid (all zero) -> bypass/fault (init)
1265 * 2. Bypass/fault -> translation/bypass (attach)
1266 * 3. Translation/bypass -> bypass/fault (detach)
1267 *
1268 * Given that we can't update the STE atomically and the SMMU
1269 * doesn't read the thing in a defined order, that leaves us
1270 * with the following maintenance requirements:
1271 *
1272 * 1. Update Config, return (init time STEs aren't live)
1273 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1274 * 3. Update Config, sync
1275 */
1276 u64 val = le64_to_cpu(dst[0]);
1277 bool ste_live = false;
1278 struct arm_smmu_device *smmu = NULL;
1279 struct arm_smmu_s1_cfg *s1_cfg = NULL;
1280 struct arm_smmu_s2_cfg *s2_cfg = NULL;
1281 struct arm_smmu_domain *smmu_domain = NULL;
1282 struct arm_smmu_cmdq_ent prefetch_cmd = {
1283 .opcode = CMDQ_OP_PREFETCH_CFG,
1284 .prefetch = {
1285 .sid = sid,
1286 },
1287 };
1288
1289 if (master) {
1290 smmu_domain = master->domain;
1291 smmu = master->smmu;
1292 }
1293
1294 if (smmu_domain) {
1295 switch (smmu_domain->stage) {
1296 case ARM_SMMU_DOMAIN_S1:
1297 s1_cfg = &smmu_domain->s1_cfg;
1298 break;
1299 case ARM_SMMU_DOMAIN_S2:
1300 case ARM_SMMU_DOMAIN_NESTED:
1301 s2_cfg = &smmu_domain->s2_cfg;
1302 break;
1303 default:
1304 break;
1305 }
1306 }
1307
1308 if (val & STRTAB_STE_0_V) {
1309 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1310 case STRTAB_STE_0_CFG_BYPASS:
1311 break;
1312 case STRTAB_STE_0_CFG_S1_TRANS:
1313 case STRTAB_STE_0_CFG_S2_TRANS:
1314 ste_live = true;
1315 break;
1316 case STRTAB_STE_0_CFG_ABORT:
1317 BUG_ON(!disable_bypass);
1318 break;
1319 default:
1320 BUG(); /* STE corruption */
1321 }
1322 }
1323
1324 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1325 val = STRTAB_STE_0_V;
1326
1327 /* Bypass/fault */
1328 if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1329 if (!smmu_domain && disable_bypass)
1330 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1331 else
1332 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1333
1334 dst[0] = cpu_to_le64(val);
1335 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1336 STRTAB_STE_1_SHCFG_INCOMING));
1337 dst[2] = 0; /* Nuke the VMID */
1338 /*
1339 * The SMMU can perform negative caching, so we must sync
1340 * the STE regardless of whether the old value was live.
1341 */
1342 if (smmu)
1343 arm_smmu_sync_ste_for_sid(smmu, sid);
1344 return;
1345 }
1346
1347 if (s1_cfg) {
1348 u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1349 STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1350
1351 BUG_ON(ste_live);
1352 dst[1] = cpu_to_le64(
1353 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1354 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1355 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1356 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1357 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1358
1359 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1360 !master->stall_enabled)
1361 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1362
1363 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1364 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1365 FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1366 FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1367 }
1368
1369 if (s2_cfg) {
1370 BUG_ON(ste_live);
1371 dst[2] = cpu_to_le64(
1372 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1373 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1374 #ifdef __BIG_ENDIAN
1375 STRTAB_STE_2_S2ENDI |
1376 #endif
1377 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1378 STRTAB_STE_2_S2R);
1379
1380 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1381
1382 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1383 }
1384
1385 if (master->ats_enabled)
1386 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1387 STRTAB_STE_1_EATS_TRANS));
1388
1389 arm_smmu_sync_ste_for_sid(smmu, sid);
1390 /* See comment in arm_smmu_write_ctx_desc() */
1391 WRITE_ONCE(dst[0], cpu_to_le64(val));
1392 arm_smmu_sync_ste_for_sid(smmu, sid);
1393
1394 /* It's likely that we'll want to use the new STE soon */
1395 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1396 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1397 }
1398
arm_smmu_init_bypass_stes(__le64 * strtab,unsigned int nent,bool force)1399 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
1400 {
1401 unsigned int i;
1402 u64 val = STRTAB_STE_0_V;
1403
1404 if (disable_bypass && !force)
1405 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1406 else
1407 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1408
1409 for (i = 0; i < nent; ++i) {
1410 strtab[0] = cpu_to_le64(val);
1411 strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1412 STRTAB_STE_1_SHCFG_INCOMING));
1413 strtab[2] = 0;
1414 strtab += STRTAB_STE_DWORDS;
1415 }
1416 }
1417
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1418 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1419 {
1420 size_t size;
1421 void *strtab;
1422 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1423 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1424
1425 if (desc->l2ptr)
1426 return 0;
1427
1428 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1429 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1430
1431 desc->span = STRTAB_SPLIT + 1;
1432 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1433 GFP_KERNEL);
1434 if (!desc->l2ptr) {
1435 dev_err(smmu->dev,
1436 "failed to allocate l2 stream table for SID %u\n",
1437 sid);
1438 return -ENOMEM;
1439 }
1440
1441 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1442 arm_smmu_write_strtab_l1_desc(strtab, desc);
1443 return 0;
1444 }
1445
arm_smmu_streams_cmp_key(const void * lhs,const struct rb_node * rhs)1446 static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs)
1447 {
1448 struct arm_smmu_stream *stream_rhs =
1449 rb_entry(rhs, struct arm_smmu_stream, node);
1450 const u32 *sid_lhs = lhs;
1451
1452 if (*sid_lhs < stream_rhs->id)
1453 return -1;
1454 if (*sid_lhs > stream_rhs->id)
1455 return 1;
1456 return 0;
1457 }
1458
arm_smmu_streams_cmp_node(struct rb_node * lhs,const struct rb_node * rhs)1459 static int arm_smmu_streams_cmp_node(struct rb_node *lhs,
1460 const struct rb_node *rhs)
1461 {
1462 return arm_smmu_streams_cmp_key(
1463 &rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs);
1464 }
1465
1466 static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device * smmu,u32 sid)1467 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1468 {
1469 struct rb_node *node;
1470
1471 lockdep_assert_held(&smmu->streams_mutex);
1472
1473 node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key);
1474 if (!node)
1475 return NULL;
1476 return rb_entry(node, struct arm_smmu_stream, node)->master;
1477 }
1478
1479 /* IRQ and event handlers */
arm_smmu_handle_evt(struct arm_smmu_device * smmu,u64 * evt)1480 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1481 {
1482 int ret;
1483 u32 reason;
1484 u32 perm = 0;
1485 struct arm_smmu_master *master;
1486 bool ssid_valid = evt[0] & EVTQ_0_SSV;
1487 u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1488 struct iommu_fault_event fault_evt = { };
1489 struct iommu_fault *flt = &fault_evt.fault;
1490
1491 switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1492 case EVT_ID_TRANSLATION_FAULT:
1493 reason = IOMMU_FAULT_REASON_PTE_FETCH;
1494 break;
1495 case EVT_ID_ADDR_SIZE_FAULT:
1496 reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1497 break;
1498 case EVT_ID_ACCESS_FAULT:
1499 reason = IOMMU_FAULT_REASON_ACCESS;
1500 break;
1501 case EVT_ID_PERMISSION_FAULT:
1502 reason = IOMMU_FAULT_REASON_PERMISSION;
1503 break;
1504 default:
1505 return -EOPNOTSUPP;
1506 }
1507
1508 /* Stage-2 is always pinned at the moment */
1509 if (evt[1] & EVTQ_1_S2)
1510 return -EFAULT;
1511
1512 if (evt[1] & EVTQ_1_RnW)
1513 perm |= IOMMU_FAULT_PERM_READ;
1514 else
1515 perm |= IOMMU_FAULT_PERM_WRITE;
1516
1517 if (evt[1] & EVTQ_1_InD)
1518 perm |= IOMMU_FAULT_PERM_EXEC;
1519
1520 if (evt[1] & EVTQ_1_PnU)
1521 perm |= IOMMU_FAULT_PERM_PRIV;
1522
1523 if (evt[1] & EVTQ_1_STALL) {
1524 flt->type = IOMMU_FAULT_PAGE_REQ;
1525 flt->prm = (struct iommu_fault_page_request) {
1526 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1527 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1528 .perm = perm,
1529 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1530 };
1531
1532 if (ssid_valid) {
1533 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1534 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1535 }
1536 } else {
1537 flt->type = IOMMU_FAULT_DMA_UNRECOV;
1538 flt->event = (struct iommu_fault_unrecoverable) {
1539 .reason = reason,
1540 .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1541 .perm = perm,
1542 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1543 };
1544
1545 if (ssid_valid) {
1546 flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1547 flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1548 }
1549 }
1550
1551 mutex_lock(&smmu->streams_mutex);
1552 master = arm_smmu_find_master(smmu, sid);
1553 if (!master) {
1554 ret = -EINVAL;
1555 goto out_unlock;
1556 }
1557
1558 ret = iommu_report_device_fault(master->dev, &fault_evt);
1559 if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1560 /* Nobody cared, abort the access */
1561 struct iommu_page_response resp = {
1562 .pasid = flt->prm.pasid,
1563 .grpid = flt->prm.grpid,
1564 .code = IOMMU_PAGE_RESP_FAILURE,
1565 };
1566 arm_smmu_page_response(master->dev, &fault_evt, &resp);
1567 }
1568
1569 out_unlock:
1570 mutex_unlock(&smmu->streams_mutex);
1571 return ret;
1572 }
1573
arm_smmu_evtq_thread(int irq,void * dev)1574 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1575 {
1576 int i, ret;
1577 struct arm_smmu_device *smmu = dev;
1578 struct arm_smmu_queue *q = &smmu->evtq.q;
1579 struct arm_smmu_ll_queue *llq = &q->llq;
1580 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1581 DEFAULT_RATELIMIT_BURST);
1582 u64 evt[EVTQ_ENT_DWORDS];
1583
1584 do {
1585 while (!queue_remove_raw(q, evt)) {
1586 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1587
1588 ret = arm_smmu_handle_evt(smmu, evt);
1589 if (!ret || !__ratelimit(&rs))
1590 continue;
1591
1592 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1593 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1594 dev_info(smmu->dev, "\t0x%016llx\n",
1595 (unsigned long long)evt[i]);
1596
1597 cond_resched();
1598 }
1599
1600 /*
1601 * Not much we can do on overflow, so scream and pretend we're
1602 * trying harder.
1603 */
1604 if (queue_sync_prod_in(q) == -EOVERFLOW)
1605 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1606 } while (!queue_empty(llq));
1607
1608 /* Sync our overflow flag, as we believe we're up to speed */
1609 queue_sync_cons_ovf(q);
1610 return IRQ_HANDLED;
1611 }
1612
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1613 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1614 {
1615 u32 sid, ssid;
1616 u16 grpid;
1617 bool ssv, last;
1618
1619 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1620 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1621 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1622 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1623 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1624
1625 dev_info(smmu->dev, "unexpected PRI request received:\n");
1626 dev_info(smmu->dev,
1627 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1628 sid, ssid, grpid, last ? "L" : "",
1629 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1630 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1631 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1632 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1633 evt[1] & PRIQ_1_ADDR_MASK);
1634
1635 if (last) {
1636 struct arm_smmu_cmdq_ent cmd = {
1637 .opcode = CMDQ_OP_PRI_RESP,
1638 .substream_valid = ssv,
1639 .pri = {
1640 .sid = sid,
1641 .ssid = ssid,
1642 .grpid = grpid,
1643 .resp = PRI_RESP_DENY,
1644 },
1645 };
1646
1647 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1648 }
1649 }
1650
arm_smmu_priq_thread(int irq,void * dev)1651 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1652 {
1653 struct arm_smmu_device *smmu = dev;
1654 struct arm_smmu_queue *q = &smmu->priq.q;
1655 struct arm_smmu_ll_queue *llq = &q->llq;
1656 u64 evt[PRIQ_ENT_DWORDS];
1657
1658 do {
1659 while (!queue_remove_raw(q, evt))
1660 arm_smmu_handle_ppr(smmu, evt);
1661
1662 if (queue_sync_prod_in(q) == -EOVERFLOW)
1663 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1664 } while (!queue_empty(llq));
1665
1666 /* Sync our overflow flag, as we believe we're up to speed */
1667 queue_sync_cons_ovf(q);
1668 return IRQ_HANDLED;
1669 }
1670
1671 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1672
arm_smmu_gerror_handler(int irq,void * dev)1673 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1674 {
1675 u32 gerror, gerrorn, active;
1676 struct arm_smmu_device *smmu = dev;
1677
1678 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1679 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1680
1681 active = gerror ^ gerrorn;
1682 if (!(active & GERROR_ERR_MASK))
1683 return IRQ_NONE; /* No errors pending */
1684
1685 dev_warn(smmu->dev,
1686 "unexpected global error reported (0x%08x), this could be serious\n",
1687 active);
1688
1689 if (active & GERROR_SFM_ERR) {
1690 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1691 arm_smmu_device_disable(smmu);
1692 }
1693
1694 if (active & GERROR_MSI_GERROR_ABT_ERR)
1695 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1696
1697 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1698 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1699
1700 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1701 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1702
1703 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1704 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1705
1706 if (active & GERROR_PRIQ_ABT_ERR)
1707 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1708
1709 if (active & GERROR_EVTQ_ABT_ERR)
1710 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1711
1712 if (active & GERROR_CMDQ_ERR)
1713 arm_smmu_cmdq_skip_err(smmu);
1714
1715 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1716 return IRQ_HANDLED;
1717 }
1718
arm_smmu_combined_irq_thread(int irq,void * dev)1719 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1720 {
1721 struct arm_smmu_device *smmu = dev;
1722
1723 arm_smmu_evtq_thread(irq, dev);
1724 if (smmu->features & ARM_SMMU_FEAT_PRI)
1725 arm_smmu_priq_thread(irq, dev);
1726
1727 return IRQ_HANDLED;
1728 }
1729
arm_smmu_combined_irq_handler(int irq,void * dev)1730 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1731 {
1732 arm_smmu_gerror_handler(irq, dev);
1733 return IRQ_WAKE_THREAD;
1734 }
1735
1736 static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)1737 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1738 struct arm_smmu_cmdq_ent *cmd)
1739 {
1740 size_t log2_span;
1741 size_t span_mask;
1742 /* ATC invalidates are always on 4096-bytes pages */
1743 size_t inval_grain_shift = 12;
1744 unsigned long page_start, page_end;
1745
1746 /*
1747 * ATS and PASID:
1748 *
1749 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1750 * prefix. In that case all ATC entries within the address range are
1751 * invalidated, including those that were requested with a PASID! There
1752 * is no way to invalidate only entries without PASID.
1753 *
1754 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1755 * traffic), translation requests without PASID create ATC entries
1756 * without PASID, which must be invalidated with substream_valid clear.
1757 * This has the unpleasant side-effect of invalidating all PASID-tagged
1758 * ATC entries within the address range.
1759 */
1760 *cmd = (struct arm_smmu_cmdq_ent) {
1761 .opcode = CMDQ_OP_ATC_INV,
1762 .substream_valid = (ssid != IOMMU_NO_PASID),
1763 .atc.ssid = ssid,
1764 };
1765
1766 if (!size) {
1767 cmd->atc.size = ATC_INV_SIZE_ALL;
1768 return;
1769 }
1770
1771 page_start = iova >> inval_grain_shift;
1772 page_end = (iova + size - 1) >> inval_grain_shift;
1773
1774 /*
1775 * In an ATS Invalidate Request, the address must be aligned on the
1776 * range size, which must be a power of two number of page sizes. We
1777 * thus have to choose between grossly over-invalidating the region, or
1778 * splitting the invalidation into multiple commands. For simplicity
1779 * we'll go with the first solution, but should refine it in the future
1780 * if multiple commands are shown to be more efficient.
1781 *
1782 * Find the smallest power of two that covers the range. The most
1783 * significant differing bit between the start and end addresses,
1784 * fls(start ^ end), indicates the required span. For example:
1785 *
1786 * We want to invalidate pages [8; 11]. This is already the ideal range:
1787 * x = 0b1000 ^ 0b1011 = 0b11
1788 * span = 1 << fls(x) = 4
1789 *
1790 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1791 * x = 0b0111 ^ 0b1010 = 0b1101
1792 * span = 1 << fls(x) = 16
1793 */
1794 log2_span = fls_long(page_start ^ page_end);
1795 span_mask = (1ULL << log2_span) - 1;
1796
1797 page_start &= ~span_mask;
1798
1799 cmd->atc.addr = page_start << inval_grain_shift;
1800 cmd->atc.size = log2_span;
1801 }
1802
arm_smmu_atc_inv_master(struct arm_smmu_master * master)1803 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1804 {
1805 int i;
1806 struct arm_smmu_cmdq_ent cmd;
1807 struct arm_smmu_cmdq_batch cmds;
1808
1809 arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
1810
1811 cmds.num = 0;
1812 for (i = 0; i < master->num_streams; i++) {
1813 cmd.atc.sid = master->streams[i].id;
1814 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1815 }
1816
1817 return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1818 }
1819
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,int ssid,unsigned long iova,size_t size)1820 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1821 unsigned long iova, size_t size)
1822 {
1823 int i;
1824 unsigned long flags;
1825 struct arm_smmu_cmdq_ent cmd;
1826 struct arm_smmu_master *master;
1827 struct arm_smmu_cmdq_batch cmds;
1828
1829 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1830 return 0;
1831
1832 /*
1833 * Ensure that we've completed prior invalidation of the main TLBs
1834 * before we read 'nr_ats_masters' in case of a concurrent call to
1835 * arm_smmu_enable_ats():
1836 *
1837 * // unmap() // arm_smmu_enable_ats()
1838 * TLBI+SYNC atomic_inc(&nr_ats_masters);
1839 * smp_mb(); [...]
1840 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
1841 *
1842 * Ensures that we always see the incremented 'nr_ats_masters' count if
1843 * ATS was enabled at the PCI device before completion of the TLBI.
1844 */
1845 smp_mb();
1846 if (!atomic_read(&smmu_domain->nr_ats_masters))
1847 return 0;
1848
1849 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1850
1851 cmds.num = 0;
1852
1853 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1854 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1855 if (!master->ats_enabled)
1856 continue;
1857
1858 for (i = 0; i < master->num_streams; i++) {
1859 cmd.atc.sid = master->streams[i].id;
1860 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1861 }
1862 }
1863 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1864
1865 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1866 }
1867
1868 /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)1869 static void arm_smmu_tlb_inv_context(void *cookie)
1870 {
1871 struct arm_smmu_domain *smmu_domain = cookie;
1872 struct arm_smmu_device *smmu = smmu_domain->smmu;
1873 struct arm_smmu_cmdq_ent cmd;
1874
1875 /*
1876 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1877 * PTEs previously cleared by unmaps on the current CPU not yet visible
1878 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1879 * insertion to guarantee those are observed before the TLBI. Do be
1880 * careful, 007.
1881 */
1882 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1883 arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1884 } else {
1885 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1886 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1887 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1888 }
1889 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
1890 }
1891
__arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent * cmd,unsigned long iova,size_t size,size_t granule,struct arm_smmu_domain * smmu_domain)1892 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1893 unsigned long iova, size_t size,
1894 size_t granule,
1895 struct arm_smmu_domain *smmu_domain)
1896 {
1897 struct arm_smmu_device *smmu = smmu_domain->smmu;
1898 unsigned long end = iova + size, num_pages = 0, tg = 0;
1899 size_t inv_range = granule;
1900 struct arm_smmu_cmdq_batch cmds;
1901
1902 if (!size)
1903 return;
1904
1905 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1906 /* Get the leaf page size */
1907 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1908
1909 num_pages = size >> tg;
1910
1911 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1912 cmd->tlbi.tg = (tg - 10) / 2;
1913
1914 /*
1915 * Determine what level the granule is at. For non-leaf, both
1916 * io-pgtable and SVA pass a nominal last-level granule because
1917 * they don't know what level(s) actually apply, so ignore that
1918 * and leave TTL=0. However for various errata reasons we still
1919 * want to use a range command, so avoid the SVA corner case
1920 * where both scale and num could be 0 as well.
1921 */
1922 if (cmd->tlbi.leaf)
1923 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1924 else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
1925 num_pages++;
1926 }
1927
1928 cmds.num = 0;
1929
1930 while (iova < end) {
1931 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1932 /*
1933 * On each iteration of the loop, the range is 5 bits
1934 * worth of the aligned size remaining.
1935 * The range in pages is:
1936 *
1937 * range = (num_pages & (0x1f << __ffs(num_pages)))
1938 */
1939 unsigned long scale, num;
1940
1941 /* Determine the power of 2 multiple number of pages */
1942 scale = __ffs(num_pages);
1943 cmd->tlbi.scale = scale;
1944
1945 /* Determine how many chunks of 2^scale size we have */
1946 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1947 cmd->tlbi.num = num - 1;
1948
1949 /* range is num * 2^scale * pgsize */
1950 inv_range = num << (scale + tg);
1951
1952 /* Clear out the lower order bits for the next iteration */
1953 num_pages -= num << scale;
1954 }
1955
1956 cmd->tlbi.addr = iova;
1957 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1958 iova += inv_range;
1959 }
1960 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1961 }
1962
arm_smmu_tlb_inv_range_domain(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1963 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1964 size_t granule, bool leaf,
1965 struct arm_smmu_domain *smmu_domain)
1966 {
1967 struct arm_smmu_cmdq_ent cmd = {
1968 .tlbi = {
1969 .leaf = leaf,
1970 },
1971 };
1972
1973 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1974 cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1975 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1976 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1977 } else {
1978 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1979 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1980 }
1981 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1982
1983 /*
1984 * Unfortunately, this can't be leaf-only since we may have
1985 * zapped an entire table.
1986 */
1987 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
1988 }
1989
arm_smmu_tlb_inv_range_asid(unsigned long iova,size_t size,int asid,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1990 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1991 size_t granule, bool leaf,
1992 struct arm_smmu_domain *smmu_domain)
1993 {
1994 struct arm_smmu_cmdq_ent cmd = {
1995 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1996 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1997 .tlbi = {
1998 .asid = asid,
1999 .leaf = leaf,
2000 },
2001 };
2002
2003 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2004 }
2005
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)2006 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2007 unsigned long iova, size_t granule,
2008 void *cookie)
2009 {
2010 struct arm_smmu_domain *smmu_domain = cookie;
2011 struct iommu_domain *domain = &smmu_domain->domain;
2012
2013 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2014 }
2015
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)2016 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2017 size_t granule, void *cookie)
2018 {
2019 arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2020 }
2021
2022 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2023 .tlb_flush_all = arm_smmu_tlb_inv_context,
2024 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
2025 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
2026 };
2027
2028 /* IOMMU API */
arm_smmu_capable(struct device * dev,enum iommu_cap cap)2029 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2030 {
2031 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2032
2033 switch (cap) {
2034 case IOMMU_CAP_CACHE_COHERENCY:
2035 /* Assume that a coherent TCU implies coherent TBUs */
2036 return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2037 case IOMMU_CAP_NOEXEC:
2038 case IOMMU_CAP_DEFERRED_FLUSH:
2039 return true;
2040 default:
2041 return false;
2042 }
2043 }
2044
arm_smmu_domain_alloc(unsigned type)2045 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2046 {
2047 struct arm_smmu_domain *smmu_domain;
2048
2049 if (type == IOMMU_DOMAIN_SVA)
2050 return arm_smmu_sva_domain_alloc();
2051
2052 if (type != IOMMU_DOMAIN_UNMANAGED &&
2053 type != IOMMU_DOMAIN_DMA &&
2054 type != IOMMU_DOMAIN_IDENTITY)
2055 return NULL;
2056
2057 /*
2058 * Allocate the domain and initialise some of its data structures.
2059 * We can't really do anything meaningful until we've added a
2060 * master.
2061 */
2062 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2063 if (!smmu_domain)
2064 return NULL;
2065
2066 mutex_init(&smmu_domain->init_mutex);
2067 INIT_LIST_HEAD(&smmu_domain->devices);
2068 spin_lock_init(&smmu_domain->devices_lock);
2069 INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2070
2071 return &smmu_domain->domain;
2072 }
2073
arm_smmu_domain_free(struct iommu_domain * domain)2074 static void arm_smmu_domain_free(struct iommu_domain *domain)
2075 {
2076 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2077 struct arm_smmu_device *smmu = smmu_domain->smmu;
2078
2079 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2080
2081 /* Free the CD and ASID, if we allocated them */
2082 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2083 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2084
2085 /* Prevent SVA from touching the CD while we're freeing it */
2086 mutex_lock(&arm_smmu_asid_lock);
2087 if (cfg->cdcfg.cdtab)
2088 arm_smmu_free_cd_tables(smmu_domain);
2089 arm_smmu_free_asid(&cfg->cd);
2090 mutex_unlock(&arm_smmu_asid_lock);
2091 } else {
2092 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2093 if (cfg->vmid)
2094 ida_free(&smmu->vmid_map, cfg->vmid);
2095 }
2096
2097 kfree(smmu_domain);
2098 }
2099
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)2100 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2101 struct arm_smmu_master *master,
2102 struct io_pgtable_cfg *pgtbl_cfg)
2103 {
2104 int ret;
2105 u32 asid;
2106 struct arm_smmu_device *smmu = smmu_domain->smmu;
2107 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2108 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2109
2110 refcount_set(&cfg->cd.refs, 1);
2111
2112 /* Prevent SVA from modifying the ASID until it is written to the CD */
2113 mutex_lock(&arm_smmu_asid_lock);
2114 ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2115 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2116 if (ret)
2117 goto out_unlock;
2118
2119 cfg->s1cdmax = master->ssid_bits;
2120
2121 smmu_domain->stall_enabled = master->stall_enabled;
2122
2123 ret = arm_smmu_alloc_cd_tables(smmu_domain);
2124 if (ret)
2125 goto out_free_asid;
2126
2127 cfg->cd.asid = (u16)asid;
2128 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2129 cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2130 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2131 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2132 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2133 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2134 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2135 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2136 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2137
2138 /*
2139 * Note that this will end up calling arm_smmu_sync_cd() before
2140 * the master has been added to the devices list for this domain.
2141 * This isn't an issue because the STE hasn't been installed yet.
2142 */
2143 ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd);
2144 if (ret)
2145 goto out_free_cd_tables;
2146
2147 mutex_unlock(&arm_smmu_asid_lock);
2148 return 0;
2149
2150 out_free_cd_tables:
2151 arm_smmu_free_cd_tables(smmu_domain);
2152 out_free_asid:
2153 arm_smmu_free_asid(&cfg->cd);
2154 out_unlock:
2155 mutex_unlock(&arm_smmu_asid_lock);
2156 return ret;
2157 }
2158
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)2159 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2160 struct arm_smmu_master *master,
2161 struct io_pgtable_cfg *pgtbl_cfg)
2162 {
2163 int vmid;
2164 struct arm_smmu_device *smmu = smmu_domain->smmu;
2165 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2166 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2167
2168 /* Reserve VMID 0 for stage-2 bypass STEs */
2169 vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2170 GFP_KERNEL);
2171 if (vmid < 0)
2172 return vmid;
2173
2174 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2175 cfg->vmid = (u16)vmid;
2176 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2177 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2178 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2179 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2180 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2181 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2182 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2183 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2184 return 0;
2185 }
2186
arm_smmu_domain_finalise(struct iommu_domain * domain,struct arm_smmu_master * master)2187 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2188 struct arm_smmu_master *master)
2189 {
2190 int ret;
2191 unsigned long ias, oas;
2192 enum io_pgtable_fmt fmt;
2193 struct io_pgtable_cfg pgtbl_cfg;
2194 struct io_pgtable_ops *pgtbl_ops;
2195 int (*finalise_stage_fn)(struct arm_smmu_domain *,
2196 struct arm_smmu_master *,
2197 struct io_pgtable_cfg *);
2198 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2199 struct arm_smmu_device *smmu = smmu_domain->smmu;
2200
2201 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2202 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2203 return 0;
2204 }
2205
2206 /* Restrict the stage to what we can actually support */
2207 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2208 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2209 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2210 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2211
2212 switch (smmu_domain->stage) {
2213 case ARM_SMMU_DOMAIN_S1:
2214 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2215 ias = min_t(unsigned long, ias, VA_BITS);
2216 oas = smmu->ias;
2217 fmt = ARM_64_LPAE_S1;
2218 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2219 break;
2220 case ARM_SMMU_DOMAIN_NESTED:
2221 case ARM_SMMU_DOMAIN_S2:
2222 ias = smmu->ias;
2223 oas = smmu->oas;
2224 fmt = ARM_64_LPAE_S2;
2225 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2226 break;
2227 default:
2228 return -EINVAL;
2229 }
2230
2231 pgtbl_cfg = (struct io_pgtable_cfg) {
2232 .pgsize_bitmap = smmu->pgsize_bitmap,
2233 .ias = ias,
2234 .oas = oas,
2235 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2236 .tlb = &arm_smmu_flush_ops,
2237 .iommu_dev = smmu->dev,
2238 };
2239
2240 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2241 if (!pgtbl_ops)
2242 return -ENOMEM;
2243
2244 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2245 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2246 domain->geometry.force_aperture = true;
2247
2248 ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2249 if (ret < 0) {
2250 free_io_pgtable_ops(pgtbl_ops);
2251 return ret;
2252 }
2253
2254 smmu_domain->pgtbl_ops = pgtbl_ops;
2255 return 0;
2256 }
2257
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)2258 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2259 {
2260 __le64 *step;
2261 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2262
2263 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2264 struct arm_smmu_strtab_l1_desc *l1_desc;
2265 int idx;
2266
2267 /* Two-level walk */
2268 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2269 l1_desc = &cfg->l1_desc[idx];
2270 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2271 step = &l1_desc->l2ptr[idx];
2272 } else {
2273 /* Simple linear lookup */
2274 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2275 }
2276
2277 return step;
2278 }
2279
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master)2280 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2281 {
2282 int i, j;
2283 struct arm_smmu_device *smmu = master->smmu;
2284
2285 for (i = 0; i < master->num_streams; ++i) {
2286 u32 sid = master->streams[i].id;
2287 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2288
2289 /* Bridged PCI devices may end up with duplicated IDs */
2290 for (j = 0; j < i; j++)
2291 if (master->streams[j].id == sid)
2292 break;
2293 if (j < i)
2294 continue;
2295
2296 arm_smmu_write_strtab_ent(master, sid, step);
2297 }
2298 }
2299
arm_smmu_ats_supported(struct arm_smmu_master * master)2300 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2301 {
2302 struct device *dev = master->dev;
2303 struct arm_smmu_device *smmu = master->smmu;
2304 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2305
2306 if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2307 return false;
2308
2309 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2310 return false;
2311
2312 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2313 }
2314
arm_smmu_enable_ats(struct arm_smmu_master * master)2315 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2316 {
2317 size_t stu;
2318 struct pci_dev *pdev;
2319 struct arm_smmu_device *smmu = master->smmu;
2320 struct arm_smmu_domain *smmu_domain = master->domain;
2321
2322 /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2323 if (!master->ats_enabled)
2324 return;
2325
2326 /* Smallest Translation Unit: log2 of the smallest supported granule */
2327 stu = __ffs(smmu->pgsize_bitmap);
2328 pdev = to_pci_dev(master->dev);
2329
2330 atomic_inc(&smmu_domain->nr_ats_masters);
2331 arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2332 if (pci_enable_ats(pdev, stu))
2333 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2334 }
2335
arm_smmu_disable_ats(struct arm_smmu_master * master)2336 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2337 {
2338 struct arm_smmu_domain *smmu_domain = master->domain;
2339
2340 if (!master->ats_enabled)
2341 return;
2342
2343 pci_disable_ats(to_pci_dev(master->dev));
2344 /*
2345 * Ensure ATS is disabled at the endpoint before we issue the
2346 * ATC invalidation via the SMMU.
2347 */
2348 wmb();
2349 arm_smmu_atc_inv_master(master);
2350 atomic_dec(&smmu_domain->nr_ats_masters);
2351 }
2352
arm_smmu_enable_pasid(struct arm_smmu_master * master)2353 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2354 {
2355 int ret;
2356 int features;
2357 int num_pasids;
2358 struct pci_dev *pdev;
2359
2360 if (!dev_is_pci(master->dev))
2361 return -ENODEV;
2362
2363 pdev = to_pci_dev(master->dev);
2364
2365 features = pci_pasid_features(pdev);
2366 if (features < 0)
2367 return features;
2368
2369 num_pasids = pci_max_pasids(pdev);
2370 if (num_pasids <= 0)
2371 return num_pasids;
2372
2373 ret = pci_enable_pasid(pdev, features);
2374 if (ret) {
2375 dev_err(&pdev->dev, "Failed to enable PASID\n");
2376 return ret;
2377 }
2378
2379 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2380 master->smmu->ssid_bits);
2381 return 0;
2382 }
2383
arm_smmu_disable_pasid(struct arm_smmu_master * master)2384 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2385 {
2386 struct pci_dev *pdev;
2387
2388 if (!dev_is_pci(master->dev))
2389 return;
2390
2391 pdev = to_pci_dev(master->dev);
2392
2393 if (!pdev->pasid_enabled)
2394 return;
2395
2396 master->ssid_bits = 0;
2397 pci_disable_pasid(pdev);
2398 }
2399
arm_smmu_detach_dev(struct arm_smmu_master * master)2400 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2401 {
2402 unsigned long flags;
2403 struct arm_smmu_domain *smmu_domain = master->domain;
2404
2405 if (!smmu_domain)
2406 return;
2407
2408 arm_smmu_disable_ats(master);
2409
2410 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2411 list_del(&master->domain_head);
2412 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2413
2414 master->domain = NULL;
2415 master->ats_enabled = false;
2416 arm_smmu_install_ste_for_dev(master);
2417 }
2418
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)2419 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2420 {
2421 int ret = 0;
2422 unsigned long flags;
2423 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2424 struct arm_smmu_device *smmu;
2425 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2426 struct arm_smmu_master *master;
2427
2428 if (!fwspec)
2429 return -ENOENT;
2430
2431 master = dev_iommu_priv_get(dev);
2432 smmu = master->smmu;
2433
2434 /*
2435 * Checking that SVA is disabled ensures that this device isn't bound to
2436 * any mm, and can be safely detached from its old domain. Bonds cannot
2437 * be removed concurrently since we're holding the group mutex.
2438 */
2439 if (arm_smmu_master_sva_enabled(master)) {
2440 dev_err(dev, "cannot attach - SVA enabled\n");
2441 return -EBUSY;
2442 }
2443
2444 arm_smmu_detach_dev(master);
2445
2446 mutex_lock(&smmu_domain->init_mutex);
2447
2448 if (!smmu_domain->smmu) {
2449 smmu_domain->smmu = smmu;
2450 ret = arm_smmu_domain_finalise(domain, master);
2451 if (ret) {
2452 smmu_domain->smmu = NULL;
2453 goto out_unlock;
2454 }
2455 } else if (smmu_domain->smmu != smmu) {
2456 ret = -EINVAL;
2457 goto out_unlock;
2458 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2459 master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2460 ret = -EINVAL;
2461 goto out_unlock;
2462 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2463 smmu_domain->stall_enabled != master->stall_enabled) {
2464 ret = -EINVAL;
2465 goto out_unlock;
2466 }
2467
2468 master->domain = smmu_domain;
2469
2470 /*
2471 * The SMMU does not support enabling ATS with bypass. When the STE is
2472 * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
2473 * Translated transactions are denied as though ATS is disabled for the
2474 * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2475 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2476 */
2477 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2478 master->ats_enabled = arm_smmu_ats_supported(master);
2479
2480 arm_smmu_install_ste_for_dev(master);
2481
2482 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2483 list_add(&master->domain_head, &smmu_domain->devices);
2484 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2485
2486 arm_smmu_enable_ats(master);
2487
2488 out_unlock:
2489 mutex_unlock(&smmu_domain->init_mutex);
2490 return ret;
2491 }
2492
arm_smmu_map_pages(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)2493 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2494 phys_addr_t paddr, size_t pgsize, size_t pgcount,
2495 int prot, gfp_t gfp, size_t *mapped)
2496 {
2497 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2498
2499 if (!ops)
2500 return -ENODEV;
2501
2502 return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2503 }
2504
arm_smmu_unmap_pages(struct iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)2505 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2506 size_t pgsize, size_t pgcount,
2507 struct iommu_iotlb_gather *gather)
2508 {
2509 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2510 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2511
2512 if (!ops)
2513 return 0;
2514
2515 return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2516 }
2517
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)2518 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2519 {
2520 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2521
2522 if (smmu_domain->smmu)
2523 arm_smmu_tlb_inv_context(smmu_domain);
2524 }
2525
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)2526 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2527 struct iommu_iotlb_gather *gather)
2528 {
2529 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2530
2531 if (!gather->pgsize)
2532 return;
2533
2534 arm_smmu_tlb_inv_range_domain(gather->start,
2535 gather->end - gather->start + 1,
2536 gather->pgsize, true, smmu_domain);
2537 }
2538
2539 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)2540 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2541 {
2542 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2543
2544 if (!ops)
2545 return 0;
2546
2547 return ops->iova_to_phys(ops, iova);
2548 }
2549
2550 static struct platform_driver arm_smmu_driver;
2551
2552 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)2553 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2554 {
2555 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2556 fwnode);
2557 put_device(dev);
2558 return dev ? dev_get_drvdata(dev) : NULL;
2559 }
2560
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)2561 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2562 {
2563 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2564
2565 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2566 limit *= 1UL << STRTAB_SPLIT;
2567
2568 return sid < limit;
2569 }
2570
arm_smmu_init_sid_strtab(struct arm_smmu_device * smmu,u32 sid)2571 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2572 {
2573 /* Check the SIDs are in range of the SMMU and our stream table */
2574 if (!arm_smmu_sid_in_range(smmu, sid))
2575 return -ERANGE;
2576
2577 /* Ensure l2 strtab is initialised */
2578 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2579 return arm_smmu_init_l2_strtab(smmu, sid);
2580
2581 return 0;
2582 }
2583
arm_smmu_insert_master(struct arm_smmu_device * smmu,struct arm_smmu_master * master)2584 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2585 struct arm_smmu_master *master)
2586 {
2587 int i;
2588 int ret = 0;
2589 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2590
2591 master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2592 GFP_KERNEL);
2593 if (!master->streams)
2594 return -ENOMEM;
2595 master->num_streams = fwspec->num_ids;
2596
2597 mutex_lock(&smmu->streams_mutex);
2598 for (i = 0; i < fwspec->num_ids; i++) {
2599 struct arm_smmu_stream *new_stream = &master->streams[i];
2600 struct rb_node *existing;
2601 u32 sid = fwspec->ids[i];
2602
2603 new_stream->id = sid;
2604 new_stream->master = master;
2605
2606 ret = arm_smmu_init_sid_strtab(smmu, sid);
2607 if (ret)
2608 break;
2609
2610 /* Insert into SID tree */
2611 existing = rb_find_add(&new_stream->node, &smmu->streams,
2612 arm_smmu_streams_cmp_node);
2613 if (existing) {
2614 struct arm_smmu_master *existing_master =
2615 rb_entry(existing, struct arm_smmu_stream, node)
2616 ->master;
2617
2618 /* Bridged PCI devices may end up with duplicated IDs */
2619 if (existing_master == master)
2620 continue;
2621
2622 dev_warn(master->dev,
2623 "stream %u already in tree from dev %s\n", sid,
2624 dev_name(existing_master->dev));
2625 ret = -EINVAL;
2626 break;
2627 }
2628 }
2629
2630 if (ret) {
2631 for (i--; i >= 0; i--)
2632 rb_erase(&master->streams[i].node, &smmu->streams);
2633 kfree(master->streams);
2634 }
2635 mutex_unlock(&smmu->streams_mutex);
2636
2637 return ret;
2638 }
2639
arm_smmu_remove_master(struct arm_smmu_master * master)2640 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2641 {
2642 int i;
2643 struct arm_smmu_device *smmu = master->smmu;
2644 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2645
2646 if (!smmu || !master->streams)
2647 return;
2648
2649 mutex_lock(&smmu->streams_mutex);
2650 for (i = 0; i < fwspec->num_ids; i++)
2651 rb_erase(&master->streams[i].node, &smmu->streams);
2652 mutex_unlock(&smmu->streams_mutex);
2653
2654 kfree(master->streams);
2655 }
2656
2657 static struct iommu_ops arm_smmu_ops;
2658
arm_smmu_probe_device(struct device * dev)2659 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2660 {
2661 int ret;
2662 struct arm_smmu_device *smmu;
2663 struct arm_smmu_master *master;
2664 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2665
2666 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2667 return ERR_PTR(-ENODEV);
2668
2669 if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2670 return ERR_PTR(-EBUSY);
2671
2672 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2673 if (!smmu)
2674 return ERR_PTR(-ENODEV);
2675
2676 master = kzalloc(sizeof(*master), GFP_KERNEL);
2677 if (!master)
2678 return ERR_PTR(-ENOMEM);
2679
2680 master->dev = dev;
2681 master->smmu = smmu;
2682 INIT_LIST_HEAD(&master->bonds);
2683 dev_iommu_priv_set(dev, master);
2684
2685 ret = arm_smmu_insert_master(smmu, master);
2686 if (ret)
2687 goto err_free_master;
2688
2689 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2690 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2691
2692 /*
2693 * Note that PASID must be enabled before, and disabled after ATS:
2694 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2695 *
2696 * Behavior is undefined if this bit is Set and the value of the PASID
2697 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
2698 * are changed.
2699 */
2700 arm_smmu_enable_pasid(master);
2701
2702 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2703 master->ssid_bits = min_t(u8, master->ssid_bits,
2704 CTXDESC_LINEAR_CDMAX);
2705
2706 if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2707 device_property_read_bool(dev, "dma-can-stall")) ||
2708 smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2709 master->stall_enabled = true;
2710
2711 return &smmu->iommu;
2712
2713 err_free_master:
2714 kfree(master);
2715 dev_iommu_priv_set(dev, NULL);
2716 return ERR_PTR(ret);
2717 }
2718
arm_smmu_release_device(struct device * dev)2719 static void arm_smmu_release_device(struct device *dev)
2720 {
2721 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2722
2723 if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2724 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2725 arm_smmu_detach_dev(master);
2726 arm_smmu_disable_pasid(master);
2727 arm_smmu_remove_master(master);
2728 kfree(master);
2729 }
2730
arm_smmu_device_group(struct device * dev)2731 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2732 {
2733 struct iommu_group *group;
2734
2735 /*
2736 * We don't support devices sharing stream IDs other than PCI RID
2737 * aliases, since the necessary ID-to-device lookup becomes rather
2738 * impractical given a potential sparse 32-bit stream ID space.
2739 */
2740 if (dev_is_pci(dev))
2741 group = pci_device_group(dev);
2742 else
2743 group = generic_device_group(dev);
2744
2745 return group;
2746 }
2747
arm_smmu_enable_nesting(struct iommu_domain * domain)2748 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2749 {
2750 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2751 int ret = 0;
2752
2753 mutex_lock(&smmu_domain->init_mutex);
2754 if (smmu_domain->smmu)
2755 ret = -EPERM;
2756 else
2757 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2758 mutex_unlock(&smmu_domain->init_mutex);
2759
2760 return ret;
2761 }
2762
arm_smmu_of_xlate(struct device * dev,struct of_phandle_args * args)2763 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2764 {
2765 return iommu_fwspec_add_ids(dev, args->args, 1);
2766 }
2767
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)2768 static void arm_smmu_get_resv_regions(struct device *dev,
2769 struct list_head *head)
2770 {
2771 struct iommu_resv_region *region;
2772 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2773
2774 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2775 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2776 if (!region)
2777 return;
2778
2779 list_add_tail(®ion->list, head);
2780
2781 iommu_dma_get_resv_regions(dev, head);
2782 }
2783
arm_smmu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)2784 static int arm_smmu_dev_enable_feature(struct device *dev,
2785 enum iommu_dev_features feat)
2786 {
2787 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2788
2789 if (!master)
2790 return -ENODEV;
2791
2792 switch (feat) {
2793 case IOMMU_DEV_FEAT_IOPF:
2794 if (!arm_smmu_master_iopf_supported(master))
2795 return -EINVAL;
2796 if (master->iopf_enabled)
2797 return -EBUSY;
2798 master->iopf_enabled = true;
2799 return 0;
2800 case IOMMU_DEV_FEAT_SVA:
2801 if (!arm_smmu_master_sva_supported(master))
2802 return -EINVAL;
2803 if (arm_smmu_master_sva_enabled(master))
2804 return -EBUSY;
2805 return arm_smmu_master_enable_sva(master);
2806 default:
2807 return -EINVAL;
2808 }
2809 }
2810
arm_smmu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)2811 static int arm_smmu_dev_disable_feature(struct device *dev,
2812 enum iommu_dev_features feat)
2813 {
2814 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2815
2816 if (!master)
2817 return -EINVAL;
2818
2819 switch (feat) {
2820 case IOMMU_DEV_FEAT_IOPF:
2821 if (!master->iopf_enabled)
2822 return -EINVAL;
2823 if (master->sva_enabled)
2824 return -EBUSY;
2825 master->iopf_enabled = false;
2826 return 0;
2827 case IOMMU_DEV_FEAT_SVA:
2828 if (!arm_smmu_master_sva_enabled(master))
2829 return -EINVAL;
2830 return arm_smmu_master_disable_sva(master);
2831 default:
2832 return -EINVAL;
2833 }
2834 }
2835
2836 /*
2837 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2838 * PCIe link and save the data to memory by DMA. The hardware is restricted to
2839 * use identity mapping only.
2840 */
2841 #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2842 (pdev)->device == 0xa12e)
2843
arm_smmu_def_domain_type(struct device * dev)2844 static int arm_smmu_def_domain_type(struct device *dev)
2845 {
2846 if (dev_is_pci(dev)) {
2847 struct pci_dev *pdev = to_pci_dev(dev);
2848
2849 if (IS_HISI_PTT_DEVICE(pdev))
2850 return IOMMU_DOMAIN_IDENTITY;
2851 }
2852
2853 return 0;
2854 }
2855
arm_smmu_remove_dev_pasid(struct device * dev,ioasid_t pasid)2856 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
2857 {
2858 struct iommu_domain *domain;
2859
2860 domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
2861 if (WARN_ON(IS_ERR(domain)) || !domain)
2862 return;
2863
2864 arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
2865 }
2866
2867 static struct iommu_ops arm_smmu_ops = {
2868 .capable = arm_smmu_capable,
2869 .domain_alloc = arm_smmu_domain_alloc,
2870 .probe_device = arm_smmu_probe_device,
2871 .release_device = arm_smmu_release_device,
2872 .device_group = arm_smmu_device_group,
2873 .of_xlate = arm_smmu_of_xlate,
2874 .get_resv_regions = arm_smmu_get_resv_regions,
2875 .remove_dev_pasid = arm_smmu_remove_dev_pasid,
2876 .dev_enable_feat = arm_smmu_dev_enable_feature,
2877 .dev_disable_feat = arm_smmu_dev_disable_feature,
2878 .page_response = arm_smmu_page_response,
2879 .def_domain_type = arm_smmu_def_domain_type,
2880 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2881 .owner = THIS_MODULE,
2882 .default_domain_ops = &(const struct iommu_domain_ops) {
2883 .attach_dev = arm_smmu_attach_dev,
2884 .map_pages = arm_smmu_map_pages,
2885 .unmap_pages = arm_smmu_unmap_pages,
2886 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2887 .iotlb_sync = arm_smmu_iotlb_sync,
2888 .iova_to_phys = arm_smmu_iova_to_phys,
2889 .enable_nesting = arm_smmu_enable_nesting,
2890 .free = arm_smmu_domain_free,
2891 }
2892 };
2893
2894 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,void __iomem * page,unsigned long prod_off,unsigned long cons_off,size_t dwords,const char * name)2895 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2896 struct arm_smmu_queue *q,
2897 void __iomem *page,
2898 unsigned long prod_off,
2899 unsigned long cons_off,
2900 size_t dwords, const char *name)
2901 {
2902 size_t qsz;
2903
2904 do {
2905 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2906 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2907 GFP_KERNEL);
2908 if (q->base || qsz < PAGE_SIZE)
2909 break;
2910
2911 q->llq.max_n_shift--;
2912 } while (1);
2913
2914 if (!q->base) {
2915 dev_err(smmu->dev,
2916 "failed to allocate queue (0x%zx bytes) for %s\n",
2917 qsz, name);
2918 return -ENOMEM;
2919 }
2920
2921 if (!WARN_ON(q->base_dma & (qsz - 1))) {
2922 dev_info(smmu->dev, "allocated %u entries for %s\n",
2923 1 << q->llq.max_n_shift, name);
2924 }
2925
2926 q->prod_reg = page + prod_off;
2927 q->cons_reg = page + cons_off;
2928 q->ent_dwords = dwords;
2929
2930 q->q_base = Q_BASE_RWA;
2931 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2932 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2933
2934 q->llq.prod = q->llq.cons = 0;
2935 return 0;
2936 }
2937
arm_smmu_cmdq_init(struct arm_smmu_device * smmu)2938 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2939 {
2940 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2941 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2942
2943 atomic_set(&cmdq->owner_prod, 0);
2944 atomic_set(&cmdq->lock, 0);
2945
2946 cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2947 GFP_KERNEL);
2948 if (!cmdq->valid_map)
2949 return -ENOMEM;
2950
2951 return 0;
2952 }
2953
arm_smmu_init_queues(struct arm_smmu_device * smmu)2954 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2955 {
2956 int ret;
2957
2958 /* cmdq */
2959 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2960 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2961 CMDQ_ENT_DWORDS, "cmdq");
2962 if (ret)
2963 return ret;
2964
2965 ret = arm_smmu_cmdq_init(smmu);
2966 if (ret)
2967 return ret;
2968
2969 /* evtq */
2970 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2971 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2972 EVTQ_ENT_DWORDS, "evtq");
2973 if (ret)
2974 return ret;
2975
2976 if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2977 (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2978 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2979 if (!smmu->evtq.iopf)
2980 return -ENOMEM;
2981 }
2982
2983 /* priq */
2984 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2985 return 0;
2986
2987 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2988 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2989 PRIQ_ENT_DWORDS, "priq");
2990 }
2991
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2992 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2993 {
2994 unsigned int i;
2995 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2996 void *strtab = smmu->strtab_cfg.strtab;
2997
2998 cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
2999 sizeof(*cfg->l1_desc), GFP_KERNEL);
3000 if (!cfg->l1_desc)
3001 return -ENOMEM;
3002
3003 for (i = 0; i < cfg->num_l1_ents; ++i) {
3004 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3005 strtab += STRTAB_L1_DESC_DWORDS << 3;
3006 }
3007
3008 return 0;
3009 }
3010
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)3011 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3012 {
3013 void *strtab;
3014 u64 reg;
3015 u32 size, l1size;
3016 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3017
3018 /* Calculate the L1 size, capped to the SIDSIZE. */
3019 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3020 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3021 cfg->num_l1_ents = 1 << size;
3022
3023 size += STRTAB_SPLIT;
3024 if (size < smmu->sid_bits)
3025 dev_warn(smmu->dev,
3026 "2-level strtab only covers %u/%u bits of SID\n",
3027 size, smmu->sid_bits);
3028
3029 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3030 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3031 GFP_KERNEL);
3032 if (!strtab) {
3033 dev_err(smmu->dev,
3034 "failed to allocate l1 stream table (%u bytes)\n",
3035 l1size);
3036 return -ENOMEM;
3037 }
3038 cfg->strtab = strtab;
3039
3040 /* Configure strtab_base_cfg for 2 levels */
3041 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3042 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3043 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3044 cfg->strtab_base_cfg = reg;
3045
3046 return arm_smmu_init_l1_strtab(smmu);
3047 }
3048
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)3049 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3050 {
3051 void *strtab;
3052 u64 reg;
3053 u32 size;
3054 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3055
3056 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3057 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3058 GFP_KERNEL);
3059 if (!strtab) {
3060 dev_err(smmu->dev,
3061 "failed to allocate linear stream table (%u bytes)\n",
3062 size);
3063 return -ENOMEM;
3064 }
3065 cfg->strtab = strtab;
3066 cfg->num_l1_ents = 1 << smmu->sid_bits;
3067
3068 /* Configure strtab_base_cfg for a linear table covering all SIDs */
3069 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3070 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3071 cfg->strtab_base_cfg = reg;
3072
3073 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3074 return 0;
3075 }
3076
arm_smmu_init_strtab(struct arm_smmu_device * smmu)3077 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3078 {
3079 u64 reg;
3080 int ret;
3081
3082 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3083 ret = arm_smmu_init_strtab_2lvl(smmu);
3084 else
3085 ret = arm_smmu_init_strtab_linear(smmu);
3086
3087 if (ret)
3088 return ret;
3089
3090 /* Set the strtab base address */
3091 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3092 reg |= STRTAB_BASE_RA;
3093 smmu->strtab_cfg.strtab_base = reg;
3094
3095 ida_init(&smmu->vmid_map);
3096
3097 return 0;
3098 }
3099
arm_smmu_init_structures(struct arm_smmu_device * smmu)3100 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3101 {
3102 int ret;
3103
3104 mutex_init(&smmu->streams_mutex);
3105 smmu->streams = RB_ROOT;
3106
3107 ret = arm_smmu_init_queues(smmu);
3108 if (ret)
3109 return ret;
3110
3111 return arm_smmu_init_strtab(smmu);
3112 }
3113
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)3114 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3115 unsigned int reg_off, unsigned int ack_off)
3116 {
3117 u32 reg;
3118
3119 writel_relaxed(val, smmu->base + reg_off);
3120 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3121 1, ARM_SMMU_POLL_TIMEOUT_US);
3122 }
3123
3124 /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)3125 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3126 {
3127 int ret;
3128 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3129
3130 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3131 1, ARM_SMMU_POLL_TIMEOUT_US);
3132 if (ret)
3133 return ret;
3134
3135 reg &= ~clr;
3136 reg |= set;
3137 writel_relaxed(reg | GBPA_UPDATE, gbpa);
3138 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3139 1, ARM_SMMU_POLL_TIMEOUT_US);
3140
3141 if (ret)
3142 dev_err(smmu->dev, "GBPA not responding to update\n");
3143 return ret;
3144 }
3145
arm_smmu_free_msis(void * data)3146 static void arm_smmu_free_msis(void *data)
3147 {
3148 struct device *dev = data;
3149 platform_msi_domain_free_irqs(dev);
3150 }
3151
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)3152 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3153 {
3154 phys_addr_t doorbell;
3155 struct device *dev = msi_desc_to_dev(desc);
3156 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3157 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3158
3159 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3160 doorbell &= MSI_CFG0_ADDR_MASK;
3161
3162 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3163 writel_relaxed(msg->data, smmu->base + cfg[1]);
3164 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3165 }
3166
arm_smmu_setup_msis(struct arm_smmu_device * smmu)3167 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3168 {
3169 int ret, nvec = ARM_SMMU_MAX_MSIS;
3170 struct device *dev = smmu->dev;
3171
3172 /* Clear the MSI address regs */
3173 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3174 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3175
3176 if (smmu->features & ARM_SMMU_FEAT_PRI)
3177 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3178 else
3179 nvec--;
3180
3181 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3182 return;
3183
3184 if (!dev->msi.domain) {
3185 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3186 return;
3187 }
3188
3189 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3190 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3191 if (ret) {
3192 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3193 return;
3194 }
3195
3196 smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3197 smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3198 smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3199
3200 /* Add callback to free MSIs on teardown */
3201 devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
3202 }
3203
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)3204 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3205 {
3206 int irq, ret;
3207
3208 arm_smmu_setup_msis(smmu);
3209
3210 /* Request interrupt lines */
3211 irq = smmu->evtq.q.irq;
3212 if (irq) {
3213 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3214 arm_smmu_evtq_thread,
3215 IRQF_ONESHOT,
3216 "arm-smmu-v3-evtq", smmu);
3217 if (ret < 0)
3218 dev_warn(smmu->dev, "failed to enable evtq irq\n");
3219 } else {
3220 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3221 }
3222
3223 irq = smmu->gerr_irq;
3224 if (irq) {
3225 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3226 0, "arm-smmu-v3-gerror", smmu);
3227 if (ret < 0)
3228 dev_warn(smmu->dev, "failed to enable gerror irq\n");
3229 } else {
3230 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3231 }
3232
3233 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3234 irq = smmu->priq.q.irq;
3235 if (irq) {
3236 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3237 arm_smmu_priq_thread,
3238 IRQF_ONESHOT,
3239 "arm-smmu-v3-priq",
3240 smmu);
3241 if (ret < 0)
3242 dev_warn(smmu->dev,
3243 "failed to enable priq irq\n");
3244 } else {
3245 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3246 }
3247 }
3248 }
3249
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)3250 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3251 {
3252 int ret, irq;
3253 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3254
3255 /* Disable IRQs first */
3256 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3257 ARM_SMMU_IRQ_CTRLACK);
3258 if (ret) {
3259 dev_err(smmu->dev, "failed to disable irqs\n");
3260 return ret;
3261 }
3262
3263 irq = smmu->combined_irq;
3264 if (irq) {
3265 /*
3266 * Cavium ThunderX2 implementation doesn't support unique irq
3267 * lines. Use a single irq line for all the SMMUv3 interrupts.
3268 */
3269 ret = devm_request_threaded_irq(smmu->dev, irq,
3270 arm_smmu_combined_irq_handler,
3271 arm_smmu_combined_irq_thread,
3272 IRQF_ONESHOT,
3273 "arm-smmu-v3-combined-irq", smmu);
3274 if (ret < 0)
3275 dev_warn(smmu->dev, "failed to enable combined irq\n");
3276 } else
3277 arm_smmu_setup_unique_irqs(smmu);
3278
3279 if (smmu->features & ARM_SMMU_FEAT_PRI)
3280 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3281
3282 /* Enable interrupt generation on the SMMU */
3283 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3284 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3285 if (ret)
3286 dev_warn(smmu->dev, "failed to enable irqs\n");
3287
3288 return 0;
3289 }
3290
arm_smmu_device_disable(struct arm_smmu_device * smmu)3291 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3292 {
3293 int ret;
3294
3295 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3296 if (ret)
3297 dev_err(smmu->dev, "failed to clear cr0\n");
3298
3299 return ret;
3300 }
3301
arm_smmu_device_reset(struct arm_smmu_device * smmu,bool bypass)3302 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3303 {
3304 int ret;
3305 u32 reg, enables;
3306 struct arm_smmu_cmdq_ent cmd;
3307
3308 /* Clear CR0 and sync (disables SMMU and queue processing) */
3309 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3310 if (reg & CR0_SMMUEN) {
3311 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3312 WARN_ON(is_kdump_kernel() && !disable_bypass);
3313 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3314 }
3315
3316 ret = arm_smmu_device_disable(smmu);
3317 if (ret)
3318 return ret;
3319
3320 /* CR1 (table and queue memory attributes) */
3321 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3322 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3323 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3324 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3325 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3326 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3327 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3328
3329 /* CR2 (random crap) */
3330 reg = CR2_PTM | CR2_RECINVSID;
3331
3332 if (smmu->features & ARM_SMMU_FEAT_E2H)
3333 reg |= CR2_E2H;
3334
3335 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3336
3337 /* Stream table */
3338 writeq_relaxed(smmu->strtab_cfg.strtab_base,
3339 smmu->base + ARM_SMMU_STRTAB_BASE);
3340 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3341 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3342
3343 /* Command queue */
3344 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3345 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3346 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3347
3348 enables = CR0_CMDQEN;
3349 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3350 ARM_SMMU_CR0ACK);
3351 if (ret) {
3352 dev_err(smmu->dev, "failed to enable command queue\n");
3353 return ret;
3354 }
3355
3356 /* Invalidate any cached configuration */
3357 cmd.opcode = CMDQ_OP_CFGI_ALL;
3358 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3359
3360 /* Invalidate any stale TLB entries */
3361 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3362 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3363 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3364 }
3365
3366 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3367 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3368
3369 /* Event queue */
3370 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3371 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3372 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3373
3374 enables |= CR0_EVTQEN;
3375 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3376 ARM_SMMU_CR0ACK);
3377 if (ret) {
3378 dev_err(smmu->dev, "failed to enable event queue\n");
3379 return ret;
3380 }
3381
3382 /* PRI queue */
3383 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3384 writeq_relaxed(smmu->priq.q.q_base,
3385 smmu->base + ARM_SMMU_PRIQ_BASE);
3386 writel_relaxed(smmu->priq.q.llq.prod,
3387 smmu->page1 + ARM_SMMU_PRIQ_PROD);
3388 writel_relaxed(smmu->priq.q.llq.cons,
3389 smmu->page1 + ARM_SMMU_PRIQ_CONS);
3390
3391 enables |= CR0_PRIQEN;
3392 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3393 ARM_SMMU_CR0ACK);
3394 if (ret) {
3395 dev_err(smmu->dev, "failed to enable PRI queue\n");
3396 return ret;
3397 }
3398 }
3399
3400 if (smmu->features & ARM_SMMU_FEAT_ATS) {
3401 enables |= CR0_ATSCHK;
3402 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3403 ARM_SMMU_CR0ACK);
3404 if (ret) {
3405 dev_err(smmu->dev, "failed to enable ATS check\n");
3406 return ret;
3407 }
3408 }
3409
3410 ret = arm_smmu_setup_irqs(smmu);
3411 if (ret) {
3412 dev_err(smmu->dev, "failed to setup irqs\n");
3413 return ret;
3414 }
3415
3416 if (is_kdump_kernel())
3417 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3418
3419 /* Enable the SMMU interface, or ensure bypass */
3420 if (!bypass || disable_bypass) {
3421 enables |= CR0_SMMUEN;
3422 } else {
3423 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3424 if (ret)
3425 return ret;
3426 }
3427 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3428 ARM_SMMU_CR0ACK);
3429 if (ret) {
3430 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3431 return ret;
3432 }
3433
3434 return 0;
3435 }
3436
3437 #define IIDR_IMPLEMENTER_ARM 0x43b
3438 #define IIDR_PRODUCTID_ARM_MMU_600 0x483
3439 #define IIDR_PRODUCTID_ARM_MMU_700 0x487
3440
arm_smmu_device_iidr_probe(struct arm_smmu_device * smmu)3441 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3442 {
3443 u32 reg;
3444 unsigned int implementer, productid, variant, revision;
3445
3446 reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3447 implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3448 productid = FIELD_GET(IIDR_PRODUCTID, reg);
3449 variant = FIELD_GET(IIDR_VARIANT, reg);
3450 revision = FIELD_GET(IIDR_REVISION, reg);
3451
3452 switch (implementer) {
3453 case IIDR_IMPLEMENTER_ARM:
3454 switch (productid) {
3455 case IIDR_PRODUCTID_ARM_MMU_600:
3456 /* Arm erratum 1076982 */
3457 if (variant == 0 && revision <= 2)
3458 smmu->features &= ~ARM_SMMU_FEAT_SEV;
3459 /* Arm erratum 1209401 */
3460 if (variant < 2)
3461 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3462 break;
3463 case IIDR_PRODUCTID_ARM_MMU_700:
3464 /* Arm erratum 2812531 */
3465 smmu->features &= ~ARM_SMMU_FEAT_BTM;
3466 smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3467 /* Arm errata 2268618, 2812531 */
3468 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3469 break;
3470 }
3471 break;
3472 }
3473 }
3474
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)3475 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3476 {
3477 u32 reg;
3478 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3479
3480 /* IDR0 */
3481 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3482
3483 /* 2-level structures */
3484 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3485 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3486
3487 if (reg & IDR0_CD2L)
3488 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3489
3490 /*
3491 * Translation table endianness.
3492 * We currently require the same endianness as the CPU, but this
3493 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3494 */
3495 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3496 case IDR0_TTENDIAN_MIXED:
3497 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3498 break;
3499 #ifdef __BIG_ENDIAN
3500 case IDR0_TTENDIAN_BE:
3501 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3502 break;
3503 #else
3504 case IDR0_TTENDIAN_LE:
3505 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3506 break;
3507 #endif
3508 default:
3509 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3510 return -ENXIO;
3511 }
3512
3513 /* Boolean feature flags */
3514 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3515 smmu->features |= ARM_SMMU_FEAT_PRI;
3516
3517 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3518 smmu->features |= ARM_SMMU_FEAT_ATS;
3519
3520 if (reg & IDR0_SEV)
3521 smmu->features |= ARM_SMMU_FEAT_SEV;
3522
3523 if (reg & IDR0_MSI) {
3524 smmu->features |= ARM_SMMU_FEAT_MSI;
3525 if (coherent && !disable_msipolling)
3526 smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3527 }
3528
3529 if (reg & IDR0_HYP) {
3530 smmu->features |= ARM_SMMU_FEAT_HYP;
3531 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3532 smmu->features |= ARM_SMMU_FEAT_E2H;
3533 }
3534
3535 /*
3536 * The coherency feature as set by FW is used in preference to the ID
3537 * register, but warn on mismatch.
3538 */
3539 if (!!(reg & IDR0_COHACC) != coherent)
3540 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3541 coherent ? "true" : "false");
3542
3543 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3544 case IDR0_STALL_MODEL_FORCE:
3545 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3546 fallthrough;
3547 case IDR0_STALL_MODEL_STALL:
3548 smmu->features |= ARM_SMMU_FEAT_STALLS;
3549 }
3550
3551 if (reg & IDR0_S1P)
3552 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3553
3554 if (reg & IDR0_S2P)
3555 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3556
3557 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3558 dev_err(smmu->dev, "no translation support!\n");
3559 return -ENXIO;
3560 }
3561
3562 /* We only support the AArch64 table format at present */
3563 switch (FIELD_GET(IDR0_TTF, reg)) {
3564 case IDR0_TTF_AARCH32_64:
3565 smmu->ias = 40;
3566 fallthrough;
3567 case IDR0_TTF_AARCH64:
3568 break;
3569 default:
3570 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3571 return -ENXIO;
3572 }
3573
3574 /* ASID/VMID sizes */
3575 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3576 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3577
3578 /* IDR1 */
3579 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3580 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3581 dev_err(smmu->dev, "embedded implementation not supported\n");
3582 return -ENXIO;
3583 }
3584
3585 /* Queue sizes, capped to ensure natural alignment */
3586 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3587 FIELD_GET(IDR1_CMDQS, reg));
3588 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3589 /*
3590 * We don't support splitting up batches, so one batch of
3591 * commands plus an extra sync needs to fit inside the command
3592 * queue. There's also no way we can handle the weird alignment
3593 * restrictions on the base pointer for a unit-length queue.
3594 */
3595 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3596 CMDQ_BATCH_ENTRIES);
3597 return -ENXIO;
3598 }
3599
3600 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3601 FIELD_GET(IDR1_EVTQS, reg));
3602 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3603 FIELD_GET(IDR1_PRIQS, reg));
3604
3605 /* SID/SSID sizes */
3606 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3607 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3608 smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3609
3610 /*
3611 * If the SMMU supports fewer bits than would fill a single L2 stream
3612 * table, use a linear table instead.
3613 */
3614 if (smmu->sid_bits <= STRTAB_SPLIT)
3615 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3616
3617 /* IDR3 */
3618 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3619 if (FIELD_GET(IDR3_RIL, reg))
3620 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3621
3622 /* IDR5 */
3623 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3624
3625 /* Maximum number of outstanding stalls */
3626 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3627
3628 /* Page sizes */
3629 if (reg & IDR5_GRAN64K)
3630 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3631 if (reg & IDR5_GRAN16K)
3632 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3633 if (reg & IDR5_GRAN4K)
3634 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3635
3636 /* Input address size */
3637 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3638 smmu->features |= ARM_SMMU_FEAT_VAX;
3639
3640 /* Output address size */
3641 switch (FIELD_GET(IDR5_OAS, reg)) {
3642 case IDR5_OAS_32_BIT:
3643 smmu->oas = 32;
3644 break;
3645 case IDR5_OAS_36_BIT:
3646 smmu->oas = 36;
3647 break;
3648 case IDR5_OAS_40_BIT:
3649 smmu->oas = 40;
3650 break;
3651 case IDR5_OAS_42_BIT:
3652 smmu->oas = 42;
3653 break;
3654 case IDR5_OAS_44_BIT:
3655 smmu->oas = 44;
3656 break;
3657 case IDR5_OAS_52_BIT:
3658 smmu->oas = 52;
3659 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3660 break;
3661 default:
3662 dev_info(smmu->dev,
3663 "unknown output address size. Truncating to 48-bit\n");
3664 fallthrough;
3665 case IDR5_OAS_48_BIT:
3666 smmu->oas = 48;
3667 }
3668
3669 if (arm_smmu_ops.pgsize_bitmap == -1UL)
3670 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3671 else
3672 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3673
3674 /* Set the DMA mask for our table walker */
3675 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3676 dev_warn(smmu->dev,
3677 "failed to set DMA mask for table walker\n");
3678
3679 smmu->ias = max(smmu->ias, smmu->oas);
3680
3681 if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3682 (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3683 smmu->features |= ARM_SMMU_FEAT_NESTING;
3684
3685 arm_smmu_device_iidr_probe(smmu);
3686
3687 if (arm_smmu_sva_supported(smmu))
3688 smmu->features |= ARM_SMMU_FEAT_SVA;
3689
3690 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3691 smmu->ias, smmu->oas, smmu->features);
3692 return 0;
3693 }
3694
3695 #ifdef CONFIG_ACPI
acpi_smmu_get_options(u32 model,struct arm_smmu_device * smmu)3696 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3697 {
3698 switch (model) {
3699 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3700 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3701 break;
3702 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3703 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3704 break;
3705 }
3706
3707 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3708 }
3709
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3710 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3711 struct arm_smmu_device *smmu)
3712 {
3713 struct acpi_iort_smmu_v3 *iort_smmu;
3714 struct device *dev = smmu->dev;
3715 struct acpi_iort_node *node;
3716
3717 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3718
3719 /* Retrieve SMMUv3 specific data */
3720 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3721
3722 acpi_smmu_get_options(iort_smmu->model, smmu);
3723
3724 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3725 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3726
3727 return 0;
3728 }
3729 #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3730 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3731 struct arm_smmu_device *smmu)
3732 {
3733 return -ENODEV;
3734 }
3735 #endif
3736
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3737 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3738 struct arm_smmu_device *smmu)
3739 {
3740 struct device *dev = &pdev->dev;
3741 u32 cells;
3742 int ret = -EINVAL;
3743
3744 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3745 dev_err(dev, "missing #iommu-cells property\n");
3746 else if (cells != 1)
3747 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3748 else
3749 ret = 0;
3750
3751 parse_driver_options(smmu);
3752
3753 if (of_dma_is_coherent(dev->of_node))
3754 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3755
3756 return ret;
3757 }
3758
arm_smmu_resource_size(struct arm_smmu_device * smmu)3759 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3760 {
3761 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3762 return SZ_64K;
3763 else
3764 return SZ_128K;
3765 }
3766
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)3767 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3768 resource_size_t size)
3769 {
3770 struct resource res = DEFINE_RES_MEM(start, size);
3771
3772 return devm_ioremap_resource(dev, &res);
3773 }
3774
arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device * smmu)3775 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3776 {
3777 struct list_head rmr_list;
3778 struct iommu_resv_region *e;
3779
3780 INIT_LIST_HEAD(&rmr_list);
3781 iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3782
3783 list_for_each_entry(e, &rmr_list, list) {
3784 __le64 *step;
3785 struct iommu_iort_rmr_data *rmr;
3786 int ret, i;
3787
3788 rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3789 for (i = 0; i < rmr->num_sids; i++) {
3790 ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3791 if (ret) {
3792 dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3793 rmr->sids[i]);
3794 continue;
3795 }
3796
3797 step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3798 arm_smmu_init_bypass_stes(step, 1, true);
3799 }
3800 }
3801
3802 iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3803 }
3804
arm_smmu_device_probe(struct platform_device * pdev)3805 static int arm_smmu_device_probe(struct platform_device *pdev)
3806 {
3807 int irq, ret;
3808 struct resource *res;
3809 resource_size_t ioaddr;
3810 struct arm_smmu_device *smmu;
3811 struct device *dev = &pdev->dev;
3812 bool bypass;
3813
3814 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3815 if (!smmu)
3816 return -ENOMEM;
3817 smmu->dev = dev;
3818
3819 if (dev->of_node) {
3820 ret = arm_smmu_device_dt_probe(pdev, smmu);
3821 } else {
3822 ret = arm_smmu_device_acpi_probe(pdev, smmu);
3823 if (ret == -ENODEV)
3824 return ret;
3825 }
3826
3827 /* Set bypass mode according to firmware probing result */
3828 bypass = !!ret;
3829
3830 /* Base address */
3831 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3832 if (!res)
3833 return -EINVAL;
3834 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3835 dev_err(dev, "MMIO region too small (%pr)\n", res);
3836 return -EINVAL;
3837 }
3838 ioaddr = res->start;
3839
3840 /*
3841 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3842 * the PMCG registers which are reserved by the PMU driver.
3843 */
3844 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3845 if (IS_ERR(smmu->base))
3846 return PTR_ERR(smmu->base);
3847
3848 if (arm_smmu_resource_size(smmu) > SZ_64K) {
3849 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3850 ARM_SMMU_REG_SZ);
3851 if (IS_ERR(smmu->page1))
3852 return PTR_ERR(smmu->page1);
3853 } else {
3854 smmu->page1 = smmu->base;
3855 }
3856
3857 /* Interrupt lines */
3858
3859 irq = platform_get_irq_byname_optional(pdev, "combined");
3860 if (irq > 0)
3861 smmu->combined_irq = irq;
3862 else {
3863 irq = platform_get_irq_byname_optional(pdev, "eventq");
3864 if (irq > 0)
3865 smmu->evtq.q.irq = irq;
3866
3867 irq = platform_get_irq_byname_optional(pdev, "priq");
3868 if (irq > 0)
3869 smmu->priq.q.irq = irq;
3870
3871 irq = platform_get_irq_byname_optional(pdev, "gerror");
3872 if (irq > 0)
3873 smmu->gerr_irq = irq;
3874 }
3875 /* Probe the h/w */
3876 ret = arm_smmu_device_hw_probe(smmu);
3877 if (ret)
3878 return ret;
3879
3880 /* Initialise in-memory data structures */
3881 ret = arm_smmu_init_structures(smmu);
3882 if (ret)
3883 goto err_free_iopf;
3884
3885 /* Record our private device structure */
3886 platform_set_drvdata(pdev, smmu);
3887
3888 /* Check for RMRs and install bypass STEs if any */
3889 arm_smmu_rmr_install_bypass_ste(smmu);
3890
3891 /* Reset the device */
3892 ret = arm_smmu_device_reset(smmu, bypass);
3893 if (ret)
3894 goto err_disable;
3895
3896 /* And we're up. Go go go! */
3897 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3898 "smmu3.%pa", &ioaddr);
3899 if (ret)
3900 goto err_disable;
3901
3902 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3903 if (ret) {
3904 dev_err(dev, "Failed to register iommu\n");
3905 goto err_free_sysfs;
3906 }
3907
3908 return 0;
3909
3910 err_free_sysfs:
3911 iommu_device_sysfs_remove(&smmu->iommu);
3912 err_disable:
3913 arm_smmu_device_disable(smmu);
3914 err_free_iopf:
3915 iopf_queue_free(smmu->evtq.iopf);
3916 return ret;
3917 }
3918
arm_smmu_device_remove(struct platform_device * pdev)3919 static void arm_smmu_device_remove(struct platform_device *pdev)
3920 {
3921 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3922
3923 iommu_device_unregister(&smmu->iommu);
3924 iommu_device_sysfs_remove(&smmu->iommu);
3925 arm_smmu_device_disable(smmu);
3926 iopf_queue_free(smmu->evtq.iopf);
3927 ida_destroy(&smmu->vmid_map);
3928 }
3929
arm_smmu_device_shutdown(struct platform_device * pdev)3930 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3931 {
3932 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3933
3934 arm_smmu_device_disable(smmu);
3935 }
3936
3937 static const struct of_device_id arm_smmu_of_match[] = {
3938 { .compatible = "arm,smmu-v3", },
3939 { },
3940 };
3941 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3942
arm_smmu_driver_unregister(struct platform_driver * drv)3943 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3944 {
3945 arm_smmu_sva_notifier_synchronize();
3946 platform_driver_unregister(drv);
3947 }
3948
3949 static struct platform_driver arm_smmu_driver = {
3950 .driver = {
3951 .name = "arm-smmu-v3",
3952 .of_match_table = arm_smmu_of_match,
3953 .suppress_bind_attrs = true,
3954 },
3955 .probe = arm_smmu_device_probe,
3956 .remove_new = arm_smmu_device_remove,
3957 .shutdown = arm_smmu_device_shutdown,
3958 };
3959 module_driver(arm_smmu_driver, platform_driver_register,
3960 arm_smmu_driver_unregister);
3961
3962 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3963 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3964 MODULE_ALIAS("platform:arm-smmu-v3");
3965 MODULE_LICENSE("GPL v2");
3966