1 /*
2 * QEMU emulation of an RISC-V IOMMU
3 *
4 * Copyright (C) 2021-2023, Rivos Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "qemu/osdep.h"
20 #include "qom/object.h"
21 #include "hw/pci/pci_bus.h"
22 #include "hw/pci/pci_device.h"
23 #include "hw/qdev-properties.h"
24 #include "hw/riscv/riscv_hart.h"
25 #include "migration/vmstate.h"
26 #include "qapi/error.h"
27 #include "qemu/timer.h"
28
29 #include "cpu_bits.h"
30 #include "riscv-iommu.h"
31 #include "riscv-iommu-bits.h"
32 #include "trace.h"
33
34 #define LIMIT_CACHE_CTX (1U << 7)
35 #define LIMIT_CACHE_IOT (1U << 20)
36
37 /* Physical page number coversions */
38 #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS)
39 #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS)
40
41 typedef struct RISCVIOMMUContext RISCVIOMMUContext;
42 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
43
44 /* Device assigned I/O address space */
45 struct RISCVIOMMUSpace {
46 IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */
47 AddressSpace iova_as; /* IOVA address space for attached device */
48 RISCVIOMMUState *iommu; /* Managing IOMMU device state */
49 uint32_t devid; /* Requester identifier, AKA device_id */
50 bool notifier; /* IOMMU unmap notifier enabled */
51 QLIST_ENTRY(RISCVIOMMUSpace) list;
52 };
53
54 /* Device translation context state. */
55 struct RISCVIOMMUContext {
56 uint64_t devid:24; /* Requester Id, AKA device_id */
57 uint64_t process_id:20; /* Process ID. PASID for PCIe */
58 uint64_t tc; /* Translation Control */
59 uint64_t ta; /* Translation Attributes */
60 uint64_t satp; /* S-Stage address translation and protection */
61 uint64_t gatp; /* G-Stage address translation and protection */
62 uint64_t msi_addr_mask; /* MSI filtering - address mask */
63 uint64_t msi_addr_pattern; /* MSI filtering - address pattern */
64 uint64_t msiptp; /* MSI redirection page table pointer */
65 };
66
67 /* Address translation cache entry */
68 struct RISCVIOMMUEntry {
69 uint64_t iova:44; /* IOVA Page Number */
70 uint64_t pscid:20; /* Process Soft-Context identifier */
71 uint64_t phys:44; /* Physical Page Number */
72 uint64_t gscid:16; /* Guest Soft-Context identifier */
73 uint64_t perm:2; /* IOMMU_RW flags */
74 };
75
76 /* IOMMU index for transactions without process_id specified. */
77 #define RISCV_IOMMU_NOPROCID 0
78
riscv_iommu_get_icvec_vector(uint32_t icvec,uint32_t vec_type)79 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type)
80 {
81 switch (vec_type) {
82 case RISCV_IOMMU_INTR_CQ:
83 return icvec & RISCV_IOMMU_ICVEC_CIV;
84 case RISCV_IOMMU_INTR_FQ:
85 return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4;
86 case RISCV_IOMMU_INTR_PM:
87 return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8;
88 case RISCV_IOMMU_INTR_PQ:
89 return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12;
90 default:
91 g_assert_not_reached();
92 }
93 }
94
riscv_iommu_notify(RISCVIOMMUState * s,int vec_type)95 static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
96 {
97 const uint32_t fctl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FCTL);
98 uint32_t ipsr, icvec, vector;
99
100 if (fctl & RISCV_IOMMU_FCTL_WSI || !s->notify) {
101 return;
102 }
103
104 icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC);
105 ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0);
106
107 if (!(ipsr & (1 << vec_type))) {
108 vector = riscv_iommu_get_icvec_vector(icvec, vec_type);
109 s->notify(s, vector);
110 trace_riscv_iommu_notify_int_vector(vec_type, vector);
111 }
112 }
113
riscv_iommu_fault(RISCVIOMMUState * s,struct riscv_iommu_fq_record * ev)114 static void riscv_iommu_fault(RISCVIOMMUState *s,
115 struct riscv_iommu_fq_record *ev)
116 {
117 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
118 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask;
119 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask;
120 uint32_t next = (tail + 1) & s->fq_mask;
121 uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID);
122
123 trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
124 PCI_FUNC(devid), ev->hdr, ev->iotval);
125
126 if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) ||
127 !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) {
128 return;
129 }
130
131 if (head == next) {
132 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
133 RISCV_IOMMU_FQCSR_FQOF, 0);
134 } else {
135 dma_addr_t addr = s->fq_addr + tail * sizeof(*ev);
136 if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev),
137 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
138 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
139 RISCV_IOMMU_FQCSR_FQMF, 0);
140 } else {
141 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next);
142 }
143 }
144
145 if (ctrl & RISCV_IOMMU_FQCSR_FIE) {
146 riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ);
147 }
148 }
149
riscv_iommu_pri(RISCVIOMMUState * s,struct riscv_iommu_pq_record * pr)150 static void riscv_iommu_pri(RISCVIOMMUState *s,
151 struct riscv_iommu_pq_record *pr)
152 {
153 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
154 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask;
155 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask;
156 uint32_t next = (tail + 1) & s->pq_mask;
157 uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID);
158
159 trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
160 PCI_FUNC(devid), pr->payload);
161
162 if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) ||
163 !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) {
164 return;
165 }
166
167 if (head == next) {
168 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
169 RISCV_IOMMU_PQCSR_PQOF, 0);
170 } else {
171 dma_addr_t addr = s->pq_addr + tail * sizeof(*pr);
172 if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr),
173 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
174 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
175 RISCV_IOMMU_PQCSR_PQMF, 0);
176 } else {
177 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next);
178 }
179 }
180
181 if (ctrl & RISCV_IOMMU_PQCSR_PIE) {
182 riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ);
183 }
184 }
185
186 /*
187 * Discards all bits from 'val' whose matching bits in the same
188 * positions in the mask 'ext' are zeros, and packs the remaining
189 * bits from 'val' contiguously at the least-significant end of the
190 * result, keeping the same bit order as 'val' and filling any
191 * other bits at the most-significant end of the result with zeros.
192 *
193 * For example, for the following 'val' and 'ext', the return 'ret'
194 * will be:
195 *
196 * val = a b c d e f g h
197 * ext = 1 0 1 0 0 1 1 0
198 * ret = 0 0 0 0 a c f g
199 *
200 * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3
201 * "Process to translate addresses of MSIs", is similar to bit manip
202 * function PEXT (Parallel bits extract) from x86.
203 */
riscv_iommu_pext_u64(uint64_t val,uint64_t ext)204 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext)
205 {
206 uint64_t ret = 0;
207 uint64_t rot = 1;
208
209 while (ext) {
210 if (ext & 1) {
211 if (val & 1) {
212 ret |= rot;
213 }
214 rot <<= 1;
215 }
216 val >>= 1;
217 ext >>= 1;
218 }
219
220 return ret;
221 }
222
223 /* Check if GPA matches MSI/MRIF pattern. */
riscv_iommu_msi_check(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,dma_addr_t gpa)224 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
225 dma_addr_t gpa)
226 {
227 if (!s->enable_msi) {
228 return false;
229 }
230
231 if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) !=
232 RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
233 return false; /* Invalid MSI/MRIF mode */
234 }
235
236 if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) {
237 return false; /* GPA not in MSI range defined by AIA IMSIC rules. */
238 }
239
240 return true;
241 }
242
243 /*
244 * RISCV IOMMU Address Translation Lookup - Page Table Walk
245 *
246 * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c
247 * Both implementation can be merged into single helper function in future.
248 * Keeping them separate for now, as error reporting and flow specifics are
249 * sufficiently different for separate implementation.
250 *
251 * @s : IOMMU Device State
252 * @ctx : Translation context for device id and process address space id.
253 * @iotlb : translation data: physical address and access mode.
254 * @return : success or fault cause code.
255 */
riscv_iommu_spa_fetch(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,IOMMUTLBEntry * iotlb)256 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
257 IOMMUTLBEntry *iotlb)
258 {
259 dma_addr_t addr, base;
260 uint64_t satp, gatp, pte;
261 bool en_s, en_g;
262 struct {
263 unsigned char step;
264 unsigned char levels;
265 unsigned char ptidxbits;
266 unsigned char ptesize;
267 } sc[2];
268 /* Translation stage phase */
269 enum {
270 S_STAGE = 0,
271 G_STAGE = 1,
272 } pass;
273 MemTxResult ret;
274
275 satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
276 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
277
278 en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE;
279 en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE;
280
281 /*
282 * Early check for MSI address match when IOVA == GPA.
283 * Note that the (!en_s) condition means that the MSI
284 * page table may only be used when guest pages are
285 * mapped using the g-stage page table, whether single-
286 * or two-stage paging is enabled. It's unavoidable though,
287 * because the spec mandates that we do a first-stage
288 * translation before we check the MSI page table, which
289 * means we can't do an early MSI check unless we have
290 * strictly !en_s.
291 */
292 if (!en_s && (iotlb->perm & IOMMU_WO) &&
293 riscv_iommu_msi_check(s, ctx, iotlb->iova)) {
294 iotlb->target_as = &s->trap_as;
295 iotlb->translated_addr = iotlb->iova;
296 iotlb->addr_mask = ~TARGET_PAGE_MASK;
297 return 0;
298 }
299
300 /* Exit early for pass-through mode. */
301 if (!(en_s || en_g)) {
302 iotlb->translated_addr = iotlb->iova;
303 iotlb->addr_mask = ~TARGET_PAGE_MASK;
304 /* Allow R/W in pass-through mode */
305 iotlb->perm = IOMMU_RW;
306 return 0;
307 }
308
309 /* S/G translation parameters. */
310 for (pass = 0; pass < 2; pass++) {
311 uint32_t sv_mode;
312
313 sc[pass].step = 0;
314 if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) :
315 (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) {
316 /* 32bit mode for GXL/SXL == 1 */
317 switch (pass ? gatp : satp) {
318 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
319 sc[pass].levels = 0;
320 sc[pass].ptidxbits = 0;
321 sc[pass].ptesize = 0;
322 break;
323 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
324 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32;
325 if (!(s->cap & sv_mode)) {
326 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
327 }
328 sc[pass].levels = 2;
329 sc[pass].ptidxbits = 10;
330 sc[pass].ptesize = 4;
331 break;
332 default:
333 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
334 }
335 } else {
336 /* 64bit mode for GXL/SXL == 0 */
337 switch (pass ? gatp : satp) {
338 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
339 sc[pass].levels = 0;
340 sc[pass].ptidxbits = 0;
341 sc[pass].ptesize = 0;
342 break;
343 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
344 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39;
345 if (!(s->cap & sv_mode)) {
346 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
347 }
348 sc[pass].levels = 3;
349 sc[pass].ptidxbits = 9;
350 sc[pass].ptesize = 8;
351 break;
352 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
353 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48;
354 if (!(s->cap & sv_mode)) {
355 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
356 }
357 sc[pass].levels = 4;
358 sc[pass].ptidxbits = 9;
359 sc[pass].ptesize = 8;
360 break;
361 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
362 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57;
363 if (!(s->cap & sv_mode)) {
364 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
365 }
366 sc[pass].levels = 5;
367 sc[pass].ptidxbits = 9;
368 sc[pass].ptesize = 8;
369 break;
370 default:
371 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
372 }
373 }
374 };
375
376 /* S/G stages translation tables root pointers */
377 gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
378 satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD));
379 addr = (en_s && en_g) ? satp : iotlb->iova;
380 base = en_g ? gatp : satp;
381 pass = en_g ? G_STAGE : S_STAGE;
382
383 do {
384 const unsigned widened = (pass && !sc[pass].step) ? 2 : 0;
385 const unsigned va_bits = widened + sc[pass].ptidxbits;
386 const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits *
387 (sc[pass].levels - 1 - sc[pass].step);
388 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
389 const dma_addr_t pte_addr = base + idx * sc[pass].ptesize;
390 const bool ade =
391 ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE);
392
393 /* Address range check before first level lookup */
394 if (!sc[pass].step) {
395 const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1;
396 if ((addr & va_mask) != addr) {
397 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
398 }
399 }
400
401 /* Read page table entry */
402 if (sc[pass].ptesize == 4) {
403 uint32_t pte32 = 0;
404 ret = ldl_le_dma(s->target_as, pte_addr, &pte32,
405 MEMTXATTRS_UNSPECIFIED);
406 pte = pte32;
407 } else {
408 ret = ldq_le_dma(s->target_as, pte_addr, &pte,
409 MEMTXATTRS_UNSPECIFIED);
410 }
411 if (ret != MEMTX_OK) {
412 return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT
413 : RISCV_IOMMU_FQ_CAUSE_RD_FAULT;
414 }
415
416 sc[pass].step++;
417 hwaddr ppn = pte >> PTE_PPN_SHIFT;
418
419 if (!(pte & PTE_V)) {
420 break; /* Invalid PTE */
421 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
422 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
423 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
424 break; /* Reserved leaf PTE flags: PTE_W */
425 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
426 break; /* Reserved leaf PTE flags: PTE_W + PTE_X */
427 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
428 break; /* Misaligned PPN */
429 } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) {
430 break; /* Read access check failed */
431 } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) {
432 break; /* Write access check failed */
433 } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) {
434 break; /* Access bit not set */
435 } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) {
436 break; /* Dirty bit not set */
437 } else {
438 /* Leaf PTE, translation completed. */
439 sc[pass].step = sc[pass].levels;
440 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
441 /* Update address mask based on smallest translation granularity */
442 iotlb->addr_mask &= (1ULL << va_skip) - 1;
443 /* Continue with S-Stage translation? */
444 if (pass && sc[0].step != sc[0].levels) {
445 pass = S_STAGE;
446 addr = iotlb->iova;
447 continue;
448 }
449 /* Translation phase completed (GPA or SPA) */
450 iotlb->translated_addr = base;
451 iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO)
452 : IOMMU_RO;
453
454 /* Check MSI GPA address match */
455 if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) &&
456 riscv_iommu_msi_check(s, ctx, base)) {
457 /* Trap MSI writes and return GPA address. */
458 iotlb->target_as = &s->trap_as;
459 iotlb->addr_mask = ~TARGET_PAGE_MASK;
460 return 0;
461 }
462
463 /* Continue with G-Stage translation? */
464 if (!pass && en_g) {
465 pass = G_STAGE;
466 addr = base;
467 base = gatp;
468 sc[pass].step = 0;
469 continue;
470 }
471
472 return 0;
473 }
474
475 if (sc[pass].step == sc[pass].levels) {
476 break; /* Can't find leaf PTE */
477 }
478
479 /* Continue with G-Stage translation? */
480 if (!pass && en_g) {
481 pass = G_STAGE;
482 addr = base;
483 base = gatp;
484 sc[pass].step = 0;
485 }
486 } while (1);
487
488 return (iotlb->perm & IOMMU_WO) ?
489 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
490 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) :
491 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS :
492 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S);
493 }
494
riscv_iommu_report_fault(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,uint32_t fault_type,uint32_t cause,bool pv,uint64_t iotval,uint64_t iotval2)495 static void riscv_iommu_report_fault(RISCVIOMMUState *s,
496 RISCVIOMMUContext *ctx,
497 uint32_t fault_type, uint32_t cause,
498 bool pv,
499 uint64_t iotval, uint64_t iotval2)
500 {
501 struct riscv_iommu_fq_record ev = { 0 };
502
503 if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) {
504 switch (cause) {
505 case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED:
506 case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT:
507 case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID:
508 case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED:
509 case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED:
510 case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR:
511 case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT:
512 break;
513 default:
514 /* DTF prevents reporting a fault for this given cause */
515 return;
516 }
517 }
518
519 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause);
520 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type);
521 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid);
522 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true);
523
524 if (pv) {
525 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id);
526 }
527
528 ev.iotval = iotval;
529 ev.iotval2 = iotval2;
530
531 riscv_iommu_fault(s, &ev);
532 }
533
534 /* Redirect MSI write for given GPA. */
riscv_iommu_msi_write(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,uint64_t gpa,uint64_t data,unsigned size,MemTxAttrs attrs)535 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s,
536 RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data,
537 unsigned size, MemTxAttrs attrs)
538 {
539 MemTxResult res;
540 dma_addr_t addr;
541 uint64_t intn;
542 uint32_t n190;
543 uint64_t pte[2];
544 int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
545 int cause;
546
547 /* Interrupt File Number */
548 intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask);
549 if (intn >= 256) {
550 /* Interrupt file number out of range */
551 res = MEMTX_ACCESS_ERROR;
552 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
553 goto err;
554 }
555
556 /* fetch MSI PTE */
557 addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN));
558 addr = addr | (intn * sizeof(pte));
559 res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte),
560 MEMTXATTRS_UNSPECIFIED);
561 if (res != MEMTX_OK) {
562 if (res == MEMTX_DECODE_ERROR) {
563 cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED;
564 } else {
565 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
566 }
567 goto err;
568 }
569
570 le64_to_cpus(&pte[0]);
571 le64_to_cpus(&pte[1]);
572
573 if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) {
574 /*
575 * The spec mentions that: "If msipte.C == 1, then further
576 * processing to interpret the PTE is implementation
577 * defined.". We'll abort with cause = 262 for this
578 * case too.
579 */
580 res = MEMTX_ACCESS_ERROR;
581 cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID;
582 goto err;
583 }
584
585 switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) {
586 case RISCV_IOMMU_MSI_PTE_M_BASIC:
587 /* MSI Pass-through mode */
588 addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN));
589
590 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
591 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
592 gpa, addr);
593
594 res = dma_memory_write(s->target_as, addr, &data, size, attrs);
595 if (res != MEMTX_OK) {
596 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
597 goto err;
598 }
599
600 return MEMTX_OK;
601 case RISCV_IOMMU_MSI_PTE_M_MRIF:
602 /* MRIF mode, continue. */
603 break;
604 default:
605 res = MEMTX_ACCESS_ERROR;
606 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
607 goto err;
608 }
609
610 /*
611 * Report an error for interrupt identities exceeding the maximum allowed
612 * for an IMSIC interrupt file (2047) or destination address is not 32-bit
613 * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables.
614 */
615 if ((data > 2047) || (gpa & 3)) {
616 res = MEMTX_ACCESS_ERROR;
617 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
618 goto err;
619 }
620
621 /* MSI MRIF mode, non atomic pending bit update */
622
623 /* MRIF pending bit address */
624 addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9;
625 addr = addr | ((data & 0x7c0) >> 3);
626
627 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
628 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
629 gpa, addr);
630
631 /* MRIF pending bit mask */
632 data = 1ULL << (data & 0x03f);
633 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
634 if (res != MEMTX_OK) {
635 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
636 goto err;
637 }
638
639 intn = intn | data;
640 res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs);
641 if (res != MEMTX_OK) {
642 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
643 goto err;
644 }
645
646 /* Get MRIF enable bits */
647 addr = addr + sizeof(intn);
648 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
649 if (res != MEMTX_OK) {
650 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
651 goto err;
652 }
653
654 if (!(intn & data)) {
655 /* notification disabled, MRIF update completed. */
656 return MEMTX_OK;
657 }
658
659 /* Send notification message */
660 addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN));
661 n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) |
662 (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10);
663
664 res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs);
665 if (res != MEMTX_OK) {
666 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
667 goto err;
668 }
669
670 trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr);
671
672 return MEMTX_OK;
673
674 err:
675 riscv_iommu_report_fault(s, ctx, fault_type, cause,
676 !!ctx->process_id, 0, 0);
677 return res;
678 }
679
680 /*
681 * Check device context configuration as described by the
682 * riscv-iommu spec section "Device-context configuration
683 * checks".
684 */
riscv_iommu_validate_device_ctx(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)685 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s,
686 RISCVIOMMUContext *ctx)
687 {
688 uint32_t fsc_mode, msi_mode;
689 uint64_t gatp;
690
691 if (!(s->cap & RISCV_IOMMU_CAP_ATS) &&
692 (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS ||
693 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI ||
694 ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) {
695 return false;
696 }
697
698 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) &&
699 (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA ||
700 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) {
701 return false;
702 }
703
704 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) &&
705 ctx->tc & RISCV_IOMMU_DC_TC_PRPR) {
706 return false;
707 }
708
709 if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) &&
710 ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) {
711 return false;
712 }
713
714 if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) {
715 msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE);
716
717 if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF &&
718 msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
719 return false;
720 }
721 }
722
723 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
724 if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA &&
725 gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
726 return false;
727 }
728
729 fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
730
731 if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) {
732 switch (fsc_mode) {
733 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8:
734 if (!(s->cap & RISCV_IOMMU_CAP_PD8)) {
735 return false;
736 }
737 break;
738 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17:
739 if (!(s->cap & RISCV_IOMMU_CAP_PD17)) {
740 return false;
741 }
742 break;
743 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20:
744 if (!(s->cap & RISCV_IOMMU_CAP_PD20)) {
745 return false;
746 }
747 break;
748 }
749 } else {
750 /* DC.tc.PDTV is 0 */
751 if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) {
752 return false;
753 }
754
755 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
756 if (fsc_mode == RISCV_IOMMU_CAP_SV32 &&
757 !(s->cap & RISCV_IOMMU_CAP_SV32)) {
758 return false;
759 }
760 } else {
761 switch (fsc_mode) {
762 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
763 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
764 return false;
765 }
766 break;
767 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
768 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
769 return false;
770 }
771 break;
772 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
773 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
774 return false;
775 }
776 break;
777 }
778 }
779 }
780
781 /*
782 * CAP_END is always zero (only one endianess). FCTL_BE is
783 * always zero (little-endian accesses). Thus TC_SBE must
784 * always be LE, i.e. zero.
785 */
786 if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) {
787 return false;
788 }
789
790 return true;
791 }
792
793 /*
794 * Validate process context (PC) according to section
795 * "Process-context configuration checks".
796 */
riscv_iommu_validate_process_ctx(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)797 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
798 RISCVIOMMUContext *ctx)
799 {
800 uint32_t mode;
801
802 if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) {
803 return false;
804 }
805
806 if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) {
807 return false;
808 }
809
810 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
811 switch (mode) {
812 case RISCV_IOMMU_DC_FSC_MODE_BARE:
813 /* sv39 and sv32 modes have the same value (8) */
814 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
815 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
816 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
817 break;
818 default:
819 return false;
820 }
821
822 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
823 if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 &&
824 !(s->cap & RISCV_IOMMU_CAP_SV32)) {
825 return false;
826 }
827 } else {
828 switch (mode) {
829 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
830 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
831 return false;
832 }
833 break;
834 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
835 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
836 return false;
837 }
838 break;
839 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
840 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
841 return false;
842 }
843 break;
844 }
845 }
846
847 return true;
848 }
849
850 /*
851 * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
852 *
853 * @s : IOMMU Device State
854 * @ctx : Device Translation Context with devid and process_id set.
855 * @return : success or fault code.
856 */
riscv_iommu_ctx_fetch(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)857 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
858 {
859 const uint64_t ddtp = s->ddtp;
860 unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE);
861 dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN));
862 struct riscv_iommu_dc dc;
863 /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */
864 const int dc_fmt = !s->enable_msi;
865 const size_t dc_len = sizeof(dc) >> dc_fmt;
866 int depth;
867 uint64_t de;
868
869 switch (mode) {
870 case RISCV_IOMMU_DDTP_MODE_OFF:
871 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
872
873 case RISCV_IOMMU_DDTP_MODE_BARE:
874 /* mock up pass-through translation context */
875 ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
876 RISCV_IOMMU_DC_IOHGATP_MODE_BARE);
877 ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
878 RISCV_IOMMU_DC_FSC_MODE_BARE);
879
880 ctx->tc = RISCV_IOMMU_DC_TC_V;
881 if (s->enable_ats) {
882 ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS;
883 }
884
885 ctx->ta = 0;
886 ctx->msiptp = 0;
887 return 0;
888
889 case RISCV_IOMMU_DDTP_MODE_1LVL:
890 depth = 0;
891 break;
892
893 case RISCV_IOMMU_DDTP_MODE_2LVL:
894 depth = 1;
895 break;
896
897 case RISCV_IOMMU_DDTP_MODE_3LVL:
898 depth = 2;
899 break;
900
901 default:
902 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
903 }
904
905 /*
906 * Check supported device id width (in bits).
907 * See IOMMU Specification, Chapter 6. Software guidelines.
908 * - if extended device-context format is used:
909 * 1LVL: 6, 2LVL: 15, 3LVL: 24
910 * - if base device-context format is used:
911 * 1LVL: 7, 2LVL: 16, 3LVL: 24
912 */
913 if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) {
914 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
915 }
916
917 /* Device directory tree walk */
918 for (; depth-- > 0; ) {
919 /*
920 * Select device id index bits based on device directory tree level
921 * and device context format.
922 * See IOMMU Specification, Chapter 2. Data Structures.
923 * - if extended device-context format is used:
924 * device index: [23:15][14:6][5:0]
925 * - if base device-context format is used:
926 * device index: [23:16][15:7][6:0]
927 */
928 const int split = depth * 9 + 6 + dc_fmt;
929 addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK;
930 if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
931 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
932 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
933 }
934 le64_to_cpus(&de);
935 if (!(de & RISCV_IOMMU_DDTE_VALID)) {
936 /* invalid directory entry */
937 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
938 }
939 if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) {
940 /* reserved bits set */
941 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
942 }
943 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
944 }
945
946 /* index into device context entry page */
947 addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
948
949 memset(&dc, 0, sizeof(dc));
950 if (dma_memory_read(s->target_as, addr, &dc, dc_len,
951 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
952 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
953 }
954
955 /* Set translation context. */
956 ctx->tc = le64_to_cpu(dc.tc);
957 ctx->gatp = le64_to_cpu(dc.iohgatp);
958 ctx->satp = le64_to_cpu(dc.fsc);
959 ctx->ta = le64_to_cpu(dc.ta);
960 ctx->msiptp = le64_to_cpu(dc.msiptp);
961 ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask);
962 ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern);
963
964 if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) {
965 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
966 }
967
968 if (!riscv_iommu_validate_device_ctx(s, ctx)) {
969 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
970 }
971
972 /* FSC field checks */
973 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
974 addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN));
975
976 if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) {
977 if (ctx->process_id != RISCV_IOMMU_NOPROCID) {
978 /* PID is disabled */
979 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
980 }
981 if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) {
982 /* Invalid translation mode */
983 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
984 }
985 return 0;
986 }
987
988 if (ctx->process_id == RISCV_IOMMU_NOPROCID) {
989 if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) {
990 /* No default process_id enabled, set BARE mode */
991 ctx->satp = 0ULL;
992 return 0;
993 } else {
994 /* Use default process_id #0 */
995 ctx->process_id = 0;
996 }
997 }
998
999 if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) {
1000 /* No S-Stage translation, done. */
1001 return 0;
1002 }
1003
1004 /* FSC.TC.PDTV enabled */
1005 if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) {
1006 /* Invalid PDTP.MODE */
1007 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1008 }
1009
1010 for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
1011 /*
1012 * Select process id index bits based on process directory tree
1013 * level. See IOMMU Specification, 2.2. Process-Directory-Table.
1014 */
1015 const int split = depth * 9 + 8;
1016 addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
1017 if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
1018 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1019 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1020 }
1021 le64_to_cpus(&de);
1022 if (!(de & RISCV_IOMMU_PC_TA_V)) {
1023 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1024 }
1025 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN));
1026 }
1027
1028 /* Leaf entry in PDT */
1029 addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
1030 if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
1031 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1032 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1033 }
1034
1035 /* Use FSC and TA from process directory entry. */
1036 ctx->ta = le64_to_cpu(dc.ta);
1037 ctx->satp = le64_to_cpu(dc.fsc);
1038
1039 if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) {
1040 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1041 }
1042
1043 if (!riscv_iommu_validate_process_ctx(s, ctx)) {
1044 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1045 }
1046
1047 return 0;
1048 }
1049
1050 /* Translation Context cache support */
riscv_iommu_ctx_equal(gconstpointer v1,gconstpointer v2)1051 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2)
1052 {
1053 RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1;
1054 RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2;
1055 return c1->devid == c2->devid &&
1056 c1->process_id == c2->process_id;
1057 }
1058
riscv_iommu_ctx_hash(gconstpointer v)1059 static guint riscv_iommu_ctx_hash(gconstpointer v)
1060 {
1061 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v;
1062 /*
1063 * Generate simple hash of (process_id, devid)
1064 * assuming 24-bit wide devid.
1065 */
1066 return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24);
1067 }
1068
riscv_iommu_ctx_inval_devid_procid(gpointer key,gpointer value,gpointer data)1069 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value,
1070 gpointer data)
1071 {
1072 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1073 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1074 if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1075 ctx->devid == arg->devid &&
1076 ctx->process_id == arg->process_id) {
1077 ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1078 }
1079 }
1080
riscv_iommu_ctx_inval_devid(gpointer key,gpointer value,gpointer data)1081 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value,
1082 gpointer data)
1083 {
1084 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1085 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1086 if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1087 ctx->devid == arg->devid) {
1088 ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1089 }
1090 }
1091
riscv_iommu_ctx_inval_all(gpointer key,gpointer value,gpointer data)1092 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value,
1093 gpointer data)
1094 {
1095 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1096 if (ctx->tc & RISCV_IOMMU_DC_TC_V) {
1097 ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1098 }
1099 }
1100
riscv_iommu_ctx_inval(RISCVIOMMUState * s,GHFunc func,uint32_t devid,uint32_t process_id)1101 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func,
1102 uint32_t devid, uint32_t process_id)
1103 {
1104 GHashTable *ctx_cache;
1105 RISCVIOMMUContext key = {
1106 .devid = devid,
1107 .process_id = process_id,
1108 };
1109 ctx_cache = g_hash_table_ref(s->ctx_cache);
1110 g_hash_table_foreach(ctx_cache, func, &key);
1111 g_hash_table_unref(ctx_cache);
1112 }
1113
1114 /* Find or allocate translation context for a given {device_id, process_id} */
riscv_iommu_ctx(RISCVIOMMUState * s,unsigned devid,unsigned process_id,void ** ref)1115 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s,
1116 unsigned devid, unsigned process_id,
1117 void **ref)
1118 {
1119 GHashTable *ctx_cache;
1120 RISCVIOMMUContext *ctx;
1121 RISCVIOMMUContext key = {
1122 .devid = devid,
1123 .process_id = process_id,
1124 };
1125
1126 ctx_cache = g_hash_table_ref(s->ctx_cache);
1127 ctx = g_hash_table_lookup(ctx_cache, &key);
1128
1129 if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) {
1130 *ref = ctx_cache;
1131 return ctx;
1132 }
1133
1134 ctx = g_new0(RISCVIOMMUContext, 1);
1135 ctx->devid = devid;
1136 ctx->process_id = process_id;
1137
1138 int fault = riscv_iommu_ctx_fetch(s, ctx);
1139 if (!fault) {
1140 if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) {
1141 g_hash_table_unref(ctx_cache);
1142 ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
1143 riscv_iommu_ctx_equal,
1144 g_free, NULL);
1145 g_hash_table_ref(ctx_cache);
1146 g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache));
1147 }
1148 g_hash_table_add(ctx_cache, ctx);
1149 *ref = ctx_cache;
1150 return ctx;
1151 }
1152
1153 g_hash_table_unref(ctx_cache);
1154 *ref = NULL;
1155
1156 riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD,
1157 fault, !!process_id, 0, 0);
1158
1159 g_free(ctx);
1160 return NULL;
1161 }
1162
riscv_iommu_ctx_put(RISCVIOMMUState * s,void * ref)1163 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
1164 {
1165 if (ref) {
1166 g_hash_table_unref((GHashTable *)ref);
1167 }
1168 }
1169
1170 /* Find or allocate address space for a given device */
riscv_iommu_space(RISCVIOMMUState * s,uint32_t devid)1171 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid)
1172 {
1173 RISCVIOMMUSpace *as;
1174
1175 /* FIXME: PCIe bus remapping for attached endpoints. */
1176 devid |= s->bus << 8;
1177
1178 QLIST_FOREACH(as, &s->spaces, list) {
1179 if (as->devid == devid) {
1180 break;
1181 }
1182 }
1183
1184 if (as == NULL) {
1185 char name[64];
1186 as = g_new0(RISCVIOMMUSpace, 1);
1187
1188 as->iommu = s;
1189 as->devid = devid;
1190
1191 snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
1192 PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1193
1194 /* IOVA address space, untranslated addresses */
1195 memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr),
1196 TYPE_RISCV_IOMMU_MEMORY_REGION,
1197 OBJECT(as), "riscv_iommu", UINT64_MAX);
1198 address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name);
1199
1200 QLIST_INSERT_HEAD(&s->spaces, as, list);
1201
1202 trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid),
1203 PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1204 }
1205 return &as->iova_as;
1206 }
1207
1208 /* Translation Object cache support */
riscv_iommu_iot_equal(gconstpointer v1,gconstpointer v2)1209 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2)
1210 {
1211 RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1;
1212 RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2;
1213 return t1->gscid == t2->gscid && t1->pscid == t2->pscid &&
1214 t1->iova == t2->iova;
1215 }
1216
riscv_iommu_iot_hash(gconstpointer v)1217 static guint riscv_iommu_iot_hash(gconstpointer v)
1218 {
1219 RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v;
1220 return (guint)t->iova;
1221 }
1222
1223 /* GV: 1 PSCV: 1 AV: 1 */
riscv_iommu_iot_inval_pscid_iova(gpointer key,gpointer value,gpointer data)1224 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value,
1225 gpointer data)
1226 {
1227 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1228 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1229 if (iot->gscid == arg->gscid &&
1230 iot->pscid == arg->pscid &&
1231 iot->iova == arg->iova) {
1232 iot->perm = IOMMU_NONE;
1233 }
1234 }
1235
1236 /* GV: 1 PSCV: 1 AV: 0 */
riscv_iommu_iot_inval_pscid(gpointer key,gpointer value,gpointer data)1237 static void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value,
1238 gpointer data)
1239 {
1240 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1241 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1242 if (iot->gscid == arg->gscid &&
1243 iot->pscid == arg->pscid) {
1244 iot->perm = IOMMU_NONE;
1245 }
1246 }
1247
1248 /* GV: 1 GVMA: 1 */
riscv_iommu_iot_inval_gscid_gpa(gpointer key,gpointer value,gpointer data)1249 static void riscv_iommu_iot_inval_gscid_gpa(gpointer key, gpointer value,
1250 gpointer data)
1251 {
1252 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1253 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1254 if (iot->gscid == arg->gscid) {
1255 /* simplified cache, no GPA matching */
1256 iot->perm = IOMMU_NONE;
1257 }
1258 }
1259
1260 /* GV: 1 GVMA: 0 */
riscv_iommu_iot_inval_gscid(gpointer key,gpointer value,gpointer data)1261 static void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value,
1262 gpointer data)
1263 {
1264 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1265 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1266 if (iot->gscid == arg->gscid) {
1267 iot->perm = IOMMU_NONE;
1268 }
1269 }
1270
1271 /* GV: 0 */
riscv_iommu_iot_inval_all(gpointer key,gpointer value,gpointer data)1272 static void riscv_iommu_iot_inval_all(gpointer key, gpointer value,
1273 gpointer data)
1274 {
1275 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1276 iot->perm = IOMMU_NONE;
1277 }
1278
1279 /* caller should keep ref-count for iot_cache object */
riscv_iommu_iot_lookup(RISCVIOMMUContext * ctx,GHashTable * iot_cache,hwaddr iova)1280 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx,
1281 GHashTable *iot_cache, hwaddr iova)
1282 {
1283 RISCVIOMMUEntry key = {
1284 .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID),
1285 .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID),
1286 .iova = PPN_DOWN(iova),
1287 };
1288 return g_hash_table_lookup(iot_cache, &key);
1289 }
1290
1291 /* caller should keep ref-count for iot_cache object */
riscv_iommu_iot_update(RISCVIOMMUState * s,GHashTable * iot_cache,RISCVIOMMUEntry * iot)1292 static void riscv_iommu_iot_update(RISCVIOMMUState *s,
1293 GHashTable *iot_cache, RISCVIOMMUEntry *iot)
1294 {
1295 if (!s->iot_limit) {
1296 return;
1297 }
1298
1299 if (g_hash_table_size(s->iot_cache) >= s->iot_limit) {
1300 iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
1301 riscv_iommu_iot_equal,
1302 g_free, NULL);
1303 g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache));
1304 }
1305 g_hash_table_add(iot_cache, iot);
1306 }
1307
riscv_iommu_iot_inval(RISCVIOMMUState * s,GHFunc func,uint32_t gscid,uint32_t pscid,hwaddr iova)1308 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func,
1309 uint32_t gscid, uint32_t pscid, hwaddr iova)
1310 {
1311 GHashTable *iot_cache;
1312 RISCVIOMMUEntry key = {
1313 .gscid = gscid,
1314 .pscid = pscid,
1315 .iova = PPN_DOWN(iova),
1316 };
1317
1318 iot_cache = g_hash_table_ref(s->iot_cache);
1319 g_hash_table_foreach(iot_cache, func, &key);
1320 g_hash_table_unref(iot_cache);
1321 }
1322
riscv_iommu_translate(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,IOMMUTLBEntry * iotlb,bool enable_cache)1323 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
1324 IOMMUTLBEntry *iotlb, bool enable_cache)
1325 {
1326 RISCVIOMMUEntry *iot;
1327 IOMMUAccessFlags perm;
1328 bool enable_pid;
1329 bool enable_pri;
1330 GHashTable *iot_cache;
1331 int fault;
1332
1333 iot_cache = g_hash_table_ref(s->iot_cache);
1334 /*
1335 * TC[32] is reserved for custom extensions, used here to temporarily
1336 * enable automatic page-request generation for ATS queries.
1337 */
1338 enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32));
1339 enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV);
1340
1341 /* Check for ATS request. */
1342 if (iotlb->perm == IOMMU_NONE) {
1343 /* Check if ATS is disabled. */
1344 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) {
1345 enable_pri = false;
1346 fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1347 goto done;
1348 }
1349 }
1350
1351 iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova);
1352 perm = iot ? iot->perm : IOMMU_NONE;
1353 if (perm != IOMMU_NONE) {
1354 iotlb->translated_addr = PPN_PHYS(iot->phys);
1355 iotlb->addr_mask = ~TARGET_PAGE_MASK;
1356 iotlb->perm = perm;
1357 fault = 0;
1358 goto done;
1359 }
1360
1361 /* Translate using device directory / page table information. */
1362 fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
1363
1364 if (!fault && iotlb->target_as == &s->trap_as) {
1365 /* Do not cache trapped MSI translations */
1366 goto done;
1367 }
1368
1369 /*
1370 * We made an implementation choice to not cache identity-mapped
1371 * translations, as allowed by the specification, to avoid
1372 * translation cache evictions for other devices sharing the
1373 * IOMMU hardware model.
1374 */
1375 if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) {
1376 iot = g_new0(RISCVIOMMUEntry, 1);
1377 iot->iova = PPN_DOWN(iotlb->iova);
1378 iot->phys = PPN_DOWN(iotlb->translated_addr);
1379 iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
1380 iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
1381 iot->perm = iotlb->perm;
1382 riscv_iommu_iot_update(s, iot_cache, iot);
1383 }
1384
1385 done:
1386 g_hash_table_unref(iot_cache);
1387
1388 if (enable_pri && fault) {
1389 struct riscv_iommu_pq_record pr = {0};
1390 if (enable_pid) {
1391 pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV,
1392 RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id);
1393 }
1394 pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid);
1395 pr.payload = (iotlb->iova & TARGET_PAGE_MASK) |
1396 RISCV_IOMMU_PREQ_PAYLOAD_M;
1397 riscv_iommu_pri(s, &pr);
1398 return fault;
1399 }
1400
1401 if (fault) {
1402 unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ;
1403
1404 if (iotlb->perm & IOMMU_RW) {
1405 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
1406 } else if (iotlb->perm & IOMMU_RO) {
1407 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD;
1408 }
1409
1410 riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid,
1411 iotlb->iova, iotlb->translated_addr);
1412 return fault;
1413 }
1414
1415 return 0;
1416 }
1417
1418 /* IOMMU Command Interface */
riscv_iommu_iofence(RISCVIOMMUState * s,bool notify,uint64_t addr,uint32_t data)1419 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify,
1420 uint64_t addr, uint32_t data)
1421 {
1422 /*
1423 * ATS processing in this implementation of the IOMMU is synchronous,
1424 * no need to wait for completions here.
1425 */
1426 if (!notify) {
1427 return MEMTX_OK;
1428 }
1429
1430 return dma_memory_write(s->target_as, addr, &data, sizeof(data),
1431 MEMTXATTRS_UNSPECIFIED);
1432 }
1433
riscv_iommu_ats(RISCVIOMMUState * s,struct riscv_iommu_command * cmd,IOMMUNotifierFlag flag,IOMMUAccessFlags perm,void (* trace_fn)(const char * id))1434 static void riscv_iommu_ats(RISCVIOMMUState *s,
1435 struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag,
1436 IOMMUAccessFlags perm,
1437 void (*trace_fn)(const char *id))
1438 {
1439 RISCVIOMMUSpace *as = NULL;
1440 IOMMUNotifier *n;
1441 IOMMUTLBEvent event;
1442 uint32_t pid;
1443 uint32_t devid;
1444 const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV;
1445
1446 if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) {
1447 /* Use device segment and requester id */
1448 devid = get_field(cmd->dword0,
1449 RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID);
1450 } else {
1451 devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID);
1452 }
1453
1454 pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID);
1455
1456 QLIST_FOREACH(as, &s->spaces, list) {
1457 if (as->devid == devid) {
1458 break;
1459 }
1460 }
1461
1462 if (!as || !as->notifier) {
1463 return;
1464 }
1465
1466 event.type = flag;
1467 event.entry.perm = perm;
1468 event.entry.target_as = s->target_as;
1469
1470 IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) {
1471 if (!pv || n->iommu_idx == pid) {
1472 event.entry.iova = n->start;
1473 event.entry.addr_mask = n->end - n->start;
1474 trace_fn(as->iova_mr.parent_obj.name);
1475 memory_region_notify_iommu_one(n, &event);
1476 }
1477 }
1478 }
1479
riscv_iommu_ats_inval(RISCVIOMMUState * s,struct riscv_iommu_command * cmd)1480 static void riscv_iommu_ats_inval(RISCVIOMMUState *s,
1481 struct riscv_iommu_command *cmd)
1482 {
1483 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE,
1484 trace_riscv_iommu_ats_inval);
1485 }
1486
riscv_iommu_ats_prgr(RISCVIOMMUState * s,struct riscv_iommu_command * cmd)1487 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s,
1488 struct riscv_iommu_command *cmd)
1489 {
1490 unsigned resp_code = get_field(cmd->dword1,
1491 RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE);
1492
1493 /* Using the access flag to carry response code information */
1494 IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW;
1495 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm,
1496 trace_riscv_iommu_ats_prgr);
1497 }
1498
riscv_iommu_process_ddtp(RISCVIOMMUState * s)1499 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s)
1500 {
1501 uint64_t old_ddtp = s->ddtp;
1502 uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP);
1503 unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE);
1504 unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE);
1505 bool ok = false;
1506
1507 /*
1508 * Check for allowed DDTP.MODE transitions:
1509 * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL}
1510 * {1LVL, 2LVL, 3LVL} -> {OFF, BARE}
1511 */
1512 if (new_mode == old_mode ||
1513 new_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1514 new_mode == RISCV_IOMMU_DDTP_MODE_BARE) {
1515 ok = true;
1516 } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL ||
1517 new_mode == RISCV_IOMMU_DDTP_MODE_2LVL ||
1518 new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) {
1519 ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1520 old_mode == RISCV_IOMMU_DDTP_MODE_BARE;
1521 }
1522
1523 if (ok) {
1524 /* clear reserved and busy bits, report back sanitized version */
1525 new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN,
1526 RISCV_IOMMU_DDTP_MODE, new_mode);
1527 } else {
1528 new_ddtp = old_ddtp;
1529 }
1530 s->ddtp = new_ddtp;
1531
1532 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp);
1533 }
1534
1535 /* Command function and opcode field. */
1536 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op))
1537
riscv_iommu_process_cq_tail(RISCVIOMMUState * s)1538 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s)
1539 {
1540 struct riscv_iommu_command cmd;
1541 MemTxResult res;
1542 dma_addr_t addr;
1543 uint32_t tail, head, ctrl;
1544 uint64_t cmd_opcode;
1545 GHFunc func;
1546
1547 ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1548 tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask;
1549 head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask;
1550
1551 /* Check for pending error or queue processing disabled */
1552 if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) ||
1553 !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) {
1554 return;
1555 }
1556
1557 while (tail != head) {
1558 addr = s->cq_addr + head * sizeof(cmd);
1559 res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd),
1560 MEMTXATTRS_UNSPECIFIED);
1561
1562 if (res != MEMTX_OK) {
1563 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1564 RISCV_IOMMU_CQCSR_CQMF, 0);
1565 goto fault;
1566 }
1567
1568 trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1);
1569
1570 cmd_opcode = get_field(cmd.dword0,
1571 RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC);
1572
1573 switch (cmd_opcode) {
1574 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C,
1575 RISCV_IOMMU_CMD_IOFENCE_OPCODE):
1576 res = riscv_iommu_iofence(s,
1577 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2,
1578 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA));
1579
1580 if (res != MEMTX_OK) {
1581 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1582 RISCV_IOMMU_CQCSR_CQMF, 0);
1583 goto fault;
1584 }
1585 break;
1586
1587 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA,
1588 RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1589 if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) {
1590 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */
1591 goto cmd_ill;
1592 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) {
1593 /* invalidate all cache mappings */
1594 func = riscv_iommu_iot_inval_all;
1595 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) {
1596 /* invalidate cache matching GSCID */
1597 func = riscv_iommu_iot_inval_gscid;
1598 } else {
1599 /* invalidate cache matching GSCID and ADDR (GPA) */
1600 func = riscv_iommu_iot_inval_gscid_gpa;
1601 }
1602 riscv_iommu_iot_inval(s, func,
1603 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 0,
1604 cmd.dword1 << 2 & TARGET_PAGE_MASK);
1605 break;
1606
1607 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA,
1608 RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1609 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) {
1610 /* invalidate all cache mappings, simplified model */
1611 func = riscv_iommu_iot_inval_all;
1612 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV)) {
1613 /* invalidate cache matching GSCID, simplified model */
1614 func = riscv_iommu_iot_inval_gscid;
1615 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) {
1616 /* invalidate cache matching GSCID and PSCID */
1617 func = riscv_iommu_iot_inval_pscid;
1618 } else {
1619 /* invalidate cache matching GSCID and PSCID and ADDR (IOVA) */
1620 func = riscv_iommu_iot_inval_pscid_iova;
1621 }
1622 riscv_iommu_iot_inval(s, func,
1623 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID),
1624 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_PSCID),
1625 cmd.dword1 << 2 & TARGET_PAGE_MASK);
1626 break;
1627
1628 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT,
1629 RISCV_IOMMU_CMD_IODIR_OPCODE):
1630 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1631 /* invalidate all device context cache mappings */
1632 func = riscv_iommu_ctx_inval_all;
1633 } else {
1634 /* invalidate all device context matching DID */
1635 func = riscv_iommu_ctx_inval_devid;
1636 }
1637 riscv_iommu_ctx_inval(s, func,
1638 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0);
1639 break;
1640
1641 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT,
1642 RISCV_IOMMU_CMD_IODIR_OPCODE):
1643 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1644 /* illegal command arguments IODIR_PDT & DV == 0 */
1645 goto cmd_ill;
1646 } else {
1647 func = riscv_iommu_ctx_inval_devid_procid;
1648 }
1649 riscv_iommu_ctx_inval(s, func,
1650 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID),
1651 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID));
1652 break;
1653
1654 /* ATS commands */
1655 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL,
1656 RISCV_IOMMU_CMD_ATS_OPCODE):
1657 if (!s->enable_ats) {
1658 goto cmd_ill;
1659 }
1660
1661 riscv_iommu_ats_inval(s, &cmd);
1662 break;
1663
1664 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR,
1665 RISCV_IOMMU_CMD_ATS_OPCODE):
1666 if (!s->enable_ats) {
1667 goto cmd_ill;
1668 }
1669
1670 riscv_iommu_ats_prgr(s, &cmd);
1671 break;
1672
1673 default:
1674 cmd_ill:
1675 /* Invalid instruction, do not advance instruction index. */
1676 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1677 RISCV_IOMMU_CQCSR_CMD_ILL, 0);
1678 goto fault;
1679 }
1680
1681 /* Advance and update head pointer after command completes. */
1682 head = (head + 1) & s->cq_mask;
1683 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head);
1684 }
1685 return;
1686
1687 fault:
1688 if (ctrl & RISCV_IOMMU_CQCSR_CIE) {
1689 riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ);
1690 }
1691 }
1692
riscv_iommu_process_cq_control(RISCVIOMMUState * s)1693 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s)
1694 {
1695 uint64_t base;
1696 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1697 uint32_t ctrl_clr;
1698 bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN);
1699 bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON);
1700
1701 if (enable && !active) {
1702 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB);
1703 s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1;
1704 s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN));
1705 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask);
1706 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0);
1707 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0);
1708 ctrl_set = RISCV_IOMMU_CQCSR_CQON;
1709 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF |
1710 RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO |
1711 RISCV_IOMMU_CQCSR_FENCE_W_IP;
1712 } else if (!enable && active) {
1713 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0);
1714 ctrl_set = 0;
1715 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON;
1716 } else {
1717 ctrl_set = 0;
1718 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY;
1719 }
1720
1721 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr);
1722 }
1723
riscv_iommu_process_fq_control(RISCVIOMMUState * s)1724 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s)
1725 {
1726 uint64_t base;
1727 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1728 uint32_t ctrl_clr;
1729 bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN);
1730 bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON);
1731
1732 if (enable && !active) {
1733 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB);
1734 s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1;
1735 s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN));
1736 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask);
1737 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0);
1738 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0);
1739 ctrl_set = RISCV_IOMMU_FQCSR_FQON;
1740 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF |
1741 RISCV_IOMMU_FQCSR_FQOF;
1742 } else if (!enable && active) {
1743 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0);
1744 ctrl_set = 0;
1745 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON;
1746 } else {
1747 ctrl_set = 0;
1748 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY;
1749 }
1750
1751 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr);
1752 }
1753
riscv_iommu_process_pq_control(RISCVIOMMUState * s)1754 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s)
1755 {
1756 uint64_t base;
1757 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
1758 uint32_t ctrl_clr;
1759 bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN);
1760 bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON);
1761
1762 if (enable && !active) {
1763 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB);
1764 s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1;
1765 s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN));
1766 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask);
1767 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0);
1768 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0);
1769 ctrl_set = RISCV_IOMMU_PQCSR_PQON;
1770 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF |
1771 RISCV_IOMMU_PQCSR_PQOF;
1772 } else if (!enable && active) {
1773 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0);
1774 ctrl_set = 0;
1775 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON;
1776 } else {
1777 ctrl_set = 0;
1778 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY;
1779 }
1780
1781 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr);
1782 }
1783
riscv_iommu_process_dbg(RISCVIOMMUState * s)1784 static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
1785 {
1786 uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA);
1787 uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL);
1788 unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID);
1789 unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID);
1790 RISCVIOMMUContext *ctx;
1791 void *ref;
1792
1793 if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) {
1794 return;
1795 }
1796
1797 ctx = riscv_iommu_ctx(s, devid, pid, &ref);
1798 if (ctx == NULL) {
1799 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE,
1800 RISCV_IOMMU_TR_RESPONSE_FAULT |
1801 (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10));
1802 } else {
1803 IOMMUTLBEntry iotlb = {
1804 .iova = iova,
1805 .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW,
1806 .addr_mask = ~0,
1807 .target_as = NULL,
1808 };
1809 int fault = riscv_iommu_translate(s, ctx, &iotlb, false);
1810 if (fault) {
1811 iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
1812 } else {
1813 iova = iotlb.translated_addr & ~iotlb.addr_mask;
1814 iova >>= TARGET_PAGE_BITS;
1815 iova &= RISCV_IOMMU_TR_RESPONSE_PPN;
1816
1817 /* We do not support superpages (> 4kbs) for now */
1818 iova &= ~RISCV_IOMMU_TR_RESPONSE_S;
1819 }
1820 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
1821 }
1822
1823 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
1824 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
1825 riscv_iommu_ctx_put(s, ref);
1826 }
1827
1828 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s);
1829
riscv_iommu_update_icvec(RISCVIOMMUState * s,uint64_t data)1830 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data)
1831 {
1832 uint64_t icvec = 0;
1833
1834 icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV,
1835 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV);
1836
1837 icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV,
1838 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV);
1839
1840 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV,
1841 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV);
1842
1843 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV,
1844 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV);
1845
1846 trace_riscv_iommu_icvec_write(data, icvec);
1847
1848 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec);
1849 }
1850
riscv_iommu_update_ipsr(RISCVIOMMUState * s,uint64_t data)1851 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
1852 {
1853 uint32_t cqcsr, fqcsr, pqcsr;
1854 uint32_t ipsr_set = 0;
1855 uint32_t ipsr_clr = 0;
1856
1857 if (data & RISCV_IOMMU_IPSR_CIP) {
1858 cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1859
1860 if (cqcsr & RISCV_IOMMU_CQCSR_CIE &&
1861 (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP ||
1862 cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL ||
1863 cqcsr & RISCV_IOMMU_CQCSR_CMD_TO ||
1864 cqcsr & RISCV_IOMMU_CQCSR_CQMF)) {
1865 ipsr_set |= RISCV_IOMMU_IPSR_CIP;
1866 } else {
1867 ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
1868 }
1869 } else {
1870 ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
1871 }
1872
1873 if (data & RISCV_IOMMU_IPSR_FIP) {
1874 fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1875
1876 if (fqcsr & RISCV_IOMMU_FQCSR_FIE &&
1877 (fqcsr & RISCV_IOMMU_FQCSR_FQOF ||
1878 fqcsr & RISCV_IOMMU_FQCSR_FQMF)) {
1879 ipsr_set |= RISCV_IOMMU_IPSR_FIP;
1880 } else {
1881 ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
1882 }
1883 } else {
1884 ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
1885 }
1886
1887 if (data & RISCV_IOMMU_IPSR_PIP) {
1888 pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
1889
1890 if (pqcsr & RISCV_IOMMU_PQCSR_PIE &&
1891 (pqcsr & RISCV_IOMMU_PQCSR_PQOF ||
1892 pqcsr & RISCV_IOMMU_PQCSR_PQMF)) {
1893 ipsr_set |= RISCV_IOMMU_IPSR_PIP;
1894 } else {
1895 ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
1896 }
1897 } else {
1898 ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
1899 }
1900
1901 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
1902 }
1903
1904 /*
1905 * Write the resulting value of 'data' for the reg specified
1906 * by 'reg_addr', after considering read-only/read-write/write-clear
1907 * bits, in the pointer 'dest'.
1908 *
1909 * The result is written in little-endian.
1910 */
riscv_iommu_write_reg_val(RISCVIOMMUState * s,void * dest,hwaddr reg_addr,int size,uint64_t data)1911 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s,
1912 void *dest, hwaddr reg_addr,
1913 int size, uint64_t data)
1914 {
1915 uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size);
1916 uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size);
1917 uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size);
1918
1919 stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc));
1920 }
1921
riscv_iommu_mmio_write(void * opaque,hwaddr addr,uint64_t data,unsigned size,MemTxAttrs attrs)1922 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
1923 uint64_t data, unsigned size,
1924 MemTxAttrs attrs)
1925 {
1926 riscv_iommu_process_fn *process_fn = NULL;
1927 RISCVIOMMUState *s = opaque;
1928 uint32_t regb = addr & ~3;
1929 uint32_t busy = 0;
1930 uint64_t val = 0;
1931
1932 if ((addr & (size - 1)) != 0) {
1933 /* Unsupported MMIO alignment or access size */
1934 return MEMTX_ERROR;
1935 }
1936
1937 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
1938 /* Unsupported MMIO access location. */
1939 return MEMTX_ACCESS_ERROR;
1940 }
1941
1942 /* Track actionable MMIO write. */
1943 switch (regb) {
1944 case RISCV_IOMMU_REG_DDTP:
1945 case RISCV_IOMMU_REG_DDTP + 4:
1946 process_fn = riscv_iommu_process_ddtp;
1947 regb = RISCV_IOMMU_REG_DDTP;
1948 busy = RISCV_IOMMU_DDTP_BUSY;
1949 break;
1950
1951 case RISCV_IOMMU_REG_CQT:
1952 process_fn = riscv_iommu_process_cq_tail;
1953 break;
1954
1955 case RISCV_IOMMU_REG_CQCSR:
1956 process_fn = riscv_iommu_process_cq_control;
1957 busy = RISCV_IOMMU_CQCSR_BUSY;
1958 break;
1959
1960 case RISCV_IOMMU_REG_FQCSR:
1961 process_fn = riscv_iommu_process_fq_control;
1962 busy = RISCV_IOMMU_FQCSR_BUSY;
1963 break;
1964
1965 case RISCV_IOMMU_REG_PQCSR:
1966 process_fn = riscv_iommu_process_pq_control;
1967 busy = RISCV_IOMMU_PQCSR_BUSY;
1968 break;
1969
1970 case RISCV_IOMMU_REG_ICVEC:
1971 case RISCV_IOMMU_REG_IPSR:
1972 /*
1973 * ICVEC and IPSR have special read/write procedures. We'll
1974 * call their respective helpers and exit.
1975 */
1976 riscv_iommu_write_reg_val(s, &val, addr, size, data);
1977
1978 /*
1979 * 'val' is stored as LE. Switch to host endianess
1980 * before using it.
1981 */
1982 val = le64_to_cpu(val);
1983
1984 if (regb == RISCV_IOMMU_REG_ICVEC) {
1985 riscv_iommu_update_icvec(s, val);
1986 } else {
1987 riscv_iommu_update_ipsr(s, val);
1988 }
1989
1990 return MEMTX_OK;
1991
1992 case RISCV_IOMMU_REG_TR_REQ_CTL:
1993 process_fn = riscv_iommu_process_dbg;
1994 regb = RISCV_IOMMU_REG_TR_REQ_CTL;
1995 busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
1996 break;
1997
1998 default:
1999 break;
2000 }
2001
2002 /*
2003 * Registers update might be not synchronized with core logic.
2004 * If system software updates register when relevant BUSY bit
2005 * is set IOMMU behavior of additional writes to the register
2006 * is UNSPECIFIED.
2007 */
2008 riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data);
2009
2010 /* Busy flag update, MSB 4-byte register. */
2011 if (busy) {
2012 uint32_t rw = ldl_le_p(&s->regs_rw[regb]);
2013 stl_le_p(&s->regs_rw[regb], rw | busy);
2014 }
2015
2016 if (process_fn) {
2017 process_fn(s);
2018 }
2019
2020 return MEMTX_OK;
2021 }
2022
riscv_iommu_mmio_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)2023 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
2024 uint64_t *data, unsigned size, MemTxAttrs attrs)
2025 {
2026 RISCVIOMMUState *s = opaque;
2027 uint64_t val = -1;
2028 uint8_t *ptr;
2029
2030 if ((addr & (size - 1)) != 0) {
2031 /* Unsupported MMIO alignment. */
2032 return MEMTX_ERROR;
2033 }
2034
2035 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
2036 return MEMTX_ACCESS_ERROR;
2037 }
2038
2039 ptr = &s->regs_rw[addr];
2040 val = ldn_le_p(ptr, size);
2041
2042 *data = val;
2043
2044 return MEMTX_OK;
2045 }
2046
2047 static const MemoryRegionOps riscv_iommu_mmio_ops = {
2048 .read_with_attrs = riscv_iommu_mmio_read,
2049 .write_with_attrs = riscv_iommu_mmio_write,
2050 .endianness = DEVICE_NATIVE_ENDIAN,
2051 .impl = {
2052 .min_access_size = 4,
2053 .max_access_size = 8,
2054 .unaligned = false,
2055 },
2056 .valid = {
2057 .min_access_size = 4,
2058 .max_access_size = 8,
2059 }
2060 };
2061
2062 /*
2063 * Translations matching MSI pattern check are redirected to "riscv-iommu-trap"
2064 * memory region as untranslated address, for additional MSI/MRIF interception
2065 * by IOMMU interrupt remapping implementation.
2066 * Note: Device emulation code generating an MSI is expected to provide a valid
2067 * memory transaction attributes with requested_id set.
2068 */
riscv_iommu_trap_write(void * opaque,hwaddr addr,uint64_t data,unsigned size,MemTxAttrs attrs)2069 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr,
2070 uint64_t data, unsigned size, MemTxAttrs attrs)
2071 {
2072 RISCVIOMMUState* s = (RISCVIOMMUState *)opaque;
2073 RISCVIOMMUContext *ctx;
2074 MemTxResult res;
2075 void *ref;
2076 uint32_t devid = attrs.requester_id;
2077
2078 if (attrs.unspecified) {
2079 return MEMTX_ACCESS_ERROR;
2080 }
2081
2082 /* FIXME: PCIe bus remapping for attached endpoints. */
2083 devid |= s->bus << 8;
2084
2085 ctx = riscv_iommu_ctx(s, devid, 0, &ref);
2086 if (ctx == NULL) {
2087 res = MEMTX_ACCESS_ERROR;
2088 } else {
2089 res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs);
2090 }
2091 riscv_iommu_ctx_put(s, ref);
2092 return res;
2093 }
2094
riscv_iommu_trap_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)2095 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr,
2096 uint64_t *data, unsigned size, MemTxAttrs attrs)
2097 {
2098 return MEMTX_ACCESS_ERROR;
2099 }
2100
2101 static const MemoryRegionOps riscv_iommu_trap_ops = {
2102 .read_with_attrs = riscv_iommu_trap_read,
2103 .write_with_attrs = riscv_iommu_trap_write,
2104 .endianness = DEVICE_LITTLE_ENDIAN,
2105 .impl = {
2106 .min_access_size = 4,
2107 .max_access_size = 8,
2108 .unaligned = true,
2109 },
2110 .valid = {
2111 .min_access_size = 4,
2112 .max_access_size = 8,
2113 }
2114 };
2115
riscv_iommu_realize(DeviceState * dev,Error ** errp)2116 static void riscv_iommu_realize(DeviceState *dev, Error **errp)
2117 {
2118 RISCVIOMMUState *s = RISCV_IOMMU(dev);
2119
2120 s->cap = s->version & RISCV_IOMMU_CAP_VERSION;
2121 if (s->enable_msi) {
2122 s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF;
2123 }
2124 if (s->enable_ats) {
2125 s->cap |= RISCV_IOMMU_CAP_ATS;
2126 }
2127 if (s->enable_s_stage) {
2128 s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 |
2129 RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57;
2130 }
2131 if (s->enable_g_stage) {
2132 s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
2133 RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4;
2134 }
2135 /* Enable translation debug interface */
2136 s->cap |= RISCV_IOMMU_CAP_DBG;
2137
2138 /* Report QEMU target physical address space limits */
2139 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS,
2140 TARGET_PHYS_ADDR_SPACE_BITS);
2141
2142 /* TODO: method to report supported PID bits */
2143 s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */
2144 s->cap |= RISCV_IOMMU_CAP_PD8;
2145
2146 /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */
2147 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
2148 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE);
2149
2150 /* register storage */
2151 s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2152 s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2153 s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2154
2155 /* Mark all registers read-only */
2156 memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE);
2157
2158 /*
2159 * Register complete MMIO space, including MSI/PBA registers.
2160 * Note, PCIDevice implementation will add overlapping MR for MSI/PBA,
2161 * managed directly by the PCIDevice implementation.
2162 */
2163 memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s,
2164 "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE);
2165
2166 /* Set power-on register state */
2167 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap);
2168 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0);
2169 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL],
2170 ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI));
2171 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP],
2172 ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE));
2173 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB],
2174 ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN));
2175 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB],
2176 ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN));
2177 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB],
2178 ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN));
2179 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF |
2180 RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL);
2181 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON |
2182 RISCV_IOMMU_CQCSR_BUSY);
2183 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF |
2184 RISCV_IOMMU_FQCSR_FQOF);
2185 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON |
2186 RISCV_IOMMU_FQCSR_BUSY);
2187 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF |
2188 RISCV_IOMMU_PQCSR_PQOF);
2189 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON |
2190 RISCV_IOMMU_PQCSR_BUSY);
2191 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0);
2192 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0);
2193 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp);
2194 /* If debug registers enabled. */
2195 if (s->cap & RISCV_IOMMU_CAP_DBG) {
2196 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0);
2197 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL],
2198 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2199 }
2200
2201 /* Memory region for downstream access, if specified. */
2202 if (s->target_mr) {
2203 s->target_as = g_new0(AddressSpace, 1);
2204 address_space_init(s->target_as, s->target_mr,
2205 "riscv-iommu-downstream");
2206 } else {
2207 /* Fallback to global system memory. */
2208 s->target_as = &address_space_memory;
2209 }
2210
2211 /* Memory region for untranslated MRIF/MSI writes */
2212 memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s,
2213 "riscv-iommu-trap", ~0ULL);
2214 address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as");
2215
2216 /* Device translation context cache */
2217 s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
2218 riscv_iommu_ctx_equal,
2219 g_free, NULL);
2220
2221 s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
2222 riscv_iommu_iot_equal,
2223 g_free, NULL);
2224
2225 s->iommus.le_next = NULL;
2226 s->iommus.le_prev = NULL;
2227 QLIST_INIT(&s->spaces);
2228 }
2229
riscv_iommu_unrealize(DeviceState * dev)2230 static void riscv_iommu_unrealize(DeviceState *dev)
2231 {
2232 RISCVIOMMUState *s = RISCV_IOMMU(dev);
2233
2234 g_hash_table_unref(s->iot_cache);
2235 g_hash_table_unref(s->ctx_cache);
2236 }
2237
2238 static Property riscv_iommu_properties[] = {
2239 DEFINE_PROP_UINT32("version", RISCVIOMMUState, version,
2240 RISCV_IOMMU_SPEC_DOT_VER),
2241 DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0),
2242 DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit,
2243 LIMIT_CACHE_IOT),
2244 DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE),
2245 DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE),
2246 DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE),
2247 DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE),
2248 DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE),
2249 DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
2250 TYPE_MEMORY_REGION, MemoryRegion *),
2251 DEFINE_PROP_END_OF_LIST(),
2252 };
2253
riscv_iommu_class_init(ObjectClass * klass,void * data)2254 static void riscv_iommu_class_init(ObjectClass *klass, void* data)
2255 {
2256 DeviceClass *dc = DEVICE_CLASS(klass);
2257
2258 /* internal device for riscv-iommu-{pci/sys}, not user-creatable */
2259 dc->user_creatable = false;
2260 dc->realize = riscv_iommu_realize;
2261 dc->unrealize = riscv_iommu_unrealize;
2262 device_class_set_props(dc, riscv_iommu_properties);
2263 }
2264
2265 static const TypeInfo riscv_iommu_info = {
2266 .name = TYPE_RISCV_IOMMU,
2267 .parent = TYPE_DEVICE,
2268 .instance_size = sizeof(RISCVIOMMUState),
2269 .class_init = riscv_iommu_class_init,
2270 };
2271
2272 static const char *IOMMU_FLAG_STR[] = {
2273 "NA",
2274 "RO",
2275 "WR",
2276 "RW",
2277 };
2278
2279 /* RISC-V IOMMU Memory Region - Address Translation Space */
riscv_iommu_memory_region_translate(IOMMUMemoryRegion * iommu_mr,hwaddr addr,IOMMUAccessFlags flag,int iommu_idx)2280 static IOMMUTLBEntry riscv_iommu_memory_region_translate(
2281 IOMMUMemoryRegion *iommu_mr, hwaddr addr,
2282 IOMMUAccessFlags flag, int iommu_idx)
2283 {
2284 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2285 RISCVIOMMUContext *ctx;
2286 void *ref;
2287 IOMMUTLBEntry iotlb = {
2288 .iova = addr,
2289 .target_as = as->iommu->target_as,
2290 .addr_mask = ~0ULL,
2291 .perm = flag,
2292 };
2293
2294 ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref);
2295 if (ctx == NULL) {
2296 /* Translation disabled or invalid. */
2297 iotlb.addr_mask = 0;
2298 iotlb.perm = IOMMU_NONE;
2299 } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) {
2300 /* Translation disabled or fault reported. */
2301 iotlb.addr_mask = 0;
2302 iotlb.perm = IOMMU_NONE;
2303 }
2304
2305 /* Trace all dma translations with original access flags. */
2306 trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid),
2307 PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx,
2308 IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova,
2309 iotlb.translated_addr);
2310
2311 riscv_iommu_ctx_put(as->iommu, ref);
2312
2313 return iotlb;
2314 }
2315
riscv_iommu_memory_region_notify(IOMMUMemoryRegion * iommu_mr,IOMMUNotifierFlag old,IOMMUNotifierFlag new,Error ** errp)2316 static int riscv_iommu_memory_region_notify(
2317 IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old,
2318 IOMMUNotifierFlag new, Error **errp)
2319 {
2320 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2321
2322 if (old == IOMMU_NOTIFIER_NONE) {
2323 as->notifier = true;
2324 trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name);
2325 } else if (new == IOMMU_NOTIFIER_NONE) {
2326 as->notifier = false;
2327 trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name);
2328 }
2329
2330 return 0;
2331 }
2332
pci_is_iommu(PCIDevice * pdev)2333 static inline bool pci_is_iommu(PCIDevice *pdev)
2334 {
2335 return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806;
2336 }
2337
riscv_iommu_find_as(PCIBus * bus,void * opaque,int devfn)2338 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn)
2339 {
2340 RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
2341 PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
2342 AddressSpace *as = NULL;
2343
2344 if (pdev && pci_is_iommu(pdev)) {
2345 return s->target_as;
2346 }
2347
2348 /* Find first registered IOMMU device */
2349 while (s->iommus.le_prev) {
2350 s = *(s->iommus.le_prev);
2351 }
2352
2353 /* Find first matching IOMMU */
2354 while (s != NULL && as == NULL) {
2355 as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn));
2356 s = s->iommus.le_next;
2357 }
2358
2359 return as ? as : &address_space_memory;
2360 }
2361
2362 static const PCIIOMMUOps riscv_iommu_ops = {
2363 .get_address_space = riscv_iommu_find_as,
2364 };
2365
riscv_iommu_pci_setup_iommu(RISCVIOMMUState * iommu,PCIBus * bus,Error ** errp)2366 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
2367 Error **errp)
2368 {
2369 if (bus->iommu_ops &&
2370 bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
2371 /* Allow multiple IOMMUs on the same PCIe bus, link known devices */
2372 RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
2373 QLIST_INSERT_AFTER(last, iommu, iommus);
2374 } else if (!bus->iommu_ops && !bus->iommu_opaque) {
2375 pci_setup_iommu(bus, &riscv_iommu_ops, iommu);
2376 } else {
2377 error_setg(errp, "can't register secondary IOMMU for PCI bus #%d",
2378 pci_bus_num(bus));
2379 }
2380 }
2381
riscv_iommu_memory_region_index(IOMMUMemoryRegion * iommu_mr,MemTxAttrs attrs)2382 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
2383 MemTxAttrs attrs)
2384 {
2385 return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid;
2386 }
2387
riscv_iommu_memory_region_index_len(IOMMUMemoryRegion * iommu_mr)2388 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr)
2389 {
2390 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2391 return 1 << as->iommu->pid_bits;
2392 }
2393
riscv_iommu_memory_region_init(ObjectClass * klass,void * data)2394 static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data)
2395 {
2396 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
2397
2398 imrc->translate = riscv_iommu_memory_region_translate;
2399 imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
2400 imrc->attrs_to_index = riscv_iommu_memory_region_index;
2401 imrc->num_indexes = riscv_iommu_memory_region_index_len;
2402 }
2403
2404 static const TypeInfo riscv_iommu_memory_region_info = {
2405 .parent = TYPE_IOMMU_MEMORY_REGION,
2406 .name = TYPE_RISCV_IOMMU_MEMORY_REGION,
2407 .class_init = riscv_iommu_memory_region_init,
2408 };
2409
riscv_iommu_register_mr_types(void)2410 static void riscv_iommu_register_mr_types(void)
2411 {
2412 type_register_static(&riscv_iommu_memory_region_info);
2413 type_register_static(&riscv_iommu_info);
2414 }
2415
2416 type_init(riscv_iommu_register_mr_types);
2417