1 /*
2 * QEMU emulation of an RISC-V IOMMU
3 *
4 * Copyright (C) 2021-2023, Rivos Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "qemu/osdep.h"
20 #include "qom/object.h"
21 #include "exec/target_page.h"
22 #include "hw/pci/pci_bus.h"
23 #include "hw/pci/pci_device.h"
24 #include "hw/qdev-properties.h"
25 #include "hw/riscv/riscv_hart.h"
26 #include "migration/vmstate.h"
27 #include "qapi/error.h"
28 #include "qemu/timer.h"
29
30 #include "cpu_bits.h"
31 #include "riscv-iommu.h"
32 #include "riscv-iommu-bits.h"
33 #include "riscv-iommu-hpm.h"
34 #include "trace.h"
35
36 #define LIMIT_CACHE_CTX (1U << 7)
37 #define LIMIT_CACHE_IOT (1U << 20)
38
39 /* Physical page number coversions */
40 #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS)
41 #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS)
42
43 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
44
45 /* Device assigned I/O address space */
46 struct RISCVIOMMUSpace {
47 IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */
48 AddressSpace iova_as; /* IOVA address space for attached device */
49 RISCVIOMMUState *iommu; /* Managing IOMMU device state */
50 uint32_t devid; /* Requester identifier, AKA device_id */
51 bool notifier; /* IOMMU unmap notifier enabled */
52 QLIST_ENTRY(RISCVIOMMUSpace) list;
53 };
54
55 typedef enum RISCVIOMMUTransTag {
56 RISCV_IOMMU_TRANS_TAG_BY, /* Bypass */
57 RISCV_IOMMU_TRANS_TAG_SS, /* Single Stage */
58 RISCV_IOMMU_TRANS_TAG_VG, /* G-stage only */
59 RISCV_IOMMU_TRANS_TAG_VN, /* Nested translation */
60 } RISCVIOMMUTransTag;
61
62 /* Address translation cache entry */
63 struct RISCVIOMMUEntry {
64 RISCVIOMMUTransTag tag; /* Translation Tag */
65 uint64_t iova:44; /* IOVA Page Number */
66 uint64_t pscid:20; /* Process Soft-Context identifier */
67 uint64_t phys:44; /* Physical Page Number */
68 uint64_t gscid:16; /* Guest Soft-Context identifier */
69 uint64_t perm:2; /* IOMMU_RW flags */
70 };
71
72 /* IOMMU index for transactions without process_id specified. */
73 #define RISCV_IOMMU_NOPROCID 0
74
riscv_iommu_get_icvec_vector(uint32_t icvec,uint32_t vec_type)75 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type)
76 {
77 switch (vec_type) {
78 case RISCV_IOMMU_INTR_CQ:
79 return icvec & RISCV_IOMMU_ICVEC_CIV;
80 case RISCV_IOMMU_INTR_FQ:
81 return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4;
82 case RISCV_IOMMU_INTR_PM:
83 return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8;
84 case RISCV_IOMMU_INTR_PQ:
85 return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12;
86 default:
87 g_assert_not_reached();
88 }
89 }
90
riscv_iommu_notify(RISCVIOMMUState * s,int vec_type)91 void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
92 {
93 uint32_t ipsr, icvec, vector;
94
95 if (!s->notify) {
96 return;
97 }
98
99 icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC);
100 ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0);
101
102 if (!(ipsr & (1 << vec_type))) {
103 vector = riscv_iommu_get_icvec_vector(icvec, vec_type);
104 s->notify(s, vector);
105 trace_riscv_iommu_notify_int_vector(vec_type, vector);
106 }
107 }
108
riscv_iommu_fault(RISCVIOMMUState * s,struct riscv_iommu_fq_record * ev)109 static void riscv_iommu_fault(RISCVIOMMUState *s,
110 struct riscv_iommu_fq_record *ev)
111 {
112 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
113 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask;
114 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask;
115 uint32_t next = (tail + 1) & s->fq_mask;
116 uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID);
117
118 trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
119 PCI_FUNC(devid), ev->hdr, ev->iotval);
120
121 if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) ||
122 !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) {
123 return;
124 }
125
126 if (head == next) {
127 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
128 RISCV_IOMMU_FQCSR_FQOF, 0);
129 } else {
130 dma_addr_t addr = s->fq_addr + tail * sizeof(*ev);
131 if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev),
132 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
133 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
134 RISCV_IOMMU_FQCSR_FQMF, 0);
135 } else {
136 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next);
137 }
138 }
139
140 if (ctrl & RISCV_IOMMU_FQCSR_FIE) {
141 riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ);
142 }
143 }
144
riscv_iommu_pri(RISCVIOMMUState * s,struct riscv_iommu_pq_record * pr)145 static void riscv_iommu_pri(RISCVIOMMUState *s,
146 struct riscv_iommu_pq_record *pr)
147 {
148 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
149 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask;
150 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask;
151 uint32_t next = (tail + 1) & s->pq_mask;
152 uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID);
153
154 trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
155 PCI_FUNC(devid), pr->payload);
156
157 if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) ||
158 !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) {
159 return;
160 }
161
162 if (head == next) {
163 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
164 RISCV_IOMMU_PQCSR_PQOF, 0);
165 } else {
166 dma_addr_t addr = s->pq_addr + tail * sizeof(*pr);
167 if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr),
168 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
169 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
170 RISCV_IOMMU_PQCSR_PQMF, 0);
171 } else {
172 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next);
173 }
174 }
175
176 if (ctrl & RISCV_IOMMU_PQCSR_PIE) {
177 riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ);
178 }
179 }
180
181 /*
182 * Discards all bits from 'val' whose matching bits in the same
183 * positions in the mask 'ext' are zeros, and packs the remaining
184 * bits from 'val' contiguously at the least-significant end of the
185 * result, keeping the same bit order as 'val' and filling any
186 * other bits at the most-significant end of the result with zeros.
187 *
188 * For example, for the following 'val' and 'ext', the return 'ret'
189 * will be:
190 *
191 * val = a b c d e f g h
192 * ext = 1 0 1 0 0 1 1 0
193 * ret = 0 0 0 0 a c f g
194 *
195 * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3
196 * "Process to translate addresses of MSIs", is similar to bit manip
197 * function PEXT (Parallel bits extract) from x86.
198 */
riscv_iommu_pext_u64(uint64_t val,uint64_t ext)199 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext)
200 {
201 uint64_t ret = 0;
202 uint64_t rot = 1;
203
204 while (ext) {
205 if (ext & 1) {
206 if (val & 1) {
207 ret |= rot;
208 }
209 rot <<= 1;
210 }
211 val >>= 1;
212 ext >>= 1;
213 }
214
215 return ret;
216 }
217
218 /* Check if GPA matches MSI/MRIF pattern. */
riscv_iommu_msi_check(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,dma_addr_t gpa)219 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
220 dma_addr_t gpa)
221 {
222 if (!s->enable_msi) {
223 return false;
224 }
225
226 if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) !=
227 RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
228 return false; /* Invalid MSI/MRIF mode */
229 }
230
231 if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) {
232 return false; /* GPA not in MSI range defined by AIA IMSIC rules. */
233 }
234
235 return true;
236 }
237
238 /*
239 * RISCV IOMMU Address Translation Lookup - Page Table Walk
240 *
241 * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c
242 * Both implementation can be merged into single helper function in future.
243 * Keeping them separate for now, as error reporting and flow specifics are
244 * sufficiently different for separate implementation.
245 *
246 * @s : IOMMU Device State
247 * @ctx : Translation context for device id and process address space id.
248 * @iotlb : translation data: physical address and access mode.
249 * @return : success or fault cause code.
250 */
riscv_iommu_spa_fetch(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,IOMMUTLBEntry * iotlb)251 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
252 IOMMUTLBEntry *iotlb)
253 {
254 dma_addr_t addr, base;
255 uint64_t satp, gatp, pte;
256 bool en_s, en_g;
257 struct {
258 unsigned char step;
259 unsigned char levels;
260 unsigned char ptidxbits;
261 unsigned char ptesize;
262 } sc[2];
263 /* Translation stage phase */
264 enum {
265 S_STAGE = 0,
266 G_STAGE = 1,
267 } pass;
268 MemTxResult ret;
269
270 satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
271 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
272
273 en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE;
274 en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE;
275
276 /*
277 * Early check for MSI address match when IOVA == GPA.
278 * Note that the (!en_s) condition means that the MSI
279 * page table may only be used when guest pages are
280 * mapped using the g-stage page table, whether single-
281 * or two-stage paging is enabled. It's unavoidable though,
282 * because the spec mandates that we do a first-stage
283 * translation before we check the MSI page table, which
284 * means we can't do an early MSI check unless we have
285 * strictly !en_s.
286 */
287 if (!en_s && (iotlb->perm & IOMMU_WO) &&
288 riscv_iommu_msi_check(s, ctx, iotlb->iova)) {
289 iotlb->target_as = &s->trap_as;
290 iotlb->translated_addr = iotlb->iova;
291 iotlb->addr_mask = ~TARGET_PAGE_MASK;
292 return 0;
293 }
294
295 /* Exit early for pass-through mode. */
296 if (!(en_s || en_g)) {
297 iotlb->translated_addr = iotlb->iova;
298 iotlb->addr_mask = ~TARGET_PAGE_MASK;
299 /* Allow R/W in pass-through mode */
300 iotlb->perm = IOMMU_RW;
301 return 0;
302 }
303
304 /* S/G translation parameters. */
305 for (pass = 0; pass < 2; pass++) {
306 uint32_t sv_mode;
307
308 sc[pass].step = 0;
309 if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) :
310 (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) {
311 /* 32bit mode for GXL/SXL == 1 */
312 switch (pass ? gatp : satp) {
313 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
314 sc[pass].levels = 0;
315 sc[pass].ptidxbits = 0;
316 sc[pass].ptesize = 0;
317 break;
318 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
319 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32;
320 if (!(s->cap & sv_mode)) {
321 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
322 }
323 sc[pass].levels = 2;
324 sc[pass].ptidxbits = 10;
325 sc[pass].ptesize = 4;
326 break;
327 default:
328 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
329 }
330 } else {
331 /* 64bit mode for GXL/SXL == 0 */
332 switch (pass ? gatp : satp) {
333 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
334 sc[pass].levels = 0;
335 sc[pass].ptidxbits = 0;
336 sc[pass].ptesize = 0;
337 break;
338 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
339 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39;
340 if (!(s->cap & sv_mode)) {
341 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
342 }
343 sc[pass].levels = 3;
344 sc[pass].ptidxbits = 9;
345 sc[pass].ptesize = 8;
346 break;
347 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
348 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48;
349 if (!(s->cap & sv_mode)) {
350 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
351 }
352 sc[pass].levels = 4;
353 sc[pass].ptidxbits = 9;
354 sc[pass].ptesize = 8;
355 break;
356 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
357 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57;
358 if (!(s->cap & sv_mode)) {
359 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
360 }
361 sc[pass].levels = 5;
362 sc[pass].ptidxbits = 9;
363 sc[pass].ptesize = 8;
364 break;
365 default:
366 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
367 }
368 }
369 };
370
371 /* S/G stages translation tables root pointers */
372 gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
373 satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD));
374 addr = (en_s && en_g) ? satp : iotlb->iova;
375 base = en_g ? gatp : satp;
376 pass = en_g ? G_STAGE : S_STAGE;
377
378 do {
379 const unsigned widened = (pass && !sc[pass].step) ? 2 : 0;
380 const unsigned va_bits = widened + sc[pass].ptidxbits;
381 const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits *
382 (sc[pass].levels - 1 - sc[pass].step);
383 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
384 const dma_addr_t pte_addr = base + idx * sc[pass].ptesize;
385 const bool ade =
386 ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE);
387
388 /* Address range check before first level lookup */
389 if (!sc[pass].step) {
390 const uint64_t va_len = va_skip + va_bits;
391 const uint64_t va_mask = (1ULL << va_len) - 1;
392
393 if (pass == S_STAGE && va_len > 32) {
394 target_ulong mask, masked_msbs;
395
396 mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1;
397 masked_msbs = (addr >> (va_len - 1)) & mask;
398
399 if (masked_msbs != 0 && masked_msbs != mask) {
400 return (iotlb->perm & IOMMU_WO) ?
401 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S :
402 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S;
403 }
404 } else {
405 if ((addr & va_mask) != addr) {
406 return (iotlb->perm & IOMMU_WO) ?
407 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
408 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS;
409 }
410 }
411 }
412
413
414 if (pass == S_STAGE) {
415 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS);
416 } else {
417 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS);
418 }
419
420 /* Read page table entry */
421 if (sc[pass].ptesize == 4) {
422 uint32_t pte32 = 0;
423 ret = ldl_le_dma(s->target_as, pte_addr, &pte32,
424 MEMTXATTRS_UNSPECIFIED);
425 pte = pte32;
426 } else {
427 ret = ldq_le_dma(s->target_as, pte_addr, &pte,
428 MEMTXATTRS_UNSPECIFIED);
429 }
430 if (ret != MEMTX_OK) {
431 return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT
432 : RISCV_IOMMU_FQ_CAUSE_RD_FAULT;
433 }
434
435 sc[pass].step++;
436 hwaddr ppn = pte >> PTE_PPN_SHIFT;
437
438 if (!(pte & PTE_V)) {
439 break; /* Invalid PTE */
440 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
441 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
442 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
443 break; /* Reserved leaf PTE flags: PTE_W */
444 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
445 break; /* Reserved leaf PTE flags: PTE_W + PTE_X */
446 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
447 break; /* Misaligned PPN */
448 } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) {
449 break; /* Read access check failed */
450 } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) {
451 break; /* Write access check failed */
452 } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) {
453 break; /* Access bit not set */
454 } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) {
455 break; /* Dirty bit not set */
456 } else {
457 /* Leaf PTE, translation completed. */
458 sc[pass].step = sc[pass].levels;
459 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
460 /* Update address mask based on smallest translation granularity */
461 iotlb->addr_mask &= (1ULL << va_skip) - 1;
462 /* Continue with S-Stage translation? */
463 if (pass && sc[0].step != sc[0].levels) {
464 pass = S_STAGE;
465 addr = iotlb->iova;
466 continue;
467 }
468 /* Translation phase completed (GPA or SPA) */
469 iotlb->translated_addr = base;
470 iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO)
471 : IOMMU_RO;
472
473 /* Check MSI GPA address match */
474 if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) &&
475 riscv_iommu_msi_check(s, ctx, base)) {
476 /* Trap MSI writes and return GPA address. */
477 iotlb->target_as = &s->trap_as;
478 iotlb->addr_mask = ~TARGET_PAGE_MASK;
479 return 0;
480 }
481
482 /* Continue with G-Stage translation? */
483 if (!pass && en_g) {
484 pass = G_STAGE;
485 addr = base;
486 base = gatp;
487 sc[pass].step = 0;
488 continue;
489 }
490
491 return 0;
492 }
493
494 if (sc[pass].step == sc[pass].levels) {
495 break; /* Can't find leaf PTE */
496 }
497
498 /* Continue with G-Stage translation? */
499 if (!pass && en_g) {
500 pass = G_STAGE;
501 addr = base;
502 base = gatp;
503 sc[pass].step = 0;
504 }
505 } while (1);
506
507 return (iotlb->perm & IOMMU_WO) ?
508 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
509 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) :
510 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS :
511 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S);
512 }
513
riscv_iommu_report_fault(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,uint32_t fault_type,uint32_t cause,bool pv,uint64_t iotval,uint64_t iotval2)514 static void riscv_iommu_report_fault(RISCVIOMMUState *s,
515 RISCVIOMMUContext *ctx,
516 uint32_t fault_type, uint32_t cause,
517 bool pv,
518 uint64_t iotval, uint64_t iotval2)
519 {
520 struct riscv_iommu_fq_record ev = { 0 };
521
522 if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) {
523 switch (cause) {
524 case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED:
525 case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT:
526 case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID:
527 case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED:
528 case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED:
529 case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR:
530 case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT:
531 break;
532 default:
533 /* DTF prevents reporting a fault for this given cause */
534 return;
535 }
536 }
537
538 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause);
539 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type);
540 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid);
541 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true);
542
543 if (pv) {
544 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id);
545 }
546
547 ev.iotval = iotval;
548 ev.iotval2 = iotval2;
549
550 riscv_iommu_fault(s, &ev);
551 }
552
553 /* Redirect MSI write for given GPA. */
riscv_iommu_msi_write(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,uint64_t gpa,uint64_t data,unsigned size,MemTxAttrs attrs)554 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s,
555 RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data,
556 unsigned size, MemTxAttrs attrs)
557 {
558 MemTxResult res;
559 dma_addr_t addr;
560 uint64_t intn;
561 size_t offset;
562 uint32_t n190;
563 uint64_t pte[2];
564 int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
565 int cause;
566
567 /* Interrupt File Number */
568 intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask);
569 offset = intn * sizeof(pte);
570
571 /* fetch MSI PTE */
572 addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN));
573 if (addr & offset) {
574 /* Interrupt file number out of range */
575 res = MEMTX_ACCESS_ERROR;
576 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
577 goto err;
578 }
579
580 addr |= offset;
581 res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte),
582 MEMTXATTRS_UNSPECIFIED);
583 if (res != MEMTX_OK) {
584 if (res == MEMTX_DECODE_ERROR) {
585 cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED;
586 } else {
587 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
588 }
589 goto err;
590 }
591
592 le64_to_cpus(&pte[0]);
593 le64_to_cpus(&pte[1]);
594
595 if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) {
596 /*
597 * The spec mentions that: "If msipte.C == 1, then further
598 * processing to interpret the PTE is implementation
599 * defined.". We'll abort with cause = 262 for this
600 * case too.
601 */
602 res = MEMTX_ACCESS_ERROR;
603 cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID;
604 goto err;
605 }
606
607 switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) {
608 case RISCV_IOMMU_MSI_PTE_M_BASIC:
609 /* MSI Pass-through mode */
610 addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN));
611
612 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
613 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
614 gpa, addr);
615
616 res = dma_memory_write(s->target_as, addr, &data, size, attrs);
617 if (res != MEMTX_OK) {
618 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
619 goto err;
620 }
621
622 return MEMTX_OK;
623 case RISCV_IOMMU_MSI_PTE_M_MRIF:
624 /* MRIF mode, continue. */
625 break;
626 default:
627 res = MEMTX_ACCESS_ERROR;
628 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
629 goto err;
630 }
631
632 /*
633 * Report an error for interrupt identities exceeding the maximum allowed
634 * for an IMSIC interrupt file (2047) or destination address is not 32-bit
635 * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables.
636 */
637 if ((data > 2047) || (gpa & 3)) {
638 res = MEMTX_ACCESS_ERROR;
639 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
640 goto err;
641 }
642
643 /* MSI MRIF mode, non atomic pending bit update */
644
645 /* MRIF pending bit address */
646 addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9;
647 addr = addr | ((data & 0x7c0) >> 3);
648
649 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
650 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
651 gpa, addr);
652
653 /* MRIF pending bit mask */
654 data = 1ULL << (data & 0x03f);
655 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
656 if (res != MEMTX_OK) {
657 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
658 goto err;
659 }
660
661 intn = intn | data;
662 res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs);
663 if (res != MEMTX_OK) {
664 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
665 goto err;
666 }
667
668 /* Get MRIF enable bits */
669 addr = addr + sizeof(intn);
670 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
671 if (res != MEMTX_OK) {
672 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
673 goto err;
674 }
675
676 if (!(intn & data)) {
677 /* notification disabled, MRIF update completed. */
678 return MEMTX_OK;
679 }
680
681 /* Send notification message */
682 addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN));
683 n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) |
684 (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10);
685
686 res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs);
687 if (res != MEMTX_OK) {
688 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
689 goto err;
690 }
691
692 trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr);
693
694 return MEMTX_OK;
695
696 err:
697 riscv_iommu_report_fault(s, ctx, fault_type, cause,
698 !!ctx->process_id, 0, 0);
699 return res;
700 }
701
702 /*
703 * Check device context configuration as described by the
704 * riscv-iommu spec section "Device-context configuration
705 * checks".
706 */
riscv_iommu_validate_device_ctx(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)707 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s,
708 RISCVIOMMUContext *ctx)
709 {
710 uint32_t fsc_mode, msi_mode;
711 uint64_t gatp;
712
713 if (!(s->cap & RISCV_IOMMU_CAP_ATS) &&
714 (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS ||
715 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI ||
716 ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) {
717 return false;
718 }
719
720 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) &&
721 (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA ||
722 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) {
723 return false;
724 }
725
726 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) &&
727 ctx->tc & RISCV_IOMMU_DC_TC_PRPR) {
728 return false;
729 }
730
731 if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) &&
732 ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) {
733 return false;
734 }
735
736 if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) {
737 msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE);
738
739 if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF &&
740 msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
741 return false;
742 }
743 }
744
745 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
746 if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA &&
747 gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
748 return false;
749 }
750
751 fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
752
753 if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) {
754 switch (fsc_mode) {
755 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8:
756 if (!(s->cap & RISCV_IOMMU_CAP_PD8)) {
757 return false;
758 }
759 break;
760 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17:
761 if (!(s->cap & RISCV_IOMMU_CAP_PD17)) {
762 return false;
763 }
764 break;
765 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20:
766 if (!(s->cap & RISCV_IOMMU_CAP_PD20)) {
767 return false;
768 }
769 break;
770 }
771 } else {
772 /* DC.tc.PDTV is 0 */
773 if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) {
774 return false;
775 }
776
777 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
778 if (fsc_mode == RISCV_IOMMU_CAP_SV32 &&
779 !(s->cap & RISCV_IOMMU_CAP_SV32)) {
780 return false;
781 }
782 } else {
783 switch (fsc_mode) {
784 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
785 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
786 return false;
787 }
788 break;
789 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
790 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
791 return false;
792 }
793 break;
794 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
795 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
796 return false;
797 }
798 break;
799 }
800 }
801 }
802
803 /*
804 * CAP_END is always zero (only one endianess). FCTL_BE is
805 * always zero (little-endian accesses). Thus TC_SBE must
806 * always be LE, i.e. zero.
807 */
808 if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) {
809 return false;
810 }
811
812 return true;
813 }
814
815 /*
816 * Validate process context (PC) according to section
817 * "Process-context configuration checks".
818 */
riscv_iommu_validate_process_ctx(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)819 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
820 RISCVIOMMUContext *ctx)
821 {
822 uint32_t mode;
823
824 if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) {
825 return false;
826 }
827
828 if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) {
829 return false;
830 }
831
832 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
833 switch (mode) {
834 case RISCV_IOMMU_DC_FSC_MODE_BARE:
835 /* sv39 and sv32 modes have the same value (8) */
836 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
837 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
838 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
839 break;
840 default:
841 return false;
842 }
843
844 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
845 if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 &&
846 !(s->cap & RISCV_IOMMU_CAP_SV32)) {
847 return false;
848 }
849 } else {
850 switch (mode) {
851 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
852 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
853 return false;
854 }
855 break;
856 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
857 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
858 return false;
859 }
860 break;
861 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
862 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
863 return false;
864 }
865 break;
866 }
867 }
868
869 return true;
870 }
871
872 /**
873 * pdt_memory_read: PDT wrapper of dma_memory_read.
874 *
875 * @s: IOMMU Device State
876 * @ctx: Device Translation Context with devid and pasid set
877 * @addr: address within that address space
878 * @buf: buffer with the data transferred
879 * @len: length of the data transferred
880 * @attrs: memory transaction attributes
881 */
pdt_memory_read(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,dma_addr_t addr,void * buf,dma_addr_t len,MemTxAttrs attrs)882 static MemTxResult pdt_memory_read(RISCVIOMMUState *s,
883 RISCVIOMMUContext *ctx,
884 dma_addr_t addr,
885 void *buf, dma_addr_t len,
886 MemTxAttrs attrs)
887 {
888 uint64_t gatp_mode, pte;
889 struct {
890 unsigned char step;
891 unsigned char levels;
892 unsigned char ptidxbits;
893 unsigned char ptesize;
894 } sc;
895 MemTxResult ret;
896 dma_addr_t base = addr;
897
898 /* G stages translation mode */
899 gatp_mode = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
900 if (gatp_mode == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
901 goto out;
902 }
903
904 /* G stages translation tables root pointer */
905 base = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
906
907 /* Start at step 0 */
908 sc.step = 0;
909
910 if (s->fctl & RISCV_IOMMU_FCTL_GXL) {
911 /* 32bit mode for GXL == 1 */
912 switch (gatp_mode) {
913 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
914 if (!(s->cap & RISCV_IOMMU_CAP_SV32X4)) {
915 return MEMTX_ACCESS_ERROR;
916 }
917 sc.levels = 2;
918 sc.ptidxbits = 10;
919 sc.ptesize = 4;
920 break;
921 default:
922 return MEMTX_ACCESS_ERROR;
923 }
924 } else {
925 /* 64bit mode for GXL == 0 */
926 switch (gatp_mode) {
927 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
928 if (!(s->cap & RISCV_IOMMU_CAP_SV39X4)) {
929 return MEMTX_ACCESS_ERROR;
930 }
931 sc.levels = 3;
932 sc.ptidxbits = 9;
933 sc.ptesize = 8;
934 break;
935 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
936 if (!(s->cap & RISCV_IOMMU_CAP_SV48X4)) {
937 return MEMTX_ACCESS_ERROR;
938 }
939 sc.levels = 4;
940 sc.ptidxbits = 9;
941 sc.ptesize = 8;
942 break;
943 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
944 if (!(s->cap & RISCV_IOMMU_CAP_SV57X4)) {
945 return MEMTX_ACCESS_ERROR;
946 }
947 sc.levels = 5;
948 sc.ptidxbits = 9;
949 sc.ptesize = 8;
950 break;
951 default:
952 return MEMTX_ACCESS_ERROR;
953 }
954 }
955
956 do {
957 const unsigned va_bits = (sc.step ? 0 : 2) + sc.ptidxbits;
958 const unsigned va_skip = TARGET_PAGE_BITS + sc.ptidxbits *
959 (sc.levels - 1 - sc.step);
960 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
961 const dma_addr_t pte_addr = base + idx * sc.ptesize;
962
963 /* Address range check before first level lookup */
964 if (!sc.step) {
965 const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1;
966 if ((addr & va_mask) != addr) {
967 return MEMTX_ACCESS_ERROR;
968 }
969 }
970
971 /* Read page table entry */
972 if (sc.ptesize == 4) {
973 uint32_t pte32 = 0;
974 ret = ldl_le_dma(s->target_as, pte_addr, &pte32, attrs);
975 pte = pte32;
976 } else {
977 ret = ldq_le_dma(s->target_as, pte_addr, &pte, attrs);
978 }
979 if (ret != MEMTX_OK) {
980 return ret;
981 }
982
983 sc.step++;
984 hwaddr ppn = pte >> PTE_PPN_SHIFT;
985
986 if (!(pte & PTE_V)) {
987 return MEMTX_ACCESS_ERROR; /* Invalid PTE */
988 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
989 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
990 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
991 return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W */
992 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
993 return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W + PTE_X */
994 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
995 return MEMTX_ACCESS_ERROR; /* Misaligned PPN */
996 } else {
997 /* Leaf PTE, translation completed. */
998 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
999 break;
1000 }
1001
1002 if (sc.step == sc.levels) {
1003 return MEMTX_ACCESS_ERROR; /* Can't find leaf PTE */
1004 }
1005 } while (1);
1006
1007 out:
1008 return dma_memory_read(s->target_as, base, buf, len, attrs);
1009 }
1010
1011 /*
1012 * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
1013 *
1014 * @s : IOMMU Device State
1015 * @ctx : Device Translation Context with devid and process_id set.
1016 * @return : success or fault code.
1017 */
riscv_iommu_ctx_fetch(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)1018 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
1019 {
1020 const uint64_t ddtp = s->ddtp;
1021 unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE);
1022 dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN));
1023 struct riscv_iommu_dc dc;
1024 /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */
1025 const int dc_fmt = !s->enable_msi;
1026 const size_t dc_len = sizeof(dc) >> dc_fmt;
1027 int depth;
1028 uint64_t de;
1029
1030 switch (mode) {
1031 case RISCV_IOMMU_DDTP_MODE_OFF:
1032 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
1033
1034 case RISCV_IOMMU_DDTP_MODE_BARE:
1035 /* mock up pass-through translation context */
1036 ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
1037 RISCV_IOMMU_DC_IOHGATP_MODE_BARE);
1038 ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
1039 RISCV_IOMMU_DC_FSC_MODE_BARE);
1040
1041 ctx->tc = RISCV_IOMMU_DC_TC_V;
1042 if (s->enable_ats) {
1043 ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS;
1044 }
1045
1046 ctx->ta = 0;
1047 ctx->msiptp = 0;
1048 return 0;
1049
1050 case RISCV_IOMMU_DDTP_MODE_1LVL:
1051 depth = 0;
1052 break;
1053
1054 case RISCV_IOMMU_DDTP_MODE_2LVL:
1055 depth = 1;
1056 break;
1057
1058 case RISCV_IOMMU_DDTP_MODE_3LVL:
1059 depth = 2;
1060 break;
1061
1062 default:
1063 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
1064 }
1065
1066 /*
1067 * Check supported device id width (in bits).
1068 * See IOMMU Specification, Chapter 6. Software guidelines.
1069 * - if extended device-context format is used:
1070 * 1LVL: 6, 2LVL: 15, 3LVL: 24
1071 * - if base device-context format is used:
1072 * 1LVL: 7, 2LVL: 16, 3LVL: 24
1073 */
1074 if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) {
1075 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1076 }
1077
1078 /* Device directory tree walk */
1079 for (; depth-- > 0; ) {
1080 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
1081 /*
1082 * Select device id index bits based on device directory tree level
1083 * and device context format.
1084 * See IOMMU Specification, Chapter 2. Data Structures.
1085 * - if extended device-context format is used:
1086 * device index: [23:15][14:6][5:0]
1087 * - if base device-context format is used:
1088 * device index: [23:16][15:7][6:0]
1089 */
1090 const int split = depth * 9 + 6 + dc_fmt;
1091 addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK;
1092 if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
1093 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1094 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
1095 }
1096 le64_to_cpus(&de);
1097 if (!(de & RISCV_IOMMU_DDTE_VALID)) {
1098 /* invalid directory entry */
1099 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
1100 }
1101 if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) {
1102 /* reserved bits set */
1103 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
1104 }
1105 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
1106 }
1107
1108 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
1109
1110 /* index into device context entry page */
1111 addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
1112
1113 memset(&dc, 0, sizeof(dc));
1114 if (dma_memory_read(s->target_as, addr, &dc, dc_len,
1115 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1116 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
1117 }
1118
1119 /* Set translation context. */
1120 ctx->tc = le64_to_cpu(dc.tc);
1121 ctx->gatp = le64_to_cpu(dc.iohgatp);
1122 ctx->satp = le64_to_cpu(dc.fsc);
1123 ctx->ta = le64_to_cpu(dc.ta);
1124 ctx->msiptp = le64_to_cpu(dc.msiptp);
1125 ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask);
1126 ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern);
1127
1128 if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) {
1129 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
1130 }
1131
1132 if (!riscv_iommu_validate_device_ctx(s, ctx)) {
1133 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
1134 }
1135
1136 /* FSC field checks */
1137 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
1138 addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN));
1139
1140 if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) {
1141 if (ctx->process_id != RISCV_IOMMU_NOPROCID) {
1142 /* PID is disabled */
1143 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1144 }
1145 if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) {
1146 /* Invalid translation mode */
1147 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
1148 }
1149 return 0;
1150 }
1151
1152 if (ctx->process_id == RISCV_IOMMU_NOPROCID) {
1153 if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) {
1154 /* No default process_id enabled, set BARE mode */
1155 ctx->satp = 0ULL;
1156 return 0;
1157 } else {
1158 /* Use default process_id #0 */
1159 ctx->process_id = 0;
1160 }
1161 }
1162
1163 if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) {
1164 /* No S-Stage translation, done. */
1165 return 0;
1166 }
1167
1168 /* FSC.TC.PDTV enabled */
1169 if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) {
1170 /* Invalid PDTP.MODE */
1171 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1172 }
1173
1174 for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
1175 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
1176
1177 /*
1178 * Select process id index bits based on process directory tree
1179 * level. See IOMMU Specification, 2.2. Process-Directory-Table.
1180 */
1181 const int split = depth * 9 + 8;
1182 addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
1183 if (pdt_memory_read(s, ctx, addr, &de, sizeof(de),
1184 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1185 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1186 }
1187 le64_to_cpus(&de);
1188 if (!(de & RISCV_IOMMU_PDTE_VALID)) {
1189 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1190 }
1191 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PDTE_PPN));
1192 }
1193
1194 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
1195
1196 /* Leaf entry in PDT */
1197 addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
1198 if (pdt_memory_read(s, ctx, addr, &dc.ta, sizeof(uint64_t) * 2,
1199 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1200 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1201 }
1202
1203 /* Use FSC and TA from process directory entry. */
1204 ctx->ta = le64_to_cpu(dc.ta);
1205 ctx->satp = le64_to_cpu(dc.fsc);
1206
1207 if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) {
1208 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1209 }
1210
1211 if (!riscv_iommu_validate_process_ctx(s, ctx)) {
1212 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1213 }
1214
1215 return 0;
1216 }
1217
1218 /* Translation Context cache support */
riscv_iommu_ctx_equal(gconstpointer v1,gconstpointer v2)1219 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2)
1220 {
1221 RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1;
1222 RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2;
1223 return c1->devid == c2->devid &&
1224 c1->process_id == c2->process_id;
1225 }
1226
riscv_iommu_ctx_hash(gconstpointer v)1227 static guint riscv_iommu_ctx_hash(gconstpointer v)
1228 {
1229 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v;
1230 /*
1231 * Generate simple hash of (process_id, devid)
1232 * assuming 24-bit wide devid.
1233 */
1234 return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24);
1235 }
1236
riscv_iommu_ctx_inval_devid_procid(gpointer key,gpointer value,gpointer data)1237 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value,
1238 gpointer data)
1239 {
1240 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1241 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1242 if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1243 ctx->devid == arg->devid &&
1244 ctx->process_id == arg->process_id) {
1245 ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1246 }
1247 }
1248
riscv_iommu_ctx_inval_devid(gpointer key,gpointer value,gpointer data)1249 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value,
1250 gpointer data)
1251 {
1252 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1253 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1254 if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1255 ctx->devid == arg->devid) {
1256 ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1257 }
1258 }
1259
riscv_iommu_ctx_inval_all(gpointer key,gpointer value,gpointer data)1260 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value,
1261 gpointer data)
1262 {
1263 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1264 if (ctx->tc & RISCV_IOMMU_DC_TC_V) {
1265 ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1266 }
1267 }
1268
riscv_iommu_ctx_inval(RISCVIOMMUState * s,GHFunc func,uint32_t devid,uint32_t process_id)1269 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func,
1270 uint32_t devid, uint32_t process_id)
1271 {
1272 GHashTable *ctx_cache;
1273 RISCVIOMMUContext key = {
1274 .devid = devid,
1275 .process_id = process_id,
1276 };
1277 ctx_cache = g_hash_table_ref(s->ctx_cache);
1278 g_hash_table_foreach(ctx_cache, func, &key);
1279 g_hash_table_unref(ctx_cache);
1280 }
1281
1282 /* Find or allocate translation context for a given {device_id, process_id} */
riscv_iommu_ctx(RISCVIOMMUState * s,unsigned devid,unsigned process_id,void ** ref)1283 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s,
1284 unsigned devid, unsigned process_id,
1285 void **ref)
1286 {
1287 GHashTable *ctx_cache;
1288 RISCVIOMMUContext *ctx;
1289 RISCVIOMMUContext key = {
1290 .devid = devid,
1291 .process_id = process_id,
1292 };
1293
1294 ctx_cache = g_hash_table_ref(s->ctx_cache);
1295 ctx = g_hash_table_lookup(ctx_cache, &key);
1296
1297 if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) {
1298 *ref = ctx_cache;
1299 return ctx;
1300 }
1301
1302 ctx = g_new0(RISCVIOMMUContext, 1);
1303 ctx->devid = devid;
1304 ctx->process_id = process_id;
1305
1306 int fault = riscv_iommu_ctx_fetch(s, ctx);
1307 if (!fault) {
1308 if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) {
1309 g_hash_table_unref(ctx_cache);
1310 ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
1311 riscv_iommu_ctx_equal,
1312 g_free, NULL);
1313 g_hash_table_ref(ctx_cache);
1314 g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache));
1315 }
1316 g_hash_table_add(ctx_cache, ctx);
1317 *ref = ctx_cache;
1318 return ctx;
1319 }
1320
1321 g_hash_table_unref(ctx_cache);
1322 *ref = NULL;
1323
1324 riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD,
1325 fault, !!process_id, 0, 0);
1326
1327 g_free(ctx);
1328 return NULL;
1329 }
1330
riscv_iommu_ctx_put(RISCVIOMMUState * s,void * ref)1331 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
1332 {
1333 if (ref) {
1334 g_hash_table_unref((GHashTable *)ref);
1335 }
1336 }
1337
1338 /* Find or allocate address space for a given device */
riscv_iommu_space(RISCVIOMMUState * s,uint32_t devid)1339 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid)
1340 {
1341 RISCVIOMMUSpace *as;
1342
1343 /* FIXME: PCIe bus remapping for attached endpoints. */
1344 devid |= s->bus << 8;
1345
1346 QLIST_FOREACH(as, &s->spaces, list) {
1347 if (as->devid == devid) {
1348 break;
1349 }
1350 }
1351
1352 if (as == NULL) {
1353 char name[64];
1354 as = g_new0(RISCVIOMMUSpace, 1);
1355
1356 as->iommu = s;
1357 as->devid = devid;
1358
1359 snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
1360 PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1361
1362 /* IOVA address space, untranslated addresses */
1363 memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr),
1364 TYPE_RISCV_IOMMU_MEMORY_REGION,
1365 OBJECT(as), "riscv_iommu", UINT64_MAX);
1366 address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name);
1367
1368 QLIST_INSERT_HEAD(&s->spaces, as, list);
1369
1370 trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid),
1371 PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1372 }
1373 return &as->iova_as;
1374 }
1375
1376 /* Translation Object cache support */
riscv_iommu_iot_equal(gconstpointer v1,gconstpointer v2)1377 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2)
1378 {
1379 RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1;
1380 RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2;
1381 return t1->gscid == t2->gscid && t1->pscid == t2->pscid &&
1382 t1->iova == t2->iova && t1->tag == t2->tag;
1383 }
1384
riscv_iommu_iot_hash(gconstpointer v)1385 static guint riscv_iommu_iot_hash(gconstpointer v)
1386 {
1387 RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v;
1388 return (guint)t->iova;
1389 }
1390
1391 /* GV: 0 AV: 0 PSCV: 0 GVMA: 0 */
1392 /* GV: 0 AV: 0 GVMA: 1 */
1393 static
riscv_iommu_iot_inval_all(gpointer key,gpointer value,gpointer data)1394 void riscv_iommu_iot_inval_all(gpointer key, gpointer value, gpointer data)
1395 {
1396 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1397 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1398 if (iot->tag == arg->tag) {
1399 iot->perm = IOMMU_NONE;
1400 }
1401 }
1402
1403 /* GV: 0 AV: 0 PSCV: 1 GVMA: 0 */
1404 static
riscv_iommu_iot_inval_pscid(gpointer key,gpointer value,gpointer data)1405 void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, gpointer data)
1406 {
1407 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1408 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1409 if (iot->tag == arg->tag &&
1410 iot->pscid == arg->pscid) {
1411 iot->perm = IOMMU_NONE;
1412 }
1413 }
1414
1415 /* GV: 0 AV: 1 PSCV: 0 GVMA: 0 */
1416 static
riscv_iommu_iot_inval_iova(gpointer key,gpointer value,gpointer data)1417 void riscv_iommu_iot_inval_iova(gpointer key, gpointer value, gpointer data)
1418 {
1419 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1420 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1421 if (iot->tag == arg->tag &&
1422 iot->iova == arg->iova) {
1423 iot->perm = IOMMU_NONE;
1424 }
1425 }
1426
1427 /* GV: 0 AV: 1 PSCV: 1 GVMA: 0 */
riscv_iommu_iot_inval_pscid_iova(gpointer key,gpointer value,gpointer data)1428 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value,
1429 gpointer data)
1430 {
1431 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1432 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1433 if (iot->tag == arg->tag &&
1434 iot->pscid == arg->pscid &&
1435 iot->iova == arg->iova) {
1436 iot->perm = IOMMU_NONE;
1437 }
1438 }
1439
1440 /* GV: 1 AV: 0 PSCV: 0 GVMA: 0 */
1441 /* GV: 1 AV: 0 GVMA: 1 */
1442 static
riscv_iommu_iot_inval_gscid(gpointer key,gpointer value,gpointer data)1443 void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, gpointer data)
1444 {
1445 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1446 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1447 if (iot->tag == arg->tag &&
1448 iot->gscid == arg->gscid) {
1449 iot->perm = IOMMU_NONE;
1450 }
1451 }
1452
1453 /* GV: 1 AV: 0 PSCV: 1 GVMA: 0 */
riscv_iommu_iot_inval_gscid_pscid(gpointer key,gpointer value,gpointer data)1454 static void riscv_iommu_iot_inval_gscid_pscid(gpointer key, gpointer value,
1455 gpointer data)
1456 {
1457 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1458 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1459 if (iot->tag == arg->tag &&
1460 iot->gscid == arg->gscid &&
1461 iot->pscid == arg->pscid) {
1462 iot->perm = IOMMU_NONE;
1463 }
1464 }
1465
1466 /* GV: 1 AV: 1 PSCV: 0 GVMA: 0 */
1467 /* GV: 1 AV: 1 GVMA: 1 */
riscv_iommu_iot_inval_gscid_iova(gpointer key,gpointer value,gpointer data)1468 static void riscv_iommu_iot_inval_gscid_iova(gpointer key, gpointer value,
1469 gpointer data)
1470 {
1471 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1472 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1473 if (iot->tag == arg->tag &&
1474 iot->gscid == arg->gscid &&
1475 iot->iova == arg->iova) {
1476 iot->perm = IOMMU_NONE;
1477 }
1478 }
1479
1480 /* GV: 1 AV: 1 PSCV: 1 GVMA: 0 */
riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key,gpointer value,gpointer data)1481 static void riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key, gpointer value,
1482 gpointer data)
1483 {
1484 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1485 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1486 if (iot->tag == arg->tag &&
1487 iot->gscid == arg->gscid &&
1488 iot->pscid == arg->pscid &&
1489 iot->iova == arg->iova) {
1490 iot->perm = IOMMU_NONE;
1491 }
1492 }
1493
1494 /* caller should keep ref-count for iot_cache object */
riscv_iommu_iot_lookup(RISCVIOMMUContext * ctx,GHashTable * iot_cache,hwaddr iova,RISCVIOMMUTransTag transtag)1495 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx,
1496 GHashTable *iot_cache, hwaddr iova, RISCVIOMMUTransTag transtag)
1497 {
1498 RISCVIOMMUEntry key = {
1499 .tag = transtag,
1500 .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID),
1501 .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID),
1502 .iova = PPN_DOWN(iova),
1503 };
1504 return g_hash_table_lookup(iot_cache, &key);
1505 }
1506
1507 /* caller should keep ref-count for iot_cache object */
riscv_iommu_iot_update(RISCVIOMMUState * s,GHashTable * iot_cache,RISCVIOMMUEntry * iot)1508 static void riscv_iommu_iot_update(RISCVIOMMUState *s,
1509 GHashTable *iot_cache, RISCVIOMMUEntry *iot)
1510 {
1511 if (!s->iot_limit) {
1512 return;
1513 }
1514
1515 if (g_hash_table_size(s->iot_cache) >= s->iot_limit) {
1516 iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
1517 riscv_iommu_iot_equal,
1518 g_free, NULL);
1519 g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache));
1520 }
1521 g_hash_table_add(iot_cache, iot);
1522 }
1523
riscv_iommu_iot_inval(RISCVIOMMUState * s,GHFunc func,uint32_t gscid,uint32_t pscid,hwaddr iova,RISCVIOMMUTransTag transtag)1524 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func,
1525 uint32_t gscid, uint32_t pscid, hwaddr iova, RISCVIOMMUTransTag transtag)
1526 {
1527 GHashTable *iot_cache;
1528 RISCVIOMMUEntry key = {
1529 .tag = transtag,
1530 .gscid = gscid,
1531 .pscid = pscid,
1532 .iova = PPN_DOWN(iova),
1533 };
1534
1535 iot_cache = g_hash_table_ref(s->iot_cache);
1536 g_hash_table_foreach(iot_cache, func, &key);
1537 g_hash_table_unref(iot_cache);
1538 }
1539
riscv_iommu_get_transtag(RISCVIOMMUContext * ctx)1540 static RISCVIOMMUTransTag riscv_iommu_get_transtag(RISCVIOMMUContext *ctx)
1541 {
1542 uint64_t satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
1543 uint64_t gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
1544
1545 if (satp == RISCV_IOMMU_DC_FSC_MODE_BARE) {
1546 return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ?
1547 RISCV_IOMMU_TRANS_TAG_BY : RISCV_IOMMU_TRANS_TAG_VG;
1548 } else {
1549 return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ?
1550 RISCV_IOMMU_TRANS_TAG_SS : RISCV_IOMMU_TRANS_TAG_VN;
1551 }
1552 }
1553
riscv_iommu_translate(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,IOMMUTLBEntry * iotlb,bool enable_cache)1554 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
1555 IOMMUTLBEntry *iotlb, bool enable_cache)
1556 {
1557 RISCVIOMMUTransTag transtag = riscv_iommu_get_transtag(ctx);
1558 RISCVIOMMUEntry *iot;
1559 IOMMUAccessFlags perm;
1560 bool enable_pid;
1561 bool enable_pri;
1562 GHashTable *iot_cache;
1563 int fault;
1564
1565 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ);
1566
1567 iot_cache = g_hash_table_ref(s->iot_cache);
1568 /*
1569 * TC[32] is reserved for custom extensions, used here to temporarily
1570 * enable automatic page-request generation for ATS queries.
1571 */
1572 enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32));
1573 enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV);
1574
1575 /* Check for ATS request. */
1576 if (iotlb->perm == IOMMU_NONE) {
1577 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ);
1578 /* Check if ATS is disabled. */
1579 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) {
1580 enable_pri = false;
1581 fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1582 goto done;
1583 }
1584 }
1585
1586 iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova, transtag);
1587 perm = iot ? iot->perm : IOMMU_NONE;
1588 if (perm != IOMMU_NONE) {
1589 iotlb->translated_addr = PPN_PHYS(iot->phys);
1590 iotlb->addr_mask = ~TARGET_PAGE_MASK;
1591 iotlb->perm = perm;
1592 fault = 0;
1593 goto done;
1594 }
1595
1596 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS);
1597
1598 /* Translate using device directory / page table information. */
1599 fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
1600
1601 if (!fault && iotlb->target_as == &s->trap_as) {
1602 /* Do not cache trapped MSI translations */
1603 goto done;
1604 }
1605
1606 /*
1607 * We made an implementation choice to not cache identity-mapped
1608 * translations, as allowed by the specification, to avoid
1609 * translation cache evictions for other devices sharing the
1610 * IOMMU hardware model.
1611 */
1612 if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) {
1613 iot = g_new0(RISCVIOMMUEntry, 1);
1614 iot->iova = PPN_DOWN(iotlb->iova);
1615 iot->phys = PPN_DOWN(iotlb->translated_addr);
1616 iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
1617 iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
1618 iot->perm = iotlb->perm;
1619 iot->tag = transtag;
1620 riscv_iommu_iot_update(s, iot_cache, iot);
1621 }
1622
1623 done:
1624 g_hash_table_unref(iot_cache);
1625
1626 if (enable_pri && fault) {
1627 struct riscv_iommu_pq_record pr = {0};
1628 if (enable_pid) {
1629 pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV,
1630 RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id);
1631 }
1632 pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid);
1633 pr.payload = (iotlb->iova & TARGET_PAGE_MASK) |
1634 RISCV_IOMMU_PREQ_PAYLOAD_M;
1635 riscv_iommu_pri(s, &pr);
1636 return fault;
1637 }
1638
1639 if (fault) {
1640 unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ;
1641
1642 if (iotlb->perm & IOMMU_RW) {
1643 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
1644 } else if (iotlb->perm & IOMMU_RO) {
1645 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD;
1646 }
1647
1648 riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid,
1649 iotlb->iova, iotlb->translated_addr);
1650 return fault;
1651 }
1652
1653 return 0;
1654 }
1655
1656 /* IOMMU Command Interface */
riscv_iommu_iofence(RISCVIOMMUState * s,bool notify,uint64_t addr,uint32_t data)1657 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify,
1658 uint64_t addr, uint32_t data)
1659 {
1660 /*
1661 * ATS processing in this implementation of the IOMMU is synchronous,
1662 * no need to wait for completions here.
1663 */
1664 if (!notify) {
1665 return MEMTX_OK;
1666 }
1667
1668 return dma_memory_write(s->target_as, addr, &data, sizeof(data),
1669 MEMTXATTRS_UNSPECIFIED);
1670 }
1671
riscv_iommu_ats(RISCVIOMMUState * s,struct riscv_iommu_command * cmd,IOMMUNotifierFlag flag,IOMMUAccessFlags perm,void (* trace_fn)(const char * id))1672 static void riscv_iommu_ats(RISCVIOMMUState *s,
1673 struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag,
1674 IOMMUAccessFlags perm,
1675 void (*trace_fn)(const char *id))
1676 {
1677 RISCVIOMMUSpace *as = NULL;
1678 IOMMUNotifier *n;
1679 IOMMUTLBEvent event;
1680 uint32_t pid;
1681 uint32_t devid;
1682 const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV;
1683
1684 if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) {
1685 /* Use device segment and requester id */
1686 devid = get_field(cmd->dword0,
1687 RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID);
1688 } else {
1689 devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID);
1690 }
1691
1692 pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID);
1693
1694 QLIST_FOREACH(as, &s->spaces, list) {
1695 if (as->devid == devid) {
1696 break;
1697 }
1698 }
1699
1700 if (!as || !as->notifier) {
1701 return;
1702 }
1703
1704 event.type = flag;
1705 event.entry.perm = perm;
1706 event.entry.target_as = s->target_as;
1707
1708 IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) {
1709 if (!pv || n->iommu_idx == pid) {
1710 event.entry.iova = n->start;
1711 event.entry.addr_mask = n->end - n->start;
1712 trace_fn(as->iova_mr.parent_obj.name);
1713 memory_region_notify_iommu_one(n, &event);
1714 }
1715 }
1716 }
1717
riscv_iommu_ats_inval(RISCVIOMMUState * s,struct riscv_iommu_command * cmd)1718 static void riscv_iommu_ats_inval(RISCVIOMMUState *s,
1719 struct riscv_iommu_command *cmd)
1720 {
1721 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE,
1722 trace_riscv_iommu_ats_inval);
1723 }
1724
riscv_iommu_ats_prgr(RISCVIOMMUState * s,struct riscv_iommu_command * cmd)1725 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s,
1726 struct riscv_iommu_command *cmd)
1727 {
1728 unsigned resp_code = get_field(cmd->dword1,
1729 RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE);
1730
1731 /* Using the access flag to carry response code information */
1732 IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW;
1733 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm,
1734 trace_riscv_iommu_ats_prgr);
1735 }
1736
riscv_iommu_process_ddtp(RISCVIOMMUState * s)1737 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s)
1738 {
1739 uint64_t old_ddtp = s->ddtp;
1740 uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP);
1741 unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE);
1742 unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE);
1743 bool ok = false;
1744
1745 /*
1746 * Check for allowed DDTP.MODE transitions:
1747 * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL}
1748 * {1LVL, 2LVL, 3LVL} -> {OFF, BARE}
1749 */
1750 if (new_mode == old_mode ||
1751 new_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1752 new_mode == RISCV_IOMMU_DDTP_MODE_BARE) {
1753 ok = true;
1754 } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL ||
1755 new_mode == RISCV_IOMMU_DDTP_MODE_2LVL ||
1756 new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) {
1757 ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1758 old_mode == RISCV_IOMMU_DDTP_MODE_BARE;
1759 }
1760
1761 if (ok) {
1762 /* clear reserved and busy bits, report back sanitized version */
1763 new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN,
1764 RISCV_IOMMU_DDTP_MODE, new_mode);
1765 } else {
1766 new_ddtp = old_ddtp;
1767 }
1768 s->ddtp = new_ddtp;
1769
1770 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp);
1771 }
1772
1773 /* Command function and opcode field. */
1774 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op))
1775
riscv_iommu_process_cq_tail(RISCVIOMMUState * s)1776 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s)
1777 {
1778 struct riscv_iommu_command cmd;
1779 MemTxResult res;
1780 dma_addr_t addr;
1781 uint32_t tail, head, ctrl;
1782 uint64_t cmd_opcode;
1783 GHFunc func;
1784
1785 ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1786 tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask;
1787 head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask;
1788
1789 /* Check for pending error or queue processing disabled */
1790 if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) ||
1791 !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) {
1792 return;
1793 }
1794
1795 while (tail != head) {
1796 addr = s->cq_addr + head * sizeof(cmd);
1797 res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd),
1798 MEMTXATTRS_UNSPECIFIED);
1799
1800 if (res != MEMTX_OK) {
1801 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1802 RISCV_IOMMU_CQCSR_CQMF, 0);
1803 goto fault;
1804 }
1805
1806 trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1);
1807
1808 cmd_opcode = get_field(cmd.dword0,
1809 RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC);
1810
1811 switch (cmd_opcode) {
1812 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C,
1813 RISCV_IOMMU_CMD_IOFENCE_OPCODE):
1814 res = riscv_iommu_iofence(s,
1815 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2,
1816 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA));
1817
1818 if (res != MEMTX_OK) {
1819 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1820 RISCV_IOMMU_CQCSR_CQMF, 0);
1821 goto fault;
1822 }
1823 break;
1824
1825 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA,
1826 RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1827 {
1828 bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV);
1829 bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV);
1830 bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV);
1831 uint32_t gscid = get_field(cmd.dword0,
1832 RISCV_IOMMU_CMD_IOTINVAL_GSCID);
1833 uint32_t pscid = get_field(cmd.dword0,
1834 RISCV_IOMMU_CMD_IOTINVAL_PSCID);
1835 hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK;
1836
1837 if (pscv) {
1838 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */
1839 goto cmd_ill;
1840 }
1841
1842 func = riscv_iommu_iot_inval_all;
1843
1844 if (gv) {
1845 func = (av) ? riscv_iommu_iot_inval_gscid_iova :
1846 riscv_iommu_iot_inval_gscid;
1847 }
1848
1849 riscv_iommu_iot_inval(
1850 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VG);
1851
1852 riscv_iommu_iot_inval(
1853 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VN);
1854 break;
1855 }
1856
1857 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA,
1858 RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1859 {
1860 bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV);
1861 bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV);
1862 bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV);
1863 uint32_t gscid = get_field(cmd.dword0,
1864 RISCV_IOMMU_CMD_IOTINVAL_GSCID);
1865 uint32_t pscid = get_field(cmd.dword0,
1866 RISCV_IOMMU_CMD_IOTINVAL_PSCID);
1867 hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK;
1868 RISCVIOMMUTransTag transtag;
1869
1870 if (gv) {
1871 transtag = RISCV_IOMMU_TRANS_TAG_VN;
1872 if (pscv) {
1873 func = (av) ? riscv_iommu_iot_inval_gscid_pscid_iova :
1874 riscv_iommu_iot_inval_gscid_pscid;
1875 } else {
1876 func = (av) ? riscv_iommu_iot_inval_gscid_iova :
1877 riscv_iommu_iot_inval_gscid;
1878 }
1879 } else {
1880 transtag = RISCV_IOMMU_TRANS_TAG_SS;
1881 if (pscv) {
1882 func = (av) ? riscv_iommu_iot_inval_pscid_iova :
1883 riscv_iommu_iot_inval_pscid;
1884 } else {
1885 func = (av) ? riscv_iommu_iot_inval_iova :
1886 riscv_iommu_iot_inval_all;
1887 }
1888 }
1889
1890 riscv_iommu_iot_inval(s, func, gscid, pscid, iova, transtag);
1891 break;
1892 }
1893
1894 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT,
1895 RISCV_IOMMU_CMD_IODIR_OPCODE):
1896 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1897 /* invalidate all device context cache mappings */
1898 func = riscv_iommu_ctx_inval_all;
1899 } else {
1900 /* invalidate all device context matching DID */
1901 func = riscv_iommu_ctx_inval_devid;
1902 }
1903 riscv_iommu_ctx_inval(s, func,
1904 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0);
1905 break;
1906
1907 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT,
1908 RISCV_IOMMU_CMD_IODIR_OPCODE):
1909 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1910 /* illegal command arguments IODIR_PDT & DV == 0 */
1911 goto cmd_ill;
1912 } else {
1913 func = riscv_iommu_ctx_inval_devid_procid;
1914 }
1915 riscv_iommu_ctx_inval(s, func,
1916 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID),
1917 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID));
1918 break;
1919
1920 /* ATS commands */
1921 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL,
1922 RISCV_IOMMU_CMD_ATS_OPCODE):
1923 if (!s->enable_ats) {
1924 goto cmd_ill;
1925 }
1926
1927 riscv_iommu_ats_inval(s, &cmd);
1928 break;
1929
1930 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR,
1931 RISCV_IOMMU_CMD_ATS_OPCODE):
1932 if (!s->enable_ats) {
1933 goto cmd_ill;
1934 }
1935
1936 riscv_iommu_ats_prgr(s, &cmd);
1937 break;
1938
1939 default:
1940 cmd_ill:
1941 /* Invalid instruction, do not advance instruction index. */
1942 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1943 RISCV_IOMMU_CQCSR_CMD_ILL, 0);
1944 goto fault;
1945 }
1946
1947 /* Advance and update head pointer after command completes. */
1948 head = (head + 1) & s->cq_mask;
1949 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head);
1950 }
1951 return;
1952
1953 fault:
1954 if (ctrl & RISCV_IOMMU_CQCSR_CIE) {
1955 riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ);
1956 }
1957 }
1958
riscv_iommu_process_cq_control(RISCVIOMMUState * s)1959 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s)
1960 {
1961 uint64_t base;
1962 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1963 uint32_t ctrl_clr;
1964 bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN);
1965 bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON);
1966
1967 if (enable && !active) {
1968 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB);
1969 s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1;
1970 s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN));
1971 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask);
1972 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0);
1973 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0);
1974 ctrl_set = RISCV_IOMMU_CQCSR_CQON;
1975 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF |
1976 RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO |
1977 RISCV_IOMMU_CQCSR_FENCE_W_IP;
1978 } else if (!enable && active) {
1979 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0);
1980 ctrl_set = 0;
1981 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON;
1982 } else {
1983 ctrl_set = 0;
1984 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY;
1985 }
1986
1987 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr);
1988 }
1989
riscv_iommu_process_fq_control(RISCVIOMMUState * s)1990 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s)
1991 {
1992 uint64_t base;
1993 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1994 uint32_t ctrl_clr;
1995 bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN);
1996 bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON);
1997
1998 if (enable && !active) {
1999 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB);
2000 s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1;
2001 s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN));
2002 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask);
2003 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0);
2004 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0);
2005 ctrl_set = RISCV_IOMMU_FQCSR_FQON;
2006 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF |
2007 RISCV_IOMMU_FQCSR_FQOF;
2008 } else if (!enable && active) {
2009 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0);
2010 ctrl_set = 0;
2011 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON;
2012 } else {
2013 ctrl_set = 0;
2014 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY;
2015 }
2016
2017 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr);
2018 }
2019
riscv_iommu_process_pq_control(RISCVIOMMUState * s)2020 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s)
2021 {
2022 uint64_t base;
2023 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
2024 uint32_t ctrl_clr;
2025 bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN);
2026 bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON);
2027
2028 if (enable && !active) {
2029 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB);
2030 s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1;
2031 s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN));
2032 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask);
2033 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0);
2034 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0);
2035 ctrl_set = RISCV_IOMMU_PQCSR_PQON;
2036 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF |
2037 RISCV_IOMMU_PQCSR_PQOF;
2038 } else if (!enable && active) {
2039 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0);
2040 ctrl_set = 0;
2041 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON;
2042 } else {
2043 ctrl_set = 0;
2044 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY;
2045 }
2046
2047 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr);
2048 }
2049
riscv_iommu_process_dbg(RISCVIOMMUState * s)2050 static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
2051 {
2052 uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA);
2053 uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL);
2054 unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID);
2055 unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID);
2056 RISCVIOMMUContext *ctx;
2057 void *ref;
2058
2059 if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) {
2060 return;
2061 }
2062
2063 ctx = riscv_iommu_ctx(s, devid, pid, &ref);
2064 if (ctx == NULL) {
2065 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE,
2066 RISCV_IOMMU_TR_RESPONSE_FAULT |
2067 (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10));
2068 } else {
2069 IOMMUTLBEntry iotlb = {
2070 .iova = iova,
2071 .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW,
2072 .addr_mask = ~0,
2073 .target_as = NULL,
2074 };
2075 int fault = riscv_iommu_translate(s, ctx, &iotlb, false);
2076 if (fault) {
2077 iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
2078 } else {
2079 iova = iotlb.translated_addr & ~iotlb.addr_mask;
2080 iova = set_field(0, RISCV_IOMMU_TR_RESPONSE_PPN, PPN_DOWN(iova));
2081 }
2082 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
2083 }
2084
2085 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
2086 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2087 riscv_iommu_ctx_put(s, ref);
2088 }
2089
2090 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s);
2091
riscv_iommu_update_icvec(RISCVIOMMUState * s,uint64_t data)2092 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data)
2093 {
2094 uint64_t icvec = 0;
2095
2096 icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV,
2097 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV);
2098
2099 icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV,
2100 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV);
2101
2102 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV,
2103 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV);
2104
2105 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV,
2106 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV);
2107
2108 trace_riscv_iommu_icvec_write(data, icvec);
2109
2110 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec);
2111 }
2112
riscv_iommu_update_ipsr(RISCVIOMMUState * s,uint64_t data)2113 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
2114 {
2115 uint32_t cqcsr, fqcsr, pqcsr;
2116 uint32_t ipsr_set = 0;
2117 uint32_t ipsr_clr = 0;
2118
2119 if (data & RISCV_IOMMU_IPSR_CIP) {
2120 cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
2121
2122 if (cqcsr & RISCV_IOMMU_CQCSR_CIE &&
2123 (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP ||
2124 cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL ||
2125 cqcsr & RISCV_IOMMU_CQCSR_CMD_TO ||
2126 cqcsr & RISCV_IOMMU_CQCSR_CQMF)) {
2127 ipsr_set |= RISCV_IOMMU_IPSR_CIP;
2128 } else {
2129 ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
2130 }
2131 } else {
2132 ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
2133 }
2134
2135 if (data & RISCV_IOMMU_IPSR_FIP) {
2136 fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
2137
2138 if (fqcsr & RISCV_IOMMU_FQCSR_FIE &&
2139 (fqcsr & RISCV_IOMMU_FQCSR_FQOF ||
2140 fqcsr & RISCV_IOMMU_FQCSR_FQMF)) {
2141 ipsr_set |= RISCV_IOMMU_IPSR_FIP;
2142 } else {
2143 ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
2144 }
2145 } else {
2146 ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
2147 }
2148
2149 if (data & RISCV_IOMMU_IPSR_PIP) {
2150 pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
2151
2152 if (pqcsr & RISCV_IOMMU_PQCSR_PIE &&
2153 (pqcsr & RISCV_IOMMU_PQCSR_PQOF ||
2154 pqcsr & RISCV_IOMMU_PQCSR_PQMF)) {
2155 ipsr_set |= RISCV_IOMMU_IPSR_PIP;
2156 } else {
2157 ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
2158 }
2159 } else {
2160 ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
2161 }
2162
2163 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
2164 }
2165
riscv_iommu_process_hpm_writes(RISCVIOMMUState * s,uint32_t regb,bool prev_cy_inh)2166 static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s,
2167 uint32_t regb,
2168 bool prev_cy_inh)
2169 {
2170 switch (regb) {
2171 case RISCV_IOMMU_REG_IOCOUNTINH:
2172 riscv_iommu_process_iocntinh_cy(s, prev_cy_inh);
2173 break;
2174
2175 case RISCV_IOMMU_REG_IOHPMCYCLES:
2176 case RISCV_IOMMU_REG_IOHPMCYCLES + 4:
2177 riscv_iommu_process_hpmcycle_write(s);
2178 break;
2179
2180 case RISCV_IOMMU_REG_IOHPMEVT_BASE ...
2181 RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4:
2182 riscv_iommu_process_hpmevt_write(s, regb & ~7);
2183 break;
2184 }
2185 }
2186
2187 /*
2188 * Write the resulting value of 'data' for the reg specified
2189 * by 'reg_addr', after considering read-only/read-write/write-clear
2190 * bits, in the pointer 'dest'.
2191 *
2192 * The result is written in little-endian.
2193 */
riscv_iommu_write_reg_val(RISCVIOMMUState * s,void * dest,hwaddr reg_addr,int size,uint64_t data)2194 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s,
2195 void *dest, hwaddr reg_addr,
2196 int size, uint64_t data)
2197 {
2198 uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size);
2199 uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size);
2200 uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size);
2201
2202 stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc));
2203 }
2204
riscv_iommu_mmio_write(void * opaque,hwaddr addr,uint64_t data,unsigned size,MemTxAttrs attrs)2205 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
2206 uint64_t data, unsigned size,
2207 MemTxAttrs attrs)
2208 {
2209 riscv_iommu_process_fn *process_fn = NULL;
2210 RISCVIOMMUState *s = opaque;
2211 uint32_t regb = addr & ~3;
2212 uint32_t busy = 0;
2213 uint64_t val = 0;
2214 bool cy_inh = false;
2215
2216 if ((addr & (size - 1)) != 0) {
2217 /* Unsupported MMIO alignment or access size */
2218 return MEMTX_ERROR;
2219 }
2220
2221 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
2222 /* Unsupported MMIO access location. */
2223 return MEMTX_ACCESS_ERROR;
2224 }
2225
2226 /* Track actionable MMIO write. */
2227 switch (regb) {
2228 case RISCV_IOMMU_REG_DDTP:
2229 case RISCV_IOMMU_REG_DDTP + 4:
2230 process_fn = riscv_iommu_process_ddtp;
2231 regb = RISCV_IOMMU_REG_DDTP;
2232 busy = RISCV_IOMMU_DDTP_BUSY;
2233 break;
2234
2235 case RISCV_IOMMU_REG_CQT:
2236 process_fn = riscv_iommu_process_cq_tail;
2237 break;
2238
2239 case RISCV_IOMMU_REG_CQCSR:
2240 process_fn = riscv_iommu_process_cq_control;
2241 busy = RISCV_IOMMU_CQCSR_BUSY;
2242 break;
2243
2244 case RISCV_IOMMU_REG_FQCSR:
2245 process_fn = riscv_iommu_process_fq_control;
2246 busy = RISCV_IOMMU_FQCSR_BUSY;
2247 break;
2248
2249 case RISCV_IOMMU_REG_PQCSR:
2250 process_fn = riscv_iommu_process_pq_control;
2251 busy = RISCV_IOMMU_PQCSR_BUSY;
2252 break;
2253
2254 case RISCV_IOMMU_REG_ICVEC:
2255 case RISCV_IOMMU_REG_IPSR:
2256 /*
2257 * ICVEC and IPSR have special read/write procedures. We'll
2258 * call their respective helpers and exit.
2259 */
2260 riscv_iommu_write_reg_val(s, &val, addr, size, data);
2261
2262 /*
2263 * 'val' is stored as LE. Switch to host endianess
2264 * before using it.
2265 */
2266 val = le64_to_cpu(val);
2267
2268 if (regb == RISCV_IOMMU_REG_ICVEC) {
2269 riscv_iommu_update_icvec(s, val);
2270 } else {
2271 riscv_iommu_update_ipsr(s, val);
2272 }
2273
2274 return MEMTX_OK;
2275
2276 case RISCV_IOMMU_REG_TR_REQ_CTL:
2277 process_fn = riscv_iommu_process_dbg;
2278 regb = RISCV_IOMMU_REG_TR_REQ_CTL;
2279 busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
2280 break;
2281
2282 case RISCV_IOMMU_REG_IOCOUNTINH:
2283 if (addr != RISCV_IOMMU_REG_IOCOUNTINH) {
2284 break;
2285 }
2286 /* Store previous value of CY bit. */
2287 cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) &
2288 RISCV_IOMMU_IOCOUNTINH_CY);
2289 break;
2290
2291
2292 default:
2293 break;
2294 }
2295
2296 /*
2297 * Registers update might be not synchronized with core logic.
2298 * If system software updates register when relevant BUSY bit
2299 * is set IOMMU behavior of additional writes to the register
2300 * is UNSPECIFIED.
2301 */
2302 riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data);
2303
2304 /* Busy flag update, MSB 4-byte register. */
2305 if (busy) {
2306 uint32_t rw = ldl_le_p(&s->regs_rw[regb]);
2307 stl_le_p(&s->regs_rw[regb], rw | busy);
2308 }
2309
2310 /* Process HPM writes and update any internal state if needed. */
2311 if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF &&
2312 regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) {
2313 riscv_iommu_process_hpm_writes(s, regb, cy_inh);
2314 }
2315
2316 if (process_fn) {
2317 process_fn(s);
2318 }
2319
2320 return MEMTX_OK;
2321 }
2322
riscv_iommu_mmio_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)2323 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
2324 uint64_t *data, unsigned size, MemTxAttrs attrs)
2325 {
2326 RISCVIOMMUState *s = opaque;
2327 uint64_t val = -1;
2328 uint8_t *ptr;
2329
2330 if ((addr & (size - 1)) != 0) {
2331 /* Unsupported MMIO alignment. */
2332 return MEMTX_ERROR;
2333 }
2334
2335 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
2336 return MEMTX_ACCESS_ERROR;
2337 }
2338
2339 /* Compute cycle register value. */
2340 if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) {
2341 val = riscv_iommu_hpmcycle_read(s);
2342 ptr = (uint8_t *)&val + (addr & 7);
2343 } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) {
2344 /*
2345 * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer
2346 * callback completes. In which case CY_OF bit in
2347 * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the
2348 * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as
2349 * it's not dependent over the timer callback and is computed
2350 * from cycle overflow.
2351 */
2352 val = ldq_le_p(&s->regs_rw[addr]);
2353 val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF)
2354 ? RISCV_IOMMU_IOCOUNTOVF_CY
2355 : 0;
2356 ptr = (uint8_t *)&val + (addr & 3);
2357 } else {
2358 ptr = &s->regs_rw[addr];
2359 }
2360
2361 val = ldn_le_p(ptr, size);
2362
2363 *data = val;
2364
2365 return MEMTX_OK;
2366 }
2367
2368 static const MemoryRegionOps riscv_iommu_mmio_ops = {
2369 .read_with_attrs = riscv_iommu_mmio_read,
2370 .write_with_attrs = riscv_iommu_mmio_write,
2371 .endianness = DEVICE_NATIVE_ENDIAN,
2372 .impl = {
2373 .min_access_size = 4,
2374 .max_access_size = 8,
2375 .unaligned = false,
2376 },
2377 .valid = {
2378 .min_access_size = 4,
2379 .max_access_size = 8,
2380 }
2381 };
2382
2383 /*
2384 * Translations matching MSI pattern check are redirected to "riscv-iommu-trap"
2385 * memory region as untranslated address, for additional MSI/MRIF interception
2386 * by IOMMU interrupt remapping implementation.
2387 * Note: Device emulation code generating an MSI is expected to provide a valid
2388 * memory transaction attributes with requested_id set.
2389 */
riscv_iommu_trap_write(void * opaque,hwaddr addr,uint64_t data,unsigned size,MemTxAttrs attrs)2390 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr,
2391 uint64_t data, unsigned size, MemTxAttrs attrs)
2392 {
2393 RISCVIOMMUState* s = (RISCVIOMMUState *)opaque;
2394 RISCVIOMMUContext *ctx;
2395 MemTxResult res;
2396 void *ref;
2397 uint32_t devid = attrs.requester_id;
2398
2399 if (attrs.unspecified) {
2400 return MEMTX_ACCESS_ERROR;
2401 }
2402
2403 /* FIXME: PCIe bus remapping for attached endpoints. */
2404 devid |= s->bus << 8;
2405
2406 ctx = riscv_iommu_ctx(s, devid, 0, &ref);
2407 if (ctx == NULL) {
2408 res = MEMTX_ACCESS_ERROR;
2409 } else {
2410 res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs);
2411 }
2412 riscv_iommu_ctx_put(s, ref);
2413 return res;
2414 }
2415
riscv_iommu_trap_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)2416 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr,
2417 uint64_t *data, unsigned size, MemTxAttrs attrs)
2418 {
2419 return MEMTX_ACCESS_ERROR;
2420 }
2421
2422 static const MemoryRegionOps riscv_iommu_trap_ops = {
2423 .read_with_attrs = riscv_iommu_trap_read,
2424 .write_with_attrs = riscv_iommu_trap_write,
2425 .endianness = DEVICE_LITTLE_ENDIAN,
2426 .impl = {
2427 .min_access_size = 4,
2428 .max_access_size = 8,
2429 .unaligned = true,
2430 },
2431 .valid = {
2432 .min_access_size = 4,
2433 .max_access_size = 8,
2434 }
2435 };
2436
riscv_iommu_set_cap_igs(RISCVIOMMUState * s,riscv_iommu_igs_mode mode)2437 void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode)
2438 {
2439 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_IGS, mode);
2440 }
2441
riscv_iommu_instance_init(Object * obj)2442 static void riscv_iommu_instance_init(Object *obj)
2443 {
2444 RISCVIOMMUState *s = RISCV_IOMMU(obj);
2445
2446 /* Enable translation debug interface */
2447 s->cap = RISCV_IOMMU_CAP_DBG;
2448
2449 /* Report QEMU target physical address space limits */
2450 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS,
2451 TARGET_PHYS_ADDR_SPACE_BITS);
2452
2453 /* TODO: method to report supported PID bits */
2454 s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */
2455 s->cap |= RISCV_IOMMU_CAP_PD8;
2456
2457 /* register storage */
2458 s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2459 s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2460 s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2461
2462 /* Mark all registers read-only */
2463 memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE);
2464
2465 /* Device translation context cache */
2466 s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
2467 riscv_iommu_ctx_equal,
2468 g_free, NULL);
2469
2470 s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
2471 riscv_iommu_iot_equal,
2472 g_free, NULL);
2473
2474 s->iommus.le_next = NULL;
2475 s->iommus.le_prev = NULL;
2476 QLIST_INIT(&s->spaces);
2477 }
2478
riscv_iommu_realize(DeviceState * dev,Error ** errp)2479 static void riscv_iommu_realize(DeviceState *dev, Error **errp)
2480 {
2481 RISCVIOMMUState *s = RISCV_IOMMU(dev);
2482
2483 s->cap |= s->version & RISCV_IOMMU_CAP_VERSION;
2484 if (s->enable_msi) {
2485 s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF;
2486 }
2487 if (s->enable_ats) {
2488 s->cap |= RISCV_IOMMU_CAP_ATS;
2489 }
2490 if (s->enable_s_stage) {
2491 s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 |
2492 RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57;
2493 }
2494 if (s->enable_g_stage) {
2495 s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
2496 RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4 |
2497 RISCV_IOMMU_CAP_SVRSW60T59B;
2498 }
2499
2500 if (s->hpm_cntrs > 0) {
2501 /* Clip number of HPM counters to maximum supported (31). */
2502 if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) {
2503 s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM;
2504 }
2505 /* Enable hardware performance monitor interface */
2506 s->cap |= RISCV_IOMMU_CAP_HPM;
2507 }
2508
2509 /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */
2510 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
2511 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE);
2512
2513 /*
2514 * Register complete MMIO space, including MSI/PBA registers.
2515 * Note, PCIDevice implementation will add overlapping MR for MSI/PBA,
2516 * managed directly by the PCIDevice implementation.
2517 */
2518 memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s,
2519 "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE);
2520
2521 /* Set power-on register state */
2522 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap);
2523 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0);
2524 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL],
2525 ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI));
2526 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP],
2527 ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE));
2528 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB],
2529 ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN));
2530 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB],
2531 ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN));
2532 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB],
2533 ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN));
2534 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF |
2535 RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL);
2536 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON |
2537 RISCV_IOMMU_CQCSR_BUSY);
2538 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF |
2539 RISCV_IOMMU_FQCSR_FQOF);
2540 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON |
2541 RISCV_IOMMU_FQCSR_BUSY);
2542 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF |
2543 RISCV_IOMMU_PQCSR_PQOF);
2544 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON |
2545 RISCV_IOMMU_PQCSR_BUSY);
2546 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0);
2547 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0);
2548 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp);
2549 /* If debug registers enabled. */
2550 if (s->cap & RISCV_IOMMU_CAP_DBG) {
2551 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0);
2552 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL],
2553 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2554 }
2555
2556 /* If HPM registers are enabled. */
2557 if (s->cap & RISCV_IOMMU_CAP_HPM) {
2558 /* +1 for cycle counter bit. */
2559 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH],
2560 ~((2 << s->hpm_cntrs) - 1));
2561 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0);
2562 memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE],
2563 0x00, s->hpm_cntrs * 8);
2564 memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE],
2565 0x00, s->hpm_cntrs * 8);
2566 }
2567
2568 /* Memory region for downstream access, if specified. */
2569 if (s->target_mr) {
2570 s->target_as = g_new0(AddressSpace, 1);
2571 address_space_init(s->target_as, s->target_mr,
2572 "riscv-iommu-downstream");
2573 } else {
2574 /* Fallback to global system memory. */
2575 s->target_as = &address_space_memory;
2576 }
2577
2578 /* Memory region for untranslated MRIF/MSI writes */
2579 memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s,
2580 "riscv-iommu-trap", ~0ULL);
2581 address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as");
2582
2583 if (s->cap & RISCV_IOMMU_CAP_HPM) {
2584 s->hpm_timer =
2585 timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s);
2586 s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal);
2587 }
2588 }
2589
riscv_iommu_unrealize(DeviceState * dev)2590 static void riscv_iommu_unrealize(DeviceState *dev)
2591 {
2592 RISCVIOMMUState *s = RISCV_IOMMU(dev);
2593
2594 g_hash_table_unref(s->iot_cache);
2595 g_hash_table_unref(s->ctx_cache);
2596
2597 if (s->cap & RISCV_IOMMU_CAP_HPM) {
2598 g_hash_table_unref(s->hpm_event_ctr_map);
2599 timer_free(s->hpm_timer);
2600 }
2601 }
2602
riscv_iommu_reset(RISCVIOMMUState * s)2603 void riscv_iommu_reset(RISCVIOMMUState *s)
2604 {
2605 uint32_t reg_clr;
2606 int ddtp_mode;
2607
2608 /*
2609 * Clear DDTP while setting DDTP_mode back to user
2610 * initial setting.
2611 */
2612 ddtp_mode = s->enable_off ?
2613 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE;
2614 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, ddtp_mode);
2615 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, s->ddtp);
2616
2617 reg_clr = RISCV_IOMMU_CQCSR_CQEN | RISCV_IOMMU_CQCSR_CIE |
2618 RISCV_IOMMU_CQCSR_CQON | RISCV_IOMMU_CQCSR_BUSY;
2619 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 0, reg_clr);
2620
2621 reg_clr = RISCV_IOMMU_FQCSR_FQEN | RISCV_IOMMU_FQCSR_FIE |
2622 RISCV_IOMMU_FQCSR_FQON | RISCV_IOMMU_FQCSR_BUSY;
2623 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 0, reg_clr);
2624
2625 reg_clr = RISCV_IOMMU_PQCSR_PQEN | RISCV_IOMMU_PQCSR_PIE |
2626 RISCV_IOMMU_PQCSR_PQON | RISCV_IOMMU_PQCSR_BUSY;
2627 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 0, reg_clr);
2628
2629 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
2630 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2631
2632 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_IPSR, 0);
2633
2634 g_hash_table_remove_all(s->ctx_cache);
2635 g_hash_table_remove_all(s->iot_cache);
2636 }
2637
2638 static const Property riscv_iommu_properties[] = {
2639 DEFINE_PROP_UINT32("version", RISCVIOMMUState, version,
2640 RISCV_IOMMU_SPEC_DOT_VER),
2641 DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0),
2642 DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit,
2643 LIMIT_CACHE_IOT),
2644 DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE),
2645 DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE),
2646 DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE),
2647 DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE),
2648 DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE),
2649 DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
2650 TYPE_MEMORY_REGION, MemoryRegion *),
2651 DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs,
2652 RISCV_IOMMU_IOCOUNT_NUM),
2653 };
2654
riscv_iommu_class_init(ObjectClass * klass,const void * data)2655 static void riscv_iommu_class_init(ObjectClass *klass, const void *data)
2656 {
2657 DeviceClass *dc = DEVICE_CLASS(klass);
2658
2659 /* internal device for riscv-iommu-{pci/sys}, not user-creatable */
2660 dc->user_creatable = false;
2661 dc->realize = riscv_iommu_realize;
2662 dc->unrealize = riscv_iommu_unrealize;
2663 device_class_set_props(dc, riscv_iommu_properties);
2664 }
2665
2666 static const TypeInfo riscv_iommu_info = {
2667 .name = TYPE_RISCV_IOMMU,
2668 .parent = TYPE_DEVICE,
2669 .instance_size = sizeof(RISCVIOMMUState),
2670 .instance_init = riscv_iommu_instance_init,
2671 .class_init = riscv_iommu_class_init,
2672 };
2673
2674 static const char *IOMMU_FLAG_STR[] = {
2675 "NA",
2676 "RO",
2677 "WR",
2678 "RW",
2679 };
2680
2681 /* RISC-V IOMMU Memory Region - Address Translation Space */
riscv_iommu_memory_region_translate(IOMMUMemoryRegion * iommu_mr,hwaddr addr,IOMMUAccessFlags flag,int iommu_idx)2682 static IOMMUTLBEntry riscv_iommu_memory_region_translate(
2683 IOMMUMemoryRegion *iommu_mr, hwaddr addr,
2684 IOMMUAccessFlags flag, int iommu_idx)
2685 {
2686 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2687 RISCVIOMMUContext *ctx;
2688 void *ref;
2689 IOMMUTLBEntry iotlb = {
2690 .iova = addr,
2691 .target_as = as->iommu->target_as,
2692 .addr_mask = ~0ULL,
2693 .perm = flag,
2694 };
2695
2696 ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref);
2697 if (ctx == NULL) {
2698 /* Translation disabled or invalid. */
2699 iotlb.addr_mask = 0;
2700 iotlb.perm = IOMMU_NONE;
2701 } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) {
2702 /* Translation disabled or fault reported. */
2703 iotlb.addr_mask = 0;
2704 iotlb.perm = IOMMU_NONE;
2705 }
2706
2707 /* Trace all dma translations with original access flags. */
2708 trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid),
2709 PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx,
2710 IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova,
2711 iotlb.translated_addr);
2712
2713 riscv_iommu_ctx_put(as->iommu, ref);
2714
2715 return iotlb;
2716 }
2717
riscv_iommu_memory_region_notify(IOMMUMemoryRegion * iommu_mr,IOMMUNotifierFlag old,IOMMUNotifierFlag new,Error ** errp)2718 static int riscv_iommu_memory_region_notify(
2719 IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old,
2720 IOMMUNotifierFlag new, Error **errp)
2721 {
2722 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2723
2724 if (old == IOMMU_NOTIFIER_NONE) {
2725 as->notifier = true;
2726 trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name);
2727 } else if (new == IOMMU_NOTIFIER_NONE) {
2728 as->notifier = false;
2729 trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name);
2730 }
2731
2732 return 0;
2733 }
2734
pci_is_iommu(PCIDevice * pdev)2735 static inline bool pci_is_iommu(PCIDevice *pdev)
2736 {
2737 return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806;
2738 }
2739
riscv_iommu_find_as(PCIBus * bus,void * opaque,int devfn)2740 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn)
2741 {
2742 RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
2743 PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
2744 AddressSpace *as = NULL;
2745
2746 if (pdev && pci_is_iommu(pdev)) {
2747 return s->target_as;
2748 }
2749
2750 /* Find first registered IOMMU device */
2751 while (s->iommus.le_prev) {
2752 s = *(s->iommus.le_prev);
2753 }
2754
2755 /* Find first matching IOMMU */
2756 while (s != NULL && as == NULL) {
2757 as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn));
2758 s = s->iommus.le_next;
2759 }
2760
2761 return as ? as : &address_space_memory;
2762 }
2763
2764 static const PCIIOMMUOps riscv_iommu_ops = {
2765 .get_address_space = riscv_iommu_find_as,
2766 };
2767
riscv_iommu_pci_setup_iommu(RISCVIOMMUState * iommu,PCIBus * bus,Error ** errp)2768 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
2769 Error **errp)
2770 {
2771 if (bus->iommu_ops &&
2772 bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
2773 /* Allow multiple IOMMUs on the same PCIe bus, link known devices */
2774 RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
2775 QLIST_INSERT_AFTER(last, iommu, iommus);
2776 } else if (!bus->iommu_ops && !bus->iommu_opaque) {
2777 pci_setup_iommu(bus, &riscv_iommu_ops, iommu);
2778 } else {
2779 error_setg(errp, "can't register secondary IOMMU for PCI bus #%d",
2780 pci_bus_num(bus));
2781 }
2782 }
2783
riscv_iommu_memory_region_index(IOMMUMemoryRegion * iommu_mr,MemTxAttrs attrs)2784 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
2785 MemTxAttrs attrs)
2786 {
2787 return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid;
2788 }
2789
riscv_iommu_memory_region_index_len(IOMMUMemoryRegion * iommu_mr)2790 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr)
2791 {
2792 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2793 return 1 << as->iommu->pid_bits;
2794 }
2795
riscv_iommu_memory_region_init(ObjectClass * klass,const void * data)2796 static void riscv_iommu_memory_region_init(ObjectClass *klass, const void *data)
2797 {
2798 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
2799
2800 imrc->translate = riscv_iommu_memory_region_translate;
2801 imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
2802 imrc->attrs_to_index = riscv_iommu_memory_region_index;
2803 imrc->num_indexes = riscv_iommu_memory_region_index_len;
2804 }
2805
2806 static const TypeInfo riscv_iommu_memory_region_info = {
2807 .parent = TYPE_IOMMU_MEMORY_REGION,
2808 .name = TYPE_RISCV_IOMMU_MEMORY_REGION,
2809 .class_init = riscv_iommu_memory_region_init,
2810 };
2811
riscv_iommu_register_mr_types(void)2812 static void riscv_iommu_register_mr_types(void)
2813 {
2814 type_register_static(&riscv_iommu_memory_region_info);
2815 type_register_static(&riscv_iommu_info);
2816 }
2817
2818 type_init(riscv_iommu_register_mr_types);
2819