xref: /openbmc/qemu/hw/riscv/riscv-iommu.c (revision 63e7af2035242dda6e2460f4eadbbe6f58c67614)
1 /*
2  * QEMU emulation of an RISC-V IOMMU
3  *
4  * Copyright (C) 2021-2023, Rivos Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qom/object.h"
21 #include "exec/target_page.h"
22 #include "hw/pci/pci_bus.h"
23 #include "hw/pci/pci_device.h"
24 #include "hw/qdev-properties.h"
25 #include "hw/riscv/riscv_hart.h"
26 #include "migration/vmstate.h"
27 #include "qapi/error.h"
28 #include "qemu/timer.h"
29 
30 #include "cpu_bits.h"
31 #include "riscv-iommu.h"
32 #include "riscv-iommu-bits.h"
33 #include "riscv-iommu-hpm.h"
34 #include "trace.h"
35 
36 #define LIMIT_CACHE_CTX               (1U << 7)
37 #define LIMIT_CACHE_IOT               (1U << 20)
38 
39 /* Physical page number coversions */
40 #define PPN_PHYS(ppn)                 ((ppn) << TARGET_PAGE_BITS)
41 #define PPN_DOWN(phy)                 ((phy) >> TARGET_PAGE_BITS)
42 
43 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
44 
45 /* Device assigned I/O address space */
46 struct RISCVIOMMUSpace {
47     IOMMUMemoryRegion iova_mr;  /* IOVA memory region for attached device */
48     AddressSpace iova_as;       /* IOVA address space for attached device */
49     RISCVIOMMUState *iommu;     /* Managing IOMMU device state */
50     uint32_t devid;             /* Requester identifier, AKA device_id */
51     bool notifier;              /* IOMMU unmap notifier enabled */
52     QLIST_ENTRY(RISCVIOMMUSpace) list;
53 };
54 
55 typedef enum RISCVIOMMUTransTag {
56     RISCV_IOMMU_TRANS_TAG_BY,  /* Bypass */
57     RISCV_IOMMU_TRANS_TAG_SS,  /* Single Stage */
58     RISCV_IOMMU_TRANS_TAG_VG,  /* G-stage only */
59     RISCV_IOMMU_TRANS_TAG_VN,  /* Nested translation */
60 } RISCVIOMMUTransTag;
61 
62 /* Address translation cache entry */
63 struct RISCVIOMMUEntry {
64     RISCVIOMMUTransTag tag;     /* Translation Tag */
65     uint64_t iova:44;           /* IOVA Page Number */
66     uint64_t pscid:20;          /* Process Soft-Context identifier */
67     uint64_t phys:44;           /* Physical Page Number */
68     uint64_t gscid:16;          /* Guest Soft-Context identifier */
69     uint64_t perm:2;            /* IOMMU_RW flags */
70 };
71 
72 /* IOMMU index for transactions without process_id specified. */
73 #define RISCV_IOMMU_NOPROCID 0
74 
75 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type)
76 {
77     switch (vec_type) {
78     case RISCV_IOMMU_INTR_CQ:
79         return icvec & RISCV_IOMMU_ICVEC_CIV;
80     case RISCV_IOMMU_INTR_FQ:
81         return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4;
82     case RISCV_IOMMU_INTR_PM:
83         return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8;
84     case RISCV_IOMMU_INTR_PQ:
85         return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12;
86     default:
87         g_assert_not_reached();
88     }
89 }
90 
91 void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
92 {
93     uint32_t ipsr, icvec, vector;
94 
95     if (!s->notify) {
96         return;
97     }
98 
99     icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC);
100     ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0);
101 
102     if (!(ipsr & (1 << vec_type))) {
103         vector = riscv_iommu_get_icvec_vector(icvec, vec_type);
104         s->notify(s, vector);
105         trace_riscv_iommu_notify_int_vector(vec_type, vector);
106     }
107 }
108 
109 static void riscv_iommu_fault(RISCVIOMMUState *s,
110                               struct riscv_iommu_fq_record *ev)
111 {
112     uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
113     uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask;
114     uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask;
115     uint32_t next = (tail + 1) & s->fq_mask;
116     uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID);
117 
118     trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
119                           PCI_FUNC(devid), ev->hdr, ev->iotval);
120 
121     if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) ||
122         !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) {
123         return;
124     }
125 
126     if (head == next) {
127         riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
128                               RISCV_IOMMU_FQCSR_FQOF, 0);
129     } else {
130         dma_addr_t addr = s->fq_addr + tail * sizeof(*ev);
131         if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev),
132                              MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
133             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
134                                   RISCV_IOMMU_FQCSR_FQMF, 0);
135         } else {
136             riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next);
137         }
138     }
139 
140     if (ctrl & RISCV_IOMMU_FQCSR_FIE) {
141         riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ);
142     }
143 }
144 
145 static void riscv_iommu_pri(RISCVIOMMUState *s,
146     struct riscv_iommu_pq_record *pr)
147 {
148     uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
149     uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask;
150     uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask;
151     uint32_t next = (tail + 1) & s->pq_mask;
152     uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID);
153 
154     trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
155                           PCI_FUNC(devid), pr->payload);
156 
157     if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) ||
158         !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) {
159         return;
160     }
161 
162     if (head == next) {
163         riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
164                               RISCV_IOMMU_PQCSR_PQOF, 0);
165     } else {
166         dma_addr_t addr = s->pq_addr + tail * sizeof(*pr);
167         if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr),
168                              MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
169             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
170                                   RISCV_IOMMU_PQCSR_PQMF, 0);
171         } else {
172             riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next);
173         }
174     }
175 
176     if (ctrl & RISCV_IOMMU_PQCSR_PIE) {
177         riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ);
178     }
179 }
180 
181 /*
182  * Discards all bits from 'val' whose matching bits in the same
183  * positions in the mask 'ext' are zeros, and packs the remaining
184  * bits from 'val' contiguously at the least-significant end of the
185  * result, keeping the same bit order as 'val' and filling any
186  * other bits at the most-significant end of the result with zeros.
187  *
188  * For example, for the following 'val' and 'ext', the return 'ret'
189  * will be:
190  *
191  * val = a b c d e f g h
192  * ext = 1 0 1 0 0 1 1 0
193  * ret = 0 0 0 0 a c f g
194  *
195  * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3
196  * "Process to translate addresses of MSIs", is similar to bit manip
197  * function PEXT (Parallel bits extract) from x86.
198  */
199 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext)
200 {
201     uint64_t ret = 0;
202     uint64_t rot = 1;
203 
204     while (ext) {
205         if (ext & 1) {
206             if (val & 1) {
207                 ret |= rot;
208             }
209             rot <<= 1;
210         }
211         val >>= 1;
212         ext >>= 1;
213     }
214 
215     return ret;
216 }
217 
218 /* Check if GPA matches MSI/MRIF pattern. */
219 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
220     dma_addr_t gpa)
221 {
222     if (!s->enable_msi) {
223         return false;
224     }
225 
226     if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) !=
227         RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
228         return false; /* Invalid MSI/MRIF mode */
229     }
230 
231     if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) {
232         return false; /* GPA not in MSI range defined by AIA IMSIC rules. */
233     }
234 
235     return true;
236 }
237 
238 /*
239  * RISCV IOMMU Address Translation Lookup - Page Table Walk
240  *
241  * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c
242  * Both implementation can be merged into single helper function in future.
243  * Keeping them separate for now, as error reporting and flow specifics are
244  * sufficiently different for separate implementation.
245  *
246  * @s        : IOMMU Device State
247  * @ctx      : Translation context for device id and process address space id.
248  * @iotlb    : translation data: physical address and access mode.
249  * @return   : success or fault cause code.
250  */
251 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
252     IOMMUTLBEntry *iotlb)
253 {
254     dma_addr_t addr, base;
255     uint64_t satp, gatp, pte;
256     bool en_s, en_g;
257     struct {
258         unsigned char step;
259         unsigned char levels;
260         unsigned char ptidxbits;
261         unsigned char ptesize;
262     } sc[2];
263     /* Translation stage phase */
264     enum {
265         S_STAGE = 0,
266         G_STAGE = 1,
267     } pass;
268     MemTxResult ret;
269 
270     satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
271     gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
272 
273     en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE;
274     en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE;
275 
276     /*
277      * Early check for MSI address match when IOVA == GPA.
278      * Note that the (!en_s) condition means that the MSI
279      * page table may only be used when guest pages are
280      * mapped using the g-stage page table, whether single-
281      * or two-stage paging is enabled. It's unavoidable though,
282      * because the spec mandates that we do a first-stage
283      * translation before we check the MSI page table, which
284      * means we can't do an early MSI check unless we have
285      * strictly !en_s.
286      */
287     if (!en_s && (iotlb->perm & IOMMU_WO) &&
288         riscv_iommu_msi_check(s, ctx, iotlb->iova)) {
289         iotlb->target_as = &s->trap_as;
290         iotlb->translated_addr = iotlb->iova;
291         iotlb->addr_mask = ~TARGET_PAGE_MASK;
292         return 0;
293     }
294 
295     /* Exit early for pass-through mode. */
296     if (!(en_s || en_g)) {
297         iotlb->translated_addr = iotlb->iova;
298         iotlb->addr_mask = ~TARGET_PAGE_MASK;
299         /* Allow R/W in pass-through mode */
300         iotlb->perm = IOMMU_RW;
301         return 0;
302     }
303 
304     /* S/G translation parameters. */
305     for (pass = 0; pass < 2; pass++) {
306         uint32_t sv_mode;
307 
308         sc[pass].step = 0;
309         if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) :
310             (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) {
311             /* 32bit mode for GXL/SXL == 1 */
312             switch (pass ? gatp : satp) {
313             case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
314                 sc[pass].levels    = 0;
315                 sc[pass].ptidxbits = 0;
316                 sc[pass].ptesize   = 0;
317                 break;
318             case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
319                 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32;
320                 if (!(s->cap & sv_mode)) {
321                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
322                 }
323                 sc[pass].levels    = 2;
324                 sc[pass].ptidxbits = 10;
325                 sc[pass].ptesize   = 4;
326                 break;
327             default:
328                 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
329             }
330         } else {
331             /* 64bit mode for GXL/SXL == 0 */
332             switch (pass ? gatp : satp) {
333             case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
334                 sc[pass].levels    = 0;
335                 sc[pass].ptidxbits = 0;
336                 sc[pass].ptesize   = 0;
337                 break;
338             case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
339                 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39;
340                 if (!(s->cap & sv_mode)) {
341                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
342                 }
343                 sc[pass].levels    = 3;
344                 sc[pass].ptidxbits = 9;
345                 sc[pass].ptesize   = 8;
346                 break;
347             case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
348                 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48;
349                 if (!(s->cap & sv_mode)) {
350                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
351                 }
352                 sc[pass].levels    = 4;
353                 sc[pass].ptidxbits = 9;
354                 sc[pass].ptesize   = 8;
355                 break;
356             case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
357                 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57;
358                 if (!(s->cap & sv_mode)) {
359                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
360                 }
361                 sc[pass].levels    = 5;
362                 sc[pass].ptidxbits = 9;
363                 sc[pass].ptesize   = 8;
364                 break;
365             default:
366                 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
367             }
368         }
369     };
370 
371     /* S/G stages translation tables root pointers */
372     gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
373     satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD));
374     addr = (en_s && en_g) ? satp : iotlb->iova;
375     base = en_g ? gatp : satp;
376     pass = en_g ? G_STAGE : S_STAGE;
377 
378     do {
379         const unsigned widened = (pass && !sc[pass].step) ? 2 : 0;
380         const unsigned va_bits = widened + sc[pass].ptidxbits;
381         const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits *
382                                  (sc[pass].levels - 1 - sc[pass].step);
383         const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
384         const dma_addr_t pte_addr = base + idx * sc[pass].ptesize;
385         const bool ade =
386             ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE);
387 
388         /* Address range check before first level lookup */
389         if (!sc[pass].step) {
390             const uint64_t va_len = va_skip + va_bits;
391             const uint64_t va_mask = (1ULL << va_len) - 1;
392 
393             if (pass == S_STAGE && va_len > 32) {
394                 target_ulong mask, masked_msbs;
395 
396                 mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1;
397                 masked_msbs = (addr >> (va_len - 1)) & mask;
398 
399                 if (masked_msbs != 0 && masked_msbs != mask) {
400                     return (iotlb->perm & IOMMU_WO) ?
401                                 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S :
402                                 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S;
403                 }
404             } else {
405                 if ((addr & va_mask) != addr) {
406                     return (iotlb->perm & IOMMU_WO) ?
407                                 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
408                                 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS;
409                 }
410             }
411         }
412 
413 
414         if (pass == S_STAGE) {
415             riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS);
416         } else {
417             riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS);
418         }
419 
420         /* Read page table entry */
421         if (sc[pass].ptesize == 4) {
422             uint32_t pte32 = 0;
423             ret = ldl_le_dma(s->target_as, pte_addr, &pte32,
424                              MEMTXATTRS_UNSPECIFIED);
425             pte = pte32;
426         } else {
427             ret = ldq_le_dma(s->target_as, pte_addr, &pte,
428                              MEMTXATTRS_UNSPECIFIED);
429         }
430         if (ret != MEMTX_OK) {
431             return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT
432                                             : RISCV_IOMMU_FQ_CAUSE_RD_FAULT;
433         }
434 
435         sc[pass].step++;
436         hwaddr ppn = pte >> PTE_PPN_SHIFT;
437 
438         if (!(pte & PTE_V)) {
439             break;                /* Invalid PTE */
440         } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
441             base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
442         } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
443             break;                /* Reserved leaf PTE flags: PTE_W */
444         } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
445             break;                /* Reserved leaf PTE flags: PTE_W + PTE_X */
446         } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
447             break;                /* Misaligned PPN */
448         } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) {
449             break;                /* Read access check failed */
450         } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) {
451             break;                /* Write access check failed */
452         } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) {
453             break;                /* Access bit not set */
454         } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) {
455             break;                /* Dirty bit not set */
456         } else {
457             /* Leaf PTE, translation completed. */
458             sc[pass].step = sc[pass].levels;
459             base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
460             /* Update address mask based on smallest translation granularity */
461             iotlb->addr_mask &= (1ULL << va_skip) - 1;
462             /* Continue with S-Stage translation? */
463             if (pass && sc[0].step != sc[0].levels) {
464                 pass = S_STAGE;
465                 addr = iotlb->iova;
466                 continue;
467             }
468             /* Translation phase completed (GPA or SPA) */
469             iotlb->translated_addr = base;
470             iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO)
471                                                          : IOMMU_RO;
472 
473             /* Check MSI GPA address match */
474             if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) &&
475                 riscv_iommu_msi_check(s, ctx, base)) {
476                 /* Trap MSI writes and return GPA address. */
477                 iotlb->target_as = &s->trap_as;
478                 iotlb->addr_mask = ~TARGET_PAGE_MASK;
479                 return 0;
480             }
481 
482             /* Continue with G-Stage translation? */
483             if (!pass && en_g) {
484                 pass = G_STAGE;
485                 addr = base;
486                 base = gatp;
487                 sc[pass].step = 0;
488                 continue;
489             }
490 
491             return 0;
492         }
493 
494         if (sc[pass].step == sc[pass].levels) {
495             break; /* Can't find leaf PTE */
496         }
497 
498         /* Continue with G-Stage translation? */
499         if (!pass && en_g) {
500             pass = G_STAGE;
501             addr = base;
502             base = gatp;
503             sc[pass].step = 0;
504         }
505     } while (1);
506 
507     return (iotlb->perm & IOMMU_WO) ?
508                 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
509                         RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) :
510                 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS :
511                         RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S);
512 }
513 
514 static void riscv_iommu_report_fault(RISCVIOMMUState *s,
515                                      RISCVIOMMUContext *ctx,
516                                      uint32_t fault_type, uint32_t cause,
517                                      bool pv,
518                                      uint64_t iotval, uint64_t iotval2)
519 {
520     struct riscv_iommu_fq_record ev = { 0 };
521 
522     if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) {
523         switch (cause) {
524         case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED:
525         case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT:
526         case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID:
527         case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED:
528         case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED:
529         case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR:
530         case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT:
531             break;
532         default:
533             /* DTF prevents reporting a fault for this given cause */
534             return;
535         }
536     }
537 
538     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause);
539     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type);
540     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid);
541     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true);
542 
543     if (pv) {
544         ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id);
545     }
546 
547     ev.iotval = iotval;
548     ev.iotval2 = iotval2;
549 
550     riscv_iommu_fault(s, &ev);
551 }
552 
553 /* Redirect MSI write for given GPA. */
554 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s,
555     RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data,
556     unsigned size, MemTxAttrs attrs)
557 {
558     MemTxResult res;
559     dma_addr_t addr;
560     uint64_t intn;
561     uint32_t n190;
562     uint64_t pte[2];
563     int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
564     int cause;
565 
566     /* Interrupt File Number */
567     intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask);
568     if (intn >= 256) {
569         /* Interrupt file number out of range */
570         res = MEMTX_ACCESS_ERROR;
571         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
572         goto err;
573     }
574 
575     /* fetch MSI PTE */
576     addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN));
577     addr = addr | (intn * sizeof(pte));
578     res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte),
579             MEMTXATTRS_UNSPECIFIED);
580     if (res != MEMTX_OK) {
581         if (res == MEMTX_DECODE_ERROR) {
582             cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED;
583         } else {
584             cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
585         }
586         goto err;
587     }
588 
589     le64_to_cpus(&pte[0]);
590     le64_to_cpus(&pte[1]);
591 
592     if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) {
593         /*
594          * The spec mentions that: "If msipte.C == 1, then further
595          * processing to interpret the PTE is implementation
596          * defined.". We'll abort with cause = 262 for this
597          * case too.
598          */
599         res = MEMTX_ACCESS_ERROR;
600         cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID;
601         goto err;
602     }
603 
604     switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) {
605     case RISCV_IOMMU_MSI_PTE_M_BASIC:
606         /* MSI Pass-through mode */
607         addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN));
608 
609         trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
610                               PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
611                               gpa, addr);
612 
613         res = dma_memory_write(s->target_as, addr, &data, size, attrs);
614         if (res != MEMTX_OK) {
615             cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
616             goto err;
617         }
618 
619         return MEMTX_OK;
620     case RISCV_IOMMU_MSI_PTE_M_MRIF:
621         /* MRIF mode, continue. */
622         break;
623     default:
624         res = MEMTX_ACCESS_ERROR;
625         cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
626         goto err;
627     }
628 
629     /*
630      * Report an error for interrupt identities exceeding the maximum allowed
631      * for an IMSIC interrupt file (2047) or destination address is not 32-bit
632      * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables.
633      */
634     if ((data > 2047) || (gpa & 3)) {
635         res = MEMTX_ACCESS_ERROR;
636         cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
637         goto err;
638     }
639 
640     /* MSI MRIF mode, non atomic pending bit update */
641 
642     /* MRIF pending bit address */
643     addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9;
644     addr = addr | ((data & 0x7c0) >> 3);
645 
646     trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
647                           PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
648                           gpa, addr);
649 
650     /* MRIF pending bit mask */
651     data = 1ULL << (data & 0x03f);
652     res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
653     if (res != MEMTX_OK) {
654         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
655         goto err;
656     }
657 
658     intn = intn | data;
659     res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs);
660     if (res != MEMTX_OK) {
661         cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
662         goto err;
663     }
664 
665     /* Get MRIF enable bits */
666     addr = addr + sizeof(intn);
667     res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
668     if (res != MEMTX_OK) {
669         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
670         goto err;
671     }
672 
673     if (!(intn & data)) {
674         /* notification disabled, MRIF update completed. */
675         return MEMTX_OK;
676     }
677 
678     /* Send notification message */
679     addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN));
680     n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) |
681           (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10);
682 
683     res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs);
684     if (res != MEMTX_OK) {
685         cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
686         goto err;
687     }
688 
689     trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr);
690 
691     return MEMTX_OK;
692 
693 err:
694     riscv_iommu_report_fault(s, ctx, fault_type, cause,
695                              !!ctx->process_id, 0, 0);
696     return res;
697 }
698 
699 /*
700  * Check device context configuration as described by the
701  * riscv-iommu spec section "Device-context configuration
702  * checks".
703  */
704 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s,
705                                             RISCVIOMMUContext *ctx)
706 {
707     uint32_t fsc_mode, msi_mode;
708     uint64_t gatp;
709 
710     if (!(s->cap & RISCV_IOMMU_CAP_ATS) &&
711         (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS ||
712          ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI ||
713          ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) {
714         return false;
715     }
716 
717     if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) &&
718         (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA ||
719          ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) {
720         return false;
721     }
722 
723     if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) &&
724         ctx->tc & RISCV_IOMMU_DC_TC_PRPR) {
725         return false;
726     }
727 
728     if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) &&
729         ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) {
730         return false;
731     }
732 
733     if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) {
734         msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE);
735 
736         if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF &&
737             msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
738             return false;
739         }
740     }
741 
742     gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
743     if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA &&
744         gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
745         return false;
746     }
747 
748     fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
749 
750     if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) {
751         switch (fsc_mode) {
752         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8:
753             if (!(s->cap & RISCV_IOMMU_CAP_PD8)) {
754                 return false;
755             }
756             break;
757         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17:
758             if (!(s->cap & RISCV_IOMMU_CAP_PD17)) {
759                 return false;
760             }
761             break;
762         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20:
763             if (!(s->cap & RISCV_IOMMU_CAP_PD20)) {
764                 return false;
765             }
766             break;
767         }
768     } else {
769         /* DC.tc.PDTV is 0 */
770         if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) {
771             return false;
772         }
773 
774         if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
775             if (fsc_mode == RISCV_IOMMU_CAP_SV32 &&
776                 !(s->cap & RISCV_IOMMU_CAP_SV32)) {
777                 return false;
778             }
779         } else {
780             switch (fsc_mode) {
781             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
782                 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
783                     return false;
784                 }
785                 break;
786             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
787                 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
788                     return false;
789                 }
790             break;
791             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
792                 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
793                     return false;
794                 }
795                 break;
796             }
797         }
798     }
799 
800     /*
801      * CAP_END is always zero (only one endianess). FCTL_BE is
802      * always zero (little-endian accesses). Thus TC_SBE must
803      * always be LE, i.e. zero.
804      */
805     if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) {
806         return false;
807     }
808 
809     return true;
810 }
811 
812 /*
813  * Validate process context (PC) according to section
814  * "Process-context configuration checks".
815  */
816 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
817                                              RISCVIOMMUContext *ctx)
818 {
819     uint32_t mode;
820 
821     if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) {
822         return false;
823     }
824 
825     if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) {
826         return false;
827     }
828 
829     mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
830     switch (mode) {
831     case RISCV_IOMMU_DC_FSC_MODE_BARE:
832     /* sv39 and sv32 modes have the same value (8) */
833     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
834     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
835     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
836         break;
837     default:
838         return false;
839     }
840 
841     if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
842         if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 &&
843             !(s->cap & RISCV_IOMMU_CAP_SV32)) {
844                 return false;
845         }
846     } else {
847         switch (mode) {
848         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
849             if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
850                 return false;
851             }
852             break;
853         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
854             if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
855                 return false;
856             }
857             break;
858         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
859             if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
860                 return false;
861             }
862             break;
863         }
864     }
865 
866     return true;
867 }
868 
869 /*
870  * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
871  *
872  * @s         : IOMMU Device State
873  * @ctx       : Device Translation Context with devid and process_id set.
874  * @return    : success or fault code.
875  */
876 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
877 {
878     const uint64_t ddtp = s->ddtp;
879     unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE);
880     dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN));
881     struct riscv_iommu_dc dc;
882     /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */
883     const int dc_fmt = !s->enable_msi;
884     const size_t dc_len = sizeof(dc) >> dc_fmt;
885     int depth;
886     uint64_t de;
887 
888     switch (mode) {
889     case RISCV_IOMMU_DDTP_MODE_OFF:
890         return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
891 
892     case RISCV_IOMMU_DDTP_MODE_BARE:
893         /* mock up pass-through translation context */
894         ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
895             RISCV_IOMMU_DC_IOHGATP_MODE_BARE);
896         ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
897             RISCV_IOMMU_DC_FSC_MODE_BARE);
898 
899         ctx->tc = RISCV_IOMMU_DC_TC_V;
900         if (s->enable_ats) {
901             ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS;
902         }
903 
904         ctx->ta = 0;
905         ctx->msiptp = 0;
906         return 0;
907 
908     case RISCV_IOMMU_DDTP_MODE_1LVL:
909         depth = 0;
910         break;
911 
912     case RISCV_IOMMU_DDTP_MODE_2LVL:
913         depth = 1;
914         break;
915 
916     case RISCV_IOMMU_DDTP_MODE_3LVL:
917         depth = 2;
918         break;
919 
920     default:
921         return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
922     }
923 
924     /*
925      * Check supported device id width (in bits).
926      * See IOMMU Specification, Chapter 6. Software guidelines.
927      * - if extended device-context format is used:
928      *   1LVL: 6, 2LVL: 15, 3LVL: 24
929      * - if base device-context format is used:
930      *   1LVL: 7, 2LVL: 16, 3LVL: 24
931      */
932     if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) {
933         return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
934     }
935 
936     /* Device directory tree walk */
937     for (; depth-- > 0; ) {
938         riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
939         /*
940          * Select device id index bits based on device directory tree level
941          * and device context format.
942          * See IOMMU Specification, Chapter 2. Data Structures.
943          * - if extended device-context format is used:
944          *   device index: [23:15][14:6][5:0]
945          * - if base device-context format is used:
946          *   device index: [23:16][15:7][6:0]
947          */
948         const int split = depth * 9 + 6 + dc_fmt;
949         addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK;
950         if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
951                             MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
952             return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
953         }
954         le64_to_cpus(&de);
955         if (!(de & RISCV_IOMMU_DDTE_VALID)) {
956             /* invalid directory entry */
957             return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
958         }
959         if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) {
960             /* reserved bits set */
961             return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
962         }
963         addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
964     }
965 
966     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
967 
968     /* index into device context entry page */
969     addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
970 
971     memset(&dc, 0, sizeof(dc));
972     if (dma_memory_read(s->target_as, addr, &dc, dc_len,
973                         MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
974         return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
975     }
976 
977     /* Set translation context. */
978     ctx->tc = le64_to_cpu(dc.tc);
979     ctx->gatp = le64_to_cpu(dc.iohgatp);
980     ctx->satp = le64_to_cpu(dc.fsc);
981     ctx->ta = le64_to_cpu(dc.ta);
982     ctx->msiptp = le64_to_cpu(dc.msiptp);
983     ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask);
984     ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern);
985 
986     if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) {
987         return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
988     }
989 
990     if (!riscv_iommu_validate_device_ctx(s, ctx)) {
991         return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
992     }
993 
994     /* FSC field checks */
995     mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
996     addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN));
997 
998     if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) {
999         if (ctx->process_id != RISCV_IOMMU_NOPROCID) {
1000             /* PID is disabled */
1001             return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1002         }
1003         if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) {
1004             /* Invalid translation mode */
1005             return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
1006         }
1007         return 0;
1008     }
1009 
1010     if (ctx->process_id == RISCV_IOMMU_NOPROCID) {
1011         if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) {
1012             /* No default process_id enabled, set BARE mode */
1013             ctx->satp = 0ULL;
1014             return 0;
1015         } else {
1016             /* Use default process_id #0 */
1017             ctx->process_id = 0;
1018         }
1019     }
1020 
1021     if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) {
1022         /* No S-Stage translation, done. */
1023         return 0;
1024     }
1025 
1026     /* FSC.TC.PDTV enabled */
1027     if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) {
1028         /* Invalid PDTP.MODE */
1029         return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1030     }
1031 
1032     for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
1033         riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
1034 
1035         /*
1036          * Select process id index bits based on process directory tree
1037          * level. See IOMMU Specification, 2.2. Process-Directory-Table.
1038          */
1039         const int split = depth * 9 + 8;
1040         addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
1041         if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
1042                             MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1043             return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1044         }
1045         le64_to_cpus(&de);
1046         if (!(de & RISCV_IOMMU_PDTE_VALID)) {
1047             return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1048         }
1049         addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PDTE_PPN));
1050     }
1051 
1052     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
1053 
1054     /* Leaf entry in PDT */
1055     addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
1056     if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
1057                         MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1058         return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1059     }
1060 
1061     /* Use FSC and TA from process directory entry. */
1062     ctx->ta = le64_to_cpu(dc.ta);
1063     ctx->satp = le64_to_cpu(dc.fsc);
1064 
1065     if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) {
1066         return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1067     }
1068 
1069     if (!riscv_iommu_validate_process_ctx(s, ctx)) {
1070         return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1071     }
1072 
1073     return 0;
1074 }
1075 
1076 /* Translation Context cache support */
1077 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2)
1078 {
1079     RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1;
1080     RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2;
1081     return c1->devid == c2->devid &&
1082            c1->process_id == c2->process_id;
1083 }
1084 
1085 static guint riscv_iommu_ctx_hash(gconstpointer v)
1086 {
1087     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v;
1088     /*
1089      * Generate simple hash of (process_id, devid)
1090      * assuming 24-bit wide devid.
1091      */
1092     return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24);
1093 }
1094 
1095 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value,
1096                                                gpointer data)
1097 {
1098     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1099     RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1100     if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1101         ctx->devid == arg->devid &&
1102         ctx->process_id == arg->process_id) {
1103         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1104     }
1105 }
1106 
1107 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value,
1108                                         gpointer data)
1109 {
1110     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1111     RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1112     if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1113         ctx->devid == arg->devid) {
1114         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1115     }
1116 }
1117 
1118 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value,
1119                                       gpointer data)
1120 {
1121     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1122     if (ctx->tc & RISCV_IOMMU_DC_TC_V) {
1123         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1124     }
1125 }
1126 
1127 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func,
1128                                   uint32_t devid, uint32_t process_id)
1129 {
1130     GHashTable *ctx_cache;
1131     RISCVIOMMUContext key = {
1132         .devid = devid,
1133         .process_id = process_id,
1134     };
1135     ctx_cache = g_hash_table_ref(s->ctx_cache);
1136     g_hash_table_foreach(ctx_cache, func, &key);
1137     g_hash_table_unref(ctx_cache);
1138 }
1139 
1140 /* Find or allocate translation context for a given {device_id, process_id} */
1141 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s,
1142                                           unsigned devid, unsigned process_id,
1143                                           void **ref)
1144 {
1145     GHashTable *ctx_cache;
1146     RISCVIOMMUContext *ctx;
1147     RISCVIOMMUContext key = {
1148         .devid = devid,
1149         .process_id = process_id,
1150     };
1151 
1152     ctx_cache = g_hash_table_ref(s->ctx_cache);
1153     ctx = g_hash_table_lookup(ctx_cache, &key);
1154 
1155     if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) {
1156         *ref = ctx_cache;
1157         return ctx;
1158     }
1159 
1160     ctx = g_new0(RISCVIOMMUContext, 1);
1161     ctx->devid = devid;
1162     ctx->process_id = process_id;
1163 
1164     int fault = riscv_iommu_ctx_fetch(s, ctx);
1165     if (!fault) {
1166         if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) {
1167             g_hash_table_unref(ctx_cache);
1168             ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
1169                                               riscv_iommu_ctx_equal,
1170                                               g_free, NULL);
1171             g_hash_table_ref(ctx_cache);
1172             g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache));
1173         }
1174         g_hash_table_add(ctx_cache, ctx);
1175         *ref = ctx_cache;
1176         return ctx;
1177     }
1178 
1179     g_hash_table_unref(ctx_cache);
1180     *ref = NULL;
1181 
1182     riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD,
1183                              fault, !!process_id, 0, 0);
1184 
1185     g_free(ctx);
1186     return NULL;
1187 }
1188 
1189 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
1190 {
1191     if (ref) {
1192         g_hash_table_unref((GHashTable *)ref);
1193     }
1194 }
1195 
1196 /* Find or allocate address space for a given device */
1197 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid)
1198 {
1199     RISCVIOMMUSpace *as;
1200 
1201     /* FIXME: PCIe bus remapping for attached endpoints. */
1202     devid |= s->bus << 8;
1203 
1204     QLIST_FOREACH(as, &s->spaces, list) {
1205         if (as->devid == devid) {
1206             break;
1207         }
1208     }
1209 
1210     if (as == NULL) {
1211         char name[64];
1212         as = g_new0(RISCVIOMMUSpace, 1);
1213 
1214         as->iommu = s;
1215         as->devid = devid;
1216 
1217         snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
1218             PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1219 
1220         /* IOVA address space, untranslated addresses */
1221         memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr),
1222             TYPE_RISCV_IOMMU_MEMORY_REGION,
1223             OBJECT(as), "riscv_iommu", UINT64_MAX);
1224         address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name);
1225 
1226         QLIST_INSERT_HEAD(&s->spaces, as, list);
1227 
1228         trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid),
1229                 PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1230     }
1231     return &as->iova_as;
1232 }
1233 
1234 /* Translation Object cache support */
1235 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2)
1236 {
1237     RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1;
1238     RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2;
1239     return t1->gscid == t2->gscid && t1->pscid == t2->pscid &&
1240            t1->iova == t2->iova && t1->tag == t2->tag;
1241 }
1242 
1243 static guint riscv_iommu_iot_hash(gconstpointer v)
1244 {
1245     RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v;
1246     return (guint)t->iova;
1247 }
1248 
1249 /* GV: 0 AV: 0 PSCV: 0 GVMA: 0 */
1250 /* GV: 0 AV: 0 GVMA: 1 */
1251 static
1252 void riscv_iommu_iot_inval_all(gpointer key, gpointer value, gpointer data)
1253 {
1254     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1255     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1256     if (iot->tag == arg->tag) {
1257         iot->perm = IOMMU_NONE;
1258     }
1259 }
1260 
1261 /* GV: 0 AV: 0 PSCV: 1 GVMA: 0 */
1262 static
1263 void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, gpointer data)
1264 {
1265     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1266     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1267     if (iot->tag == arg->tag &&
1268         iot->pscid == arg->pscid) {
1269         iot->perm = IOMMU_NONE;
1270     }
1271 }
1272 
1273 /* GV: 0 AV: 1 PSCV: 0 GVMA: 0 */
1274 static
1275 void riscv_iommu_iot_inval_iova(gpointer key, gpointer value, gpointer data)
1276 {
1277     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1278     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1279     if (iot->tag == arg->tag &&
1280         iot->iova == arg->iova) {
1281         iot->perm = IOMMU_NONE;
1282     }
1283 }
1284 
1285 /* GV: 0 AV: 1 PSCV: 1 GVMA: 0 */
1286 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value,
1287                                              gpointer data)
1288 {
1289     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1290     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1291     if (iot->tag == arg->tag &&
1292         iot->pscid == arg->pscid &&
1293         iot->iova == arg->iova) {
1294         iot->perm = IOMMU_NONE;
1295     }
1296 }
1297 
1298 /* GV: 1 AV: 0 PSCV: 0 GVMA: 0 */
1299 /* GV: 1 AV: 0 GVMA: 1 */
1300 static
1301 void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, gpointer data)
1302 {
1303     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1304     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1305     if (iot->tag == arg->tag &&
1306         iot->gscid == arg->gscid) {
1307         iot->perm = IOMMU_NONE;
1308     }
1309 }
1310 
1311 /* GV: 1 AV: 0 PSCV: 1 GVMA: 0 */
1312 static void riscv_iommu_iot_inval_gscid_pscid(gpointer key, gpointer value,
1313                                               gpointer data)
1314 {
1315     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1316     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1317     if (iot->tag == arg->tag &&
1318         iot->gscid == arg->gscid &&
1319         iot->pscid == arg->pscid) {
1320         iot->perm = IOMMU_NONE;
1321     }
1322 }
1323 
1324 /* GV: 1 AV: 1 PSCV: 0 GVMA: 0 */
1325 /* GV: 1 AV: 1 GVMA: 1 */
1326 static void riscv_iommu_iot_inval_gscid_iova(gpointer key, gpointer value,
1327                                              gpointer data)
1328 {
1329     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1330     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1331     if (iot->tag == arg->tag &&
1332         iot->gscid == arg->gscid &&
1333         iot->iova == arg->iova) {
1334         iot->perm = IOMMU_NONE;
1335     }
1336 }
1337 
1338 /* GV: 1 AV: 1 PSCV: 1 GVMA: 0 */
1339 static void riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key, gpointer value,
1340                                                    gpointer data)
1341 {
1342     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1343     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1344     if (iot->tag == arg->tag &&
1345         iot->gscid == arg->gscid &&
1346         iot->pscid == arg->pscid &&
1347         iot->iova == arg->iova) {
1348         iot->perm = IOMMU_NONE;
1349     }
1350 }
1351 
1352 /* caller should keep ref-count for iot_cache object */
1353 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx,
1354     GHashTable *iot_cache, hwaddr iova, RISCVIOMMUTransTag transtag)
1355 {
1356     RISCVIOMMUEntry key = {
1357         .tag   = transtag,
1358         .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID),
1359         .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID),
1360         .iova  = PPN_DOWN(iova),
1361     };
1362     return g_hash_table_lookup(iot_cache, &key);
1363 }
1364 
1365 /* caller should keep ref-count for iot_cache object */
1366 static void riscv_iommu_iot_update(RISCVIOMMUState *s,
1367     GHashTable *iot_cache, RISCVIOMMUEntry *iot)
1368 {
1369     if (!s->iot_limit) {
1370         return;
1371     }
1372 
1373     if (g_hash_table_size(s->iot_cache) >= s->iot_limit) {
1374         iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
1375                                           riscv_iommu_iot_equal,
1376                                           g_free, NULL);
1377         g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache));
1378     }
1379     g_hash_table_add(iot_cache, iot);
1380 }
1381 
1382 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func,
1383     uint32_t gscid, uint32_t pscid, hwaddr iova, RISCVIOMMUTransTag transtag)
1384 {
1385     GHashTable *iot_cache;
1386     RISCVIOMMUEntry key = {
1387         .tag = transtag,
1388         .gscid = gscid,
1389         .pscid = pscid,
1390         .iova  = PPN_DOWN(iova),
1391     };
1392 
1393     iot_cache = g_hash_table_ref(s->iot_cache);
1394     g_hash_table_foreach(iot_cache, func, &key);
1395     g_hash_table_unref(iot_cache);
1396 }
1397 
1398 static RISCVIOMMUTransTag riscv_iommu_get_transtag(RISCVIOMMUContext *ctx)
1399 {
1400     uint64_t satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
1401     uint64_t gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
1402 
1403     if (satp == RISCV_IOMMU_DC_FSC_MODE_BARE) {
1404         return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ?
1405             RISCV_IOMMU_TRANS_TAG_BY : RISCV_IOMMU_TRANS_TAG_VG;
1406     } else {
1407         return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ?
1408             RISCV_IOMMU_TRANS_TAG_SS : RISCV_IOMMU_TRANS_TAG_VN;
1409     }
1410 }
1411 
1412 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
1413     IOMMUTLBEntry *iotlb, bool enable_cache)
1414 {
1415     RISCVIOMMUTransTag transtag = riscv_iommu_get_transtag(ctx);
1416     RISCVIOMMUEntry *iot;
1417     IOMMUAccessFlags perm;
1418     bool enable_pid;
1419     bool enable_pri;
1420     GHashTable *iot_cache;
1421     int fault;
1422 
1423     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ);
1424 
1425     iot_cache = g_hash_table_ref(s->iot_cache);
1426     /*
1427      * TC[32] is reserved for custom extensions, used here to temporarily
1428      * enable automatic page-request generation for ATS queries.
1429      */
1430     enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32));
1431     enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV);
1432 
1433     /* Check for ATS request. */
1434     if (iotlb->perm == IOMMU_NONE) {
1435         riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ);
1436         /* Check if ATS is disabled. */
1437         if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) {
1438             enable_pri = false;
1439             fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1440             goto done;
1441         }
1442     }
1443 
1444     iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova, transtag);
1445     perm = iot ? iot->perm : IOMMU_NONE;
1446     if (perm != IOMMU_NONE) {
1447         iotlb->translated_addr = PPN_PHYS(iot->phys);
1448         iotlb->addr_mask = ~TARGET_PAGE_MASK;
1449         iotlb->perm = perm;
1450         fault = 0;
1451         goto done;
1452     }
1453 
1454     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS);
1455 
1456     /* Translate using device directory / page table information. */
1457     fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
1458 
1459     if (!fault && iotlb->target_as == &s->trap_as) {
1460         /* Do not cache trapped MSI translations */
1461         goto done;
1462     }
1463 
1464     /*
1465      * We made an implementation choice to not cache identity-mapped
1466      * translations, as allowed by the specification, to avoid
1467      * translation cache evictions for other devices sharing the
1468      * IOMMU hardware model.
1469      */
1470     if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) {
1471         iot = g_new0(RISCVIOMMUEntry, 1);
1472         iot->iova = PPN_DOWN(iotlb->iova);
1473         iot->phys = PPN_DOWN(iotlb->translated_addr);
1474         iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
1475         iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
1476         iot->perm = iotlb->perm;
1477         iot->tag = transtag;
1478         riscv_iommu_iot_update(s, iot_cache, iot);
1479     }
1480 
1481 done:
1482     g_hash_table_unref(iot_cache);
1483 
1484     if (enable_pri && fault) {
1485         struct riscv_iommu_pq_record pr = {0};
1486         if (enable_pid) {
1487             pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV,
1488                                RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id);
1489         }
1490         pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid);
1491         pr.payload = (iotlb->iova & TARGET_PAGE_MASK) |
1492                      RISCV_IOMMU_PREQ_PAYLOAD_M;
1493         riscv_iommu_pri(s, &pr);
1494         return fault;
1495     }
1496 
1497     if (fault) {
1498         unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ;
1499 
1500         if (iotlb->perm & IOMMU_RW) {
1501             ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
1502         } else if (iotlb->perm & IOMMU_RO) {
1503             ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD;
1504         }
1505 
1506         riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid,
1507                                  iotlb->iova, iotlb->translated_addr);
1508         return fault;
1509     }
1510 
1511     return 0;
1512 }
1513 
1514 /* IOMMU Command Interface */
1515 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify,
1516     uint64_t addr, uint32_t data)
1517 {
1518     /*
1519      * ATS processing in this implementation of the IOMMU is synchronous,
1520      * no need to wait for completions here.
1521      */
1522     if (!notify) {
1523         return MEMTX_OK;
1524     }
1525 
1526     return dma_memory_write(s->target_as, addr, &data, sizeof(data),
1527         MEMTXATTRS_UNSPECIFIED);
1528 }
1529 
1530 static void riscv_iommu_ats(RISCVIOMMUState *s,
1531     struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag,
1532     IOMMUAccessFlags perm,
1533     void (*trace_fn)(const char *id))
1534 {
1535     RISCVIOMMUSpace *as = NULL;
1536     IOMMUNotifier *n;
1537     IOMMUTLBEvent event;
1538     uint32_t pid;
1539     uint32_t devid;
1540     const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV;
1541 
1542     if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) {
1543         /* Use device segment and requester id */
1544         devid = get_field(cmd->dword0,
1545             RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID);
1546     } else {
1547         devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID);
1548     }
1549 
1550     pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID);
1551 
1552     QLIST_FOREACH(as, &s->spaces, list) {
1553         if (as->devid == devid) {
1554             break;
1555         }
1556     }
1557 
1558     if (!as || !as->notifier) {
1559         return;
1560     }
1561 
1562     event.type = flag;
1563     event.entry.perm = perm;
1564     event.entry.target_as = s->target_as;
1565 
1566     IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) {
1567         if (!pv || n->iommu_idx == pid) {
1568             event.entry.iova = n->start;
1569             event.entry.addr_mask = n->end - n->start;
1570             trace_fn(as->iova_mr.parent_obj.name);
1571             memory_region_notify_iommu_one(n, &event);
1572         }
1573     }
1574 }
1575 
1576 static void riscv_iommu_ats_inval(RISCVIOMMUState *s,
1577     struct riscv_iommu_command *cmd)
1578 {
1579     return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE,
1580                            trace_riscv_iommu_ats_inval);
1581 }
1582 
1583 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s,
1584     struct riscv_iommu_command *cmd)
1585 {
1586     unsigned resp_code = get_field(cmd->dword1,
1587                                    RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE);
1588 
1589     /* Using the access flag to carry response code information */
1590     IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW;
1591     return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm,
1592                            trace_riscv_iommu_ats_prgr);
1593 }
1594 
1595 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s)
1596 {
1597     uint64_t old_ddtp = s->ddtp;
1598     uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP);
1599     unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE);
1600     unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE);
1601     bool ok = false;
1602 
1603     /*
1604      * Check for allowed DDTP.MODE transitions:
1605      * {OFF, BARE}        -> {OFF, BARE, 1LVL, 2LVL, 3LVL}
1606      * {1LVL, 2LVL, 3LVL} -> {OFF, BARE}
1607      */
1608     if (new_mode == old_mode ||
1609         new_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1610         new_mode == RISCV_IOMMU_DDTP_MODE_BARE) {
1611         ok = true;
1612     } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL ||
1613                new_mode == RISCV_IOMMU_DDTP_MODE_2LVL ||
1614                new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) {
1615         ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1616              old_mode == RISCV_IOMMU_DDTP_MODE_BARE;
1617     }
1618 
1619     if (ok) {
1620         /* clear reserved and busy bits, report back sanitized version */
1621         new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN,
1622                              RISCV_IOMMU_DDTP_MODE, new_mode);
1623     } else {
1624         new_ddtp = old_ddtp;
1625     }
1626     s->ddtp = new_ddtp;
1627 
1628     riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp);
1629 }
1630 
1631 /* Command function and opcode field. */
1632 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op))
1633 
1634 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s)
1635 {
1636     struct riscv_iommu_command cmd;
1637     MemTxResult res;
1638     dma_addr_t addr;
1639     uint32_t tail, head, ctrl;
1640     uint64_t cmd_opcode;
1641     GHFunc func;
1642 
1643     ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1644     tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask;
1645     head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask;
1646 
1647     /* Check for pending error or queue processing disabled */
1648     if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) ||
1649         !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) {
1650         return;
1651     }
1652 
1653     while (tail != head) {
1654         addr = s->cq_addr  + head * sizeof(cmd);
1655         res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd),
1656                               MEMTXATTRS_UNSPECIFIED);
1657 
1658         if (res != MEMTX_OK) {
1659             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1660                                   RISCV_IOMMU_CQCSR_CQMF, 0);
1661             goto fault;
1662         }
1663 
1664         trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1);
1665 
1666         cmd_opcode = get_field(cmd.dword0,
1667                                RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC);
1668 
1669         switch (cmd_opcode) {
1670         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C,
1671                              RISCV_IOMMU_CMD_IOFENCE_OPCODE):
1672             res = riscv_iommu_iofence(s,
1673                 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2,
1674                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA));
1675 
1676             if (res != MEMTX_OK) {
1677                 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1678                                       RISCV_IOMMU_CQCSR_CQMF, 0);
1679                 goto fault;
1680             }
1681             break;
1682 
1683         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA,
1684                              RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1685         {
1686             bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV);
1687             bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV);
1688             bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV);
1689             uint32_t gscid = get_field(cmd.dword0,
1690                                        RISCV_IOMMU_CMD_IOTINVAL_GSCID);
1691             uint32_t pscid = get_field(cmd.dword0,
1692                                        RISCV_IOMMU_CMD_IOTINVAL_PSCID);
1693             hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK;
1694 
1695             if (pscv) {
1696                 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */
1697                 goto cmd_ill;
1698             }
1699 
1700             func = riscv_iommu_iot_inval_all;
1701 
1702             if (gv) {
1703                 func = (av) ? riscv_iommu_iot_inval_gscid_iova :
1704                               riscv_iommu_iot_inval_gscid;
1705             }
1706 
1707             riscv_iommu_iot_inval(
1708                 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VG);
1709 
1710             riscv_iommu_iot_inval(
1711                 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VN);
1712             break;
1713         }
1714 
1715         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA,
1716                              RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1717         {
1718             bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV);
1719             bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV);
1720             bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV);
1721             uint32_t gscid = get_field(cmd.dword0,
1722                                        RISCV_IOMMU_CMD_IOTINVAL_GSCID);
1723             uint32_t pscid = get_field(cmd.dword0,
1724                                        RISCV_IOMMU_CMD_IOTINVAL_PSCID);
1725             hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK;
1726             RISCVIOMMUTransTag transtag;
1727 
1728             if (gv) {
1729                 transtag = RISCV_IOMMU_TRANS_TAG_VN;
1730                 if (pscv) {
1731                     func = (av) ? riscv_iommu_iot_inval_gscid_pscid_iova :
1732                                   riscv_iommu_iot_inval_gscid_pscid;
1733                 } else {
1734                     func = (av) ? riscv_iommu_iot_inval_gscid_iova :
1735                                   riscv_iommu_iot_inval_gscid;
1736                 }
1737             } else {
1738                 transtag = RISCV_IOMMU_TRANS_TAG_SS;
1739                 if (pscv) {
1740                     func = (av) ? riscv_iommu_iot_inval_pscid_iova :
1741                                   riscv_iommu_iot_inval_pscid;
1742                 } else {
1743                     func = (av) ? riscv_iommu_iot_inval_iova :
1744                                   riscv_iommu_iot_inval_all;
1745                 }
1746             }
1747 
1748             riscv_iommu_iot_inval(s, func, gscid, pscid, iova, transtag);
1749             break;
1750         }
1751 
1752         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT,
1753                              RISCV_IOMMU_CMD_IODIR_OPCODE):
1754             if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1755                 /* invalidate all device context cache mappings */
1756                 func = riscv_iommu_ctx_inval_all;
1757             } else {
1758                 /* invalidate all device context matching DID */
1759                 func = riscv_iommu_ctx_inval_devid;
1760             }
1761             riscv_iommu_ctx_inval(s, func,
1762                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0);
1763             break;
1764 
1765         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT,
1766                              RISCV_IOMMU_CMD_IODIR_OPCODE):
1767             if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1768                 /* illegal command arguments IODIR_PDT & DV == 0 */
1769                 goto cmd_ill;
1770             } else {
1771                 func = riscv_iommu_ctx_inval_devid_procid;
1772             }
1773             riscv_iommu_ctx_inval(s, func,
1774                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID),
1775                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID));
1776             break;
1777 
1778         /* ATS commands */
1779         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL,
1780                              RISCV_IOMMU_CMD_ATS_OPCODE):
1781             if (!s->enable_ats) {
1782                 goto cmd_ill;
1783             }
1784 
1785             riscv_iommu_ats_inval(s, &cmd);
1786             break;
1787 
1788         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR,
1789                              RISCV_IOMMU_CMD_ATS_OPCODE):
1790             if (!s->enable_ats) {
1791                 goto cmd_ill;
1792             }
1793 
1794             riscv_iommu_ats_prgr(s, &cmd);
1795             break;
1796 
1797         default:
1798         cmd_ill:
1799             /* Invalid instruction, do not advance instruction index. */
1800             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1801                 RISCV_IOMMU_CQCSR_CMD_ILL, 0);
1802             goto fault;
1803         }
1804 
1805         /* Advance and update head pointer after command completes. */
1806         head = (head + 1) & s->cq_mask;
1807         riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head);
1808     }
1809     return;
1810 
1811 fault:
1812     if (ctrl & RISCV_IOMMU_CQCSR_CIE) {
1813         riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ);
1814     }
1815 }
1816 
1817 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s)
1818 {
1819     uint64_t base;
1820     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1821     uint32_t ctrl_clr;
1822     bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN);
1823     bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON);
1824 
1825     if (enable && !active) {
1826         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB);
1827         s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1;
1828         s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN));
1829         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask);
1830         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0);
1831         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0);
1832         ctrl_set = RISCV_IOMMU_CQCSR_CQON;
1833         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF |
1834                    RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO |
1835                    RISCV_IOMMU_CQCSR_FENCE_W_IP;
1836     } else if (!enable && active) {
1837         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0);
1838         ctrl_set = 0;
1839         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON;
1840     } else {
1841         ctrl_set = 0;
1842         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY;
1843     }
1844 
1845     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr);
1846 }
1847 
1848 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s)
1849 {
1850     uint64_t base;
1851     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1852     uint32_t ctrl_clr;
1853     bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN);
1854     bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON);
1855 
1856     if (enable && !active) {
1857         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB);
1858         s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1;
1859         s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN));
1860         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask);
1861         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0);
1862         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0);
1863         ctrl_set = RISCV_IOMMU_FQCSR_FQON;
1864         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF |
1865             RISCV_IOMMU_FQCSR_FQOF;
1866     } else if (!enable && active) {
1867         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0);
1868         ctrl_set = 0;
1869         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON;
1870     } else {
1871         ctrl_set = 0;
1872         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY;
1873     }
1874 
1875     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr);
1876 }
1877 
1878 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s)
1879 {
1880     uint64_t base;
1881     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
1882     uint32_t ctrl_clr;
1883     bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN);
1884     bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON);
1885 
1886     if (enable && !active) {
1887         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB);
1888         s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1;
1889         s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN));
1890         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask);
1891         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0);
1892         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0);
1893         ctrl_set = RISCV_IOMMU_PQCSR_PQON;
1894         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF |
1895             RISCV_IOMMU_PQCSR_PQOF;
1896     } else if (!enable && active) {
1897         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0);
1898         ctrl_set = 0;
1899         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON;
1900     } else {
1901         ctrl_set = 0;
1902         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY;
1903     }
1904 
1905     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr);
1906 }
1907 
1908 static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
1909 {
1910     uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA);
1911     uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL);
1912     unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID);
1913     unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID);
1914     RISCVIOMMUContext *ctx;
1915     void *ref;
1916 
1917     if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) {
1918         return;
1919     }
1920 
1921     ctx = riscv_iommu_ctx(s, devid, pid, &ref);
1922     if (ctx == NULL) {
1923         riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE,
1924                                  RISCV_IOMMU_TR_RESPONSE_FAULT |
1925                                  (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10));
1926     } else {
1927         IOMMUTLBEntry iotlb = {
1928             .iova = iova,
1929             .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW,
1930             .addr_mask = ~0,
1931             .target_as = NULL,
1932         };
1933         int fault = riscv_iommu_translate(s, ctx, &iotlb, false);
1934         if (fault) {
1935             iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
1936         } else {
1937             iova = iotlb.translated_addr & ~iotlb.addr_mask;
1938             iova = set_field(0, RISCV_IOMMU_TR_RESPONSE_PPN, PPN_DOWN(iova));
1939         }
1940         riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
1941     }
1942 
1943     riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
1944         RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
1945     riscv_iommu_ctx_put(s, ref);
1946 }
1947 
1948 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s);
1949 
1950 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data)
1951 {
1952     uint64_t icvec = 0;
1953 
1954     icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV,
1955                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV);
1956 
1957     icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV,
1958                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV);
1959 
1960     icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV,
1961                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV);
1962 
1963     icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV,
1964                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV);
1965 
1966     trace_riscv_iommu_icvec_write(data, icvec);
1967 
1968     riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec);
1969 }
1970 
1971 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
1972 {
1973     uint32_t cqcsr, fqcsr, pqcsr;
1974     uint32_t ipsr_set = 0;
1975     uint32_t ipsr_clr = 0;
1976 
1977     if (data & RISCV_IOMMU_IPSR_CIP) {
1978         cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1979 
1980         if (cqcsr & RISCV_IOMMU_CQCSR_CIE &&
1981             (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP ||
1982              cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL ||
1983              cqcsr & RISCV_IOMMU_CQCSR_CMD_TO ||
1984              cqcsr & RISCV_IOMMU_CQCSR_CQMF)) {
1985             ipsr_set |= RISCV_IOMMU_IPSR_CIP;
1986         } else {
1987             ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
1988         }
1989     } else {
1990         ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
1991     }
1992 
1993     if (data & RISCV_IOMMU_IPSR_FIP) {
1994         fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1995 
1996         if (fqcsr & RISCV_IOMMU_FQCSR_FIE &&
1997             (fqcsr & RISCV_IOMMU_FQCSR_FQOF ||
1998              fqcsr & RISCV_IOMMU_FQCSR_FQMF)) {
1999             ipsr_set |= RISCV_IOMMU_IPSR_FIP;
2000         } else {
2001             ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
2002         }
2003     } else {
2004         ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
2005     }
2006 
2007     if (data & RISCV_IOMMU_IPSR_PIP) {
2008         pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
2009 
2010         if (pqcsr & RISCV_IOMMU_PQCSR_PIE &&
2011             (pqcsr & RISCV_IOMMU_PQCSR_PQOF ||
2012              pqcsr & RISCV_IOMMU_PQCSR_PQMF)) {
2013             ipsr_set |= RISCV_IOMMU_IPSR_PIP;
2014         } else {
2015             ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
2016         }
2017     } else {
2018         ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
2019     }
2020 
2021     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
2022 }
2023 
2024 static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s,
2025                                            uint32_t regb,
2026                                            bool prev_cy_inh)
2027 {
2028     switch (regb) {
2029     case RISCV_IOMMU_REG_IOCOUNTINH:
2030         riscv_iommu_process_iocntinh_cy(s, prev_cy_inh);
2031         break;
2032 
2033     case RISCV_IOMMU_REG_IOHPMCYCLES:
2034     case RISCV_IOMMU_REG_IOHPMCYCLES + 4:
2035         riscv_iommu_process_hpmcycle_write(s);
2036         break;
2037 
2038     case RISCV_IOMMU_REG_IOHPMEVT_BASE ...
2039         RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4:
2040         riscv_iommu_process_hpmevt_write(s, regb & ~7);
2041         break;
2042     }
2043 }
2044 
2045 /*
2046  * Write the resulting value of 'data' for the reg specified
2047  * by 'reg_addr', after considering read-only/read-write/write-clear
2048  * bits, in the pointer 'dest'.
2049  *
2050  * The result is written in little-endian.
2051  */
2052 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s,
2053                                       void *dest, hwaddr reg_addr,
2054                                       int size, uint64_t data)
2055 {
2056     uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size);
2057     uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size);
2058     uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size);
2059 
2060     stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc));
2061 }
2062 
2063 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
2064                                           uint64_t data, unsigned size,
2065                                           MemTxAttrs attrs)
2066 {
2067     riscv_iommu_process_fn *process_fn = NULL;
2068     RISCVIOMMUState *s = opaque;
2069     uint32_t regb = addr & ~3;
2070     uint32_t busy = 0;
2071     uint64_t val = 0;
2072     bool cy_inh = false;
2073 
2074     if ((addr & (size - 1)) != 0) {
2075         /* Unsupported MMIO alignment or access size */
2076         return MEMTX_ERROR;
2077     }
2078 
2079     if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
2080         /* Unsupported MMIO access location. */
2081         return MEMTX_ACCESS_ERROR;
2082     }
2083 
2084     /* Track actionable MMIO write. */
2085     switch (regb) {
2086     case RISCV_IOMMU_REG_DDTP:
2087     case RISCV_IOMMU_REG_DDTP + 4:
2088         process_fn = riscv_iommu_process_ddtp;
2089         regb = RISCV_IOMMU_REG_DDTP;
2090         busy = RISCV_IOMMU_DDTP_BUSY;
2091         break;
2092 
2093     case RISCV_IOMMU_REG_CQT:
2094         process_fn = riscv_iommu_process_cq_tail;
2095         break;
2096 
2097     case RISCV_IOMMU_REG_CQCSR:
2098         process_fn = riscv_iommu_process_cq_control;
2099         busy = RISCV_IOMMU_CQCSR_BUSY;
2100         break;
2101 
2102     case RISCV_IOMMU_REG_FQCSR:
2103         process_fn = riscv_iommu_process_fq_control;
2104         busy = RISCV_IOMMU_FQCSR_BUSY;
2105         break;
2106 
2107     case RISCV_IOMMU_REG_PQCSR:
2108         process_fn = riscv_iommu_process_pq_control;
2109         busy = RISCV_IOMMU_PQCSR_BUSY;
2110         break;
2111 
2112     case RISCV_IOMMU_REG_ICVEC:
2113     case RISCV_IOMMU_REG_IPSR:
2114         /*
2115          * ICVEC and IPSR have special read/write procedures. We'll
2116          * call their respective helpers and exit.
2117          */
2118         riscv_iommu_write_reg_val(s, &val, addr, size, data);
2119 
2120         /*
2121          * 'val' is stored as LE. Switch to host endianess
2122          * before using it.
2123          */
2124         val = le64_to_cpu(val);
2125 
2126         if (regb == RISCV_IOMMU_REG_ICVEC) {
2127             riscv_iommu_update_icvec(s, val);
2128         } else {
2129             riscv_iommu_update_ipsr(s, val);
2130         }
2131 
2132         return MEMTX_OK;
2133 
2134     case RISCV_IOMMU_REG_TR_REQ_CTL:
2135         process_fn = riscv_iommu_process_dbg;
2136         regb = RISCV_IOMMU_REG_TR_REQ_CTL;
2137         busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
2138         break;
2139 
2140     case RISCV_IOMMU_REG_IOCOUNTINH:
2141         if (addr != RISCV_IOMMU_REG_IOCOUNTINH) {
2142             break;
2143         }
2144         /* Store previous value of CY bit. */
2145         cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) &
2146             RISCV_IOMMU_IOCOUNTINH_CY);
2147         break;
2148 
2149 
2150     default:
2151         break;
2152     }
2153 
2154     /*
2155      * Registers update might be not synchronized with core logic.
2156      * If system software updates register when relevant BUSY bit
2157      * is set IOMMU behavior of additional writes to the register
2158      * is UNSPECIFIED.
2159      */
2160     riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data);
2161 
2162     /* Busy flag update, MSB 4-byte register. */
2163     if (busy) {
2164         uint32_t rw = ldl_le_p(&s->regs_rw[regb]);
2165         stl_le_p(&s->regs_rw[regb], rw | busy);
2166     }
2167 
2168     /* Process HPM writes and update any internal state if needed. */
2169     if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF &&
2170         regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) {
2171         riscv_iommu_process_hpm_writes(s, regb, cy_inh);
2172     }
2173 
2174     if (process_fn) {
2175         process_fn(s);
2176     }
2177 
2178     return MEMTX_OK;
2179 }
2180 
2181 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
2182     uint64_t *data, unsigned size, MemTxAttrs attrs)
2183 {
2184     RISCVIOMMUState *s = opaque;
2185     uint64_t val = -1;
2186     uint8_t *ptr;
2187 
2188     if ((addr & (size - 1)) != 0) {
2189         /* Unsupported MMIO alignment. */
2190         return MEMTX_ERROR;
2191     }
2192 
2193     if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
2194         return MEMTX_ACCESS_ERROR;
2195     }
2196 
2197     /* Compute cycle register value. */
2198     if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) {
2199         val = riscv_iommu_hpmcycle_read(s);
2200         ptr = (uint8_t *)&val + (addr & 7);
2201     } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) {
2202         /*
2203          * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer
2204          * callback completes. In which case CY_OF bit in
2205          * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the
2206          * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as
2207          * it's not dependent over the timer callback and is computed
2208          * from cycle overflow.
2209          */
2210         val = ldq_le_p(&s->regs_rw[addr]);
2211         val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF)
2212                    ? RISCV_IOMMU_IOCOUNTOVF_CY
2213                    : 0;
2214         ptr = (uint8_t *)&val + (addr & 3);
2215     } else {
2216         ptr = &s->regs_rw[addr];
2217     }
2218 
2219     val = ldn_le_p(ptr, size);
2220 
2221     *data = val;
2222 
2223     return MEMTX_OK;
2224 }
2225 
2226 static const MemoryRegionOps riscv_iommu_mmio_ops = {
2227     .read_with_attrs = riscv_iommu_mmio_read,
2228     .write_with_attrs = riscv_iommu_mmio_write,
2229     .endianness = DEVICE_NATIVE_ENDIAN,
2230     .impl = {
2231         .min_access_size = 4,
2232         .max_access_size = 8,
2233         .unaligned = false,
2234     },
2235     .valid = {
2236         .min_access_size = 4,
2237         .max_access_size = 8,
2238     }
2239 };
2240 
2241 /*
2242  * Translations matching MSI pattern check are redirected to "riscv-iommu-trap"
2243  * memory region as untranslated address, for additional MSI/MRIF interception
2244  * by IOMMU interrupt remapping implementation.
2245  * Note: Device emulation code generating an MSI is expected to provide a valid
2246  * memory transaction attributes with requested_id set.
2247  */
2248 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr,
2249     uint64_t data, unsigned size, MemTxAttrs attrs)
2250 {
2251     RISCVIOMMUState* s = (RISCVIOMMUState *)opaque;
2252     RISCVIOMMUContext *ctx;
2253     MemTxResult res;
2254     void *ref;
2255     uint32_t devid = attrs.requester_id;
2256 
2257     if (attrs.unspecified) {
2258         return MEMTX_ACCESS_ERROR;
2259     }
2260 
2261     /* FIXME: PCIe bus remapping for attached endpoints. */
2262     devid |= s->bus << 8;
2263 
2264     ctx = riscv_iommu_ctx(s, devid, 0, &ref);
2265     if (ctx == NULL) {
2266         res = MEMTX_ACCESS_ERROR;
2267     } else {
2268         res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs);
2269     }
2270     riscv_iommu_ctx_put(s, ref);
2271     return res;
2272 }
2273 
2274 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr,
2275     uint64_t *data, unsigned size, MemTxAttrs attrs)
2276 {
2277     return MEMTX_ACCESS_ERROR;
2278 }
2279 
2280 static const MemoryRegionOps riscv_iommu_trap_ops = {
2281     .read_with_attrs = riscv_iommu_trap_read,
2282     .write_with_attrs = riscv_iommu_trap_write,
2283     .endianness = DEVICE_LITTLE_ENDIAN,
2284     .impl = {
2285         .min_access_size = 4,
2286         .max_access_size = 8,
2287         .unaligned = true,
2288     },
2289     .valid = {
2290         .min_access_size = 4,
2291         .max_access_size = 8,
2292     }
2293 };
2294 
2295 void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode)
2296 {
2297     s->cap = set_field(s->cap, RISCV_IOMMU_CAP_IGS, mode);
2298 }
2299 
2300 static void riscv_iommu_instance_init(Object *obj)
2301 {
2302     RISCVIOMMUState *s = RISCV_IOMMU(obj);
2303 
2304     /* Enable translation debug interface */
2305     s->cap = RISCV_IOMMU_CAP_DBG;
2306 
2307     /* Report QEMU target physical address space limits */
2308     s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS,
2309                        TARGET_PHYS_ADDR_SPACE_BITS);
2310 
2311     /* TODO: method to report supported PID bits */
2312     s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */
2313     s->cap |= RISCV_IOMMU_CAP_PD8;
2314 
2315     /* register storage */
2316     s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2317     s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2318     s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2319 
2320      /* Mark all registers read-only */
2321     memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE);
2322 
2323     /* Device translation context cache */
2324     s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
2325                                          riscv_iommu_ctx_equal,
2326                                          g_free, NULL);
2327 
2328     s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
2329                                          riscv_iommu_iot_equal,
2330                                          g_free, NULL);
2331 
2332     s->iommus.le_next = NULL;
2333     s->iommus.le_prev = NULL;
2334     QLIST_INIT(&s->spaces);
2335 }
2336 
2337 static void riscv_iommu_realize(DeviceState *dev, Error **errp)
2338 {
2339     RISCVIOMMUState *s = RISCV_IOMMU(dev);
2340 
2341     s->cap |= s->version & RISCV_IOMMU_CAP_VERSION;
2342     if (s->enable_msi) {
2343         s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF;
2344     }
2345     if (s->enable_ats) {
2346         s->cap |= RISCV_IOMMU_CAP_ATS;
2347     }
2348     if (s->enable_s_stage) {
2349         s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 |
2350                   RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57;
2351     }
2352     if (s->enable_g_stage) {
2353         s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
2354                   RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4 |
2355                   RISCV_IOMMU_CAP_SVRSW60T59B;
2356     }
2357 
2358     if (s->hpm_cntrs > 0) {
2359         /* Clip number of HPM counters to maximum supported (31). */
2360         if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) {
2361             s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM;
2362         }
2363         /* Enable hardware performance monitor interface */
2364         s->cap |= RISCV_IOMMU_CAP_HPM;
2365     }
2366 
2367     /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */
2368     s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
2369                         RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE);
2370 
2371     /*
2372      * Register complete MMIO space, including MSI/PBA registers.
2373      * Note, PCIDevice implementation will add overlapping MR for MSI/PBA,
2374      * managed directly by the PCIDevice implementation.
2375      */
2376     memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s,
2377         "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE);
2378 
2379     /* Set power-on register state */
2380     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap);
2381     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0);
2382     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL],
2383              ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI));
2384     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP],
2385         ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE));
2386     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB],
2387         ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN));
2388     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB],
2389         ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN));
2390     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB],
2391         ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN));
2392     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF |
2393         RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL);
2394     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON |
2395         RISCV_IOMMU_CQCSR_BUSY);
2396     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF |
2397         RISCV_IOMMU_FQCSR_FQOF);
2398     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON |
2399         RISCV_IOMMU_FQCSR_BUSY);
2400     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF |
2401         RISCV_IOMMU_PQCSR_PQOF);
2402     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON |
2403         RISCV_IOMMU_PQCSR_BUSY);
2404     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0);
2405     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0);
2406     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp);
2407     /* If debug registers enabled. */
2408     if (s->cap & RISCV_IOMMU_CAP_DBG) {
2409         stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0);
2410         stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL],
2411             RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2412     }
2413 
2414     /* If HPM registers are enabled. */
2415     if (s->cap & RISCV_IOMMU_CAP_HPM) {
2416         /* +1 for cycle counter bit. */
2417         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH],
2418                  ~((2 << s->hpm_cntrs) - 1));
2419         stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0);
2420         memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE],
2421                0x00, s->hpm_cntrs * 8);
2422         memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE],
2423                0x00, s->hpm_cntrs * 8);
2424     }
2425 
2426     /* Memory region for downstream access, if specified. */
2427     if (s->target_mr) {
2428         s->target_as = g_new0(AddressSpace, 1);
2429         address_space_init(s->target_as, s->target_mr,
2430             "riscv-iommu-downstream");
2431     } else {
2432         /* Fallback to global system memory. */
2433         s->target_as = &address_space_memory;
2434     }
2435 
2436     /* Memory region for untranslated MRIF/MSI writes */
2437     memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s,
2438             "riscv-iommu-trap", ~0ULL);
2439     address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as");
2440 
2441     if (s->cap & RISCV_IOMMU_CAP_HPM) {
2442         s->hpm_timer =
2443             timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s);
2444         s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal);
2445     }
2446 }
2447 
2448 static void riscv_iommu_unrealize(DeviceState *dev)
2449 {
2450     RISCVIOMMUState *s = RISCV_IOMMU(dev);
2451 
2452     g_hash_table_unref(s->iot_cache);
2453     g_hash_table_unref(s->ctx_cache);
2454 
2455     if (s->cap & RISCV_IOMMU_CAP_HPM) {
2456         g_hash_table_unref(s->hpm_event_ctr_map);
2457         timer_free(s->hpm_timer);
2458     }
2459 }
2460 
2461 void riscv_iommu_reset(RISCVIOMMUState *s)
2462 {
2463     uint32_t reg_clr;
2464     int ddtp_mode;
2465 
2466     /*
2467      * Clear DDTP while setting DDTP_mode back to user
2468      * initial setting.
2469      */
2470     ddtp_mode = s->enable_off ?
2471                 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE;
2472     s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, ddtp_mode);
2473     riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, s->ddtp);
2474 
2475     reg_clr = RISCV_IOMMU_CQCSR_CQEN | RISCV_IOMMU_CQCSR_CIE |
2476               RISCV_IOMMU_CQCSR_CQON | RISCV_IOMMU_CQCSR_BUSY;
2477     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 0, reg_clr);
2478 
2479     reg_clr = RISCV_IOMMU_FQCSR_FQEN | RISCV_IOMMU_FQCSR_FIE |
2480               RISCV_IOMMU_FQCSR_FQON | RISCV_IOMMU_FQCSR_BUSY;
2481     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 0, reg_clr);
2482 
2483     reg_clr = RISCV_IOMMU_PQCSR_PQEN | RISCV_IOMMU_PQCSR_PIE |
2484               RISCV_IOMMU_PQCSR_PQON | RISCV_IOMMU_PQCSR_BUSY;
2485     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 0, reg_clr);
2486 
2487     riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
2488                           RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2489 
2490     riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_IPSR, 0);
2491 
2492     g_hash_table_remove_all(s->ctx_cache);
2493     g_hash_table_remove_all(s->iot_cache);
2494 }
2495 
2496 static const Property riscv_iommu_properties[] = {
2497     DEFINE_PROP_UINT32("version", RISCVIOMMUState, version,
2498         RISCV_IOMMU_SPEC_DOT_VER),
2499     DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0),
2500     DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit,
2501         LIMIT_CACHE_IOT),
2502     DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE),
2503     DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE),
2504     DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE),
2505     DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE),
2506     DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE),
2507     DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
2508         TYPE_MEMORY_REGION, MemoryRegion *),
2509     DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs,
2510                       RISCV_IOMMU_IOCOUNT_NUM),
2511 };
2512 
2513 static void riscv_iommu_class_init(ObjectClass *klass, const void *data)
2514 {
2515     DeviceClass *dc = DEVICE_CLASS(klass);
2516 
2517     /* internal device for riscv-iommu-{pci/sys}, not user-creatable */
2518     dc->user_creatable = false;
2519     dc->realize = riscv_iommu_realize;
2520     dc->unrealize = riscv_iommu_unrealize;
2521     device_class_set_props(dc, riscv_iommu_properties);
2522 }
2523 
2524 static const TypeInfo riscv_iommu_info = {
2525     .name = TYPE_RISCV_IOMMU,
2526     .parent = TYPE_DEVICE,
2527     .instance_size = sizeof(RISCVIOMMUState),
2528     .instance_init = riscv_iommu_instance_init,
2529     .class_init = riscv_iommu_class_init,
2530 };
2531 
2532 static const char *IOMMU_FLAG_STR[] = {
2533     "NA",
2534     "RO",
2535     "WR",
2536     "RW",
2537 };
2538 
2539 /* RISC-V IOMMU Memory Region - Address Translation Space */
2540 static IOMMUTLBEntry riscv_iommu_memory_region_translate(
2541     IOMMUMemoryRegion *iommu_mr, hwaddr addr,
2542     IOMMUAccessFlags flag, int iommu_idx)
2543 {
2544     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2545     RISCVIOMMUContext *ctx;
2546     void *ref;
2547     IOMMUTLBEntry iotlb = {
2548         .iova = addr,
2549         .target_as = as->iommu->target_as,
2550         .addr_mask = ~0ULL,
2551         .perm = flag,
2552     };
2553 
2554     ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref);
2555     if (ctx == NULL) {
2556         /* Translation disabled or invalid. */
2557         iotlb.addr_mask = 0;
2558         iotlb.perm = IOMMU_NONE;
2559     } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) {
2560         /* Translation disabled or fault reported. */
2561         iotlb.addr_mask = 0;
2562         iotlb.perm = IOMMU_NONE;
2563     }
2564 
2565     /* Trace all dma translations with original access flags. */
2566     trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid),
2567                           PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx,
2568                           IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova,
2569                           iotlb.translated_addr);
2570 
2571     riscv_iommu_ctx_put(as->iommu, ref);
2572 
2573     return iotlb;
2574 }
2575 
2576 static int riscv_iommu_memory_region_notify(
2577     IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old,
2578     IOMMUNotifierFlag new, Error **errp)
2579 {
2580     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2581 
2582     if (old == IOMMU_NOTIFIER_NONE) {
2583         as->notifier = true;
2584         trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name);
2585     } else if (new == IOMMU_NOTIFIER_NONE) {
2586         as->notifier = false;
2587         trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name);
2588     }
2589 
2590     return 0;
2591 }
2592 
2593 static inline bool pci_is_iommu(PCIDevice *pdev)
2594 {
2595     return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806;
2596 }
2597 
2598 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn)
2599 {
2600     RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
2601     PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
2602     AddressSpace *as = NULL;
2603 
2604     if (pdev && pci_is_iommu(pdev)) {
2605         return s->target_as;
2606     }
2607 
2608     /* Find first registered IOMMU device */
2609     while (s->iommus.le_prev) {
2610         s = *(s->iommus.le_prev);
2611     }
2612 
2613     /* Find first matching IOMMU */
2614     while (s != NULL && as == NULL) {
2615         as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn));
2616         s = s->iommus.le_next;
2617     }
2618 
2619     return as ? as : &address_space_memory;
2620 }
2621 
2622 static const PCIIOMMUOps riscv_iommu_ops = {
2623     .get_address_space = riscv_iommu_find_as,
2624 };
2625 
2626 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
2627         Error **errp)
2628 {
2629     if (bus->iommu_ops &&
2630         bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
2631         /* Allow multiple IOMMUs on the same PCIe bus, link known devices */
2632         RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
2633         QLIST_INSERT_AFTER(last, iommu, iommus);
2634     } else if (!bus->iommu_ops && !bus->iommu_opaque) {
2635         pci_setup_iommu(bus, &riscv_iommu_ops, iommu);
2636     } else {
2637         error_setg(errp, "can't register secondary IOMMU for PCI bus #%d",
2638             pci_bus_num(bus));
2639     }
2640 }
2641 
2642 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
2643     MemTxAttrs attrs)
2644 {
2645     return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid;
2646 }
2647 
2648 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr)
2649 {
2650     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2651     return 1 << as->iommu->pid_bits;
2652 }
2653 
2654 static void riscv_iommu_memory_region_init(ObjectClass *klass, const void *data)
2655 {
2656     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
2657 
2658     imrc->translate = riscv_iommu_memory_region_translate;
2659     imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
2660     imrc->attrs_to_index = riscv_iommu_memory_region_index;
2661     imrc->num_indexes = riscv_iommu_memory_region_index_len;
2662 }
2663 
2664 static const TypeInfo riscv_iommu_memory_region_info = {
2665     .parent = TYPE_IOMMU_MEMORY_REGION,
2666     .name = TYPE_RISCV_IOMMU_MEMORY_REGION,
2667     .class_init = riscv_iommu_memory_region_init,
2668 };
2669 
2670 static void riscv_iommu_register_mr_types(void)
2671 {
2672     type_register_static(&riscv_iommu_memory_region_info);
2673     type_register_static(&riscv_iommu_info);
2674 }
2675 
2676 type_init(riscv_iommu_register_mr_types);
2677