xref: /openbmc/qemu/hw/riscv/riscv-iommu.c (revision 0d0141fadc9063e527865ee420b2baf34e306093)
1 /*
2  * QEMU emulation of an RISC-V IOMMU
3  *
4  * Copyright (C) 2021-2023, Rivos Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qom/object.h"
21 #include "hw/pci/pci_bus.h"
22 #include "hw/pci/pci_device.h"
23 #include "hw/qdev-properties.h"
24 #include "hw/riscv/riscv_hart.h"
25 #include "migration/vmstate.h"
26 #include "qapi/error.h"
27 #include "qemu/timer.h"
28 
29 #include "cpu_bits.h"
30 #include "riscv-iommu.h"
31 #include "riscv-iommu-bits.h"
32 #include "trace.h"
33 
34 #define LIMIT_CACHE_CTX               (1U << 7)
35 #define LIMIT_CACHE_IOT               (1U << 20)
36 
37 /* Physical page number coversions */
38 #define PPN_PHYS(ppn)                 ((ppn) << TARGET_PAGE_BITS)
39 #define PPN_DOWN(phy)                 ((phy) >> TARGET_PAGE_BITS)
40 
41 typedef struct RISCVIOMMUContext RISCVIOMMUContext;
42 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
43 
44 /* Device assigned I/O address space */
45 struct RISCVIOMMUSpace {
46     IOMMUMemoryRegion iova_mr;  /* IOVA memory region for attached device */
47     AddressSpace iova_as;       /* IOVA address space for attached device */
48     RISCVIOMMUState *iommu;     /* Managing IOMMU device state */
49     uint32_t devid;             /* Requester identifier, AKA device_id */
50     bool notifier;              /* IOMMU unmap notifier enabled */
51     QLIST_ENTRY(RISCVIOMMUSpace) list;
52 };
53 
54 /* Device translation context state. */
55 struct RISCVIOMMUContext {
56     uint64_t devid:24;          /* Requester Id, AKA device_id */
57     uint64_t process_id:20;     /* Process ID. PASID for PCIe */
58     uint64_t tc;                /* Translation Control */
59     uint64_t ta;                /* Translation Attributes */
60     uint64_t satp;              /* S-Stage address translation and protection */
61     uint64_t gatp;              /* G-Stage address translation and protection */
62     uint64_t msi_addr_mask;     /* MSI filtering - address mask */
63     uint64_t msi_addr_pattern;  /* MSI filtering - address pattern */
64     uint64_t msiptp;            /* MSI redirection page table pointer */
65 };
66 
67 /* Address translation cache entry */
68 struct RISCVIOMMUEntry {
69     uint64_t iova:44;           /* IOVA Page Number */
70     uint64_t pscid:20;          /* Process Soft-Context identifier */
71     uint64_t phys:44;           /* Physical Page Number */
72     uint64_t gscid:16;          /* Guest Soft-Context identifier */
73     uint64_t perm:2;            /* IOMMU_RW flags */
74 };
75 
76 /* IOMMU index for transactions without process_id specified. */
77 #define RISCV_IOMMU_NOPROCID 0
78 
79 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type)
80 {
81     switch (vec_type) {
82     case RISCV_IOMMU_INTR_CQ:
83         return icvec & RISCV_IOMMU_ICVEC_CIV;
84     case RISCV_IOMMU_INTR_FQ:
85         return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4;
86     case RISCV_IOMMU_INTR_PM:
87         return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8;
88     case RISCV_IOMMU_INTR_PQ:
89         return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12;
90     default:
91         g_assert_not_reached();
92     }
93 }
94 
95 static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
96 {
97     const uint32_t fctl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FCTL);
98     uint32_t ipsr, icvec, vector;
99 
100     if (fctl & RISCV_IOMMU_FCTL_WSI || !s->notify) {
101         return;
102     }
103 
104     icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC);
105     ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0);
106 
107     if (!(ipsr & (1 << vec_type))) {
108         vector = riscv_iommu_get_icvec_vector(icvec, vec_type);
109         s->notify(s, vector);
110         trace_riscv_iommu_notify_int_vector(vec_type, vector);
111     }
112 }
113 
114 static void riscv_iommu_fault(RISCVIOMMUState *s,
115                               struct riscv_iommu_fq_record *ev)
116 {
117     uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
118     uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask;
119     uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask;
120     uint32_t next = (tail + 1) & s->fq_mask;
121     uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID);
122 
123     trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
124                           PCI_FUNC(devid), ev->hdr, ev->iotval);
125 
126     if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) ||
127         !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) {
128         return;
129     }
130 
131     if (head == next) {
132         riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
133                               RISCV_IOMMU_FQCSR_FQOF, 0);
134     } else {
135         dma_addr_t addr = s->fq_addr + tail * sizeof(*ev);
136         if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev),
137                              MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
138             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
139                                   RISCV_IOMMU_FQCSR_FQMF, 0);
140         } else {
141             riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next);
142         }
143     }
144 
145     if (ctrl & RISCV_IOMMU_FQCSR_FIE) {
146         riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ);
147     }
148 }
149 
150 static void riscv_iommu_pri(RISCVIOMMUState *s,
151     struct riscv_iommu_pq_record *pr)
152 {
153     uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
154     uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask;
155     uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask;
156     uint32_t next = (tail + 1) & s->pq_mask;
157     uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID);
158 
159     trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
160                           PCI_FUNC(devid), pr->payload);
161 
162     if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) ||
163         !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) {
164         return;
165     }
166 
167     if (head == next) {
168         riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
169                               RISCV_IOMMU_PQCSR_PQOF, 0);
170     } else {
171         dma_addr_t addr = s->pq_addr + tail * sizeof(*pr);
172         if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr),
173                              MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
174             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
175                                   RISCV_IOMMU_PQCSR_PQMF, 0);
176         } else {
177             riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next);
178         }
179     }
180 
181     if (ctrl & RISCV_IOMMU_PQCSR_PIE) {
182         riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ);
183     }
184 }
185 
186 /*
187  * Discards all bits from 'val' whose matching bits in the same
188  * positions in the mask 'ext' are zeros, and packs the remaining
189  * bits from 'val' contiguously at the least-significant end of the
190  * result, keeping the same bit order as 'val' and filling any
191  * other bits at the most-significant end of the result with zeros.
192  *
193  * For example, for the following 'val' and 'ext', the return 'ret'
194  * will be:
195  *
196  * val = a b c d e f g h
197  * ext = 1 0 1 0 0 1 1 0
198  * ret = 0 0 0 0 a c f g
199  *
200  * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3
201  * "Process to translate addresses of MSIs", is similar to bit manip
202  * function PEXT (Parallel bits extract) from x86.
203  */
204 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext)
205 {
206     uint64_t ret = 0;
207     uint64_t rot = 1;
208 
209     while (ext) {
210         if (ext & 1) {
211             if (val & 1) {
212                 ret |= rot;
213             }
214             rot <<= 1;
215         }
216         val >>= 1;
217         ext >>= 1;
218     }
219 
220     return ret;
221 }
222 
223 /* Check if GPA matches MSI/MRIF pattern. */
224 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
225     dma_addr_t gpa)
226 {
227     if (!s->enable_msi) {
228         return false;
229     }
230 
231     if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) !=
232         RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
233         return false; /* Invalid MSI/MRIF mode */
234     }
235 
236     if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) {
237         return false; /* GPA not in MSI range defined by AIA IMSIC rules. */
238     }
239 
240     return true;
241 }
242 
243 /*
244  * RISCV IOMMU Address Translation Lookup - Page Table Walk
245  *
246  * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c
247  * Both implementation can be merged into single helper function in future.
248  * Keeping them separate for now, as error reporting and flow specifics are
249  * sufficiently different for separate implementation.
250  *
251  * @s        : IOMMU Device State
252  * @ctx      : Translation context for device id and process address space id.
253  * @iotlb    : translation data: physical address and access mode.
254  * @return   : success or fault cause code.
255  */
256 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
257     IOMMUTLBEntry *iotlb)
258 {
259     dma_addr_t addr, base;
260     uint64_t satp, gatp, pte;
261     bool en_s, en_g;
262     struct {
263         unsigned char step;
264         unsigned char levels;
265         unsigned char ptidxbits;
266         unsigned char ptesize;
267     } sc[2];
268     /* Translation stage phase */
269     enum {
270         S_STAGE = 0,
271         G_STAGE = 1,
272     } pass;
273     MemTxResult ret;
274 
275     satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
276     gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
277 
278     en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE;
279     en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE;
280 
281     /*
282      * Early check for MSI address match when IOVA == GPA.
283      * Note that the (!en_s) condition means that the MSI
284      * page table may only be used when guest pages are
285      * mapped using the g-stage page table, whether single-
286      * or two-stage paging is enabled. It's unavoidable though,
287      * because the spec mandates that we do a first-stage
288      * translation before we check the MSI page table, which
289      * means we can't do an early MSI check unless we have
290      * strictly !en_s.
291      */
292     if (!en_s && (iotlb->perm & IOMMU_WO) &&
293         riscv_iommu_msi_check(s, ctx, iotlb->iova)) {
294         iotlb->target_as = &s->trap_as;
295         iotlb->translated_addr = iotlb->iova;
296         iotlb->addr_mask = ~TARGET_PAGE_MASK;
297         return 0;
298     }
299 
300     /* Exit early for pass-through mode. */
301     if (!(en_s || en_g)) {
302         iotlb->translated_addr = iotlb->iova;
303         iotlb->addr_mask = ~TARGET_PAGE_MASK;
304         /* Allow R/W in pass-through mode */
305         iotlb->perm = IOMMU_RW;
306         return 0;
307     }
308 
309     /* S/G translation parameters. */
310     for (pass = 0; pass < 2; pass++) {
311         uint32_t sv_mode;
312 
313         sc[pass].step = 0;
314         if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) :
315             (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) {
316             /* 32bit mode for GXL/SXL == 1 */
317             switch (pass ? gatp : satp) {
318             case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
319                 sc[pass].levels    = 0;
320                 sc[pass].ptidxbits = 0;
321                 sc[pass].ptesize   = 0;
322                 break;
323             case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
324                 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32;
325                 if (!(s->cap & sv_mode)) {
326                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
327                 }
328                 sc[pass].levels    = 2;
329                 sc[pass].ptidxbits = 10;
330                 sc[pass].ptesize   = 4;
331                 break;
332             default:
333                 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
334             }
335         } else {
336             /* 64bit mode for GXL/SXL == 0 */
337             switch (pass ? gatp : satp) {
338             case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
339                 sc[pass].levels    = 0;
340                 sc[pass].ptidxbits = 0;
341                 sc[pass].ptesize   = 0;
342                 break;
343             case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
344                 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39;
345                 if (!(s->cap & sv_mode)) {
346                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
347                 }
348                 sc[pass].levels    = 3;
349                 sc[pass].ptidxbits = 9;
350                 sc[pass].ptesize   = 8;
351                 break;
352             case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
353                 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48;
354                 if (!(s->cap & sv_mode)) {
355                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
356                 }
357                 sc[pass].levels    = 4;
358                 sc[pass].ptidxbits = 9;
359                 sc[pass].ptesize   = 8;
360                 break;
361             case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
362                 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57;
363                 if (!(s->cap & sv_mode)) {
364                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
365                 }
366                 sc[pass].levels    = 5;
367                 sc[pass].ptidxbits = 9;
368                 sc[pass].ptesize   = 8;
369                 break;
370             default:
371                 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
372             }
373         }
374     };
375 
376     /* S/G stages translation tables root pointers */
377     gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
378     satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD));
379     addr = (en_s && en_g) ? satp : iotlb->iova;
380     base = en_g ? gatp : satp;
381     pass = en_g ? G_STAGE : S_STAGE;
382 
383     do {
384         const unsigned widened = (pass && !sc[pass].step) ? 2 : 0;
385         const unsigned va_bits = widened + sc[pass].ptidxbits;
386         const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits *
387                                  (sc[pass].levels - 1 - sc[pass].step);
388         const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
389         const dma_addr_t pte_addr = base + idx * sc[pass].ptesize;
390         const bool ade =
391             ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE);
392 
393         /* Address range check before first level lookup */
394         if (!sc[pass].step) {
395             const uint64_t va_len = va_skip + va_bits;
396             const uint64_t va_mask = (1ULL << va_len) - 1;
397 
398             if (pass == S_STAGE && va_len > 32) {
399                 target_ulong mask, masked_msbs;
400 
401                 mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1;
402                 masked_msbs = (addr >> (va_len - 1)) & mask;
403 
404                 if (masked_msbs != 0 && masked_msbs != mask) {
405                     return (iotlb->perm & IOMMU_WO) ?
406                                 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S :
407                                 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S;
408                 }
409             } else {
410                 if ((addr & va_mask) != addr) {
411                     return (iotlb->perm & IOMMU_WO) ?
412                                 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
413                                 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS;
414                 }
415             }
416         }
417 
418         /* Read page table entry */
419         if (sc[pass].ptesize == 4) {
420             uint32_t pte32 = 0;
421             ret = ldl_le_dma(s->target_as, pte_addr, &pte32,
422                              MEMTXATTRS_UNSPECIFIED);
423             pte = pte32;
424         } else {
425             ret = ldq_le_dma(s->target_as, pte_addr, &pte,
426                              MEMTXATTRS_UNSPECIFIED);
427         }
428         if (ret != MEMTX_OK) {
429             return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT
430                                             : RISCV_IOMMU_FQ_CAUSE_RD_FAULT;
431         }
432 
433         sc[pass].step++;
434         hwaddr ppn = pte >> PTE_PPN_SHIFT;
435 
436         if (!(pte & PTE_V)) {
437             break;                /* Invalid PTE */
438         } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
439             base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
440         } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
441             break;                /* Reserved leaf PTE flags: PTE_W */
442         } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
443             break;                /* Reserved leaf PTE flags: PTE_W + PTE_X */
444         } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
445             break;                /* Misaligned PPN */
446         } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) {
447             break;                /* Read access check failed */
448         } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) {
449             break;                /* Write access check failed */
450         } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) {
451             break;                /* Access bit not set */
452         } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) {
453             break;                /* Dirty bit not set */
454         } else {
455             /* Leaf PTE, translation completed. */
456             sc[pass].step = sc[pass].levels;
457             base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
458             /* Update address mask based on smallest translation granularity */
459             iotlb->addr_mask &= (1ULL << va_skip) - 1;
460             /* Continue with S-Stage translation? */
461             if (pass && sc[0].step != sc[0].levels) {
462                 pass = S_STAGE;
463                 addr = iotlb->iova;
464                 continue;
465             }
466             /* Translation phase completed (GPA or SPA) */
467             iotlb->translated_addr = base;
468             iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO)
469                                                          : IOMMU_RO;
470 
471             /* Check MSI GPA address match */
472             if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) &&
473                 riscv_iommu_msi_check(s, ctx, base)) {
474                 /* Trap MSI writes and return GPA address. */
475                 iotlb->target_as = &s->trap_as;
476                 iotlb->addr_mask = ~TARGET_PAGE_MASK;
477                 return 0;
478             }
479 
480             /* Continue with G-Stage translation? */
481             if (!pass && en_g) {
482                 pass = G_STAGE;
483                 addr = base;
484                 base = gatp;
485                 sc[pass].step = 0;
486                 continue;
487             }
488 
489             return 0;
490         }
491 
492         if (sc[pass].step == sc[pass].levels) {
493             break; /* Can't find leaf PTE */
494         }
495 
496         /* Continue with G-Stage translation? */
497         if (!pass && en_g) {
498             pass = G_STAGE;
499             addr = base;
500             base = gatp;
501             sc[pass].step = 0;
502         }
503     } while (1);
504 
505     return (iotlb->perm & IOMMU_WO) ?
506                 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
507                         RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) :
508                 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS :
509                         RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S);
510 }
511 
512 static void riscv_iommu_report_fault(RISCVIOMMUState *s,
513                                      RISCVIOMMUContext *ctx,
514                                      uint32_t fault_type, uint32_t cause,
515                                      bool pv,
516                                      uint64_t iotval, uint64_t iotval2)
517 {
518     struct riscv_iommu_fq_record ev = { 0 };
519 
520     if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) {
521         switch (cause) {
522         case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED:
523         case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT:
524         case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID:
525         case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED:
526         case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED:
527         case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR:
528         case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT:
529             break;
530         default:
531             /* DTF prevents reporting a fault for this given cause */
532             return;
533         }
534     }
535 
536     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause);
537     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type);
538     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid);
539     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true);
540 
541     if (pv) {
542         ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id);
543     }
544 
545     ev.iotval = iotval;
546     ev.iotval2 = iotval2;
547 
548     riscv_iommu_fault(s, &ev);
549 }
550 
551 /* Redirect MSI write for given GPA. */
552 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s,
553     RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data,
554     unsigned size, MemTxAttrs attrs)
555 {
556     MemTxResult res;
557     dma_addr_t addr;
558     uint64_t intn;
559     uint32_t n190;
560     uint64_t pte[2];
561     int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
562     int cause;
563 
564     /* Interrupt File Number */
565     intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask);
566     if (intn >= 256) {
567         /* Interrupt file number out of range */
568         res = MEMTX_ACCESS_ERROR;
569         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
570         goto err;
571     }
572 
573     /* fetch MSI PTE */
574     addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN));
575     addr = addr | (intn * sizeof(pte));
576     res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte),
577             MEMTXATTRS_UNSPECIFIED);
578     if (res != MEMTX_OK) {
579         if (res == MEMTX_DECODE_ERROR) {
580             cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED;
581         } else {
582             cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
583         }
584         goto err;
585     }
586 
587     le64_to_cpus(&pte[0]);
588     le64_to_cpus(&pte[1]);
589 
590     if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) {
591         /*
592          * The spec mentions that: "If msipte.C == 1, then further
593          * processing to interpret the PTE is implementation
594          * defined.". We'll abort with cause = 262 for this
595          * case too.
596          */
597         res = MEMTX_ACCESS_ERROR;
598         cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID;
599         goto err;
600     }
601 
602     switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) {
603     case RISCV_IOMMU_MSI_PTE_M_BASIC:
604         /* MSI Pass-through mode */
605         addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN));
606 
607         trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
608                               PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
609                               gpa, addr);
610 
611         res = dma_memory_write(s->target_as, addr, &data, size, attrs);
612         if (res != MEMTX_OK) {
613             cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
614             goto err;
615         }
616 
617         return MEMTX_OK;
618     case RISCV_IOMMU_MSI_PTE_M_MRIF:
619         /* MRIF mode, continue. */
620         break;
621     default:
622         res = MEMTX_ACCESS_ERROR;
623         cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
624         goto err;
625     }
626 
627     /*
628      * Report an error for interrupt identities exceeding the maximum allowed
629      * for an IMSIC interrupt file (2047) or destination address is not 32-bit
630      * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables.
631      */
632     if ((data > 2047) || (gpa & 3)) {
633         res = MEMTX_ACCESS_ERROR;
634         cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
635         goto err;
636     }
637 
638     /* MSI MRIF mode, non atomic pending bit update */
639 
640     /* MRIF pending bit address */
641     addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9;
642     addr = addr | ((data & 0x7c0) >> 3);
643 
644     trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
645                           PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
646                           gpa, addr);
647 
648     /* MRIF pending bit mask */
649     data = 1ULL << (data & 0x03f);
650     res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
651     if (res != MEMTX_OK) {
652         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
653         goto err;
654     }
655 
656     intn = intn | data;
657     res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs);
658     if (res != MEMTX_OK) {
659         cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
660         goto err;
661     }
662 
663     /* Get MRIF enable bits */
664     addr = addr + sizeof(intn);
665     res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
666     if (res != MEMTX_OK) {
667         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
668         goto err;
669     }
670 
671     if (!(intn & data)) {
672         /* notification disabled, MRIF update completed. */
673         return MEMTX_OK;
674     }
675 
676     /* Send notification message */
677     addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN));
678     n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) |
679           (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10);
680 
681     res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs);
682     if (res != MEMTX_OK) {
683         cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
684         goto err;
685     }
686 
687     trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr);
688 
689     return MEMTX_OK;
690 
691 err:
692     riscv_iommu_report_fault(s, ctx, fault_type, cause,
693                              !!ctx->process_id, 0, 0);
694     return res;
695 }
696 
697 /*
698  * Check device context configuration as described by the
699  * riscv-iommu spec section "Device-context configuration
700  * checks".
701  */
702 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s,
703                                             RISCVIOMMUContext *ctx)
704 {
705     uint32_t fsc_mode, msi_mode;
706     uint64_t gatp;
707 
708     if (!(s->cap & RISCV_IOMMU_CAP_ATS) &&
709         (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS ||
710          ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI ||
711          ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) {
712         return false;
713     }
714 
715     if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) &&
716         (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA ||
717          ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) {
718         return false;
719     }
720 
721     if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) &&
722         ctx->tc & RISCV_IOMMU_DC_TC_PRPR) {
723         return false;
724     }
725 
726     if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) &&
727         ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) {
728         return false;
729     }
730 
731     if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) {
732         msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE);
733 
734         if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF &&
735             msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
736             return false;
737         }
738     }
739 
740     gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
741     if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA &&
742         gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
743         return false;
744     }
745 
746     fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
747 
748     if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) {
749         switch (fsc_mode) {
750         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8:
751             if (!(s->cap & RISCV_IOMMU_CAP_PD8)) {
752                 return false;
753             }
754             break;
755         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17:
756             if (!(s->cap & RISCV_IOMMU_CAP_PD17)) {
757                 return false;
758             }
759             break;
760         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20:
761             if (!(s->cap & RISCV_IOMMU_CAP_PD20)) {
762                 return false;
763             }
764             break;
765         }
766     } else {
767         /* DC.tc.PDTV is 0 */
768         if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) {
769             return false;
770         }
771 
772         if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
773             if (fsc_mode == RISCV_IOMMU_CAP_SV32 &&
774                 !(s->cap & RISCV_IOMMU_CAP_SV32)) {
775                 return false;
776             }
777         } else {
778             switch (fsc_mode) {
779             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
780                 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
781                     return false;
782                 }
783                 break;
784             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
785                 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
786                     return false;
787                 }
788             break;
789             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
790                 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
791                     return false;
792                 }
793                 break;
794             }
795         }
796     }
797 
798     /*
799      * CAP_END is always zero (only one endianess). FCTL_BE is
800      * always zero (little-endian accesses). Thus TC_SBE must
801      * always be LE, i.e. zero.
802      */
803     if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) {
804         return false;
805     }
806 
807     return true;
808 }
809 
810 /*
811  * Validate process context (PC) according to section
812  * "Process-context configuration checks".
813  */
814 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
815                                              RISCVIOMMUContext *ctx)
816 {
817     uint32_t mode;
818 
819     if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) {
820         return false;
821     }
822 
823     if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) {
824         return false;
825     }
826 
827     mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
828     switch (mode) {
829     case RISCV_IOMMU_DC_FSC_MODE_BARE:
830     /* sv39 and sv32 modes have the same value (8) */
831     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
832     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
833     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
834         break;
835     default:
836         return false;
837     }
838 
839     if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
840         if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 &&
841             !(s->cap & RISCV_IOMMU_CAP_SV32)) {
842                 return false;
843         }
844     } else {
845         switch (mode) {
846         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
847             if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
848                 return false;
849             }
850             break;
851         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
852             if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
853                 return false;
854             }
855             break;
856         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
857             if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
858                 return false;
859             }
860             break;
861         }
862     }
863 
864     return true;
865 }
866 
867 /*
868  * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
869  *
870  * @s         : IOMMU Device State
871  * @ctx       : Device Translation Context with devid and process_id set.
872  * @return    : success or fault code.
873  */
874 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
875 {
876     const uint64_t ddtp = s->ddtp;
877     unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE);
878     dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN));
879     struct riscv_iommu_dc dc;
880     /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */
881     const int dc_fmt = !s->enable_msi;
882     const size_t dc_len = sizeof(dc) >> dc_fmt;
883     int depth;
884     uint64_t de;
885 
886     switch (mode) {
887     case RISCV_IOMMU_DDTP_MODE_OFF:
888         return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
889 
890     case RISCV_IOMMU_DDTP_MODE_BARE:
891         /* mock up pass-through translation context */
892         ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
893             RISCV_IOMMU_DC_IOHGATP_MODE_BARE);
894         ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
895             RISCV_IOMMU_DC_FSC_MODE_BARE);
896 
897         ctx->tc = RISCV_IOMMU_DC_TC_V;
898         if (s->enable_ats) {
899             ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS;
900         }
901 
902         ctx->ta = 0;
903         ctx->msiptp = 0;
904         return 0;
905 
906     case RISCV_IOMMU_DDTP_MODE_1LVL:
907         depth = 0;
908         break;
909 
910     case RISCV_IOMMU_DDTP_MODE_2LVL:
911         depth = 1;
912         break;
913 
914     case RISCV_IOMMU_DDTP_MODE_3LVL:
915         depth = 2;
916         break;
917 
918     default:
919         return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
920     }
921 
922     /*
923      * Check supported device id width (in bits).
924      * See IOMMU Specification, Chapter 6. Software guidelines.
925      * - if extended device-context format is used:
926      *   1LVL: 6, 2LVL: 15, 3LVL: 24
927      * - if base device-context format is used:
928      *   1LVL: 7, 2LVL: 16, 3LVL: 24
929      */
930     if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) {
931         return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
932     }
933 
934     /* Device directory tree walk */
935     for (; depth-- > 0; ) {
936         /*
937          * Select device id index bits based on device directory tree level
938          * and device context format.
939          * See IOMMU Specification, Chapter 2. Data Structures.
940          * - if extended device-context format is used:
941          *   device index: [23:15][14:6][5:0]
942          * - if base device-context format is used:
943          *   device index: [23:16][15:7][6:0]
944          */
945         const int split = depth * 9 + 6 + dc_fmt;
946         addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK;
947         if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
948                             MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
949             return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
950         }
951         le64_to_cpus(&de);
952         if (!(de & RISCV_IOMMU_DDTE_VALID)) {
953             /* invalid directory entry */
954             return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
955         }
956         if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) {
957             /* reserved bits set */
958             return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
959         }
960         addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
961     }
962 
963     /* index into device context entry page */
964     addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
965 
966     memset(&dc, 0, sizeof(dc));
967     if (dma_memory_read(s->target_as, addr, &dc, dc_len,
968                         MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
969         return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
970     }
971 
972     /* Set translation context. */
973     ctx->tc = le64_to_cpu(dc.tc);
974     ctx->gatp = le64_to_cpu(dc.iohgatp);
975     ctx->satp = le64_to_cpu(dc.fsc);
976     ctx->ta = le64_to_cpu(dc.ta);
977     ctx->msiptp = le64_to_cpu(dc.msiptp);
978     ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask);
979     ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern);
980 
981     if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) {
982         return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
983     }
984 
985     if (!riscv_iommu_validate_device_ctx(s, ctx)) {
986         return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
987     }
988 
989     /* FSC field checks */
990     mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
991     addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN));
992 
993     if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) {
994         if (ctx->process_id != RISCV_IOMMU_NOPROCID) {
995             /* PID is disabled */
996             return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
997         }
998         if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) {
999             /* Invalid translation mode */
1000             return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
1001         }
1002         return 0;
1003     }
1004 
1005     if (ctx->process_id == RISCV_IOMMU_NOPROCID) {
1006         if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) {
1007             /* No default process_id enabled, set BARE mode */
1008             ctx->satp = 0ULL;
1009             return 0;
1010         } else {
1011             /* Use default process_id #0 */
1012             ctx->process_id = 0;
1013         }
1014     }
1015 
1016     if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) {
1017         /* No S-Stage translation, done. */
1018         return 0;
1019     }
1020 
1021     /* FSC.TC.PDTV enabled */
1022     if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) {
1023         /* Invalid PDTP.MODE */
1024         return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1025     }
1026 
1027     for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
1028         /*
1029          * Select process id index bits based on process directory tree
1030          * level. See IOMMU Specification, 2.2. Process-Directory-Table.
1031          */
1032         const int split = depth * 9 + 8;
1033         addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
1034         if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
1035                             MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1036             return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1037         }
1038         le64_to_cpus(&de);
1039         if (!(de & RISCV_IOMMU_PC_TA_V)) {
1040             return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1041         }
1042         addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN));
1043     }
1044 
1045     /* Leaf entry in PDT */
1046     addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
1047     if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
1048                         MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1049         return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1050     }
1051 
1052     /* Use FSC and TA from process directory entry. */
1053     ctx->ta = le64_to_cpu(dc.ta);
1054     ctx->satp = le64_to_cpu(dc.fsc);
1055 
1056     if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) {
1057         return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1058     }
1059 
1060     if (!riscv_iommu_validate_process_ctx(s, ctx)) {
1061         return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1062     }
1063 
1064     return 0;
1065 }
1066 
1067 /* Translation Context cache support */
1068 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2)
1069 {
1070     RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1;
1071     RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2;
1072     return c1->devid == c2->devid &&
1073            c1->process_id == c2->process_id;
1074 }
1075 
1076 static guint riscv_iommu_ctx_hash(gconstpointer v)
1077 {
1078     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v;
1079     /*
1080      * Generate simple hash of (process_id, devid)
1081      * assuming 24-bit wide devid.
1082      */
1083     return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24);
1084 }
1085 
1086 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value,
1087                                                gpointer data)
1088 {
1089     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1090     RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1091     if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1092         ctx->devid == arg->devid &&
1093         ctx->process_id == arg->process_id) {
1094         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1095     }
1096 }
1097 
1098 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value,
1099                                         gpointer data)
1100 {
1101     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1102     RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1103     if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1104         ctx->devid == arg->devid) {
1105         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1106     }
1107 }
1108 
1109 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value,
1110                                       gpointer data)
1111 {
1112     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1113     if (ctx->tc & RISCV_IOMMU_DC_TC_V) {
1114         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1115     }
1116 }
1117 
1118 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func,
1119                                   uint32_t devid, uint32_t process_id)
1120 {
1121     GHashTable *ctx_cache;
1122     RISCVIOMMUContext key = {
1123         .devid = devid,
1124         .process_id = process_id,
1125     };
1126     ctx_cache = g_hash_table_ref(s->ctx_cache);
1127     g_hash_table_foreach(ctx_cache, func, &key);
1128     g_hash_table_unref(ctx_cache);
1129 }
1130 
1131 /* Find or allocate translation context for a given {device_id, process_id} */
1132 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s,
1133                                           unsigned devid, unsigned process_id,
1134                                           void **ref)
1135 {
1136     GHashTable *ctx_cache;
1137     RISCVIOMMUContext *ctx;
1138     RISCVIOMMUContext key = {
1139         .devid = devid,
1140         .process_id = process_id,
1141     };
1142 
1143     ctx_cache = g_hash_table_ref(s->ctx_cache);
1144     ctx = g_hash_table_lookup(ctx_cache, &key);
1145 
1146     if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) {
1147         *ref = ctx_cache;
1148         return ctx;
1149     }
1150 
1151     ctx = g_new0(RISCVIOMMUContext, 1);
1152     ctx->devid = devid;
1153     ctx->process_id = process_id;
1154 
1155     int fault = riscv_iommu_ctx_fetch(s, ctx);
1156     if (!fault) {
1157         if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) {
1158             g_hash_table_unref(ctx_cache);
1159             ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
1160                                               riscv_iommu_ctx_equal,
1161                                               g_free, NULL);
1162             g_hash_table_ref(ctx_cache);
1163             g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache));
1164         }
1165         g_hash_table_add(ctx_cache, ctx);
1166         *ref = ctx_cache;
1167         return ctx;
1168     }
1169 
1170     g_hash_table_unref(ctx_cache);
1171     *ref = NULL;
1172 
1173     riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD,
1174                              fault, !!process_id, 0, 0);
1175 
1176     g_free(ctx);
1177     return NULL;
1178 }
1179 
1180 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
1181 {
1182     if (ref) {
1183         g_hash_table_unref((GHashTable *)ref);
1184     }
1185 }
1186 
1187 /* Find or allocate address space for a given device */
1188 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid)
1189 {
1190     RISCVIOMMUSpace *as;
1191 
1192     /* FIXME: PCIe bus remapping for attached endpoints. */
1193     devid |= s->bus << 8;
1194 
1195     QLIST_FOREACH(as, &s->spaces, list) {
1196         if (as->devid == devid) {
1197             break;
1198         }
1199     }
1200 
1201     if (as == NULL) {
1202         char name[64];
1203         as = g_new0(RISCVIOMMUSpace, 1);
1204 
1205         as->iommu = s;
1206         as->devid = devid;
1207 
1208         snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
1209             PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1210 
1211         /* IOVA address space, untranslated addresses */
1212         memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr),
1213             TYPE_RISCV_IOMMU_MEMORY_REGION,
1214             OBJECT(as), "riscv_iommu", UINT64_MAX);
1215         address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name);
1216 
1217         QLIST_INSERT_HEAD(&s->spaces, as, list);
1218 
1219         trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid),
1220                 PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1221     }
1222     return &as->iova_as;
1223 }
1224 
1225 /* Translation Object cache support */
1226 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2)
1227 {
1228     RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1;
1229     RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2;
1230     return t1->gscid == t2->gscid && t1->pscid == t2->pscid &&
1231            t1->iova == t2->iova;
1232 }
1233 
1234 static guint riscv_iommu_iot_hash(gconstpointer v)
1235 {
1236     RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v;
1237     return (guint)t->iova;
1238 }
1239 
1240 /* GV: 1 PSCV: 1 AV: 1 */
1241 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value,
1242                                              gpointer data)
1243 {
1244     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1245     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1246     if (iot->gscid == arg->gscid &&
1247         iot->pscid == arg->pscid &&
1248         iot->iova == arg->iova) {
1249         iot->perm = IOMMU_NONE;
1250     }
1251 }
1252 
1253 /* GV: 1 PSCV: 1 AV: 0 */
1254 static void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value,
1255                                         gpointer data)
1256 {
1257     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1258     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1259     if (iot->gscid == arg->gscid &&
1260         iot->pscid == arg->pscid) {
1261         iot->perm = IOMMU_NONE;
1262     }
1263 }
1264 
1265 /* GV: 1 GVMA: 1 */
1266 static void riscv_iommu_iot_inval_gscid_gpa(gpointer key, gpointer value,
1267                                             gpointer data)
1268 {
1269     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1270     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1271     if (iot->gscid == arg->gscid) {
1272         /* simplified cache, no GPA matching */
1273         iot->perm = IOMMU_NONE;
1274     }
1275 }
1276 
1277 /* GV: 1 GVMA: 0 */
1278 static void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value,
1279                                         gpointer data)
1280 {
1281     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1282     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1283     if (iot->gscid == arg->gscid) {
1284         iot->perm = IOMMU_NONE;
1285     }
1286 }
1287 
1288 /* GV: 0 */
1289 static void riscv_iommu_iot_inval_all(gpointer key, gpointer value,
1290                                       gpointer data)
1291 {
1292     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1293     iot->perm = IOMMU_NONE;
1294 }
1295 
1296 /* caller should keep ref-count for iot_cache object */
1297 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx,
1298     GHashTable *iot_cache, hwaddr iova)
1299 {
1300     RISCVIOMMUEntry key = {
1301         .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID),
1302         .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID),
1303         .iova  = PPN_DOWN(iova),
1304     };
1305     return g_hash_table_lookup(iot_cache, &key);
1306 }
1307 
1308 /* caller should keep ref-count for iot_cache object */
1309 static void riscv_iommu_iot_update(RISCVIOMMUState *s,
1310     GHashTable *iot_cache, RISCVIOMMUEntry *iot)
1311 {
1312     if (!s->iot_limit) {
1313         return;
1314     }
1315 
1316     if (g_hash_table_size(s->iot_cache) >= s->iot_limit) {
1317         iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
1318                                           riscv_iommu_iot_equal,
1319                                           g_free, NULL);
1320         g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache));
1321     }
1322     g_hash_table_add(iot_cache, iot);
1323 }
1324 
1325 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func,
1326     uint32_t gscid, uint32_t pscid, hwaddr iova)
1327 {
1328     GHashTable *iot_cache;
1329     RISCVIOMMUEntry key = {
1330         .gscid = gscid,
1331         .pscid = pscid,
1332         .iova  = PPN_DOWN(iova),
1333     };
1334 
1335     iot_cache = g_hash_table_ref(s->iot_cache);
1336     g_hash_table_foreach(iot_cache, func, &key);
1337     g_hash_table_unref(iot_cache);
1338 }
1339 
1340 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
1341     IOMMUTLBEntry *iotlb, bool enable_cache)
1342 {
1343     RISCVIOMMUEntry *iot;
1344     IOMMUAccessFlags perm;
1345     bool enable_pid;
1346     bool enable_pri;
1347     GHashTable *iot_cache;
1348     int fault;
1349 
1350     iot_cache = g_hash_table_ref(s->iot_cache);
1351     /*
1352      * TC[32] is reserved for custom extensions, used here to temporarily
1353      * enable automatic page-request generation for ATS queries.
1354      */
1355     enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32));
1356     enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV);
1357 
1358     /* Check for ATS request. */
1359     if (iotlb->perm == IOMMU_NONE) {
1360         /* Check if ATS is disabled. */
1361         if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) {
1362             enable_pri = false;
1363             fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1364             goto done;
1365         }
1366     }
1367 
1368     iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova);
1369     perm = iot ? iot->perm : IOMMU_NONE;
1370     if (perm != IOMMU_NONE) {
1371         iotlb->translated_addr = PPN_PHYS(iot->phys);
1372         iotlb->addr_mask = ~TARGET_PAGE_MASK;
1373         iotlb->perm = perm;
1374         fault = 0;
1375         goto done;
1376     }
1377 
1378     /* Translate using device directory / page table information. */
1379     fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
1380 
1381     if (!fault && iotlb->target_as == &s->trap_as) {
1382         /* Do not cache trapped MSI translations */
1383         goto done;
1384     }
1385 
1386     /*
1387      * We made an implementation choice to not cache identity-mapped
1388      * translations, as allowed by the specification, to avoid
1389      * translation cache evictions for other devices sharing the
1390      * IOMMU hardware model.
1391      */
1392     if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) {
1393         iot = g_new0(RISCVIOMMUEntry, 1);
1394         iot->iova = PPN_DOWN(iotlb->iova);
1395         iot->phys = PPN_DOWN(iotlb->translated_addr);
1396         iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
1397         iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
1398         iot->perm = iotlb->perm;
1399         riscv_iommu_iot_update(s, iot_cache, iot);
1400     }
1401 
1402 done:
1403     g_hash_table_unref(iot_cache);
1404 
1405     if (enable_pri && fault) {
1406         struct riscv_iommu_pq_record pr = {0};
1407         if (enable_pid) {
1408             pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV,
1409                                RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id);
1410         }
1411         pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid);
1412         pr.payload = (iotlb->iova & TARGET_PAGE_MASK) |
1413                      RISCV_IOMMU_PREQ_PAYLOAD_M;
1414         riscv_iommu_pri(s, &pr);
1415         return fault;
1416     }
1417 
1418     if (fault) {
1419         unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ;
1420 
1421         if (iotlb->perm & IOMMU_RW) {
1422             ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
1423         } else if (iotlb->perm & IOMMU_RO) {
1424             ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD;
1425         }
1426 
1427         riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid,
1428                                  iotlb->iova, iotlb->translated_addr);
1429         return fault;
1430     }
1431 
1432     return 0;
1433 }
1434 
1435 /* IOMMU Command Interface */
1436 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify,
1437     uint64_t addr, uint32_t data)
1438 {
1439     /*
1440      * ATS processing in this implementation of the IOMMU is synchronous,
1441      * no need to wait for completions here.
1442      */
1443     if (!notify) {
1444         return MEMTX_OK;
1445     }
1446 
1447     return dma_memory_write(s->target_as, addr, &data, sizeof(data),
1448         MEMTXATTRS_UNSPECIFIED);
1449 }
1450 
1451 static void riscv_iommu_ats(RISCVIOMMUState *s,
1452     struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag,
1453     IOMMUAccessFlags perm,
1454     void (*trace_fn)(const char *id))
1455 {
1456     RISCVIOMMUSpace *as = NULL;
1457     IOMMUNotifier *n;
1458     IOMMUTLBEvent event;
1459     uint32_t pid;
1460     uint32_t devid;
1461     const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV;
1462 
1463     if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) {
1464         /* Use device segment and requester id */
1465         devid = get_field(cmd->dword0,
1466             RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID);
1467     } else {
1468         devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID);
1469     }
1470 
1471     pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID);
1472 
1473     QLIST_FOREACH(as, &s->spaces, list) {
1474         if (as->devid == devid) {
1475             break;
1476         }
1477     }
1478 
1479     if (!as || !as->notifier) {
1480         return;
1481     }
1482 
1483     event.type = flag;
1484     event.entry.perm = perm;
1485     event.entry.target_as = s->target_as;
1486 
1487     IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) {
1488         if (!pv || n->iommu_idx == pid) {
1489             event.entry.iova = n->start;
1490             event.entry.addr_mask = n->end - n->start;
1491             trace_fn(as->iova_mr.parent_obj.name);
1492             memory_region_notify_iommu_one(n, &event);
1493         }
1494     }
1495 }
1496 
1497 static void riscv_iommu_ats_inval(RISCVIOMMUState *s,
1498     struct riscv_iommu_command *cmd)
1499 {
1500     return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE,
1501                            trace_riscv_iommu_ats_inval);
1502 }
1503 
1504 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s,
1505     struct riscv_iommu_command *cmd)
1506 {
1507     unsigned resp_code = get_field(cmd->dword1,
1508                                    RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE);
1509 
1510     /* Using the access flag to carry response code information */
1511     IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW;
1512     return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm,
1513                            trace_riscv_iommu_ats_prgr);
1514 }
1515 
1516 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s)
1517 {
1518     uint64_t old_ddtp = s->ddtp;
1519     uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP);
1520     unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE);
1521     unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE);
1522     bool ok = false;
1523 
1524     /*
1525      * Check for allowed DDTP.MODE transitions:
1526      * {OFF, BARE}        -> {OFF, BARE, 1LVL, 2LVL, 3LVL}
1527      * {1LVL, 2LVL, 3LVL} -> {OFF, BARE}
1528      */
1529     if (new_mode == old_mode ||
1530         new_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1531         new_mode == RISCV_IOMMU_DDTP_MODE_BARE) {
1532         ok = true;
1533     } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL ||
1534                new_mode == RISCV_IOMMU_DDTP_MODE_2LVL ||
1535                new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) {
1536         ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1537              old_mode == RISCV_IOMMU_DDTP_MODE_BARE;
1538     }
1539 
1540     if (ok) {
1541         /* clear reserved and busy bits, report back sanitized version */
1542         new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN,
1543                              RISCV_IOMMU_DDTP_MODE, new_mode);
1544     } else {
1545         new_ddtp = old_ddtp;
1546     }
1547     s->ddtp = new_ddtp;
1548 
1549     riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp);
1550 }
1551 
1552 /* Command function and opcode field. */
1553 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op))
1554 
1555 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s)
1556 {
1557     struct riscv_iommu_command cmd;
1558     MemTxResult res;
1559     dma_addr_t addr;
1560     uint32_t tail, head, ctrl;
1561     uint64_t cmd_opcode;
1562     GHFunc func;
1563 
1564     ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1565     tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask;
1566     head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask;
1567 
1568     /* Check for pending error or queue processing disabled */
1569     if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) ||
1570         !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) {
1571         return;
1572     }
1573 
1574     while (tail != head) {
1575         addr = s->cq_addr  + head * sizeof(cmd);
1576         res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd),
1577                               MEMTXATTRS_UNSPECIFIED);
1578 
1579         if (res != MEMTX_OK) {
1580             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1581                                   RISCV_IOMMU_CQCSR_CQMF, 0);
1582             goto fault;
1583         }
1584 
1585         trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1);
1586 
1587         cmd_opcode = get_field(cmd.dword0,
1588                                RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC);
1589 
1590         switch (cmd_opcode) {
1591         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C,
1592                              RISCV_IOMMU_CMD_IOFENCE_OPCODE):
1593             res = riscv_iommu_iofence(s,
1594                 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2,
1595                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA));
1596 
1597             if (res != MEMTX_OK) {
1598                 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1599                                       RISCV_IOMMU_CQCSR_CQMF, 0);
1600                 goto fault;
1601             }
1602             break;
1603 
1604         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA,
1605                              RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1606             if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) {
1607                 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */
1608                 goto cmd_ill;
1609             } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) {
1610                 /* invalidate all cache mappings */
1611                 func = riscv_iommu_iot_inval_all;
1612             } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) {
1613                 /* invalidate cache matching GSCID */
1614                 func = riscv_iommu_iot_inval_gscid;
1615             } else {
1616                 /* invalidate cache matching GSCID and ADDR (GPA) */
1617                 func = riscv_iommu_iot_inval_gscid_gpa;
1618             }
1619             riscv_iommu_iot_inval(s, func,
1620                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 0,
1621                 cmd.dword1 << 2 & TARGET_PAGE_MASK);
1622             break;
1623 
1624         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA,
1625                              RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1626             if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) {
1627                 /* invalidate all cache mappings, simplified model */
1628                 func = riscv_iommu_iot_inval_all;
1629             } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV)) {
1630                 /* invalidate cache matching GSCID, simplified model */
1631                 func = riscv_iommu_iot_inval_gscid;
1632             } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) {
1633                 /* invalidate cache matching GSCID and PSCID */
1634                 func = riscv_iommu_iot_inval_pscid;
1635             } else {
1636                 /* invalidate cache matching GSCID and PSCID and ADDR (IOVA) */
1637                 func = riscv_iommu_iot_inval_pscid_iova;
1638             }
1639             riscv_iommu_iot_inval(s, func,
1640                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID),
1641                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_PSCID),
1642                 cmd.dword1 << 2 & TARGET_PAGE_MASK);
1643             break;
1644 
1645         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT,
1646                              RISCV_IOMMU_CMD_IODIR_OPCODE):
1647             if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1648                 /* invalidate all device context cache mappings */
1649                 func = riscv_iommu_ctx_inval_all;
1650             } else {
1651                 /* invalidate all device context matching DID */
1652                 func = riscv_iommu_ctx_inval_devid;
1653             }
1654             riscv_iommu_ctx_inval(s, func,
1655                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0);
1656             break;
1657 
1658         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT,
1659                              RISCV_IOMMU_CMD_IODIR_OPCODE):
1660             if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1661                 /* illegal command arguments IODIR_PDT & DV == 0 */
1662                 goto cmd_ill;
1663             } else {
1664                 func = riscv_iommu_ctx_inval_devid_procid;
1665             }
1666             riscv_iommu_ctx_inval(s, func,
1667                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID),
1668                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID));
1669             break;
1670 
1671         /* ATS commands */
1672         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL,
1673                              RISCV_IOMMU_CMD_ATS_OPCODE):
1674             if (!s->enable_ats) {
1675                 goto cmd_ill;
1676             }
1677 
1678             riscv_iommu_ats_inval(s, &cmd);
1679             break;
1680 
1681         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR,
1682                              RISCV_IOMMU_CMD_ATS_OPCODE):
1683             if (!s->enable_ats) {
1684                 goto cmd_ill;
1685             }
1686 
1687             riscv_iommu_ats_prgr(s, &cmd);
1688             break;
1689 
1690         default:
1691         cmd_ill:
1692             /* Invalid instruction, do not advance instruction index. */
1693             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1694                 RISCV_IOMMU_CQCSR_CMD_ILL, 0);
1695             goto fault;
1696         }
1697 
1698         /* Advance and update head pointer after command completes. */
1699         head = (head + 1) & s->cq_mask;
1700         riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head);
1701     }
1702     return;
1703 
1704 fault:
1705     if (ctrl & RISCV_IOMMU_CQCSR_CIE) {
1706         riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ);
1707     }
1708 }
1709 
1710 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s)
1711 {
1712     uint64_t base;
1713     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1714     uint32_t ctrl_clr;
1715     bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN);
1716     bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON);
1717 
1718     if (enable && !active) {
1719         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB);
1720         s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1;
1721         s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN));
1722         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask);
1723         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0);
1724         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0);
1725         ctrl_set = RISCV_IOMMU_CQCSR_CQON;
1726         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF |
1727                    RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO |
1728                    RISCV_IOMMU_CQCSR_FENCE_W_IP;
1729     } else if (!enable && active) {
1730         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0);
1731         ctrl_set = 0;
1732         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON;
1733     } else {
1734         ctrl_set = 0;
1735         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY;
1736     }
1737 
1738     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr);
1739 }
1740 
1741 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s)
1742 {
1743     uint64_t base;
1744     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1745     uint32_t ctrl_clr;
1746     bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN);
1747     bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON);
1748 
1749     if (enable && !active) {
1750         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB);
1751         s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1;
1752         s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN));
1753         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask);
1754         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0);
1755         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0);
1756         ctrl_set = RISCV_IOMMU_FQCSR_FQON;
1757         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF |
1758             RISCV_IOMMU_FQCSR_FQOF;
1759     } else if (!enable && active) {
1760         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0);
1761         ctrl_set = 0;
1762         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON;
1763     } else {
1764         ctrl_set = 0;
1765         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY;
1766     }
1767 
1768     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr);
1769 }
1770 
1771 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s)
1772 {
1773     uint64_t base;
1774     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
1775     uint32_t ctrl_clr;
1776     bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN);
1777     bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON);
1778 
1779     if (enable && !active) {
1780         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB);
1781         s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1;
1782         s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN));
1783         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask);
1784         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0);
1785         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0);
1786         ctrl_set = RISCV_IOMMU_PQCSR_PQON;
1787         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF |
1788             RISCV_IOMMU_PQCSR_PQOF;
1789     } else if (!enable && active) {
1790         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0);
1791         ctrl_set = 0;
1792         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON;
1793     } else {
1794         ctrl_set = 0;
1795         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY;
1796     }
1797 
1798     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr);
1799 }
1800 
1801 static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
1802 {
1803     uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA);
1804     uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL);
1805     unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID);
1806     unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID);
1807     RISCVIOMMUContext *ctx;
1808     void *ref;
1809 
1810     if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) {
1811         return;
1812     }
1813 
1814     ctx = riscv_iommu_ctx(s, devid, pid, &ref);
1815     if (ctx == NULL) {
1816         riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE,
1817                                  RISCV_IOMMU_TR_RESPONSE_FAULT |
1818                                  (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10));
1819     } else {
1820         IOMMUTLBEntry iotlb = {
1821             .iova = iova,
1822             .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW,
1823             .addr_mask = ~0,
1824             .target_as = NULL,
1825         };
1826         int fault = riscv_iommu_translate(s, ctx, &iotlb, false);
1827         if (fault) {
1828             iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
1829         } else {
1830             iova = iotlb.translated_addr & ~iotlb.addr_mask;
1831             iova >>= TARGET_PAGE_BITS;
1832             iova &= RISCV_IOMMU_TR_RESPONSE_PPN;
1833 
1834             /* We do not support superpages (> 4kbs) for now */
1835             iova &= ~RISCV_IOMMU_TR_RESPONSE_S;
1836         }
1837         riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
1838     }
1839 
1840     riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
1841         RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
1842     riscv_iommu_ctx_put(s, ref);
1843 }
1844 
1845 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s);
1846 
1847 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data)
1848 {
1849     uint64_t icvec = 0;
1850 
1851     icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV,
1852                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV);
1853 
1854     icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV,
1855                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV);
1856 
1857     icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV,
1858                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV);
1859 
1860     icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV,
1861                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV);
1862 
1863     trace_riscv_iommu_icvec_write(data, icvec);
1864 
1865     riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec);
1866 }
1867 
1868 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
1869 {
1870     uint32_t cqcsr, fqcsr, pqcsr;
1871     uint32_t ipsr_set = 0;
1872     uint32_t ipsr_clr = 0;
1873 
1874     if (data & RISCV_IOMMU_IPSR_CIP) {
1875         cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1876 
1877         if (cqcsr & RISCV_IOMMU_CQCSR_CIE &&
1878             (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP ||
1879              cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL ||
1880              cqcsr & RISCV_IOMMU_CQCSR_CMD_TO ||
1881              cqcsr & RISCV_IOMMU_CQCSR_CQMF)) {
1882             ipsr_set |= RISCV_IOMMU_IPSR_CIP;
1883         } else {
1884             ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
1885         }
1886     } else {
1887         ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
1888     }
1889 
1890     if (data & RISCV_IOMMU_IPSR_FIP) {
1891         fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1892 
1893         if (fqcsr & RISCV_IOMMU_FQCSR_FIE &&
1894             (fqcsr & RISCV_IOMMU_FQCSR_FQOF ||
1895              fqcsr & RISCV_IOMMU_FQCSR_FQMF)) {
1896             ipsr_set |= RISCV_IOMMU_IPSR_FIP;
1897         } else {
1898             ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
1899         }
1900     } else {
1901         ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
1902     }
1903 
1904     if (data & RISCV_IOMMU_IPSR_PIP) {
1905         pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
1906 
1907         if (pqcsr & RISCV_IOMMU_PQCSR_PIE &&
1908             (pqcsr & RISCV_IOMMU_PQCSR_PQOF ||
1909              pqcsr & RISCV_IOMMU_PQCSR_PQMF)) {
1910             ipsr_set |= RISCV_IOMMU_IPSR_PIP;
1911         } else {
1912             ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
1913         }
1914     } else {
1915         ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
1916     }
1917 
1918     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
1919 }
1920 
1921 /*
1922  * Write the resulting value of 'data' for the reg specified
1923  * by 'reg_addr', after considering read-only/read-write/write-clear
1924  * bits, in the pointer 'dest'.
1925  *
1926  * The result is written in little-endian.
1927  */
1928 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s,
1929                                       void *dest, hwaddr reg_addr,
1930                                       int size, uint64_t data)
1931 {
1932     uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size);
1933     uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size);
1934     uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size);
1935 
1936     stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc));
1937 }
1938 
1939 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
1940                                           uint64_t data, unsigned size,
1941                                           MemTxAttrs attrs)
1942 {
1943     riscv_iommu_process_fn *process_fn = NULL;
1944     RISCVIOMMUState *s = opaque;
1945     uint32_t regb = addr & ~3;
1946     uint32_t busy = 0;
1947     uint64_t val = 0;
1948 
1949     if ((addr & (size - 1)) != 0) {
1950         /* Unsupported MMIO alignment or access size */
1951         return MEMTX_ERROR;
1952     }
1953 
1954     if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
1955         /* Unsupported MMIO access location. */
1956         return MEMTX_ACCESS_ERROR;
1957     }
1958 
1959     /* Track actionable MMIO write. */
1960     switch (regb) {
1961     case RISCV_IOMMU_REG_DDTP:
1962     case RISCV_IOMMU_REG_DDTP + 4:
1963         process_fn = riscv_iommu_process_ddtp;
1964         regb = RISCV_IOMMU_REG_DDTP;
1965         busy = RISCV_IOMMU_DDTP_BUSY;
1966         break;
1967 
1968     case RISCV_IOMMU_REG_CQT:
1969         process_fn = riscv_iommu_process_cq_tail;
1970         break;
1971 
1972     case RISCV_IOMMU_REG_CQCSR:
1973         process_fn = riscv_iommu_process_cq_control;
1974         busy = RISCV_IOMMU_CQCSR_BUSY;
1975         break;
1976 
1977     case RISCV_IOMMU_REG_FQCSR:
1978         process_fn = riscv_iommu_process_fq_control;
1979         busy = RISCV_IOMMU_FQCSR_BUSY;
1980         break;
1981 
1982     case RISCV_IOMMU_REG_PQCSR:
1983         process_fn = riscv_iommu_process_pq_control;
1984         busy = RISCV_IOMMU_PQCSR_BUSY;
1985         break;
1986 
1987     case RISCV_IOMMU_REG_ICVEC:
1988     case RISCV_IOMMU_REG_IPSR:
1989         /*
1990          * ICVEC and IPSR have special read/write procedures. We'll
1991          * call their respective helpers and exit.
1992          */
1993         riscv_iommu_write_reg_val(s, &val, addr, size, data);
1994 
1995         /*
1996          * 'val' is stored as LE. Switch to host endianess
1997          * before using it.
1998          */
1999         val = le64_to_cpu(val);
2000 
2001         if (regb == RISCV_IOMMU_REG_ICVEC) {
2002             riscv_iommu_update_icvec(s, val);
2003         } else {
2004             riscv_iommu_update_ipsr(s, val);
2005         }
2006 
2007         return MEMTX_OK;
2008 
2009     case RISCV_IOMMU_REG_TR_REQ_CTL:
2010         process_fn = riscv_iommu_process_dbg;
2011         regb = RISCV_IOMMU_REG_TR_REQ_CTL;
2012         busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
2013         break;
2014 
2015     default:
2016         break;
2017     }
2018 
2019     /*
2020      * Registers update might be not synchronized with core logic.
2021      * If system software updates register when relevant BUSY bit
2022      * is set IOMMU behavior of additional writes to the register
2023      * is UNSPECIFIED.
2024      */
2025     riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data);
2026 
2027     /* Busy flag update, MSB 4-byte register. */
2028     if (busy) {
2029         uint32_t rw = ldl_le_p(&s->regs_rw[regb]);
2030         stl_le_p(&s->regs_rw[regb], rw | busy);
2031     }
2032 
2033     if (process_fn) {
2034         process_fn(s);
2035     }
2036 
2037     return MEMTX_OK;
2038 }
2039 
2040 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
2041     uint64_t *data, unsigned size, MemTxAttrs attrs)
2042 {
2043     RISCVIOMMUState *s = opaque;
2044     uint64_t val = -1;
2045     uint8_t *ptr;
2046 
2047     if ((addr & (size - 1)) != 0) {
2048         /* Unsupported MMIO alignment. */
2049         return MEMTX_ERROR;
2050     }
2051 
2052     if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
2053         return MEMTX_ACCESS_ERROR;
2054     }
2055 
2056     ptr = &s->regs_rw[addr];
2057     val = ldn_le_p(ptr, size);
2058 
2059     *data = val;
2060 
2061     return MEMTX_OK;
2062 }
2063 
2064 static const MemoryRegionOps riscv_iommu_mmio_ops = {
2065     .read_with_attrs = riscv_iommu_mmio_read,
2066     .write_with_attrs = riscv_iommu_mmio_write,
2067     .endianness = DEVICE_NATIVE_ENDIAN,
2068     .impl = {
2069         .min_access_size = 4,
2070         .max_access_size = 8,
2071         .unaligned = false,
2072     },
2073     .valid = {
2074         .min_access_size = 4,
2075         .max_access_size = 8,
2076     }
2077 };
2078 
2079 /*
2080  * Translations matching MSI pattern check are redirected to "riscv-iommu-trap"
2081  * memory region as untranslated address, for additional MSI/MRIF interception
2082  * by IOMMU interrupt remapping implementation.
2083  * Note: Device emulation code generating an MSI is expected to provide a valid
2084  * memory transaction attributes with requested_id set.
2085  */
2086 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr,
2087     uint64_t data, unsigned size, MemTxAttrs attrs)
2088 {
2089     RISCVIOMMUState* s = (RISCVIOMMUState *)opaque;
2090     RISCVIOMMUContext *ctx;
2091     MemTxResult res;
2092     void *ref;
2093     uint32_t devid = attrs.requester_id;
2094 
2095     if (attrs.unspecified) {
2096         return MEMTX_ACCESS_ERROR;
2097     }
2098 
2099     /* FIXME: PCIe bus remapping for attached endpoints. */
2100     devid |= s->bus << 8;
2101 
2102     ctx = riscv_iommu_ctx(s, devid, 0, &ref);
2103     if (ctx == NULL) {
2104         res = MEMTX_ACCESS_ERROR;
2105     } else {
2106         res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs);
2107     }
2108     riscv_iommu_ctx_put(s, ref);
2109     return res;
2110 }
2111 
2112 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr,
2113     uint64_t *data, unsigned size, MemTxAttrs attrs)
2114 {
2115     return MEMTX_ACCESS_ERROR;
2116 }
2117 
2118 static const MemoryRegionOps riscv_iommu_trap_ops = {
2119     .read_with_attrs = riscv_iommu_trap_read,
2120     .write_with_attrs = riscv_iommu_trap_write,
2121     .endianness = DEVICE_LITTLE_ENDIAN,
2122     .impl = {
2123         .min_access_size = 4,
2124         .max_access_size = 8,
2125         .unaligned = true,
2126     },
2127     .valid = {
2128         .min_access_size = 4,
2129         .max_access_size = 8,
2130     }
2131 };
2132 
2133 static void riscv_iommu_realize(DeviceState *dev, Error **errp)
2134 {
2135     RISCVIOMMUState *s = RISCV_IOMMU(dev);
2136 
2137     s->cap = s->version & RISCV_IOMMU_CAP_VERSION;
2138     if (s->enable_msi) {
2139         s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF;
2140     }
2141     if (s->enable_ats) {
2142         s->cap |= RISCV_IOMMU_CAP_ATS;
2143     }
2144     if (s->enable_s_stage) {
2145         s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 |
2146                   RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57;
2147     }
2148     if (s->enable_g_stage) {
2149         s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
2150                   RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4;
2151     }
2152     /* Enable translation debug interface */
2153     s->cap |= RISCV_IOMMU_CAP_DBG;
2154 
2155     /* Report QEMU target physical address space limits */
2156     s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS,
2157                        TARGET_PHYS_ADDR_SPACE_BITS);
2158 
2159     /* TODO: method to report supported PID bits */
2160     s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */
2161     s->cap |= RISCV_IOMMU_CAP_PD8;
2162 
2163     /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */
2164     s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
2165                         RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE);
2166 
2167     /* register storage */
2168     s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2169     s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2170     s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2171 
2172      /* Mark all registers read-only */
2173     memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE);
2174 
2175     /*
2176      * Register complete MMIO space, including MSI/PBA registers.
2177      * Note, PCIDevice implementation will add overlapping MR for MSI/PBA,
2178      * managed directly by the PCIDevice implementation.
2179      */
2180     memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s,
2181         "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE);
2182 
2183     /* Set power-on register state */
2184     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap);
2185     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0);
2186     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL],
2187              ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI));
2188     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP],
2189         ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE));
2190     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB],
2191         ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN));
2192     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB],
2193         ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN));
2194     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB],
2195         ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN));
2196     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF |
2197         RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL);
2198     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON |
2199         RISCV_IOMMU_CQCSR_BUSY);
2200     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF |
2201         RISCV_IOMMU_FQCSR_FQOF);
2202     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON |
2203         RISCV_IOMMU_FQCSR_BUSY);
2204     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF |
2205         RISCV_IOMMU_PQCSR_PQOF);
2206     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON |
2207         RISCV_IOMMU_PQCSR_BUSY);
2208     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0);
2209     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0);
2210     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp);
2211     /* If debug registers enabled. */
2212     if (s->cap & RISCV_IOMMU_CAP_DBG) {
2213         stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0);
2214         stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL],
2215             RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2216     }
2217 
2218     /* Memory region for downstream access, if specified. */
2219     if (s->target_mr) {
2220         s->target_as = g_new0(AddressSpace, 1);
2221         address_space_init(s->target_as, s->target_mr,
2222             "riscv-iommu-downstream");
2223     } else {
2224         /* Fallback to global system memory. */
2225         s->target_as = &address_space_memory;
2226     }
2227 
2228     /* Memory region for untranslated MRIF/MSI writes */
2229     memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s,
2230             "riscv-iommu-trap", ~0ULL);
2231     address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as");
2232 
2233     /* Device translation context cache */
2234     s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
2235                                          riscv_iommu_ctx_equal,
2236                                          g_free, NULL);
2237 
2238     s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
2239                                          riscv_iommu_iot_equal,
2240                                          g_free, NULL);
2241 
2242     s->iommus.le_next = NULL;
2243     s->iommus.le_prev = NULL;
2244     QLIST_INIT(&s->spaces);
2245 }
2246 
2247 static void riscv_iommu_unrealize(DeviceState *dev)
2248 {
2249     RISCVIOMMUState *s = RISCV_IOMMU(dev);
2250 
2251     g_hash_table_unref(s->iot_cache);
2252     g_hash_table_unref(s->ctx_cache);
2253 }
2254 
2255 static const Property riscv_iommu_properties[] = {
2256     DEFINE_PROP_UINT32("version", RISCVIOMMUState, version,
2257         RISCV_IOMMU_SPEC_DOT_VER),
2258     DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0),
2259     DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit,
2260         LIMIT_CACHE_IOT),
2261     DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE),
2262     DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE),
2263     DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE),
2264     DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE),
2265     DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE),
2266     DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
2267         TYPE_MEMORY_REGION, MemoryRegion *),
2268     DEFINE_PROP_END_OF_LIST(),
2269 };
2270 
2271 static void riscv_iommu_class_init(ObjectClass *klass, void* data)
2272 {
2273     DeviceClass *dc = DEVICE_CLASS(klass);
2274 
2275     /* internal device for riscv-iommu-{pci/sys}, not user-creatable */
2276     dc->user_creatable = false;
2277     dc->realize = riscv_iommu_realize;
2278     dc->unrealize = riscv_iommu_unrealize;
2279     device_class_set_props(dc, riscv_iommu_properties);
2280 }
2281 
2282 static const TypeInfo riscv_iommu_info = {
2283     .name = TYPE_RISCV_IOMMU,
2284     .parent = TYPE_DEVICE,
2285     .instance_size = sizeof(RISCVIOMMUState),
2286     .class_init = riscv_iommu_class_init,
2287 };
2288 
2289 static const char *IOMMU_FLAG_STR[] = {
2290     "NA",
2291     "RO",
2292     "WR",
2293     "RW",
2294 };
2295 
2296 /* RISC-V IOMMU Memory Region - Address Translation Space */
2297 static IOMMUTLBEntry riscv_iommu_memory_region_translate(
2298     IOMMUMemoryRegion *iommu_mr, hwaddr addr,
2299     IOMMUAccessFlags flag, int iommu_idx)
2300 {
2301     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2302     RISCVIOMMUContext *ctx;
2303     void *ref;
2304     IOMMUTLBEntry iotlb = {
2305         .iova = addr,
2306         .target_as = as->iommu->target_as,
2307         .addr_mask = ~0ULL,
2308         .perm = flag,
2309     };
2310 
2311     ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref);
2312     if (ctx == NULL) {
2313         /* Translation disabled or invalid. */
2314         iotlb.addr_mask = 0;
2315         iotlb.perm = IOMMU_NONE;
2316     } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) {
2317         /* Translation disabled or fault reported. */
2318         iotlb.addr_mask = 0;
2319         iotlb.perm = IOMMU_NONE;
2320     }
2321 
2322     /* Trace all dma translations with original access flags. */
2323     trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid),
2324                           PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx,
2325                           IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova,
2326                           iotlb.translated_addr);
2327 
2328     riscv_iommu_ctx_put(as->iommu, ref);
2329 
2330     return iotlb;
2331 }
2332 
2333 static int riscv_iommu_memory_region_notify(
2334     IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old,
2335     IOMMUNotifierFlag new, Error **errp)
2336 {
2337     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2338 
2339     if (old == IOMMU_NOTIFIER_NONE) {
2340         as->notifier = true;
2341         trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name);
2342     } else if (new == IOMMU_NOTIFIER_NONE) {
2343         as->notifier = false;
2344         trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name);
2345     }
2346 
2347     return 0;
2348 }
2349 
2350 static inline bool pci_is_iommu(PCIDevice *pdev)
2351 {
2352     return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806;
2353 }
2354 
2355 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn)
2356 {
2357     RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
2358     PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
2359     AddressSpace *as = NULL;
2360 
2361     if (pdev && pci_is_iommu(pdev)) {
2362         return s->target_as;
2363     }
2364 
2365     /* Find first registered IOMMU device */
2366     while (s->iommus.le_prev) {
2367         s = *(s->iommus.le_prev);
2368     }
2369 
2370     /* Find first matching IOMMU */
2371     while (s != NULL && as == NULL) {
2372         as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn));
2373         s = s->iommus.le_next;
2374     }
2375 
2376     return as ? as : &address_space_memory;
2377 }
2378 
2379 static const PCIIOMMUOps riscv_iommu_ops = {
2380     .get_address_space = riscv_iommu_find_as,
2381 };
2382 
2383 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
2384         Error **errp)
2385 {
2386     if (bus->iommu_ops &&
2387         bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
2388         /* Allow multiple IOMMUs on the same PCIe bus, link known devices */
2389         RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
2390         QLIST_INSERT_AFTER(last, iommu, iommus);
2391     } else if (!bus->iommu_ops && !bus->iommu_opaque) {
2392         pci_setup_iommu(bus, &riscv_iommu_ops, iommu);
2393     } else {
2394         error_setg(errp, "can't register secondary IOMMU for PCI bus #%d",
2395             pci_bus_num(bus));
2396     }
2397 }
2398 
2399 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
2400     MemTxAttrs attrs)
2401 {
2402     return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid;
2403 }
2404 
2405 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr)
2406 {
2407     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2408     return 1 << as->iommu->pid_bits;
2409 }
2410 
2411 static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data)
2412 {
2413     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
2414 
2415     imrc->translate = riscv_iommu_memory_region_translate;
2416     imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
2417     imrc->attrs_to_index = riscv_iommu_memory_region_index;
2418     imrc->num_indexes = riscv_iommu_memory_region_index_len;
2419 }
2420 
2421 static const TypeInfo riscv_iommu_memory_region_info = {
2422     .parent = TYPE_IOMMU_MEMORY_REGION,
2423     .name = TYPE_RISCV_IOMMU_MEMORY_REGION,
2424     .class_init = riscv_iommu_memory_region_init,
2425 };
2426 
2427 static void riscv_iommu_register_mr_types(void)
2428 {
2429     type_register_static(&riscv_iommu_memory_region_info);
2430     type_register_static(&riscv_iommu_info);
2431 }
2432 
2433 type_init(riscv_iommu_register_mr_types);
2434