xref: /openbmc/qemu/hw/riscv/riscv-iommu.c (revision b01f2eccf2496fd1f97139f44aaa93340f7bfd6c)
1 /*
2  * QEMU emulation of an RISC-V IOMMU
3  *
4  * Copyright (C) 2021-2023, Rivos Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qom/object.h"
21 #include "exec/target_page.h"
22 #include "hw/pci/pci_bus.h"
23 #include "hw/pci/pci_device.h"
24 #include "hw/qdev-properties.h"
25 #include "hw/riscv/riscv_hart.h"
26 #include "migration/vmstate.h"
27 #include "qapi/error.h"
28 #include "qemu/timer.h"
29 
30 #include "cpu_bits.h"
31 #include "riscv-iommu.h"
32 #include "riscv-iommu-bits.h"
33 #include "riscv-iommu-hpm.h"
34 #include "trace.h"
35 
36 #define LIMIT_CACHE_CTX               (1U << 7)
37 #define LIMIT_CACHE_IOT               (1U << 20)
38 
39 /* Physical page number coversions */
40 #define PPN_PHYS(ppn)                 ((ppn) << TARGET_PAGE_BITS)
41 #define PPN_DOWN(phy)                 ((phy) >> TARGET_PAGE_BITS)
42 
43 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
44 
45 /* Device assigned I/O address space */
46 struct RISCVIOMMUSpace {
47     IOMMUMemoryRegion iova_mr;  /* IOVA memory region for attached device */
48     AddressSpace iova_as;       /* IOVA address space for attached device */
49     RISCVIOMMUState *iommu;     /* Managing IOMMU device state */
50     uint32_t devid;             /* Requester identifier, AKA device_id */
51     bool notifier;              /* IOMMU unmap notifier enabled */
52     QLIST_ENTRY(RISCVIOMMUSpace) list;
53 };
54 
55 typedef enum RISCVIOMMUTransTag {
56     RISCV_IOMMU_TRANS_TAG_BY,  /* Bypass */
57     RISCV_IOMMU_TRANS_TAG_SS,  /* Single Stage */
58     RISCV_IOMMU_TRANS_TAG_VG,  /* G-stage only */
59     RISCV_IOMMU_TRANS_TAG_VN,  /* Nested translation */
60 } RISCVIOMMUTransTag;
61 
62 /* Address translation cache entry */
63 struct RISCVIOMMUEntry {
64     RISCVIOMMUTransTag tag;     /* Translation Tag */
65     uint64_t iova:44;           /* IOVA Page Number */
66     uint64_t pscid:20;          /* Process Soft-Context identifier */
67     uint64_t phys:44;           /* Physical Page Number */
68     uint64_t gscid:16;          /* Guest Soft-Context identifier */
69     uint64_t perm:2;            /* IOMMU_RW flags */
70 };
71 
72 /* IOMMU index for transactions without process_id specified. */
73 #define RISCV_IOMMU_NOPROCID 0
74 
riscv_iommu_get_icvec_vector(uint32_t icvec,uint32_t vec_type)75 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type)
76 {
77     switch (vec_type) {
78     case RISCV_IOMMU_INTR_CQ:
79         return icvec & RISCV_IOMMU_ICVEC_CIV;
80     case RISCV_IOMMU_INTR_FQ:
81         return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4;
82     case RISCV_IOMMU_INTR_PM:
83         return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8;
84     case RISCV_IOMMU_INTR_PQ:
85         return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12;
86     default:
87         g_assert_not_reached();
88     }
89 }
90 
riscv_iommu_notify(RISCVIOMMUState * s,int vec_type)91 void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
92 {
93     uint32_t ipsr, icvec, vector;
94 
95     if (!s->notify) {
96         return;
97     }
98 
99     icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC);
100     ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0);
101 
102     if (!(ipsr & (1 << vec_type))) {
103         vector = riscv_iommu_get_icvec_vector(icvec, vec_type);
104         s->notify(s, vector);
105         trace_riscv_iommu_notify_int_vector(vec_type, vector);
106     }
107 }
108 
riscv_iommu_fault(RISCVIOMMUState * s,struct riscv_iommu_fq_record * ev)109 static void riscv_iommu_fault(RISCVIOMMUState *s,
110                               struct riscv_iommu_fq_record *ev)
111 {
112     uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
113     uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask;
114     uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask;
115     uint32_t next = (tail + 1) & s->fq_mask;
116     uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID);
117 
118     trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
119                           PCI_FUNC(devid), ev->hdr, ev->iotval);
120 
121     if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) ||
122         !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) {
123         return;
124     }
125 
126     if (head == next) {
127         riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
128                               RISCV_IOMMU_FQCSR_FQOF, 0);
129     } else {
130         dma_addr_t addr = s->fq_addr + tail * sizeof(*ev);
131         if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev),
132                              MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
133             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
134                                   RISCV_IOMMU_FQCSR_FQMF, 0);
135         } else {
136             riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next);
137         }
138     }
139 
140     if (ctrl & RISCV_IOMMU_FQCSR_FIE) {
141         riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ);
142     }
143 }
144 
riscv_iommu_pri(RISCVIOMMUState * s,struct riscv_iommu_pq_record * pr)145 static void riscv_iommu_pri(RISCVIOMMUState *s,
146     struct riscv_iommu_pq_record *pr)
147 {
148     uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
149     uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask;
150     uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask;
151     uint32_t next = (tail + 1) & s->pq_mask;
152     uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID);
153 
154     trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
155                           PCI_FUNC(devid), pr->payload);
156 
157     if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) ||
158         !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) {
159         return;
160     }
161 
162     if (head == next) {
163         riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
164                               RISCV_IOMMU_PQCSR_PQOF, 0);
165     } else {
166         dma_addr_t addr = s->pq_addr + tail * sizeof(*pr);
167         if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr),
168                              MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
169             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
170                                   RISCV_IOMMU_PQCSR_PQMF, 0);
171         } else {
172             riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next);
173         }
174     }
175 
176     if (ctrl & RISCV_IOMMU_PQCSR_PIE) {
177         riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ);
178     }
179 }
180 
181 /*
182  * Discards all bits from 'val' whose matching bits in the same
183  * positions in the mask 'ext' are zeros, and packs the remaining
184  * bits from 'val' contiguously at the least-significant end of the
185  * result, keeping the same bit order as 'val' and filling any
186  * other bits at the most-significant end of the result with zeros.
187  *
188  * For example, for the following 'val' and 'ext', the return 'ret'
189  * will be:
190  *
191  * val = a b c d e f g h
192  * ext = 1 0 1 0 0 1 1 0
193  * ret = 0 0 0 0 a c f g
194  *
195  * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3
196  * "Process to translate addresses of MSIs", is similar to bit manip
197  * function PEXT (Parallel bits extract) from x86.
198  */
riscv_iommu_pext_u64(uint64_t val,uint64_t ext)199 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext)
200 {
201     uint64_t ret = 0;
202     uint64_t rot = 1;
203 
204     while (ext) {
205         if (ext & 1) {
206             if (val & 1) {
207                 ret |= rot;
208             }
209             rot <<= 1;
210         }
211         val >>= 1;
212         ext >>= 1;
213     }
214 
215     return ret;
216 }
217 
218 /* Check if GPA matches MSI/MRIF pattern. */
riscv_iommu_msi_check(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,dma_addr_t gpa)219 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
220     dma_addr_t gpa)
221 {
222     if (!s->enable_msi) {
223         return false;
224     }
225 
226     if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) !=
227         RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
228         return false; /* Invalid MSI/MRIF mode */
229     }
230 
231     if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) {
232         return false; /* GPA not in MSI range defined by AIA IMSIC rules. */
233     }
234 
235     return true;
236 }
237 
238 /*
239  * RISCV IOMMU Address Translation Lookup - Page Table Walk
240  *
241  * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c
242  * Both implementation can be merged into single helper function in future.
243  * Keeping them separate for now, as error reporting and flow specifics are
244  * sufficiently different for separate implementation.
245  *
246  * @s        : IOMMU Device State
247  * @ctx      : Translation context for device id and process address space id.
248  * @iotlb    : translation data: physical address and access mode.
249  * @return   : success or fault cause code.
250  */
riscv_iommu_spa_fetch(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,IOMMUTLBEntry * iotlb)251 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
252     IOMMUTLBEntry *iotlb)
253 {
254     dma_addr_t addr, base;
255     uint64_t satp, gatp, pte;
256     bool en_s, en_g;
257     struct {
258         unsigned char step;
259         unsigned char levels;
260         unsigned char ptidxbits;
261         unsigned char ptesize;
262     } sc[2];
263     /* Translation stage phase */
264     enum {
265         S_STAGE = 0,
266         G_STAGE = 1,
267     } pass;
268     MemTxResult ret;
269 
270     satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
271     gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
272 
273     en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE;
274     en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE;
275 
276     /*
277      * Early check for MSI address match when IOVA == GPA.
278      * Note that the (!en_s) condition means that the MSI
279      * page table may only be used when guest pages are
280      * mapped using the g-stage page table, whether single-
281      * or two-stage paging is enabled. It's unavoidable though,
282      * because the spec mandates that we do a first-stage
283      * translation before we check the MSI page table, which
284      * means we can't do an early MSI check unless we have
285      * strictly !en_s.
286      */
287     if (!en_s && (iotlb->perm & IOMMU_WO) &&
288         riscv_iommu_msi_check(s, ctx, iotlb->iova)) {
289         iotlb->target_as = &s->trap_as;
290         iotlb->translated_addr = iotlb->iova;
291         iotlb->addr_mask = ~TARGET_PAGE_MASK;
292         return 0;
293     }
294 
295     /* Exit early for pass-through mode. */
296     if (!(en_s || en_g)) {
297         iotlb->translated_addr = iotlb->iova;
298         iotlb->addr_mask = ~TARGET_PAGE_MASK;
299         /* Allow R/W in pass-through mode */
300         iotlb->perm = IOMMU_RW;
301         return 0;
302     }
303 
304     /* S/G translation parameters. */
305     for (pass = 0; pass < 2; pass++) {
306         uint32_t sv_mode;
307 
308         sc[pass].step = 0;
309         if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) :
310             (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) {
311             /* 32bit mode for GXL/SXL == 1 */
312             switch (pass ? gatp : satp) {
313             case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
314                 sc[pass].levels    = 0;
315                 sc[pass].ptidxbits = 0;
316                 sc[pass].ptesize   = 0;
317                 break;
318             case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
319                 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32;
320                 if (!(s->cap & sv_mode)) {
321                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
322                 }
323                 sc[pass].levels    = 2;
324                 sc[pass].ptidxbits = 10;
325                 sc[pass].ptesize   = 4;
326                 break;
327             default:
328                 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
329             }
330         } else {
331             /* 64bit mode for GXL/SXL == 0 */
332             switch (pass ? gatp : satp) {
333             case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
334                 sc[pass].levels    = 0;
335                 sc[pass].ptidxbits = 0;
336                 sc[pass].ptesize   = 0;
337                 break;
338             case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
339                 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39;
340                 if (!(s->cap & sv_mode)) {
341                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
342                 }
343                 sc[pass].levels    = 3;
344                 sc[pass].ptidxbits = 9;
345                 sc[pass].ptesize   = 8;
346                 break;
347             case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
348                 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48;
349                 if (!(s->cap & sv_mode)) {
350                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
351                 }
352                 sc[pass].levels    = 4;
353                 sc[pass].ptidxbits = 9;
354                 sc[pass].ptesize   = 8;
355                 break;
356             case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
357                 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57;
358                 if (!(s->cap & sv_mode)) {
359                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
360                 }
361                 sc[pass].levels    = 5;
362                 sc[pass].ptidxbits = 9;
363                 sc[pass].ptesize   = 8;
364                 break;
365             default:
366                 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
367             }
368         }
369     };
370 
371     /* S/G stages translation tables root pointers */
372     gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
373     satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD));
374     addr = (en_s && en_g) ? satp : iotlb->iova;
375     base = en_g ? gatp : satp;
376     pass = en_g ? G_STAGE : S_STAGE;
377 
378     do {
379         const unsigned widened = (pass && !sc[pass].step) ? 2 : 0;
380         const unsigned va_bits = widened + sc[pass].ptidxbits;
381         const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits *
382                                  (sc[pass].levels - 1 - sc[pass].step);
383         const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
384         const dma_addr_t pte_addr = base + idx * sc[pass].ptesize;
385         const bool ade =
386             ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE);
387 
388         /* Address range check before first level lookup */
389         if (!sc[pass].step) {
390             const uint64_t va_len = va_skip + va_bits;
391             const uint64_t va_mask = (1ULL << va_len) - 1;
392 
393             if (pass == S_STAGE && va_len > 32) {
394                 target_ulong mask, masked_msbs;
395 
396                 mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1;
397                 masked_msbs = (addr >> (va_len - 1)) & mask;
398 
399                 if (masked_msbs != 0 && masked_msbs != mask) {
400                     return (iotlb->perm & IOMMU_WO) ?
401                                 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S :
402                                 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S;
403                 }
404             } else {
405                 if ((addr & va_mask) != addr) {
406                     return (iotlb->perm & IOMMU_WO) ?
407                                 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
408                                 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS;
409                 }
410             }
411         }
412 
413 
414         if (pass == S_STAGE) {
415             riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS);
416         } else {
417             riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS);
418         }
419 
420         /* Read page table entry */
421         if (sc[pass].ptesize == 4) {
422             uint32_t pte32 = 0;
423             ret = ldl_le_dma(s->target_as, pte_addr, &pte32,
424                              MEMTXATTRS_UNSPECIFIED);
425             pte = pte32;
426         } else {
427             ret = ldq_le_dma(s->target_as, pte_addr, &pte,
428                              MEMTXATTRS_UNSPECIFIED);
429         }
430         if (ret != MEMTX_OK) {
431             return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT
432                                             : RISCV_IOMMU_FQ_CAUSE_RD_FAULT;
433         }
434 
435         sc[pass].step++;
436         hwaddr ppn = pte >> PTE_PPN_SHIFT;
437 
438         if (!(pte & PTE_V)) {
439             break;                /* Invalid PTE */
440         } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
441             base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
442         } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
443             break;                /* Reserved leaf PTE flags: PTE_W */
444         } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
445             break;                /* Reserved leaf PTE flags: PTE_W + PTE_X */
446         } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
447             break;                /* Misaligned PPN */
448         } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) {
449             break;                /* Read access check failed */
450         } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) {
451             break;                /* Write access check failed */
452         } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) {
453             break;                /* Access bit not set */
454         } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) {
455             break;                /* Dirty bit not set */
456         } else {
457             /* Leaf PTE, translation completed. */
458             sc[pass].step = sc[pass].levels;
459             base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
460             /* Update address mask based on smallest translation granularity */
461             iotlb->addr_mask &= (1ULL << va_skip) - 1;
462             /* Continue with S-Stage translation? */
463             if (pass && sc[0].step != sc[0].levels) {
464                 pass = S_STAGE;
465                 addr = iotlb->iova;
466                 continue;
467             }
468             /* Translation phase completed (GPA or SPA) */
469             iotlb->translated_addr = base;
470             iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO)
471                                                          : IOMMU_RO;
472 
473             /* Check MSI GPA address match */
474             if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) &&
475                 riscv_iommu_msi_check(s, ctx, base)) {
476                 /* Trap MSI writes and return GPA address. */
477                 iotlb->target_as = &s->trap_as;
478                 iotlb->addr_mask = ~TARGET_PAGE_MASK;
479                 return 0;
480             }
481 
482             /* Continue with G-Stage translation? */
483             if (!pass && en_g) {
484                 pass = G_STAGE;
485                 addr = base;
486                 base = gatp;
487                 sc[pass].step = 0;
488                 continue;
489             }
490 
491             return 0;
492         }
493 
494         if (sc[pass].step == sc[pass].levels) {
495             break; /* Can't find leaf PTE */
496         }
497 
498         /* Continue with G-Stage translation? */
499         if (!pass && en_g) {
500             pass = G_STAGE;
501             addr = base;
502             base = gatp;
503             sc[pass].step = 0;
504         }
505     } while (1);
506 
507     return (iotlb->perm & IOMMU_WO) ?
508                 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
509                         RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) :
510                 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS :
511                         RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S);
512 }
513 
riscv_iommu_report_fault(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,uint32_t fault_type,uint32_t cause,bool pv,uint64_t iotval,uint64_t iotval2)514 static void riscv_iommu_report_fault(RISCVIOMMUState *s,
515                                      RISCVIOMMUContext *ctx,
516                                      uint32_t fault_type, uint32_t cause,
517                                      bool pv,
518                                      uint64_t iotval, uint64_t iotval2)
519 {
520     struct riscv_iommu_fq_record ev = { 0 };
521 
522     if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) {
523         switch (cause) {
524         case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED:
525         case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT:
526         case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID:
527         case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED:
528         case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED:
529         case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR:
530         case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT:
531             break;
532         default:
533             /* DTF prevents reporting a fault for this given cause */
534             return;
535         }
536     }
537 
538     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause);
539     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type);
540     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid);
541     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true);
542 
543     if (pv) {
544         ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id);
545     }
546 
547     ev.iotval = iotval;
548     ev.iotval2 = iotval2;
549 
550     riscv_iommu_fault(s, &ev);
551 }
552 
553 /* Redirect MSI write for given GPA. */
riscv_iommu_msi_write(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,uint64_t gpa,uint64_t data,unsigned size,MemTxAttrs attrs)554 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s,
555     RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data,
556     unsigned size, MemTxAttrs attrs)
557 {
558     MemTxResult res;
559     dma_addr_t addr;
560     uint64_t intn;
561     size_t offset;
562     uint32_t n190;
563     uint64_t pte[2];
564     int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
565     int cause;
566 
567     /* Interrupt File Number */
568     intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask);
569     offset = intn * sizeof(pte);
570 
571     /* fetch MSI PTE */
572     addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN));
573     if (addr & offset) {
574         /* Interrupt file number out of range */
575         res = MEMTX_ACCESS_ERROR;
576         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
577         goto err;
578     }
579 
580     addr |= offset;
581     res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte),
582             MEMTXATTRS_UNSPECIFIED);
583     if (res != MEMTX_OK) {
584         if (res == MEMTX_DECODE_ERROR) {
585             cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED;
586         } else {
587             cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
588         }
589         goto err;
590     }
591 
592     le64_to_cpus(&pte[0]);
593     le64_to_cpus(&pte[1]);
594 
595     if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) {
596         /*
597          * The spec mentions that: "If msipte.C == 1, then further
598          * processing to interpret the PTE is implementation
599          * defined.". We'll abort with cause = 262 for this
600          * case too.
601          */
602         res = MEMTX_ACCESS_ERROR;
603         cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID;
604         goto err;
605     }
606 
607     switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) {
608     case RISCV_IOMMU_MSI_PTE_M_BASIC:
609         /* MSI Pass-through mode */
610         addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN));
611 
612         trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
613                               PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
614                               gpa, addr);
615 
616         res = dma_memory_write(s->target_as, addr, &data, size, attrs);
617         if (res != MEMTX_OK) {
618             cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
619             goto err;
620         }
621 
622         return MEMTX_OK;
623     case RISCV_IOMMU_MSI_PTE_M_MRIF:
624         /* MRIF mode, continue. */
625         break;
626     default:
627         res = MEMTX_ACCESS_ERROR;
628         cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
629         goto err;
630     }
631 
632     /*
633      * Report an error for interrupt identities exceeding the maximum allowed
634      * for an IMSIC interrupt file (2047) or destination address is not 32-bit
635      * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables.
636      */
637     if ((data > 2047) || (gpa & 3)) {
638         res = MEMTX_ACCESS_ERROR;
639         cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
640         goto err;
641     }
642 
643     /* MSI MRIF mode, non atomic pending bit update */
644 
645     /* MRIF pending bit address */
646     addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9;
647     addr = addr | ((data & 0x7c0) >> 3);
648 
649     trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
650                           PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
651                           gpa, addr);
652 
653     /* MRIF pending bit mask */
654     data = 1ULL << (data & 0x03f);
655     res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
656     if (res != MEMTX_OK) {
657         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
658         goto err;
659     }
660 
661     intn = intn | data;
662     res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs);
663     if (res != MEMTX_OK) {
664         cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
665         goto err;
666     }
667 
668     /* Get MRIF enable bits */
669     addr = addr + sizeof(intn);
670     res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
671     if (res != MEMTX_OK) {
672         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
673         goto err;
674     }
675 
676     if (!(intn & data)) {
677         /* notification disabled, MRIF update completed. */
678         return MEMTX_OK;
679     }
680 
681     /* Send notification message */
682     addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN));
683     n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) |
684           (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10);
685 
686     res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs);
687     if (res != MEMTX_OK) {
688         cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
689         goto err;
690     }
691 
692     trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr);
693 
694     return MEMTX_OK;
695 
696 err:
697     riscv_iommu_report_fault(s, ctx, fault_type, cause,
698                              !!ctx->process_id, 0, 0);
699     return res;
700 }
701 
702 /*
703  * Check device context configuration as described by the
704  * riscv-iommu spec section "Device-context configuration
705  * checks".
706  */
riscv_iommu_validate_device_ctx(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)707 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s,
708                                             RISCVIOMMUContext *ctx)
709 {
710     uint32_t fsc_mode, msi_mode;
711     uint64_t gatp;
712 
713     if (!(s->cap & RISCV_IOMMU_CAP_ATS) &&
714         (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS ||
715          ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI ||
716          ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) {
717         return false;
718     }
719 
720     if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) &&
721         (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA ||
722          ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) {
723         return false;
724     }
725 
726     if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) &&
727         ctx->tc & RISCV_IOMMU_DC_TC_PRPR) {
728         return false;
729     }
730 
731     if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) &&
732         ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) {
733         return false;
734     }
735 
736     if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) {
737         msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE);
738 
739         if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF &&
740             msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
741             return false;
742         }
743     }
744 
745     gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
746     if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA &&
747         gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
748         return false;
749     }
750 
751     fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
752 
753     if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) {
754         switch (fsc_mode) {
755         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8:
756             if (!(s->cap & RISCV_IOMMU_CAP_PD8)) {
757                 return false;
758             }
759             break;
760         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17:
761             if (!(s->cap & RISCV_IOMMU_CAP_PD17)) {
762                 return false;
763             }
764             break;
765         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20:
766             if (!(s->cap & RISCV_IOMMU_CAP_PD20)) {
767                 return false;
768             }
769             break;
770         }
771     } else {
772         /* DC.tc.PDTV is 0 */
773         if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) {
774             return false;
775         }
776 
777         if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
778             if (fsc_mode == RISCV_IOMMU_CAP_SV32 &&
779                 !(s->cap & RISCV_IOMMU_CAP_SV32)) {
780                 return false;
781             }
782         } else {
783             switch (fsc_mode) {
784             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
785                 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
786                     return false;
787                 }
788                 break;
789             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
790                 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
791                     return false;
792                 }
793             break;
794             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
795                 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
796                     return false;
797                 }
798                 break;
799             }
800         }
801     }
802 
803     /*
804      * CAP_END is always zero (only one endianess). FCTL_BE is
805      * always zero (little-endian accesses). Thus TC_SBE must
806      * always be LE, i.e. zero.
807      */
808     if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) {
809         return false;
810     }
811 
812     return true;
813 }
814 
815 /*
816  * Validate process context (PC) according to section
817  * "Process-context configuration checks".
818  */
riscv_iommu_validate_process_ctx(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)819 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
820                                              RISCVIOMMUContext *ctx)
821 {
822     uint32_t mode;
823 
824     if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) {
825         return false;
826     }
827 
828     if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) {
829         return false;
830     }
831 
832     mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
833     switch (mode) {
834     case RISCV_IOMMU_DC_FSC_MODE_BARE:
835     /* sv39 and sv32 modes have the same value (8) */
836     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
837     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
838     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
839         break;
840     default:
841         return false;
842     }
843 
844     if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
845         if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 &&
846             !(s->cap & RISCV_IOMMU_CAP_SV32)) {
847                 return false;
848         }
849     } else {
850         switch (mode) {
851         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
852             if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
853                 return false;
854             }
855             break;
856         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
857             if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
858                 return false;
859             }
860             break;
861         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
862             if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
863                 return false;
864             }
865             break;
866         }
867     }
868 
869     return true;
870 }
871 
872 /**
873  * pdt_memory_read: PDT wrapper of dma_memory_read.
874  *
875  * @s: IOMMU Device State
876  * @ctx: Device Translation Context with devid and pasid set
877  * @addr: address within that address space
878  * @buf: buffer with the data transferred
879  * @len: length of the data transferred
880  * @attrs: memory transaction attributes
881  */
pdt_memory_read(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,dma_addr_t addr,void * buf,dma_addr_t len,MemTxAttrs attrs)882 static MemTxResult pdt_memory_read(RISCVIOMMUState *s,
883                                    RISCVIOMMUContext *ctx,
884                                    dma_addr_t addr,
885                                    void *buf, dma_addr_t len,
886                                    MemTxAttrs attrs)
887 {
888     uint64_t gatp_mode, pte;
889     struct {
890         unsigned char step;
891         unsigned char levels;
892         unsigned char ptidxbits;
893         unsigned char ptesize;
894     } sc;
895     MemTxResult ret;
896     dma_addr_t base = addr;
897 
898     /* G stages translation mode */
899     gatp_mode = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
900     if (gatp_mode == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
901         goto out;
902     }
903 
904     /* G stages translation tables root pointer */
905     base = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
906 
907     /* Start at step 0 */
908     sc.step = 0;
909 
910     if (s->fctl & RISCV_IOMMU_FCTL_GXL) {
911         /* 32bit mode for GXL == 1 */
912         switch (gatp_mode) {
913         case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
914             if (!(s->cap & RISCV_IOMMU_CAP_SV32X4)) {
915                 return MEMTX_ACCESS_ERROR;
916             }
917             sc.levels    = 2;
918             sc.ptidxbits = 10;
919             sc.ptesize   = 4;
920             break;
921         default:
922             return MEMTX_ACCESS_ERROR;
923         }
924     } else {
925         /* 64bit mode for GXL == 0 */
926         switch (gatp_mode) {
927         case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
928             if (!(s->cap & RISCV_IOMMU_CAP_SV39X4)) {
929                 return MEMTX_ACCESS_ERROR;
930             }
931             sc.levels    = 3;
932             sc.ptidxbits = 9;
933             sc.ptesize   = 8;
934             break;
935         case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
936             if (!(s->cap & RISCV_IOMMU_CAP_SV48X4)) {
937                 return MEMTX_ACCESS_ERROR;
938             }
939             sc.levels    = 4;
940             sc.ptidxbits = 9;
941             sc.ptesize   = 8;
942             break;
943         case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
944             if (!(s->cap & RISCV_IOMMU_CAP_SV57X4)) {
945                 return MEMTX_ACCESS_ERROR;
946             }
947             sc.levels    = 5;
948             sc.ptidxbits = 9;
949             sc.ptesize   = 8;
950             break;
951         default:
952             return MEMTX_ACCESS_ERROR;
953         }
954     }
955 
956     do {
957         const unsigned va_bits = (sc.step ? 0 : 2) + sc.ptidxbits;
958         const unsigned va_skip = TARGET_PAGE_BITS + sc.ptidxbits *
959                                  (sc.levels - 1 - sc.step);
960         const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
961         const dma_addr_t pte_addr = base + idx * sc.ptesize;
962 
963         /* Address range check before first level lookup */
964         if (!sc.step) {
965             const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1;
966             if ((addr & va_mask) != addr) {
967                 return MEMTX_ACCESS_ERROR;
968             }
969         }
970 
971         /* Read page table entry */
972         if (sc.ptesize == 4) {
973             uint32_t pte32 = 0;
974             ret = ldl_le_dma(s->target_as, pte_addr, &pte32, attrs);
975             pte = pte32;
976         } else {
977             ret = ldq_le_dma(s->target_as, pte_addr, &pte, attrs);
978         }
979         if (ret != MEMTX_OK) {
980             return ret;
981         }
982 
983         sc.step++;
984         hwaddr ppn = pte >> PTE_PPN_SHIFT;
985 
986         if (!(pte & PTE_V)) {
987             return MEMTX_ACCESS_ERROR; /* Invalid PTE */
988         } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
989             base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
990         } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
991             return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W */
992         } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
993             return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W + PTE_X */
994         } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
995             return MEMTX_ACCESS_ERROR; /* Misaligned PPN */
996         } else {
997             /* Leaf PTE, translation completed. */
998             base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
999             break;
1000         }
1001 
1002         if (sc.step == sc.levels) {
1003             return MEMTX_ACCESS_ERROR; /* Can't find leaf PTE */
1004         }
1005     } while (1);
1006 
1007 out:
1008     return dma_memory_read(s->target_as, base, buf, len, attrs);
1009 }
1010 
1011 /*
1012  * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
1013  *
1014  * @s         : IOMMU Device State
1015  * @ctx       : Device Translation Context with devid and process_id set.
1016  * @return    : success or fault code.
1017  */
riscv_iommu_ctx_fetch(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)1018 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
1019 {
1020     const uint64_t ddtp = s->ddtp;
1021     unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE);
1022     dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN));
1023     struct riscv_iommu_dc dc;
1024     /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */
1025     const int dc_fmt = !s->enable_msi;
1026     const size_t dc_len = sizeof(dc) >> dc_fmt;
1027     int depth;
1028     uint64_t de;
1029 
1030     switch (mode) {
1031     case RISCV_IOMMU_DDTP_MODE_OFF:
1032         return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
1033 
1034     case RISCV_IOMMU_DDTP_MODE_BARE:
1035         /* mock up pass-through translation context */
1036         ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
1037             RISCV_IOMMU_DC_IOHGATP_MODE_BARE);
1038         ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
1039             RISCV_IOMMU_DC_FSC_MODE_BARE);
1040 
1041         ctx->tc = RISCV_IOMMU_DC_TC_V;
1042         if (s->enable_ats) {
1043             ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS;
1044         }
1045 
1046         ctx->ta = 0;
1047         ctx->msiptp = 0;
1048         return 0;
1049 
1050     case RISCV_IOMMU_DDTP_MODE_1LVL:
1051         depth = 0;
1052         break;
1053 
1054     case RISCV_IOMMU_DDTP_MODE_2LVL:
1055         depth = 1;
1056         break;
1057 
1058     case RISCV_IOMMU_DDTP_MODE_3LVL:
1059         depth = 2;
1060         break;
1061 
1062     default:
1063         return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
1064     }
1065 
1066     /*
1067      * Check supported device id width (in bits).
1068      * See IOMMU Specification, Chapter 6. Software guidelines.
1069      * - if extended device-context format is used:
1070      *   1LVL: 6, 2LVL: 15, 3LVL: 24
1071      * - if base device-context format is used:
1072      *   1LVL: 7, 2LVL: 16, 3LVL: 24
1073      */
1074     if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) {
1075         return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1076     }
1077 
1078     /* Device directory tree walk */
1079     for (; depth-- > 0; ) {
1080         riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
1081         /*
1082          * Select device id index bits based on device directory tree level
1083          * and device context format.
1084          * See IOMMU Specification, Chapter 2. Data Structures.
1085          * - if extended device-context format is used:
1086          *   device index: [23:15][14:6][5:0]
1087          * - if base device-context format is used:
1088          *   device index: [23:16][15:7][6:0]
1089          */
1090         const int split = depth * 9 + 6 + dc_fmt;
1091         addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK;
1092         if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
1093                             MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1094             return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
1095         }
1096         le64_to_cpus(&de);
1097         if (!(de & RISCV_IOMMU_DDTE_VALID)) {
1098             /* invalid directory entry */
1099             return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
1100         }
1101         if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) {
1102             /* reserved bits set */
1103             return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
1104         }
1105         addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
1106     }
1107 
1108     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
1109 
1110     /* index into device context entry page */
1111     addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
1112 
1113     memset(&dc, 0, sizeof(dc));
1114     if (dma_memory_read(s->target_as, addr, &dc, dc_len,
1115                         MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1116         return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
1117     }
1118 
1119     /* Set translation context. */
1120     ctx->tc = le64_to_cpu(dc.tc);
1121     ctx->gatp = le64_to_cpu(dc.iohgatp);
1122     ctx->satp = le64_to_cpu(dc.fsc);
1123     ctx->ta = le64_to_cpu(dc.ta);
1124     ctx->msiptp = le64_to_cpu(dc.msiptp);
1125     ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask);
1126     ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern);
1127 
1128     if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) {
1129         return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
1130     }
1131 
1132     if (!riscv_iommu_validate_device_ctx(s, ctx)) {
1133         return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
1134     }
1135 
1136     /* FSC field checks */
1137     mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
1138     addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN));
1139 
1140     if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) {
1141         if (ctx->process_id != RISCV_IOMMU_NOPROCID) {
1142             /* PID is disabled */
1143             return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1144         }
1145         if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) {
1146             /* Invalid translation mode */
1147             return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
1148         }
1149         return 0;
1150     }
1151 
1152     if (ctx->process_id == RISCV_IOMMU_NOPROCID) {
1153         if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) {
1154             /* No default process_id enabled, set BARE mode */
1155             ctx->satp = 0ULL;
1156             return 0;
1157         } else {
1158             /* Use default process_id #0 */
1159             ctx->process_id = 0;
1160         }
1161     }
1162 
1163     if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) {
1164         /* No S-Stage translation, done. */
1165         return 0;
1166     }
1167 
1168     /* FSC.TC.PDTV enabled */
1169     if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) {
1170         /* Invalid PDTP.MODE */
1171         return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1172     }
1173 
1174     for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
1175         riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
1176 
1177         /*
1178          * Select process id index bits based on process directory tree
1179          * level. See IOMMU Specification, 2.2. Process-Directory-Table.
1180          */
1181         const int split = depth * 9 + 8;
1182         addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
1183         if (pdt_memory_read(s, ctx, addr, &de, sizeof(de),
1184                             MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1185             return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1186         }
1187         le64_to_cpus(&de);
1188         if (!(de & RISCV_IOMMU_PDTE_VALID)) {
1189             return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1190         }
1191         addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PDTE_PPN));
1192     }
1193 
1194     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
1195 
1196     /* Leaf entry in PDT */
1197     addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
1198     if (pdt_memory_read(s, ctx, addr, &dc.ta, sizeof(uint64_t) * 2,
1199                         MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1200         return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1201     }
1202 
1203     /* Use FSC and TA from process directory entry. */
1204     ctx->ta = le64_to_cpu(dc.ta);
1205     ctx->satp = le64_to_cpu(dc.fsc);
1206 
1207     if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) {
1208         return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1209     }
1210 
1211     if (!riscv_iommu_validate_process_ctx(s, ctx)) {
1212         return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1213     }
1214 
1215     return 0;
1216 }
1217 
1218 /* Translation Context cache support */
riscv_iommu_ctx_equal(gconstpointer v1,gconstpointer v2)1219 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2)
1220 {
1221     RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1;
1222     RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2;
1223     return c1->devid == c2->devid &&
1224            c1->process_id == c2->process_id;
1225 }
1226 
riscv_iommu_ctx_hash(gconstpointer v)1227 static guint riscv_iommu_ctx_hash(gconstpointer v)
1228 {
1229     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v;
1230     /*
1231      * Generate simple hash of (process_id, devid)
1232      * assuming 24-bit wide devid.
1233      */
1234     return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24);
1235 }
1236 
riscv_iommu_ctx_inval_devid_procid(gpointer key,gpointer value,gpointer data)1237 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value,
1238                                                gpointer data)
1239 {
1240     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1241     RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1242     if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1243         ctx->devid == arg->devid &&
1244         ctx->process_id == arg->process_id) {
1245         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1246     }
1247 }
1248 
riscv_iommu_ctx_inval_devid(gpointer key,gpointer value,gpointer data)1249 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value,
1250                                         gpointer data)
1251 {
1252     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1253     RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1254     if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1255         ctx->devid == arg->devid) {
1256         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1257     }
1258 }
1259 
riscv_iommu_ctx_inval_all(gpointer key,gpointer value,gpointer data)1260 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value,
1261                                       gpointer data)
1262 {
1263     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1264     if (ctx->tc & RISCV_IOMMU_DC_TC_V) {
1265         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1266     }
1267 }
1268 
riscv_iommu_ctx_inval(RISCVIOMMUState * s,GHFunc func,uint32_t devid,uint32_t process_id)1269 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func,
1270                                   uint32_t devid, uint32_t process_id)
1271 {
1272     GHashTable *ctx_cache;
1273     RISCVIOMMUContext key = {
1274         .devid = devid,
1275         .process_id = process_id,
1276     };
1277     ctx_cache = g_hash_table_ref(s->ctx_cache);
1278     g_hash_table_foreach(ctx_cache, func, &key);
1279     g_hash_table_unref(ctx_cache);
1280 }
1281 
1282 /* Find or allocate translation context for a given {device_id, process_id} */
riscv_iommu_ctx(RISCVIOMMUState * s,unsigned devid,unsigned process_id,void ** ref)1283 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s,
1284                                           unsigned devid, unsigned process_id,
1285                                           void **ref)
1286 {
1287     GHashTable *ctx_cache;
1288     RISCVIOMMUContext *ctx;
1289     RISCVIOMMUContext key = {
1290         .devid = devid,
1291         .process_id = process_id,
1292     };
1293 
1294     ctx_cache = g_hash_table_ref(s->ctx_cache);
1295     ctx = g_hash_table_lookup(ctx_cache, &key);
1296 
1297     if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) {
1298         *ref = ctx_cache;
1299         return ctx;
1300     }
1301 
1302     ctx = g_new0(RISCVIOMMUContext, 1);
1303     ctx->devid = devid;
1304     ctx->process_id = process_id;
1305 
1306     int fault = riscv_iommu_ctx_fetch(s, ctx);
1307     if (!fault) {
1308         if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) {
1309             g_hash_table_unref(ctx_cache);
1310             ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
1311                                               riscv_iommu_ctx_equal,
1312                                               g_free, NULL);
1313             g_hash_table_ref(ctx_cache);
1314             g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache));
1315         }
1316         g_hash_table_add(ctx_cache, ctx);
1317         *ref = ctx_cache;
1318         return ctx;
1319     }
1320 
1321     g_hash_table_unref(ctx_cache);
1322     *ref = NULL;
1323 
1324     riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD,
1325                              fault, !!process_id, 0, 0);
1326 
1327     g_free(ctx);
1328     return NULL;
1329 }
1330 
riscv_iommu_ctx_put(RISCVIOMMUState * s,void * ref)1331 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
1332 {
1333     if (ref) {
1334         g_hash_table_unref((GHashTable *)ref);
1335     }
1336 }
1337 
1338 /* Find or allocate address space for a given device */
riscv_iommu_space(RISCVIOMMUState * s,uint32_t devid)1339 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid)
1340 {
1341     RISCVIOMMUSpace *as;
1342 
1343     /* FIXME: PCIe bus remapping for attached endpoints. */
1344     devid |= s->bus << 8;
1345 
1346     QLIST_FOREACH(as, &s->spaces, list) {
1347         if (as->devid == devid) {
1348             break;
1349         }
1350     }
1351 
1352     if (as == NULL) {
1353         char name[64];
1354         as = g_new0(RISCVIOMMUSpace, 1);
1355 
1356         as->iommu = s;
1357         as->devid = devid;
1358 
1359         snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
1360             PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1361 
1362         /* IOVA address space, untranslated addresses */
1363         memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr),
1364             TYPE_RISCV_IOMMU_MEMORY_REGION,
1365             OBJECT(as), "riscv_iommu", UINT64_MAX);
1366         address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name);
1367 
1368         QLIST_INSERT_HEAD(&s->spaces, as, list);
1369 
1370         trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid),
1371                 PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1372     }
1373     return &as->iova_as;
1374 }
1375 
1376 /* Translation Object cache support */
riscv_iommu_iot_equal(gconstpointer v1,gconstpointer v2)1377 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2)
1378 {
1379     RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1;
1380     RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2;
1381     return t1->gscid == t2->gscid && t1->pscid == t2->pscid &&
1382            t1->iova == t2->iova && t1->tag == t2->tag;
1383 }
1384 
riscv_iommu_iot_hash(gconstpointer v)1385 static guint riscv_iommu_iot_hash(gconstpointer v)
1386 {
1387     RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v;
1388     return (guint)t->iova;
1389 }
1390 
1391 /* GV: 0 AV: 0 PSCV: 0 GVMA: 0 */
1392 /* GV: 0 AV: 0 GVMA: 1 */
1393 static
riscv_iommu_iot_inval_all(gpointer key,gpointer value,gpointer data)1394 void riscv_iommu_iot_inval_all(gpointer key, gpointer value, gpointer data)
1395 {
1396     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1397     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1398     if (iot->tag == arg->tag) {
1399         iot->perm = IOMMU_NONE;
1400     }
1401 }
1402 
1403 /* GV: 0 AV: 0 PSCV: 1 GVMA: 0 */
1404 static
riscv_iommu_iot_inval_pscid(gpointer key,gpointer value,gpointer data)1405 void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, gpointer data)
1406 {
1407     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1408     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1409     if (iot->tag == arg->tag &&
1410         iot->pscid == arg->pscid) {
1411         iot->perm = IOMMU_NONE;
1412     }
1413 }
1414 
1415 /* GV: 0 AV: 1 PSCV: 0 GVMA: 0 */
1416 static
riscv_iommu_iot_inval_iova(gpointer key,gpointer value,gpointer data)1417 void riscv_iommu_iot_inval_iova(gpointer key, gpointer value, gpointer data)
1418 {
1419     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1420     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1421     if (iot->tag == arg->tag &&
1422         iot->iova == arg->iova) {
1423         iot->perm = IOMMU_NONE;
1424     }
1425 }
1426 
1427 /* GV: 0 AV: 1 PSCV: 1 GVMA: 0 */
riscv_iommu_iot_inval_pscid_iova(gpointer key,gpointer value,gpointer data)1428 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value,
1429                                              gpointer data)
1430 {
1431     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1432     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1433     if (iot->tag == arg->tag &&
1434         iot->pscid == arg->pscid &&
1435         iot->iova == arg->iova) {
1436         iot->perm = IOMMU_NONE;
1437     }
1438 }
1439 
1440 /* GV: 1 AV: 0 PSCV: 0 GVMA: 0 */
1441 /* GV: 1 AV: 0 GVMA: 1 */
1442 static
riscv_iommu_iot_inval_gscid(gpointer key,gpointer value,gpointer data)1443 void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, gpointer data)
1444 {
1445     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1446     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1447     if (iot->tag == arg->tag &&
1448         iot->gscid == arg->gscid) {
1449         iot->perm = IOMMU_NONE;
1450     }
1451 }
1452 
1453 /* GV: 1 AV: 0 PSCV: 1 GVMA: 0 */
riscv_iommu_iot_inval_gscid_pscid(gpointer key,gpointer value,gpointer data)1454 static void riscv_iommu_iot_inval_gscid_pscid(gpointer key, gpointer value,
1455                                               gpointer data)
1456 {
1457     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1458     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1459     if (iot->tag == arg->tag &&
1460         iot->gscid == arg->gscid &&
1461         iot->pscid == arg->pscid) {
1462         iot->perm = IOMMU_NONE;
1463     }
1464 }
1465 
1466 /* GV: 1 AV: 1 PSCV: 0 GVMA: 0 */
1467 /* GV: 1 AV: 1 GVMA: 1 */
riscv_iommu_iot_inval_gscid_iova(gpointer key,gpointer value,gpointer data)1468 static void riscv_iommu_iot_inval_gscid_iova(gpointer key, gpointer value,
1469                                              gpointer data)
1470 {
1471     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1472     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1473     if (iot->tag == arg->tag &&
1474         iot->gscid == arg->gscid &&
1475         iot->iova == arg->iova) {
1476         iot->perm = IOMMU_NONE;
1477     }
1478 }
1479 
1480 /* GV: 1 AV: 1 PSCV: 1 GVMA: 0 */
riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key,gpointer value,gpointer data)1481 static void riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key, gpointer value,
1482                                                    gpointer data)
1483 {
1484     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1485     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1486     if (iot->tag == arg->tag &&
1487         iot->gscid == arg->gscid &&
1488         iot->pscid == arg->pscid &&
1489         iot->iova == arg->iova) {
1490         iot->perm = IOMMU_NONE;
1491     }
1492 }
1493 
1494 /* caller should keep ref-count for iot_cache object */
riscv_iommu_iot_lookup(RISCVIOMMUContext * ctx,GHashTable * iot_cache,hwaddr iova,RISCVIOMMUTransTag transtag)1495 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx,
1496     GHashTable *iot_cache, hwaddr iova, RISCVIOMMUTransTag transtag)
1497 {
1498     RISCVIOMMUEntry key = {
1499         .tag   = transtag,
1500         .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID),
1501         .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID),
1502         .iova  = PPN_DOWN(iova),
1503     };
1504     return g_hash_table_lookup(iot_cache, &key);
1505 }
1506 
1507 /* caller should keep ref-count for iot_cache object */
riscv_iommu_iot_update(RISCVIOMMUState * s,GHashTable * iot_cache,RISCVIOMMUEntry * iot)1508 static void riscv_iommu_iot_update(RISCVIOMMUState *s,
1509     GHashTable *iot_cache, RISCVIOMMUEntry *iot)
1510 {
1511     if (!s->iot_limit) {
1512         return;
1513     }
1514 
1515     if (g_hash_table_size(s->iot_cache) >= s->iot_limit) {
1516         iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
1517                                           riscv_iommu_iot_equal,
1518                                           g_free, NULL);
1519         g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache));
1520     }
1521     g_hash_table_add(iot_cache, iot);
1522 }
1523 
riscv_iommu_iot_inval(RISCVIOMMUState * s,GHFunc func,uint32_t gscid,uint32_t pscid,hwaddr iova,RISCVIOMMUTransTag transtag)1524 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func,
1525     uint32_t gscid, uint32_t pscid, hwaddr iova, RISCVIOMMUTransTag transtag)
1526 {
1527     GHashTable *iot_cache;
1528     RISCVIOMMUEntry key = {
1529         .tag = transtag,
1530         .gscid = gscid,
1531         .pscid = pscid,
1532         .iova  = PPN_DOWN(iova),
1533     };
1534 
1535     iot_cache = g_hash_table_ref(s->iot_cache);
1536     g_hash_table_foreach(iot_cache, func, &key);
1537     g_hash_table_unref(iot_cache);
1538 }
1539 
riscv_iommu_get_transtag(RISCVIOMMUContext * ctx)1540 static RISCVIOMMUTransTag riscv_iommu_get_transtag(RISCVIOMMUContext *ctx)
1541 {
1542     uint64_t satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
1543     uint64_t gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
1544 
1545     if (satp == RISCV_IOMMU_DC_FSC_MODE_BARE) {
1546         return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ?
1547             RISCV_IOMMU_TRANS_TAG_BY : RISCV_IOMMU_TRANS_TAG_VG;
1548     } else {
1549         return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ?
1550             RISCV_IOMMU_TRANS_TAG_SS : RISCV_IOMMU_TRANS_TAG_VN;
1551     }
1552 }
1553 
riscv_iommu_translate(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,IOMMUTLBEntry * iotlb,bool enable_cache)1554 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
1555     IOMMUTLBEntry *iotlb, bool enable_cache)
1556 {
1557     RISCVIOMMUTransTag transtag = riscv_iommu_get_transtag(ctx);
1558     RISCVIOMMUEntry *iot;
1559     IOMMUAccessFlags perm;
1560     bool enable_pid;
1561     bool enable_pri;
1562     GHashTable *iot_cache;
1563     int fault;
1564 
1565     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ);
1566 
1567     iot_cache = g_hash_table_ref(s->iot_cache);
1568     /*
1569      * TC[32] is reserved for custom extensions, used here to temporarily
1570      * enable automatic page-request generation for ATS queries.
1571      */
1572     enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32));
1573     enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV);
1574 
1575     /* Check for ATS request. */
1576     if (iotlb->perm == IOMMU_NONE) {
1577         riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ);
1578         /* Check if ATS is disabled. */
1579         if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) {
1580             enable_pri = false;
1581             fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1582             goto done;
1583         }
1584     }
1585 
1586     iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova, transtag);
1587     perm = iot ? iot->perm : IOMMU_NONE;
1588     if (perm != IOMMU_NONE) {
1589         iotlb->translated_addr = PPN_PHYS(iot->phys);
1590         iotlb->addr_mask = ~TARGET_PAGE_MASK;
1591         iotlb->perm = perm;
1592         fault = 0;
1593         goto done;
1594     }
1595 
1596     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS);
1597 
1598     /* Translate using device directory / page table information. */
1599     fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
1600 
1601     if (!fault && iotlb->target_as == &s->trap_as) {
1602         /* Do not cache trapped MSI translations */
1603         goto done;
1604     }
1605 
1606     /*
1607      * We made an implementation choice to not cache identity-mapped
1608      * translations, as allowed by the specification, to avoid
1609      * translation cache evictions for other devices sharing the
1610      * IOMMU hardware model.
1611      */
1612     if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) {
1613         iot = g_new0(RISCVIOMMUEntry, 1);
1614         iot->iova = PPN_DOWN(iotlb->iova);
1615         iot->phys = PPN_DOWN(iotlb->translated_addr);
1616         iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
1617         iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
1618         iot->perm = iotlb->perm;
1619         iot->tag = transtag;
1620         riscv_iommu_iot_update(s, iot_cache, iot);
1621     }
1622 
1623 done:
1624     g_hash_table_unref(iot_cache);
1625 
1626     if (enable_pri && fault) {
1627         struct riscv_iommu_pq_record pr = {0};
1628         if (enable_pid) {
1629             pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV,
1630                                RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id);
1631         }
1632         pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid);
1633         pr.payload = (iotlb->iova & TARGET_PAGE_MASK) |
1634                      RISCV_IOMMU_PREQ_PAYLOAD_M;
1635         riscv_iommu_pri(s, &pr);
1636         return fault;
1637     }
1638 
1639     if (fault) {
1640         unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ;
1641 
1642         if (iotlb->perm & IOMMU_RW) {
1643             ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
1644         } else if (iotlb->perm & IOMMU_RO) {
1645             ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD;
1646         }
1647 
1648         riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid,
1649                                  iotlb->iova, iotlb->translated_addr);
1650         return fault;
1651     }
1652 
1653     return 0;
1654 }
1655 
1656 /* IOMMU Command Interface */
riscv_iommu_iofence(RISCVIOMMUState * s,bool notify,uint64_t addr,uint32_t data)1657 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify,
1658     uint64_t addr, uint32_t data)
1659 {
1660     /*
1661      * ATS processing in this implementation of the IOMMU is synchronous,
1662      * no need to wait for completions here.
1663      */
1664     if (!notify) {
1665         return MEMTX_OK;
1666     }
1667 
1668     return dma_memory_write(s->target_as, addr, &data, sizeof(data),
1669         MEMTXATTRS_UNSPECIFIED);
1670 }
1671 
riscv_iommu_ats(RISCVIOMMUState * s,struct riscv_iommu_command * cmd,IOMMUNotifierFlag flag,IOMMUAccessFlags perm,void (* trace_fn)(const char * id))1672 static void riscv_iommu_ats(RISCVIOMMUState *s,
1673     struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag,
1674     IOMMUAccessFlags perm,
1675     void (*trace_fn)(const char *id))
1676 {
1677     RISCVIOMMUSpace *as = NULL;
1678     IOMMUNotifier *n;
1679     IOMMUTLBEvent event;
1680     uint32_t pid;
1681     uint32_t devid;
1682     const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV;
1683 
1684     if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) {
1685         /* Use device segment and requester id */
1686         devid = get_field(cmd->dword0,
1687             RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID);
1688     } else {
1689         devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID);
1690     }
1691 
1692     pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID);
1693 
1694     QLIST_FOREACH(as, &s->spaces, list) {
1695         if (as->devid == devid) {
1696             break;
1697         }
1698     }
1699 
1700     if (!as || !as->notifier) {
1701         return;
1702     }
1703 
1704     event.type = flag;
1705     event.entry.perm = perm;
1706     event.entry.target_as = s->target_as;
1707 
1708     IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) {
1709         if (!pv || n->iommu_idx == pid) {
1710             event.entry.iova = n->start;
1711             event.entry.addr_mask = n->end - n->start;
1712             trace_fn(as->iova_mr.parent_obj.name);
1713             memory_region_notify_iommu_one(n, &event);
1714         }
1715     }
1716 }
1717 
riscv_iommu_ats_inval(RISCVIOMMUState * s,struct riscv_iommu_command * cmd)1718 static void riscv_iommu_ats_inval(RISCVIOMMUState *s,
1719     struct riscv_iommu_command *cmd)
1720 {
1721     return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE,
1722                            trace_riscv_iommu_ats_inval);
1723 }
1724 
riscv_iommu_ats_prgr(RISCVIOMMUState * s,struct riscv_iommu_command * cmd)1725 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s,
1726     struct riscv_iommu_command *cmd)
1727 {
1728     unsigned resp_code = get_field(cmd->dword1,
1729                                    RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE);
1730 
1731     /* Using the access flag to carry response code information */
1732     IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW;
1733     return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm,
1734                            trace_riscv_iommu_ats_prgr);
1735 }
1736 
riscv_iommu_process_ddtp(RISCVIOMMUState * s)1737 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s)
1738 {
1739     uint64_t old_ddtp = s->ddtp;
1740     uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP);
1741     unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE);
1742     unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE);
1743     bool ok = false;
1744 
1745     /*
1746      * Check for allowed DDTP.MODE transitions:
1747      * {OFF, BARE}        -> {OFF, BARE, 1LVL, 2LVL, 3LVL}
1748      * {1LVL, 2LVL, 3LVL} -> {OFF, BARE}
1749      */
1750     if (new_mode == old_mode ||
1751         new_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1752         new_mode == RISCV_IOMMU_DDTP_MODE_BARE) {
1753         ok = true;
1754     } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL ||
1755                new_mode == RISCV_IOMMU_DDTP_MODE_2LVL ||
1756                new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) {
1757         ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1758              old_mode == RISCV_IOMMU_DDTP_MODE_BARE;
1759     }
1760 
1761     if (ok) {
1762         /* clear reserved and busy bits, report back sanitized version */
1763         new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN,
1764                              RISCV_IOMMU_DDTP_MODE, new_mode);
1765     } else {
1766         new_ddtp = old_ddtp;
1767     }
1768     s->ddtp = new_ddtp;
1769 
1770     riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp);
1771 }
1772 
1773 /* Command function and opcode field. */
1774 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op))
1775 
riscv_iommu_process_cq_tail(RISCVIOMMUState * s)1776 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s)
1777 {
1778     struct riscv_iommu_command cmd;
1779     MemTxResult res;
1780     dma_addr_t addr;
1781     uint32_t tail, head, ctrl;
1782     uint64_t cmd_opcode;
1783     GHFunc func;
1784 
1785     ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1786     tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask;
1787     head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask;
1788 
1789     /* Check for pending error or queue processing disabled */
1790     if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) ||
1791         !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) {
1792         return;
1793     }
1794 
1795     while (tail != head) {
1796         addr = s->cq_addr  + head * sizeof(cmd);
1797         res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd),
1798                               MEMTXATTRS_UNSPECIFIED);
1799 
1800         if (res != MEMTX_OK) {
1801             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1802                                   RISCV_IOMMU_CQCSR_CQMF, 0);
1803             goto fault;
1804         }
1805 
1806         trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1);
1807 
1808         cmd_opcode = get_field(cmd.dword0,
1809                                RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC);
1810 
1811         switch (cmd_opcode) {
1812         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C,
1813                              RISCV_IOMMU_CMD_IOFENCE_OPCODE):
1814             res = riscv_iommu_iofence(s,
1815                 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2,
1816                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA));
1817 
1818             if (res != MEMTX_OK) {
1819                 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1820                                       RISCV_IOMMU_CQCSR_CQMF, 0);
1821                 goto fault;
1822             }
1823             break;
1824 
1825         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA,
1826                              RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1827         {
1828             bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV);
1829             bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV);
1830             bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV);
1831             uint32_t gscid = get_field(cmd.dword0,
1832                                        RISCV_IOMMU_CMD_IOTINVAL_GSCID);
1833             uint32_t pscid = get_field(cmd.dword0,
1834                                        RISCV_IOMMU_CMD_IOTINVAL_PSCID);
1835             hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK;
1836 
1837             if (pscv) {
1838                 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */
1839                 goto cmd_ill;
1840             }
1841 
1842             func = riscv_iommu_iot_inval_all;
1843 
1844             if (gv) {
1845                 func = (av) ? riscv_iommu_iot_inval_gscid_iova :
1846                               riscv_iommu_iot_inval_gscid;
1847             }
1848 
1849             riscv_iommu_iot_inval(
1850                 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VG);
1851 
1852             riscv_iommu_iot_inval(
1853                 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VN);
1854             break;
1855         }
1856 
1857         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA,
1858                              RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1859         {
1860             bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV);
1861             bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV);
1862             bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV);
1863             uint32_t gscid = get_field(cmd.dword0,
1864                                        RISCV_IOMMU_CMD_IOTINVAL_GSCID);
1865             uint32_t pscid = get_field(cmd.dword0,
1866                                        RISCV_IOMMU_CMD_IOTINVAL_PSCID);
1867             hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK;
1868             RISCVIOMMUTransTag transtag;
1869 
1870             if (gv) {
1871                 transtag = RISCV_IOMMU_TRANS_TAG_VN;
1872                 if (pscv) {
1873                     func = (av) ? riscv_iommu_iot_inval_gscid_pscid_iova :
1874                                   riscv_iommu_iot_inval_gscid_pscid;
1875                 } else {
1876                     func = (av) ? riscv_iommu_iot_inval_gscid_iova :
1877                                   riscv_iommu_iot_inval_gscid;
1878                 }
1879             } else {
1880                 transtag = RISCV_IOMMU_TRANS_TAG_SS;
1881                 if (pscv) {
1882                     func = (av) ? riscv_iommu_iot_inval_pscid_iova :
1883                                   riscv_iommu_iot_inval_pscid;
1884                 } else {
1885                     func = (av) ? riscv_iommu_iot_inval_iova :
1886                                   riscv_iommu_iot_inval_all;
1887                 }
1888             }
1889 
1890             riscv_iommu_iot_inval(s, func, gscid, pscid, iova, transtag);
1891             break;
1892         }
1893 
1894         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT,
1895                              RISCV_IOMMU_CMD_IODIR_OPCODE):
1896             if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1897                 /* invalidate all device context cache mappings */
1898                 func = riscv_iommu_ctx_inval_all;
1899             } else {
1900                 /* invalidate all device context matching DID */
1901                 func = riscv_iommu_ctx_inval_devid;
1902             }
1903             riscv_iommu_ctx_inval(s, func,
1904                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0);
1905             break;
1906 
1907         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT,
1908                              RISCV_IOMMU_CMD_IODIR_OPCODE):
1909             if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1910                 /* illegal command arguments IODIR_PDT & DV == 0 */
1911                 goto cmd_ill;
1912             } else {
1913                 func = riscv_iommu_ctx_inval_devid_procid;
1914             }
1915             riscv_iommu_ctx_inval(s, func,
1916                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID),
1917                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID));
1918             break;
1919 
1920         /* ATS commands */
1921         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL,
1922                              RISCV_IOMMU_CMD_ATS_OPCODE):
1923             if (!s->enable_ats) {
1924                 goto cmd_ill;
1925             }
1926 
1927             riscv_iommu_ats_inval(s, &cmd);
1928             break;
1929 
1930         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR,
1931                              RISCV_IOMMU_CMD_ATS_OPCODE):
1932             if (!s->enable_ats) {
1933                 goto cmd_ill;
1934             }
1935 
1936             riscv_iommu_ats_prgr(s, &cmd);
1937             break;
1938 
1939         default:
1940         cmd_ill:
1941             /* Invalid instruction, do not advance instruction index. */
1942             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1943                 RISCV_IOMMU_CQCSR_CMD_ILL, 0);
1944             goto fault;
1945         }
1946 
1947         /* Advance and update head pointer after command completes. */
1948         head = (head + 1) & s->cq_mask;
1949         riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head);
1950     }
1951     return;
1952 
1953 fault:
1954     if (ctrl & RISCV_IOMMU_CQCSR_CIE) {
1955         riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ);
1956     }
1957 }
1958 
riscv_iommu_process_cq_control(RISCVIOMMUState * s)1959 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s)
1960 {
1961     uint64_t base;
1962     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1963     uint32_t ctrl_clr;
1964     bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN);
1965     bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON);
1966 
1967     if (enable && !active) {
1968         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB);
1969         s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1;
1970         s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN));
1971         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask);
1972         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0);
1973         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0);
1974         ctrl_set = RISCV_IOMMU_CQCSR_CQON;
1975         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF |
1976                    RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO |
1977                    RISCV_IOMMU_CQCSR_FENCE_W_IP;
1978     } else if (!enable && active) {
1979         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0);
1980         ctrl_set = 0;
1981         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON;
1982     } else {
1983         ctrl_set = 0;
1984         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY;
1985     }
1986 
1987     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr);
1988 }
1989 
riscv_iommu_process_fq_control(RISCVIOMMUState * s)1990 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s)
1991 {
1992     uint64_t base;
1993     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1994     uint32_t ctrl_clr;
1995     bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN);
1996     bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON);
1997 
1998     if (enable && !active) {
1999         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB);
2000         s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1;
2001         s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN));
2002         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask);
2003         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0);
2004         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0);
2005         ctrl_set = RISCV_IOMMU_FQCSR_FQON;
2006         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF |
2007             RISCV_IOMMU_FQCSR_FQOF;
2008     } else if (!enable && active) {
2009         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0);
2010         ctrl_set = 0;
2011         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON;
2012     } else {
2013         ctrl_set = 0;
2014         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY;
2015     }
2016 
2017     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr);
2018 }
2019 
riscv_iommu_process_pq_control(RISCVIOMMUState * s)2020 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s)
2021 {
2022     uint64_t base;
2023     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
2024     uint32_t ctrl_clr;
2025     bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN);
2026     bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON);
2027 
2028     if (enable && !active) {
2029         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB);
2030         s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1;
2031         s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN));
2032         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask);
2033         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0);
2034         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0);
2035         ctrl_set = RISCV_IOMMU_PQCSR_PQON;
2036         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF |
2037             RISCV_IOMMU_PQCSR_PQOF;
2038     } else if (!enable && active) {
2039         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0);
2040         ctrl_set = 0;
2041         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON;
2042     } else {
2043         ctrl_set = 0;
2044         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY;
2045     }
2046 
2047     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr);
2048 }
2049 
riscv_iommu_process_dbg(RISCVIOMMUState * s)2050 static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
2051 {
2052     uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA);
2053     uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL);
2054     unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID);
2055     unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID);
2056     RISCVIOMMUContext *ctx;
2057     void *ref;
2058 
2059     if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) {
2060         return;
2061     }
2062 
2063     ctx = riscv_iommu_ctx(s, devid, pid, &ref);
2064     if (ctx == NULL) {
2065         riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE,
2066                                  RISCV_IOMMU_TR_RESPONSE_FAULT |
2067                                  (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10));
2068     } else {
2069         IOMMUTLBEntry iotlb = {
2070             .iova = iova,
2071             .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW,
2072             .addr_mask = ~0,
2073             .target_as = NULL,
2074         };
2075         int fault = riscv_iommu_translate(s, ctx, &iotlb, false);
2076         if (fault) {
2077             iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
2078         } else {
2079             iova = iotlb.translated_addr & ~iotlb.addr_mask;
2080             iova = set_field(0, RISCV_IOMMU_TR_RESPONSE_PPN, PPN_DOWN(iova));
2081         }
2082         riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
2083     }
2084 
2085     riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
2086         RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2087     riscv_iommu_ctx_put(s, ref);
2088 }
2089 
2090 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s);
2091 
riscv_iommu_update_icvec(RISCVIOMMUState * s,uint64_t data)2092 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data)
2093 {
2094     uint64_t icvec = 0;
2095 
2096     icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV,
2097                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV);
2098 
2099     icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV,
2100                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV);
2101 
2102     icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV,
2103                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV);
2104 
2105     icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV,
2106                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV);
2107 
2108     trace_riscv_iommu_icvec_write(data, icvec);
2109 
2110     riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec);
2111 }
2112 
riscv_iommu_update_ipsr(RISCVIOMMUState * s,uint64_t data)2113 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
2114 {
2115     uint32_t cqcsr, fqcsr, pqcsr;
2116     uint32_t ipsr_set = 0;
2117     uint32_t ipsr_clr = 0;
2118 
2119     if (data & RISCV_IOMMU_IPSR_CIP) {
2120         cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
2121 
2122         if (cqcsr & RISCV_IOMMU_CQCSR_CIE &&
2123             (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP ||
2124              cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL ||
2125              cqcsr & RISCV_IOMMU_CQCSR_CMD_TO ||
2126              cqcsr & RISCV_IOMMU_CQCSR_CQMF)) {
2127             ipsr_set |= RISCV_IOMMU_IPSR_CIP;
2128         } else {
2129             ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
2130         }
2131     } else {
2132         ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
2133     }
2134 
2135     if (data & RISCV_IOMMU_IPSR_FIP) {
2136         fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
2137 
2138         if (fqcsr & RISCV_IOMMU_FQCSR_FIE &&
2139             (fqcsr & RISCV_IOMMU_FQCSR_FQOF ||
2140              fqcsr & RISCV_IOMMU_FQCSR_FQMF)) {
2141             ipsr_set |= RISCV_IOMMU_IPSR_FIP;
2142         } else {
2143             ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
2144         }
2145     } else {
2146         ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
2147     }
2148 
2149     if (data & RISCV_IOMMU_IPSR_PIP) {
2150         pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
2151 
2152         if (pqcsr & RISCV_IOMMU_PQCSR_PIE &&
2153             (pqcsr & RISCV_IOMMU_PQCSR_PQOF ||
2154              pqcsr & RISCV_IOMMU_PQCSR_PQMF)) {
2155             ipsr_set |= RISCV_IOMMU_IPSR_PIP;
2156         } else {
2157             ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
2158         }
2159     } else {
2160         ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
2161     }
2162 
2163     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
2164 }
2165 
riscv_iommu_process_hpm_writes(RISCVIOMMUState * s,uint32_t regb,bool prev_cy_inh)2166 static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s,
2167                                            uint32_t regb,
2168                                            bool prev_cy_inh)
2169 {
2170     switch (regb) {
2171     case RISCV_IOMMU_REG_IOCOUNTINH:
2172         riscv_iommu_process_iocntinh_cy(s, prev_cy_inh);
2173         break;
2174 
2175     case RISCV_IOMMU_REG_IOHPMCYCLES:
2176     case RISCV_IOMMU_REG_IOHPMCYCLES + 4:
2177         riscv_iommu_process_hpmcycle_write(s);
2178         break;
2179 
2180     case RISCV_IOMMU_REG_IOHPMEVT_BASE ...
2181         RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4:
2182         riscv_iommu_process_hpmevt_write(s, regb & ~7);
2183         break;
2184     }
2185 }
2186 
2187 /*
2188  * Write the resulting value of 'data' for the reg specified
2189  * by 'reg_addr', after considering read-only/read-write/write-clear
2190  * bits, in the pointer 'dest'.
2191  *
2192  * The result is written in little-endian.
2193  */
riscv_iommu_write_reg_val(RISCVIOMMUState * s,void * dest,hwaddr reg_addr,int size,uint64_t data)2194 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s,
2195                                       void *dest, hwaddr reg_addr,
2196                                       int size, uint64_t data)
2197 {
2198     uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size);
2199     uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size);
2200     uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size);
2201 
2202     stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc));
2203 }
2204 
riscv_iommu_mmio_write(void * opaque,hwaddr addr,uint64_t data,unsigned size,MemTxAttrs attrs)2205 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
2206                                           uint64_t data, unsigned size,
2207                                           MemTxAttrs attrs)
2208 {
2209     riscv_iommu_process_fn *process_fn = NULL;
2210     RISCVIOMMUState *s = opaque;
2211     uint32_t regb = addr & ~3;
2212     uint32_t busy = 0;
2213     uint64_t val = 0;
2214     bool cy_inh = false;
2215 
2216     if ((addr & (size - 1)) != 0) {
2217         /* Unsupported MMIO alignment or access size */
2218         return MEMTX_ERROR;
2219     }
2220 
2221     if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
2222         /* Unsupported MMIO access location. */
2223         return MEMTX_ACCESS_ERROR;
2224     }
2225 
2226     /* Track actionable MMIO write. */
2227     switch (regb) {
2228     case RISCV_IOMMU_REG_DDTP:
2229     case RISCV_IOMMU_REG_DDTP + 4:
2230         process_fn = riscv_iommu_process_ddtp;
2231         regb = RISCV_IOMMU_REG_DDTP;
2232         busy = RISCV_IOMMU_DDTP_BUSY;
2233         break;
2234 
2235     case RISCV_IOMMU_REG_CQT:
2236         process_fn = riscv_iommu_process_cq_tail;
2237         break;
2238 
2239     case RISCV_IOMMU_REG_CQCSR:
2240         process_fn = riscv_iommu_process_cq_control;
2241         busy = RISCV_IOMMU_CQCSR_BUSY;
2242         break;
2243 
2244     case RISCV_IOMMU_REG_FQCSR:
2245         process_fn = riscv_iommu_process_fq_control;
2246         busy = RISCV_IOMMU_FQCSR_BUSY;
2247         break;
2248 
2249     case RISCV_IOMMU_REG_PQCSR:
2250         process_fn = riscv_iommu_process_pq_control;
2251         busy = RISCV_IOMMU_PQCSR_BUSY;
2252         break;
2253 
2254     case RISCV_IOMMU_REG_ICVEC:
2255     case RISCV_IOMMU_REG_IPSR:
2256         /*
2257          * ICVEC and IPSR have special read/write procedures. We'll
2258          * call their respective helpers and exit.
2259          */
2260         riscv_iommu_write_reg_val(s, &val, addr, size, data);
2261 
2262         /*
2263          * 'val' is stored as LE. Switch to host endianess
2264          * before using it.
2265          */
2266         val = le64_to_cpu(val);
2267 
2268         if (regb == RISCV_IOMMU_REG_ICVEC) {
2269             riscv_iommu_update_icvec(s, val);
2270         } else {
2271             riscv_iommu_update_ipsr(s, val);
2272         }
2273 
2274         return MEMTX_OK;
2275 
2276     case RISCV_IOMMU_REG_TR_REQ_CTL:
2277         process_fn = riscv_iommu_process_dbg;
2278         regb = RISCV_IOMMU_REG_TR_REQ_CTL;
2279         busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
2280         break;
2281 
2282     case RISCV_IOMMU_REG_IOCOUNTINH:
2283         if (addr != RISCV_IOMMU_REG_IOCOUNTINH) {
2284             break;
2285         }
2286         /* Store previous value of CY bit. */
2287         cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) &
2288             RISCV_IOMMU_IOCOUNTINH_CY);
2289         break;
2290 
2291 
2292     default:
2293         break;
2294     }
2295 
2296     /*
2297      * Registers update might be not synchronized with core logic.
2298      * If system software updates register when relevant BUSY bit
2299      * is set IOMMU behavior of additional writes to the register
2300      * is UNSPECIFIED.
2301      */
2302     riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data);
2303 
2304     /* Busy flag update, MSB 4-byte register. */
2305     if (busy) {
2306         uint32_t rw = ldl_le_p(&s->regs_rw[regb]);
2307         stl_le_p(&s->regs_rw[regb], rw | busy);
2308     }
2309 
2310     /* Process HPM writes and update any internal state if needed. */
2311     if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF &&
2312         regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) {
2313         riscv_iommu_process_hpm_writes(s, regb, cy_inh);
2314     }
2315 
2316     if (process_fn) {
2317         process_fn(s);
2318     }
2319 
2320     return MEMTX_OK;
2321 }
2322 
riscv_iommu_mmio_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)2323 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
2324     uint64_t *data, unsigned size, MemTxAttrs attrs)
2325 {
2326     RISCVIOMMUState *s = opaque;
2327     uint64_t val = -1;
2328     uint8_t *ptr;
2329 
2330     if ((addr & (size - 1)) != 0) {
2331         /* Unsupported MMIO alignment. */
2332         return MEMTX_ERROR;
2333     }
2334 
2335     if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
2336         return MEMTX_ACCESS_ERROR;
2337     }
2338 
2339     /* Compute cycle register value. */
2340     if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) {
2341         val = riscv_iommu_hpmcycle_read(s);
2342         ptr = (uint8_t *)&val + (addr & 7);
2343     } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) {
2344         /*
2345          * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer
2346          * callback completes. In which case CY_OF bit in
2347          * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the
2348          * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as
2349          * it's not dependent over the timer callback and is computed
2350          * from cycle overflow.
2351          */
2352         val = ldq_le_p(&s->regs_rw[addr]);
2353         val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF)
2354                    ? RISCV_IOMMU_IOCOUNTOVF_CY
2355                    : 0;
2356         ptr = (uint8_t *)&val + (addr & 3);
2357     } else {
2358         ptr = &s->regs_rw[addr];
2359     }
2360 
2361     val = ldn_le_p(ptr, size);
2362 
2363     *data = val;
2364 
2365     return MEMTX_OK;
2366 }
2367 
2368 static const MemoryRegionOps riscv_iommu_mmio_ops = {
2369     .read_with_attrs = riscv_iommu_mmio_read,
2370     .write_with_attrs = riscv_iommu_mmio_write,
2371     .endianness = DEVICE_NATIVE_ENDIAN,
2372     .impl = {
2373         .min_access_size = 4,
2374         .max_access_size = 8,
2375         .unaligned = false,
2376     },
2377     .valid = {
2378         .min_access_size = 4,
2379         .max_access_size = 8,
2380     }
2381 };
2382 
2383 /*
2384  * Translations matching MSI pattern check are redirected to "riscv-iommu-trap"
2385  * memory region as untranslated address, for additional MSI/MRIF interception
2386  * by IOMMU interrupt remapping implementation.
2387  * Note: Device emulation code generating an MSI is expected to provide a valid
2388  * memory transaction attributes with requested_id set.
2389  */
riscv_iommu_trap_write(void * opaque,hwaddr addr,uint64_t data,unsigned size,MemTxAttrs attrs)2390 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr,
2391     uint64_t data, unsigned size, MemTxAttrs attrs)
2392 {
2393     RISCVIOMMUState* s = (RISCVIOMMUState *)opaque;
2394     RISCVIOMMUContext *ctx;
2395     MemTxResult res;
2396     void *ref;
2397     uint32_t devid = attrs.requester_id;
2398 
2399     if (attrs.unspecified) {
2400         return MEMTX_ACCESS_ERROR;
2401     }
2402 
2403     /* FIXME: PCIe bus remapping for attached endpoints. */
2404     devid |= s->bus << 8;
2405 
2406     ctx = riscv_iommu_ctx(s, devid, 0, &ref);
2407     if (ctx == NULL) {
2408         res = MEMTX_ACCESS_ERROR;
2409     } else {
2410         res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs);
2411     }
2412     riscv_iommu_ctx_put(s, ref);
2413     return res;
2414 }
2415 
riscv_iommu_trap_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)2416 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr,
2417     uint64_t *data, unsigned size, MemTxAttrs attrs)
2418 {
2419     return MEMTX_ACCESS_ERROR;
2420 }
2421 
2422 static const MemoryRegionOps riscv_iommu_trap_ops = {
2423     .read_with_attrs = riscv_iommu_trap_read,
2424     .write_with_attrs = riscv_iommu_trap_write,
2425     .endianness = DEVICE_LITTLE_ENDIAN,
2426     .impl = {
2427         .min_access_size = 4,
2428         .max_access_size = 8,
2429         .unaligned = true,
2430     },
2431     .valid = {
2432         .min_access_size = 4,
2433         .max_access_size = 8,
2434     }
2435 };
2436 
riscv_iommu_set_cap_igs(RISCVIOMMUState * s,riscv_iommu_igs_mode mode)2437 void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode)
2438 {
2439     s->cap = set_field(s->cap, RISCV_IOMMU_CAP_IGS, mode);
2440 }
2441 
riscv_iommu_instance_init(Object * obj)2442 static void riscv_iommu_instance_init(Object *obj)
2443 {
2444     RISCVIOMMUState *s = RISCV_IOMMU(obj);
2445 
2446     /* Enable translation debug interface */
2447     s->cap = RISCV_IOMMU_CAP_DBG;
2448 
2449     /* Report QEMU target physical address space limits */
2450     s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS,
2451                        TARGET_PHYS_ADDR_SPACE_BITS);
2452 
2453     /* TODO: method to report supported PID bits */
2454     s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */
2455     s->cap |= RISCV_IOMMU_CAP_PD8;
2456 
2457     /* register storage */
2458     s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2459     s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2460     s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2461 
2462      /* Mark all registers read-only */
2463     memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE);
2464 
2465     /* Device translation context cache */
2466     s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
2467                                          riscv_iommu_ctx_equal,
2468                                          g_free, NULL);
2469 
2470     s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
2471                                          riscv_iommu_iot_equal,
2472                                          g_free, NULL);
2473 
2474     s->iommus.le_next = NULL;
2475     s->iommus.le_prev = NULL;
2476     QLIST_INIT(&s->spaces);
2477 }
2478 
riscv_iommu_realize(DeviceState * dev,Error ** errp)2479 static void riscv_iommu_realize(DeviceState *dev, Error **errp)
2480 {
2481     RISCVIOMMUState *s = RISCV_IOMMU(dev);
2482 
2483     s->cap |= s->version & RISCV_IOMMU_CAP_VERSION;
2484     if (s->enable_msi) {
2485         s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF;
2486     }
2487     if (s->enable_ats) {
2488         s->cap |= RISCV_IOMMU_CAP_ATS;
2489     }
2490     if (s->enable_s_stage) {
2491         s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 |
2492                   RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57;
2493     }
2494     if (s->enable_g_stage) {
2495         s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
2496                   RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4 |
2497                   RISCV_IOMMU_CAP_SVRSW60T59B;
2498     }
2499 
2500     if (s->hpm_cntrs > 0) {
2501         /* Clip number of HPM counters to maximum supported (31). */
2502         if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) {
2503             s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM;
2504         }
2505         /* Enable hardware performance monitor interface */
2506         s->cap |= RISCV_IOMMU_CAP_HPM;
2507     }
2508 
2509     /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */
2510     s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
2511                         RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE);
2512 
2513     /*
2514      * Register complete MMIO space, including MSI/PBA registers.
2515      * Note, PCIDevice implementation will add overlapping MR for MSI/PBA,
2516      * managed directly by the PCIDevice implementation.
2517      */
2518     memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s,
2519         "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE);
2520 
2521     /* Set power-on register state */
2522     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap);
2523     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0);
2524     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL],
2525              ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI));
2526     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP],
2527         ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE));
2528     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB],
2529         ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN));
2530     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB],
2531         ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN));
2532     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB],
2533         ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN));
2534     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF |
2535         RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL);
2536     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON |
2537         RISCV_IOMMU_CQCSR_BUSY);
2538     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF |
2539         RISCV_IOMMU_FQCSR_FQOF);
2540     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON |
2541         RISCV_IOMMU_FQCSR_BUSY);
2542     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF |
2543         RISCV_IOMMU_PQCSR_PQOF);
2544     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON |
2545         RISCV_IOMMU_PQCSR_BUSY);
2546     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0);
2547     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0);
2548     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp);
2549     /* If debug registers enabled. */
2550     if (s->cap & RISCV_IOMMU_CAP_DBG) {
2551         stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0);
2552         stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL],
2553             RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2554     }
2555 
2556     /* If HPM registers are enabled. */
2557     if (s->cap & RISCV_IOMMU_CAP_HPM) {
2558         /* +1 for cycle counter bit. */
2559         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH],
2560                  ~((2 << s->hpm_cntrs) - 1));
2561         stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0);
2562         memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE],
2563                0x00, s->hpm_cntrs * 8);
2564         memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE],
2565                0x00, s->hpm_cntrs * 8);
2566     }
2567 
2568     /* Memory region for downstream access, if specified. */
2569     if (s->target_mr) {
2570         s->target_as = g_new0(AddressSpace, 1);
2571         address_space_init(s->target_as, s->target_mr,
2572             "riscv-iommu-downstream");
2573     } else {
2574         /* Fallback to global system memory. */
2575         s->target_as = &address_space_memory;
2576     }
2577 
2578     /* Memory region for untranslated MRIF/MSI writes */
2579     memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s,
2580             "riscv-iommu-trap", ~0ULL);
2581     address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as");
2582 
2583     if (s->cap & RISCV_IOMMU_CAP_HPM) {
2584         s->hpm_timer =
2585             timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s);
2586         s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal);
2587     }
2588 }
2589 
riscv_iommu_unrealize(DeviceState * dev)2590 static void riscv_iommu_unrealize(DeviceState *dev)
2591 {
2592     RISCVIOMMUState *s = RISCV_IOMMU(dev);
2593 
2594     g_hash_table_unref(s->iot_cache);
2595     g_hash_table_unref(s->ctx_cache);
2596 
2597     if (s->cap & RISCV_IOMMU_CAP_HPM) {
2598         g_hash_table_unref(s->hpm_event_ctr_map);
2599         timer_free(s->hpm_timer);
2600     }
2601 }
2602 
riscv_iommu_reset(RISCVIOMMUState * s)2603 void riscv_iommu_reset(RISCVIOMMUState *s)
2604 {
2605     uint32_t reg_clr;
2606     int ddtp_mode;
2607 
2608     /*
2609      * Clear DDTP while setting DDTP_mode back to user
2610      * initial setting.
2611      */
2612     ddtp_mode = s->enable_off ?
2613                 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE;
2614     s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, ddtp_mode);
2615     riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, s->ddtp);
2616 
2617     reg_clr = RISCV_IOMMU_CQCSR_CQEN | RISCV_IOMMU_CQCSR_CIE |
2618               RISCV_IOMMU_CQCSR_CQON | RISCV_IOMMU_CQCSR_BUSY;
2619     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 0, reg_clr);
2620 
2621     reg_clr = RISCV_IOMMU_FQCSR_FQEN | RISCV_IOMMU_FQCSR_FIE |
2622               RISCV_IOMMU_FQCSR_FQON | RISCV_IOMMU_FQCSR_BUSY;
2623     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 0, reg_clr);
2624 
2625     reg_clr = RISCV_IOMMU_PQCSR_PQEN | RISCV_IOMMU_PQCSR_PIE |
2626               RISCV_IOMMU_PQCSR_PQON | RISCV_IOMMU_PQCSR_BUSY;
2627     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 0, reg_clr);
2628 
2629     riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
2630                           RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2631 
2632     riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_IPSR, 0);
2633 
2634     g_hash_table_remove_all(s->ctx_cache);
2635     g_hash_table_remove_all(s->iot_cache);
2636 }
2637 
2638 static const Property riscv_iommu_properties[] = {
2639     DEFINE_PROP_UINT32("version", RISCVIOMMUState, version,
2640         RISCV_IOMMU_SPEC_DOT_VER),
2641     DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0),
2642     DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit,
2643         LIMIT_CACHE_IOT),
2644     DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE),
2645     DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE),
2646     DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE),
2647     DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE),
2648     DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE),
2649     DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
2650         TYPE_MEMORY_REGION, MemoryRegion *),
2651     DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs,
2652                       RISCV_IOMMU_IOCOUNT_NUM),
2653 };
2654 
riscv_iommu_class_init(ObjectClass * klass,const void * data)2655 static void riscv_iommu_class_init(ObjectClass *klass, const void *data)
2656 {
2657     DeviceClass *dc = DEVICE_CLASS(klass);
2658 
2659     /* internal device for riscv-iommu-{pci/sys}, not user-creatable */
2660     dc->user_creatable = false;
2661     dc->realize = riscv_iommu_realize;
2662     dc->unrealize = riscv_iommu_unrealize;
2663     device_class_set_props(dc, riscv_iommu_properties);
2664 }
2665 
2666 static const TypeInfo riscv_iommu_info = {
2667     .name = TYPE_RISCV_IOMMU,
2668     .parent = TYPE_DEVICE,
2669     .instance_size = sizeof(RISCVIOMMUState),
2670     .instance_init = riscv_iommu_instance_init,
2671     .class_init = riscv_iommu_class_init,
2672 };
2673 
2674 static const char *IOMMU_FLAG_STR[] = {
2675     "NA",
2676     "RO",
2677     "WR",
2678     "RW",
2679 };
2680 
2681 /* RISC-V IOMMU Memory Region - Address Translation Space */
riscv_iommu_memory_region_translate(IOMMUMemoryRegion * iommu_mr,hwaddr addr,IOMMUAccessFlags flag,int iommu_idx)2682 static IOMMUTLBEntry riscv_iommu_memory_region_translate(
2683     IOMMUMemoryRegion *iommu_mr, hwaddr addr,
2684     IOMMUAccessFlags flag, int iommu_idx)
2685 {
2686     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2687     RISCVIOMMUContext *ctx;
2688     void *ref;
2689     IOMMUTLBEntry iotlb = {
2690         .iova = addr,
2691         .target_as = as->iommu->target_as,
2692         .addr_mask = ~0ULL,
2693         .perm = flag,
2694     };
2695 
2696     ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref);
2697     if (ctx == NULL) {
2698         /* Translation disabled or invalid. */
2699         iotlb.addr_mask = 0;
2700         iotlb.perm = IOMMU_NONE;
2701     } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) {
2702         /* Translation disabled or fault reported. */
2703         iotlb.addr_mask = 0;
2704         iotlb.perm = IOMMU_NONE;
2705     }
2706 
2707     /* Trace all dma translations with original access flags. */
2708     trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid),
2709                           PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx,
2710                           IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova,
2711                           iotlb.translated_addr);
2712 
2713     riscv_iommu_ctx_put(as->iommu, ref);
2714 
2715     return iotlb;
2716 }
2717 
riscv_iommu_memory_region_notify(IOMMUMemoryRegion * iommu_mr,IOMMUNotifierFlag old,IOMMUNotifierFlag new,Error ** errp)2718 static int riscv_iommu_memory_region_notify(
2719     IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old,
2720     IOMMUNotifierFlag new, Error **errp)
2721 {
2722     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2723 
2724     if (old == IOMMU_NOTIFIER_NONE) {
2725         as->notifier = true;
2726         trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name);
2727     } else if (new == IOMMU_NOTIFIER_NONE) {
2728         as->notifier = false;
2729         trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name);
2730     }
2731 
2732     return 0;
2733 }
2734 
pci_is_iommu(PCIDevice * pdev)2735 static inline bool pci_is_iommu(PCIDevice *pdev)
2736 {
2737     return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806;
2738 }
2739 
riscv_iommu_find_as(PCIBus * bus,void * opaque,int devfn)2740 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn)
2741 {
2742     RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
2743     PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
2744     AddressSpace *as = NULL;
2745 
2746     if (pdev && pci_is_iommu(pdev)) {
2747         return s->target_as;
2748     }
2749 
2750     /* Find first registered IOMMU device */
2751     while (s->iommus.le_prev) {
2752         s = *(s->iommus.le_prev);
2753     }
2754 
2755     /* Find first matching IOMMU */
2756     while (s != NULL && as == NULL) {
2757         as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn));
2758         s = s->iommus.le_next;
2759     }
2760 
2761     return as ? as : &address_space_memory;
2762 }
2763 
2764 static const PCIIOMMUOps riscv_iommu_ops = {
2765     .get_address_space = riscv_iommu_find_as,
2766 };
2767 
riscv_iommu_pci_setup_iommu(RISCVIOMMUState * iommu,PCIBus * bus,Error ** errp)2768 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
2769         Error **errp)
2770 {
2771     if (bus->iommu_ops &&
2772         bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
2773         /* Allow multiple IOMMUs on the same PCIe bus, link known devices */
2774         RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
2775         QLIST_INSERT_AFTER(last, iommu, iommus);
2776     } else if (!bus->iommu_ops && !bus->iommu_opaque) {
2777         pci_setup_iommu(bus, &riscv_iommu_ops, iommu);
2778     } else {
2779         error_setg(errp, "can't register secondary IOMMU for PCI bus #%d",
2780             pci_bus_num(bus));
2781     }
2782 }
2783 
riscv_iommu_memory_region_index(IOMMUMemoryRegion * iommu_mr,MemTxAttrs attrs)2784 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
2785     MemTxAttrs attrs)
2786 {
2787     return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid;
2788 }
2789 
riscv_iommu_memory_region_index_len(IOMMUMemoryRegion * iommu_mr)2790 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr)
2791 {
2792     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2793     return 1 << as->iommu->pid_bits;
2794 }
2795 
riscv_iommu_memory_region_init(ObjectClass * klass,const void * data)2796 static void riscv_iommu_memory_region_init(ObjectClass *klass, const void *data)
2797 {
2798     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
2799 
2800     imrc->translate = riscv_iommu_memory_region_translate;
2801     imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
2802     imrc->attrs_to_index = riscv_iommu_memory_region_index;
2803     imrc->num_indexes = riscv_iommu_memory_region_index_len;
2804 }
2805 
2806 static const TypeInfo riscv_iommu_memory_region_info = {
2807     .parent = TYPE_IOMMU_MEMORY_REGION,
2808     .name = TYPE_RISCV_IOMMU_MEMORY_REGION,
2809     .class_init = riscv_iommu_memory_region_init,
2810 };
2811 
riscv_iommu_register_mr_types(void)2812 static void riscv_iommu_register_mr_types(void)
2813 {
2814     type_register_static(&riscv_iommu_memory_region_info);
2815     type_register_static(&riscv_iommu_info);
2816 }
2817 
2818 type_init(riscv_iommu_register_mr_types);
2819