xref: /openbmc/qemu/hw/riscv/riscv-iommu.c (revision feef1866d1366d651e6a3cb8c9cf1a9aabb81395)
1  /*
2   * QEMU emulation of an RISC-V IOMMU
3   *
4   * Copyright (C) 2021-2023, Rivos Inc.
5   *
6   * This program is free software; you can redistribute it and/or modify it
7   * under the terms and conditions of the GNU General Public License,
8   * version 2 or later, as published by the Free Software Foundation.
9   *
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License along
16   * with this program; if not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  #include "qemu/osdep.h"
20  #include "qom/object.h"
21  #include "hw/pci/pci_bus.h"
22  #include "hw/pci/pci_device.h"
23  #include "hw/qdev-properties.h"
24  #include "hw/riscv/riscv_hart.h"
25  #include "migration/vmstate.h"
26  #include "qapi/error.h"
27  #include "qemu/timer.h"
28  
29  #include "cpu_bits.h"
30  #include "riscv-iommu.h"
31  #include "riscv-iommu-bits.h"
32  #include "trace.h"
33  
34  #define LIMIT_CACHE_CTX               (1U << 7)
35  #define LIMIT_CACHE_IOT               (1U << 20)
36  
37  /* Physical page number coversions */
38  #define PPN_PHYS(ppn)                 ((ppn) << TARGET_PAGE_BITS)
39  #define PPN_DOWN(phy)                 ((phy) >> TARGET_PAGE_BITS)
40  
41  typedef struct RISCVIOMMUContext RISCVIOMMUContext;
42  typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
43  
44  /* Device assigned I/O address space */
45  struct RISCVIOMMUSpace {
46      IOMMUMemoryRegion iova_mr;  /* IOVA memory region for attached device */
47      AddressSpace iova_as;       /* IOVA address space for attached device */
48      RISCVIOMMUState *iommu;     /* Managing IOMMU device state */
49      uint32_t devid;             /* Requester identifier, AKA device_id */
50      bool notifier;              /* IOMMU unmap notifier enabled */
51      QLIST_ENTRY(RISCVIOMMUSpace) list;
52  };
53  
54  /* Device translation context state. */
55  struct RISCVIOMMUContext {
56      uint64_t devid:24;          /* Requester Id, AKA device_id */
57      uint64_t process_id:20;     /* Process ID. PASID for PCIe */
58      uint64_t tc;                /* Translation Control */
59      uint64_t ta;                /* Translation Attributes */
60      uint64_t satp;              /* S-Stage address translation and protection */
61      uint64_t gatp;              /* G-Stage address translation and protection */
62      uint64_t msi_addr_mask;     /* MSI filtering - address mask */
63      uint64_t msi_addr_pattern;  /* MSI filtering - address pattern */
64      uint64_t msiptp;            /* MSI redirection page table pointer */
65  };
66  
67  /* Address translation cache entry */
68  struct RISCVIOMMUEntry {
69      uint64_t iova:44;           /* IOVA Page Number */
70      uint64_t pscid:20;          /* Process Soft-Context identifier */
71      uint64_t phys:44;           /* Physical Page Number */
72      uint64_t gscid:16;          /* Guest Soft-Context identifier */
73      uint64_t perm:2;            /* IOMMU_RW flags */
74  };
75  
76  /* IOMMU index for transactions without process_id specified. */
77  #define RISCV_IOMMU_NOPROCID 0
78  
riscv_iommu_get_icvec_vector(uint32_t icvec,uint32_t vec_type)79  static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type)
80  {
81      switch (vec_type) {
82      case RISCV_IOMMU_INTR_CQ:
83          return icvec & RISCV_IOMMU_ICVEC_CIV;
84      case RISCV_IOMMU_INTR_FQ:
85          return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4;
86      case RISCV_IOMMU_INTR_PM:
87          return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8;
88      case RISCV_IOMMU_INTR_PQ:
89          return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12;
90      default:
91          g_assert_not_reached();
92      }
93  }
94  
riscv_iommu_notify(RISCVIOMMUState * s,int vec_type)95  static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
96  {
97      const uint32_t fctl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FCTL);
98      uint32_t ipsr, icvec, vector;
99  
100      if (fctl & RISCV_IOMMU_FCTL_WSI || !s->notify) {
101          return;
102      }
103  
104      icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC);
105      ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0);
106  
107      if (!(ipsr & (1 << vec_type))) {
108          vector = riscv_iommu_get_icvec_vector(icvec, vec_type);
109          s->notify(s, vector);
110          trace_riscv_iommu_notify_int_vector(vec_type, vector);
111      }
112  }
113  
riscv_iommu_fault(RISCVIOMMUState * s,struct riscv_iommu_fq_record * ev)114  static void riscv_iommu_fault(RISCVIOMMUState *s,
115                                struct riscv_iommu_fq_record *ev)
116  {
117      uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
118      uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask;
119      uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask;
120      uint32_t next = (tail + 1) & s->fq_mask;
121      uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID);
122  
123      trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
124                            PCI_FUNC(devid), ev->hdr, ev->iotval);
125  
126      if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) ||
127          !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) {
128          return;
129      }
130  
131      if (head == next) {
132          riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
133                                RISCV_IOMMU_FQCSR_FQOF, 0);
134      } else {
135          dma_addr_t addr = s->fq_addr + tail * sizeof(*ev);
136          if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev),
137                               MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
138              riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
139                                    RISCV_IOMMU_FQCSR_FQMF, 0);
140          } else {
141              riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next);
142          }
143      }
144  
145      if (ctrl & RISCV_IOMMU_FQCSR_FIE) {
146          riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ);
147      }
148  }
149  
riscv_iommu_pri(RISCVIOMMUState * s,struct riscv_iommu_pq_record * pr)150  static void riscv_iommu_pri(RISCVIOMMUState *s,
151      struct riscv_iommu_pq_record *pr)
152  {
153      uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
154      uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask;
155      uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask;
156      uint32_t next = (tail + 1) & s->pq_mask;
157      uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID);
158  
159      trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
160                            PCI_FUNC(devid), pr->payload);
161  
162      if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) ||
163          !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) {
164          return;
165      }
166  
167      if (head == next) {
168          riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
169                                RISCV_IOMMU_PQCSR_PQOF, 0);
170      } else {
171          dma_addr_t addr = s->pq_addr + tail * sizeof(*pr);
172          if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr),
173                               MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
174              riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
175                                    RISCV_IOMMU_PQCSR_PQMF, 0);
176          } else {
177              riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next);
178          }
179      }
180  
181      if (ctrl & RISCV_IOMMU_PQCSR_PIE) {
182          riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ);
183      }
184  }
185  
186  /*
187   * Discards all bits from 'val' whose matching bits in the same
188   * positions in the mask 'ext' are zeros, and packs the remaining
189   * bits from 'val' contiguously at the least-significant end of the
190   * result, keeping the same bit order as 'val' and filling any
191   * other bits at the most-significant end of the result with zeros.
192   *
193   * For example, for the following 'val' and 'ext', the return 'ret'
194   * will be:
195   *
196   * val = a b c d e f g h
197   * ext = 1 0 1 0 0 1 1 0
198   * ret = 0 0 0 0 a c f g
199   *
200   * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3
201   * "Process to translate addresses of MSIs", is similar to bit manip
202   * function PEXT (Parallel bits extract) from x86.
203   */
riscv_iommu_pext_u64(uint64_t val,uint64_t ext)204  static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext)
205  {
206      uint64_t ret = 0;
207      uint64_t rot = 1;
208  
209      while (ext) {
210          if (ext & 1) {
211              if (val & 1) {
212                  ret |= rot;
213              }
214              rot <<= 1;
215          }
216          val >>= 1;
217          ext >>= 1;
218      }
219  
220      return ret;
221  }
222  
223  /* Check if GPA matches MSI/MRIF pattern. */
riscv_iommu_msi_check(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,dma_addr_t gpa)224  static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
225      dma_addr_t gpa)
226  {
227      if (!s->enable_msi) {
228          return false;
229      }
230  
231      if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) !=
232          RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
233          return false; /* Invalid MSI/MRIF mode */
234      }
235  
236      if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) {
237          return false; /* GPA not in MSI range defined by AIA IMSIC rules. */
238      }
239  
240      return true;
241  }
242  
243  /*
244   * RISCV IOMMU Address Translation Lookup - Page Table Walk
245   *
246   * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c
247   * Both implementation can be merged into single helper function in future.
248   * Keeping them separate for now, as error reporting and flow specifics are
249   * sufficiently different for separate implementation.
250   *
251   * @s        : IOMMU Device State
252   * @ctx      : Translation context for device id and process address space id.
253   * @iotlb    : translation data: physical address and access mode.
254   * @return   : success or fault cause code.
255   */
riscv_iommu_spa_fetch(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,IOMMUTLBEntry * iotlb)256  static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
257      IOMMUTLBEntry *iotlb)
258  {
259      dma_addr_t addr, base;
260      uint64_t satp, gatp, pte;
261      bool en_s, en_g;
262      struct {
263          unsigned char step;
264          unsigned char levels;
265          unsigned char ptidxbits;
266          unsigned char ptesize;
267      } sc[2];
268      /* Translation stage phase */
269      enum {
270          S_STAGE = 0,
271          G_STAGE = 1,
272      } pass;
273      MemTxResult ret;
274  
275      satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
276      gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
277  
278      en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE;
279      en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE;
280  
281      /*
282       * Early check for MSI address match when IOVA == GPA.
283       * Note that the (!en_s) condition means that the MSI
284       * page table may only be used when guest pages are
285       * mapped using the g-stage page table, whether single-
286       * or two-stage paging is enabled. It's unavoidable though,
287       * because the spec mandates that we do a first-stage
288       * translation before we check the MSI page table, which
289       * means we can't do an early MSI check unless we have
290       * strictly !en_s.
291       */
292      if (!en_s && (iotlb->perm & IOMMU_WO) &&
293          riscv_iommu_msi_check(s, ctx, iotlb->iova)) {
294          iotlb->target_as = &s->trap_as;
295          iotlb->translated_addr = iotlb->iova;
296          iotlb->addr_mask = ~TARGET_PAGE_MASK;
297          return 0;
298      }
299  
300      /* Exit early for pass-through mode. */
301      if (!(en_s || en_g)) {
302          iotlb->translated_addr = iotlb->iova;
303          iotlb->addr_mask = ~TARGET_PAGE_MASK;
304          /* Allow R/W in pass-through mode */
305          iotlb->perm = IOMMU_RW;
306          return 0;
307      }
308  
309      /* S/G translation parameters. */
310      for (pass = 0; pass < 2; pass++) {
311          uint32_t sv_mode;
312  
313          sc[pass].step = 0;
314          if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) :
315              (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) {
316              /* 32bit mode for GXL/SXL == 1 */
317              switch (pass ? gatp : satp) {
318              case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
319                  sc[pass].levels    = 0;
320                  sc[pass].ptidxbits = 0;
321                  sc[pass].ptesize   = 0;
322                  break;
323              case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
324                  sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32;
325                  if (!(s->cap & sv_mode)) {
326                      return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
327                  }
328                  sc[pass].levels    = 2;
329                  sc[pass].ptidxbits = 10;
330                  sc[pass].ptesize   = 4;
331                  break;
332              default:
333                  return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
334              }
335          } else {
336              /* 64bit mode for GXL/SXL == 0 */
337              switch (pass ? gatp : satp) {
338              case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
339                  sc[pass].levels    = 0;
340                  sc[pass].ptidxbits = 0;
341                  sc[pass].ptesize   = 0;
342                  break;
343              case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
344                  sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39;
345                  if (!(s->cap & sv_mode)) {
346                      return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
347                  }
348                  sc[pass].levels    = 3;
349                  sc[pass].ptidxbits = 9;
350                  sc[pass].ptesize   = 8;
351                  break;
352              case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
353                  sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48;
354                  if (!(s->cap & sv_mode)) {
355                      return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
356                  }
357                  sc[pass].levels    = 4;
358                  sc[pass].ptidxbits = 9;
359                  sc[pass].ptesize   = 8;
360                  break;
361              case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
362                  sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57;
363                  if (!(s->cap & sv_mode)) {
364                      return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
365                  }
366                  sc[pass].levels    = 5;
367                  sc[pass].ptidxbits = 9;
368                  sc[pass].ptesize   = 8;
369                  break;
370              default:
371                  return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
372              }
373          }
374      };
375  
376      /* S/G stages translation tables root pointers */
377      gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
378      satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD));
379      addr = (en_s && en_g) ? satp : iotlb->iova;
380      base = en_g ? gatp : satp;
381      pass = en_g ? G_STAGE : S_STAGE;
382  
383      do {
384          const unsigned widened = (pass && !sc[pass].step) ? 2 : 0;
385          const unsigned va_bits = widened + sc[pass].ptidxbits;
386          const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits *
387                                   (sc[pass].levels - 1 - sc[pass].step);
388          const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
389          const dma_addr_t pte_addr = base + idx * sc[pass].ptesize;
390          const bool ade =
391              ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE);
392  
393          /* Address range check before first level lookup */
394          if (!sc[pass].step) {
395              const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1;
396              if ((addr & va_mask) != addr) {
397                  return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
398              }
399          }
400  
401          /* Read page table entry */
402          if (sc[pass].ptesize == 4) {
403              uint32_t pte32 = 0;
404              ret = ldl_le_dma(s->target_as, pte_addr, &pte32,
405                               MEMTXATTRS_UNSPECIFIED);
406              pte = pte32;
407          } else {
408              ret = ldq_le_dma(s->target_as, pte_addr, &pte,
409                               MEMTXATTRS_UNSPECIFIED);
410          }
411          if (ret != MEMTX_OK) {
412              return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT
413                                              : RISCV_IOMMU_FQ_CAUSE_RD_FAULT;
414          }
415  
416          sc[pass].step++;
417          hwaddr ppn = pte >> PTE_PPN_SHIFT;
418  
419          if (!(pte & PTE_V)) {
420              break;                /* Invalid PTE */
421          } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
422              base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
423          } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
424              break;                /* Reserved leaf PTE flags: PTE_W */
425          } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
426              break;                /* Reserved leaf PTE flags: PTE_W + PTE_X */
427          } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
428              break;                /* Misaligned PPN */
429          } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) {
430              break;                /* Read access check failed */
431          } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) {
432              break;                /* Write access check failed */
433          } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) {
434              break;                /* Access bit not set */
435          } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) {
436              break;                /* Dirty bit not set */
437          } else {
438              /* Leaf PTE, translation completed. */
439              sc[pass].step = sc[pass].levels;
440              base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
441              /* Update address mask based on smallest translation granularity */
442              iotlb->addr_mask &= (1ULL << va_skip) - 1;
443              /* Continue with S-Stage translation? */
444              if (pass && sc[0].step != sc[0].levels) {
445                  pass = S_STAGE;
446                  addr = iotlb->iova;
447                  continue;
448              }
449              /* Translation phase completed (GPA or SPA) */
450              iotlb->translated_addr = base;
451              iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO)
452                                                           : IOMMU_RO;
453  
454              /* Check MSI GPA address match */
455              if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) &&
456                  riscv_iommu_msi_check(s, ctx, base)) {
457                  /* Trap MSI writes and return GPA address. */
458                  iotlb->target_as = &s->trap_as;
459                  iotlb->addr_mask = ~TARGET_PAGE_MASK;
460                  return 0;
461              }
462  
463              /* Continue with G-Stage translation? */
464              if (!pass && en_g) {
465                  pass = G_STAGE;
466                  addr = base;
467                  base = gatp;
468                  sc[pass].step = 0;
469                  continue;
470              }
471  
472              return 0;
473          }
474  
475          if (sc[pass].step == sc[pass].levels) {
476              break; /* Can't find leaf PTE */
477          }
478  
479          /* Continue with G-Stage translation? */
480          if (!pass && en_g) {
481              pass = G_STAGE;
482              addr = base;
483              base = gatp;
484              sc[pass].step = 0;
485          }
486      } while (1);
487  
488      return (iotlb->perm & IOMMU_WO) ?
489                  (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
490                          RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) :
491                  (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS :
492                          RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S);
493  }
494  
riscv_iommu_report_fault(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,uint32_t fault_type,uint32_t cause,bool pv,uint64_t iotval,uint64_t iotval2)495  static void riscv_iommu_report_fault(RISCVIOMMUState *s,
496                                       RISCVIOMMUContext *ctx,
497                                       uint32_t fault_type, uint32_t cause,
498                                       bool pv,
499                                       uint64_t iotval, uint64_t iotval2)
500  {
501      struct riscv_iommu_fq_record ev = { 0 };
502  
503      if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) {
504          switch (cause) {
505          case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED:
506          case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT:
507          case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID:
508          case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED:
509          case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED:
510          case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR:
511          case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT:
512              break;
513          default:
514              /* DTF prevents reporting a fault for this given cause */
515              return;
516          }
517      }
518  
519      ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause);
520      ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type);
521      ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid);
522      ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true);
523  
524      if (pv) {
525          ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id);
526      }
527  
528      ev.iotval = iotval;
529      ev.iotval2 = iotval2;
530  
531      riscv_iommu_fault(s, &ev);
532  }
533  
534  /* Redirect MSI write for given GPA. */
riscv_iommu_msi_write(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,uint64_t gpa,uint64_t data,unsigned size,MemTxAttrs attrs)535  static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s,
536      RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data,
537      unsigned size, MemTxAttrs attrs)
538  {
539      MemTxResult res;
540      dma_addr_t addr;
541      uint64_t intn;
542      uint32_t n190;
543      uint64_t pte[2];
544      int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
545      int cause;
546  
547      /* Interrupt File Number */
548      intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask);
549      if (intn >= 256) {
550          /* Interrupt file number out of range */
551          res = MEMTX_ACCESS_ERROR;
552          cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
553          goto err;
554      }
555  
556      /* fetch MSI PTE */
557      addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN));
558      addr = addr | (intn * sizeof(pte));
559      res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte),
560              MEMTXATTRS_UNSPECIFIED);
561      if (res != MEMTX_OK) {
562          if (res == MEMTX_DECODE_ERROR) {
563              cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED;
564          } else {
565              cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
566          }
567          goto err;
568      }
569  
570      le64_to_cpus(&pte[0]);
571      le64_to_cpus(&pte[1]);
572  
573      if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) {
574          /*
575           * The spec mentions that: "If msipte.C == 1, then further
576           * processing to interpret the PTE is implementation
577           * defined.". We'll abort with cause = 262 for this
578           * case too.
579           */
580          res = MEMTX_ACCESS_ERROR;
581          cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID;
582          goto err;
583      }
584  
585      switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) {
586      case RISCV_IOMMU_MSI_PTE_M_BASIC:
587          /* MSI Pass-through mode */
588          addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN));
589  
590          trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
591                                PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
592                                gpa, addr);
593  
594          res = dma_memory_write(s->target_as, addr, &data, size, attrs);
595          if (res != MEMTX_OK) {
596              cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
597              goto err;
598          }
599  
600          return MEMTX_OK;
601      case RISCV_IOMMU_MSI_PTE_M_MRIF:
602          /* MRIF mode, continue. */
603          break;
604      default:
605          res = MEMTX_ACCESS_ERROR;
606          cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
607          goto err;
608      }
609  
610      /*
611       * Report an error for interrupt identities exceeding the maximum allowed
612       * for an IMSIC interrupt file (2047) or destination address is not 32-bit
613       * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables.
614       */
615      if ((data > 2047) || (gpa & 3)) {
616          res = MEMTX_ACCESS_ERROR;
617          cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
618          goto err;
619      }
620  
621      /* MSI MRIF mode, non atomic pending bit update */
622  
623      /* MRIF pending bit address */
624      addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9;
625      addr = addr | ((data & 0x7c0) >> 3);
626  
627      trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
628                            PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
629                            gpa, addr);
630  
631      /* MRIF pending bit mask */
632      data = 1ULL << (data & 0x03f);
633      res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
634      if (res != MEMTX_OK) {
635          cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
636          goto err;
637      }
638  
639      intn = intn | data;
640      res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs);
641      if (res != MEMTX_OK) {
642          cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
643          goto err;
644      }
645  
646      /* Get MRIF enable bits */
647      addr = addr + sizeof(intn);
648      res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
649      if (res != MEMTX_OK) {
650          cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
651          goto err;
652      }
653  
654      if (!(intn & data)) {
655          /* notification disabled, MRIF update completed. */
656          return MEMTX_OK;
657      }
658  
659      /* Send notification message */
660      addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN));
661      n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) |
662            (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10);
663  
664      res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs);
665      if (res != MEMTX_OK) {
666          cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
667          goto err;
668      }
669  
670      trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr);
671  
672      return MEMTX_OK;
673  
674  err:
675      riscv_iommu_report_fault(s, ctx, fault_type, cause,
676                               !!ctx->process_id, 0, 0);
677      return res;
678  }
679  
680  /*
681   * Check device context configuration as described by the
682   * riscv-iommu spec section "Device-context configuration
683   * checks".
684   */
riscv_iommu_validate_device_ctx(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)685  static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s,
686                                              RISCVIOMMUContext *ctx)
687  {
688      uint32_t fsc_mode, msi_mode;
689      uint64_t gatp;
690  
691      if (!(s->cap & RISCV_IOMMU_CAP_ATS) &&
692          (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS ||
693           ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI ||
694           ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) {
695          return false;
696      }
697  
698      if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) &&
699          (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA ||
700           ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) {
701          return false;
702      }
703  
704      if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) &&
705          ctx->tc & RISCV_IOMMU_DC_TC_PRPR) {
706          return false;
707      }
708  
709      if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) &&
710          ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) {
711          return false;
712      }
713  
714      if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) {
715          msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE);
716  
717          if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF &&
718              msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
719              return false;
720          }
721      }
722  
723      gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
724      if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA &&
725          gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
726          return false;
727      }
728  
729      fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
730  
731      if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) {
732          switch (fsc_mode) {
733          case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8:
734              if (!(s->cap & RISCV_IOMMU_CAP_PD8)) {
735                  return false;
736              }
737              break;
738          case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17:
739              if (!(s->cap & RISCV_IOMMU_CAP_PD17)) {
740                  return false;
741              }
742              break;
743          case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20:
744              if (!(s->cap & RISCV_IOMMU_CAP_PD20)) {
745                  return false;
746              }
747              break;
748          }
749      } else {
750          /* DC.tc.PDTV is 0 */
751          if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) {
752              return false;
753          }
754  
755          if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
756              if (fsc_mode == RISCV_IOMMU_CAP_SV32 &&
757                  !(s->cap & RISCV_IOMMU_CAP_SV32)) {
758                  return false;
759              }
760          } else {
761              switch (fsc_mode) {
762              case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
763                  if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
764                      return false;
765                  }
766                  break;
767              case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
768                  if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
769                      return false;
770                  }
771              break;
772              case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
773                  if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
774                      return false;
775                  }
776                  break;
777              }
778          }
779      }
780  
781      /*
782       * CAP_END is always zero (only one endianess). FCTL_BE is
783       * always zero (little-endian accesses). Thus TC_SBE must
784       * always be LE, i.e. zero.
785       */
786      if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) {
787          return false;
788      }
789  
790      return true;
791  }
792  
793  /*
794   * Validate process context (PC) according to section
795   * "Process-context configuration checks".
796   */
riscv_iommu_validate_process_ctx(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)797  static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
798                                               RISCVIOMMUContext *ctx)
799  {
800      uint32_t mode;
801  
802      if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) {
803          return false;
804      }
805  
806      if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) {
807          return false;
808      }
809  
810      mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
811      switch (mode) {
812      case RISCV_IOMMU_DC_FSC_MODE_BARE:
813      /* sv39 and sv32 modes have the same value (8) */
814      case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
815      case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
816      case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
817          break;
818      default:
819          return false;
820      }
821  
822      if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
823          if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 &&
824              !(s->cap & RISCV_IOMMU_CAP_SV32)) {
825                  return false;
826          }
827      } else {
828          switch (mode) {
829          case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
830              if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
831                  return false;
832              }
833              break;
834          case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
835              if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
836                  return false;
837              }
838              break;
839          case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
840              if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
841                  return false;
842              }
843              break;
844          }
845      }
846  
847      return true;
848  }
849  
850  /*
851   * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
852   *
853   * @s         : IOMMU Device State
854   * @ctx       : Device Translation Context with devid and process_id set.
855   * @return    : success or fault code.
856   */
riscv_iommu_ctx_fetch(RISCVIOMMUState * s,RISCVIOMMUContext * ctx)857  static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
858  {
859      const uint64_t ddtp = s->ddtp;
860      unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE);
861      dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN));
862      struct riscv_iommu_dc dc;
863      /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */
864      const int dc_fmt = !s->enable_msi;
865      const size_t dc_len = sizeof(dc) >> dc_fmt;
866      int depth;
867      uint64_t de;
868  
869      switch (mode) {
870      case RISCV_IOMMU_DDTP_MODE_OFF:
871          return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
872  
873      case RISCV_IOMMU_DDTP_MODE_BARE:
874          /* mock up pass-through translation context */
875          ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
876              RISCV_IOMMU_DC_IOHGATP_MODE_BARE);
877          ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
878              RISCV_IOMMU_DC_FSC_MODE_BARE);
879  
880          ctx->tc = RISCV_IOMMU_DC_TC_V;
881          if (s->enable_ats) {
882              ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS;
883          }
884  
885          ctx->ta = 0;
886          ctx->msiptp = 0;
887          return 0;
888  
889      case RISCV_IOMMU_DDTP_MODE_1LVL:
890          depth = 0;
891          break;
892  
893      case RISCV_IOMMU_DDTP_MODE_2LVL:
894          depth = 1;
895          break;
896  
897      case RISCV_IOMMU_DDTP_MODE_3LVL:
898          depth = 2;
899          break;
900  
901      default:
902          return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
903      }
904  
905      /*
906       * Check supported device id width (in bits).
907       * See IOMMU Specification, Chapter 6. Software guidelines.
908       * - if extended device-context format is used:
909       *   1LVL: 6, 2LVL: 15, 3LVL: 24
910       * - if base device-context format is used:
911       *   1LVL: 7, 2LVL: 16, 3LVL: 24
912       */
913      if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) {
914          return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
915      }
916  
917      /* Device directory tree walk */
918      for (; depth-- > 0; ) {
919          /*
920           * Select device id index bits based on device directory tree level
921           * and device context format.
922           * See IOMMU Specification, Chapter 2. Data Structures.
923           * - if extended device-context format is used:
924           *   device index: [23:15][14:6][5:0]
925           * - if base device-context format is used:
926           *   device index: [23:16][15:7][6:0]
927           */
928          const int split = depth * 9 + 6 + dc_fmt;
929          addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK;
930          if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
931                              MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
932              return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
933          }
934          le64_to_cpus(&de);
935          if (!(de & RISCV_IOMMU_DDTE_VALID)) {
936              /* invalid directory entry */
937              return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
938          }
939          if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) {
940              /* reserved bits set */
941              return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
942          }
943          addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
944      }
945  
946      /* index into device context entry page */
947      addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
948  
949      memset(&dc, 0, sizeof(dc));
950      if (dma_memory_read(s->target_as, addr, &dc, dc_len,
951                          MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
952          return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
953      }
954  
955      /* Set translation context. */
956      ctx->tc = le64_to_cpu(dc.tc);
957      ctx->gatp = le64_to_cpu(dc.iohgatp);
958      ctx->satp = le64_to_cpu(dc.fsc);
959      ctx->ta = le64_to_cpu(dc.ta);
960      ctx->msiptp = le64_to_cpu(dc.msiptp);
961      ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask);
962      ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern);
963  
964      if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) {
965          return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
966      }
967  
968      if (!riscv_iommu_validate_device_ctx(s, ctx)) {
969          return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
970      }
971  
972      /* FSC field checks */
973      mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
974      addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN));
975  
976      if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) {
977          if (ctx->process_id != RISCV_IOMMU_NOPROCID) {
978              /* PID is disabled */
979              return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
980          }
981          if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) {
982              /* Invalid translation mode */
983              return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
984          }
985          return 0;
986      }
987  
988      if (ctx->process_id == RISCV_IOMMU_NOPROCID) {
989          if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) {
990              /* No default process_id enabled, set BARE mode */
991              ctx->satp = 0ULL;
992              return 0;
993          } else {
994              /* Use default process_id #0 */
995              ctx->process_id = 0;
996          }
997      }
998  
999      if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) {
1000          /* No S-Stage translation, done. */
1001          return 0;
1002      }
1003  
1004      /* FSC.TC.PDTV enabled */
1005      if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) {
1006          /* Invalid PDTP.MODE */
1007          return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1008      }
1009  
1010      for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
1011          /*
1012           * Select process id index bits based on process directory tree
1013           * level. See IOMMU Specification, 2.2. Process-Directory-Table.
1014           */
1015          const int split = depth * 9 + 8;
1016          addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
1017          if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
1018                              MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1019              return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1020          }
1021          le64_to_cpus(&de);
1022          if (!(de & RISCV_IOMMU_PC_TA_V)) {
1023              return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1024          }
1025          addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN));
1026      }
1027  
1028      /* Leaf entry in PDT */
1029      addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
1030      if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
1031                          MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1032          return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1033      }
1034  
1035      /* Use FSC and TA from process directory entry. */
1036      ctx->ta = le64_to_cpu(dc.ta);
1037      ctx->satp = le64_to_cpu(dc.fsc);
1038  
1039      if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) {
1040          return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1041      }
1042  
1043      if (!riscv_iommu_validate_process_ctx(s, ctx)) {
1044          return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1045      }
1046  
1047      return 0;
1048  }
1049  
1050  /* Translation Context cache support */
riscv_iommu_ctx_equal(gconstpointer v1,gconstpointer v2)1051  static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2)
1052  {
1053      RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1;
1054      RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2;
1055      return c1->devid == c2->devid &&
1056             c1->process_id == c2->process_id;
1057  }
1058  
riscv_iommu_ctx_hash(gconstpointer v)1059  static guint riscv_iommu_ctx_hash(gconstpointer v)
1060  {
1061      RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v;
1062      /*
1063       * Generate simple hash of (process_id, devid)
1064       * assuming 24-bit wide devid.
1065       */
1066      return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24);
1067  }
1068  
riscv_iommu_ctx_inval_devid_procid(gpointer key,gpointer value,gpointer data)1069  static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value,
1070                                                 gpointer data)
1071  {
1072      RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1073      RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1074      if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1075          ctx->devid == arg->devid &&
1076          ctx->process_id == arg->process_id) {
1077          ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1078      }
1079  }
1080  
riscv_iommu_ctx_inval_devid(gpointer key,gpointer value,gpointer data)1081  static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value,
1082                                          gpointer data)
1083  {
1084      RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1085      RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1086      if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1087          ctx->devid == arg->devid) {
1088          ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1089      }
1090  }
1091  
riscv_iommu_ctx_inval_all(gpointer key,gpointer value,gpointer data)1092  static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value,
1093                                        gpointer data)
1094  {
1095      RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1096      if (ctx->tc & RISCV_IOMMU_DC_TC_V) {
1097          ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1098      }
1099  }
1100  
riscv_iommu_ctx_inval(RISCVIOMMUState * s,GHFunc func,uint32_t devid,uint32_t process_id)1101  static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func,
1102                                    uint32_t devid, uint32_t process_id)
1103  {
1104      GHashTable *ctx_cache;
1105      RISCVIOMMUContext key = {
1106          .devid = devid,
1107          .process_id = process_id,
1108      };
1109      ctx_cache = g_hash_table_ref(s->ctx_cache);
1110      g_hash_table_foreach(ctx_cache, func, &key);
1111      g_hash_table_unref(ctx_cache);
1112  }
1113  
1114  /* Find or allocate translation context for a given {device_id, process_id} */
riscv_iommu_ctx(RISCVIOMMUState * s,unsigned devid,unsigned process_id,void ** ref)1115  static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s,
1116                                            unsigned devid, unsigned process_id,
1117                                            void **ref)
1118  {
1119      GHashTable *ctx_cache;
1120      RISCVIOMMUContext *ctx;
1121      RISCVIOMMUContext key = {
1122          .devid = devid,
1123          .process_id = process_id,
1124      };
1125  
1126      ctx_cache = g_hash_table_ref(s->ctx_cache);
1127      ctx = g_hash_table_lookup(ctx_cache, &key);
1128  
1129      if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) {
1130          *ref = ctx_cache;
1131          return ctx;
1132      }
1133  
1134      ctx = g_new0(RISCVIOMMUContext, 1);
1135      ctx->devid = devid;
1136      ctx->process_id = process_id;
1137  
1138      int fault = riscv_iommu_ctx_fetch(s, ctx);
1139      if (!fault) {
1140          if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) {
1141              g_hash_table_unref(ctx_cache);
1142              ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
1143                                                riscv_iommu_ctx_equal,
1144                                                g_free, NULL);
1145              g_hash_table_ref(ctx_cache);
1146              g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache));
1147          }
1148          g_hash_table_add(ctx_cache, ctx);
1149          *ref = ctx_cache;
1150          return ctx;
1151      }
1152  
1153      g_hash_table_unref(ctx_cache);
1154      *ref = NULL;
1155  
1156      riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD,
1157                               fault, !!process_id, 0, 0);
1158  
1159      g_free(ctx);
1160      return NULL;
1161  }
1162  
riscv_iommu_ctx_put(RISCVIOMMUState * s,void * ref)1163  static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
1164  {
1165      if (ref) {
1166          g_hash_table_unref((GHashTable *)ref);
1167      }
1168  }
1169  
1170  /* Find or allocate address space for a given device */
riscv_iommu_space(RISCVIOMMUState * s,uint32_t devid)1171  static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid)
1172  {
1173      RISCVIOMMUSpace *as;
1174  
1175      /* FIXME: PCIe bus remapping for attached endpoints. */
1176      devid |= s->bus << 8;
1177  
1178      QLIST_FOREACH(as, &s->spaces, list) {
1179          if (as->devid == devid) {
1180              break;
1181          }
1182      }
1183  
1184      if (as == NULL) {
1185          char name[64];
1186          as = g_new0(RISCVIOMMUSpace, 1);
1187  
1188          as->iommu = s;
1189          as->devid = devid;
1190  
1191          snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
1192              PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1193  
1194          /* IOVA address space, untranslated addresses */
1195          memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr),
1196              TYPE_RISCV_IOMMU_MEMORY_REGION,
1197              OBJECT(as), "riscv_iommu", UINT64_MAX);
1198          address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name);
1199  
1200          QLIST_INSERT_HEAD(&s->spaces, as, list);
1201  
1202          trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid),
1203                  PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1204      }
1205      return &as->iova_as;
1206  }
1207  
1208  /* Translation Object cache support */
riscv_iommu_iot_equal(gconstpointer v1,gconstpointer v2)1209  static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2)
1210  {
1211      RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1;
1212      RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2;
1213      return t1->gscid == t2->gscid && t1->pscid == t2->pscid &&
1214             t1->iova == t2->iova;
1215  }
1216  
riscv_iommu_iot_hash(gconstpointer v)1217  static guint riscv_iommu_iot_hash(gconstpointer v)
1218  {
1219      RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v;
1220      return (guint)t->iova;
1221  }
1222  
1223  /* GV: 1 PSCV: 1 AV: 1 */
riscv_iommu_iot_inval_pscid_iova(gpointer key,gpointer value,gpointer data)1224  static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value,
1225                                               gpointer data)
1226  {
1227      RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1228      RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1229      if (iot->gscid == arg->gscid &&
1230          iot->pscid == arg->pscid &&
1231          iot->iova == arg->iova) {
1232          iot->perm = IOMMU_NONE;
1233      }
1234  }
1235  
1236  /* GV: 1 PSCV: 1 AV: 0 */
riscv_iommu_iot_inval_pscid(gpointer key,gpointer value,gpointer data)1237  static void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value,
1238                                          gpointer data)
1239  {
1240      RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1241      RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1242      if (iot->gscid == arg->gscid &&
1243          iot->pscid == arg->pscid) {
1244          iot->perm = IOMMU_NONE;
1245      }
1246  }
1247  
1248  /* GV: 1 GVMA: 1 */
riscv_iommu_iot_inval_gscid_gpa(gpointer key,gpointer value,gpointer data)1249  static void riscv_iommu_iot_inval_gscid_gpa(gpointer key, gpointer value,
1250                                              gpointer data)
1251  {
1252      RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1253      RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1254      if (iot->gscid == arg->gscid) {
1255          /* simplified cache, no GPA matching */
1256          iot->perm = IOMMU_NONE;
1257      }
1258  }
1259  
1260  /* GV: 1 GVMA: 0 */
riscv_iommu_iot_inval_gscid(gpointer key,gpointer value,gpointer data)1261  static void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value,
1262                                          gpointer data)
1263  {
1264      RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1265      RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1266      if (iot->gscid == arg->gscid) {
1267          iot->perm = IOMMU_NONE;
1268      }
1269  }
1270  
1271  /* GV: 0 */
riscv_iommu_iot_inval_all(gpointer key,gpointer value,gpointer data)1272  static void riscv_iommu_iot_inval_all(gpointer key, gpointer value,
1273                                        gpointer data)
1274  {
1275      RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1276      iot->perm = IOMMU_NONE;
1277  }
1278  
1279  /* caller should keep ref-count for iot_cache object */
riscv_iommu_iot_lookup(RISCVIOMMUContext * ctx,GHashTable * iot_cache,hwaddr iova)1280  static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx,
1281      GHashTable *iot_cache, hwaddr iova)
1282  {
1283      RISCVIOMMUEntry key = {
1284          .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID),
1285          .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID),
1286          .iova  = PPN_DOWN(iova),
1287      };
1288      return g_hash_table_lookup(iot_cache, &key);
1289  }
1290  
1291  /* caller should keep ref-count for iot_cache object */
riscv_iommu_iot_update(RISCVIOMMUState * s,GHashTable * iot_cache,RISCVIOMMUEntry * iot)1292  static void riscv_iommu_iot_update(RISCVIOMMUState *s,
1293      GHashTable *iot_cache, RISCVIOMMUEntry *iot)
1294  {
1295      if (!s->iot_limit) {
1296          return;
1297      }
1298  
1299      if (g_hash_table_size(s->iot_cache) >= s->iot_limit) {
1300          iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
1301                                            riscv_iommu_iot_equal,
1302                                            g_free, NULL);
1303          g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache));
1304      }
1305      g_hash_table_add(iot_cache, iot);
1306  }
1307  
riscv_iommu_iot_inval(RISCVIOMMUState * s,GHFunc func,uint32_t gscid,uint32_t pscid,hwaddr iova)1308  static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func,
1309      uint32_t gscid, uint32_t pscid, hwaddr iova)
1310  {
1311      GHashTable *iot_cache;
1312      RISCVIOMMUEntry key = {
1313          .gscid = gscid,
1314          .pscid = pscid,
1315          .iova  = PPN_DOWN(iova),
1316      };
1317  
1318      iot_cache = g_hash_table_ref(s->iot_cache);
1319      g_hash_table_foreach(iot_cache, func, &key);
1320      g_hash_table_unref(iot_cache);
1321  }
1322  
riscv_iommu_translate(RISCVIOMMUState * s,RISCVIOMMUContext * ctx,IOMMUTLBEntry * iotlb,bool enable_cache)1323  static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
1324      IOMMUTLBEntry *iotlb, bool enable_cache)
1325  {
1326      RISCVIOMMUEntry *iot;
1327      IOMMUAccessFlags perm;
1328      bool enable_pid;
1329      bool enable_pri;
1330      GHashTable *iot_cache;
1331      int fault;
1332  
1333      iot_cache = g_hash_table_ref(s->iot_cache);
1334      /*
1335       * TC[32] is reserved for custom extensions, used here to temporarily
1336       * enable automatic page-request generation for ATS queries.
1337       */
1338      enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32));
1339      enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV);
1340  
1341      /* Check for ATS request. */
1342      if (iotlb->perm == IOMMU_NONE) {
1343          /* Check if ATS is disabled. */
1344          if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) {
1345              enable_pri = false;
1346              fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1347              goto done;
1348          }
1349      }
1350  
1351      iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova);
1352      perm = iot ? iot->perm : IOMMU_NONE;
1353      if (perm != IOMMU_NONE) {
1354          iotlb->translated_addr = PPN_PHYS(iot->phys);
1355          iotlb->addr_mask = ~TARGET_PAGE_MASK;
1356          iotlb->perm = perm;
1357          fault = 0;
1358          goto done;
1359      }
1360  
1361      /* Translate using device directory / page table information. */
1362      fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
1363  
1364      if (!fault && iotlb->target_as == &s->trap_as) {
1365          /* Do not cache trapped MSI translations */
1366          goto done;
1367      }
1368  
1369      /*
1370       * We made an implementation choice to not cache identity-mapped
1371       * translations, as allowed by the specification, to avoid
1372       * translation cache evictions for other devices sharing the
1373       * IOMMU hardware model.
1374       */
1375      if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) {
1376          iot = g_new0(RISCVIOMMUEntry, 1);
1377          iot->iova = PPN_DOWN(iotlb->iova);
1378          iot->phys = PPN_DOWN(iotlb->translated_addr);
1379          iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
1380          iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
1381          iot->perm = iotlb->perm;
1382          riscv_iommu_iot_update(s, iot_cache, iot);
1383      }
1384  
1385  done:
1386      g_hash_table_unref(iot_cache);
1387  
1388      if (enable_pri && fault) {
1389          struct riscv_iommu_pq_record pr = {0};
1390          if (enable_pid) {
1391              pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV,
1392                                 RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id);
1393          }
1394          pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid);
1395          pr.payload = (iotlb->iova & TARGET_PAGE_MASK) |
1396                       RISCV_IOMMU_PREQ_PAYLOAD_M;
1397          riscv_iommu_pri(s, &pr);
1398          return fault;
1399      }
1400  
1401      if (fault) {
1402          unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ;
1403  
1404          if (iotlb->perm & IOMMU_RW) {
1405              ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
1406          } else if (iotlb->perm & IOMMU_RO) {
1407              ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD;
1408          }
1409  
1410          riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid,
1411                                   iotlb->iova, iotlb->translated_addr);
1412          return fault;
1413      }
1414  
1415      return 0;
1416  }
1417  
1418  /* IOMMU Command Interface */
riscv_iommu_iofence(RISCVIOMMUState * s,bool notify,uint64_t addr,uint32_t data)1419  static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify,
1420      uint64_t addr, uint32_t data)
1421  {
1422      /*
1423       * ATS processing in this implementation of the IOMMU is synchronous,
1424       * no need to wait for completions here.
1425       */
1426      if (!notify) {
1427          return MEMTX_OK;
1428      }
1429  
1430      return dma_memory_write(s->target_as, addr, &data, sizeof(data),
1431          MEMTXATTRS_UNSPECIFIED);
1432  }
1433  
riscv_iommu_ats(RISCVIOMMUState * s,struct riscv_iommu_command * cmd,IOMMUNotifierFlag flag,IOMMUAccessFlags perm,void (* trace_fn)(const char * id))1434  static void riscv_iommu_ats(RISCVIOMMUState *s,
1435      struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag,
1436      IOMMUAccessFlags perm,
1437      void (*trace_fn)(const char *id))
1438  {
1439      RISCVIOMMUSpace *as = NULL;
1440      IOMMUNotifier *n;
1441      IOMMUTLBEvent event;
1442      uint32_t pid;
1443      uint32_t devid;
1444      const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV;
1445  
1446      if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) {
1447          /* Use device segment and requester id */
1448          devid = get_field(cmd->dword0,
1449              RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID);
1450      } else {
1451          devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID);
1452      }
1453  
1454      pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID);
1455  
1456      QLIST_FOREACH(as, &s->spaces, list) {
1457          if (as->devid == devid) {
1458              break;
1459          }
1460      }
1461  
1462      if (!as || !as->notifier) {
1463          return;
1464      }
1465  
1466      event.type = flag;
1467      event.entry.perm = perm;
1468      event.entry.target_as = s->target_as;
1469  
1470      IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) {
1471          if (!pv || n->iommu_idx == pid) {
1472              event.entry.iova = n->start;
1473              event.entry.addr_mask = n->end - n->start;
1474              trace_fn(as->iova_mr.parent_obj.name);
1475              memory_region_notify_iommu_one(n, &event);
1476          }
1477      }
1478  }
1479  
riscv_iommu_ats_inval(RISCVIOMMUState * s,struct riscv_iommu_command * cmd)1480  static void riscv_iommu_ats_inval(RISCVIOMMUState *s,
1481      struct riscv_iommu_command *cmd)
1482  {
1483      return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE,
1484                             trace_riscv_iommu_ats_inval);
1485  }
1486  
riscv_iommu_ats_prgr(RISCVIOMMUState * s,struct riscv_iommu_command * cmd)1487  static void riscv_iommu_ats_prgr(RISCVIOMMUState *s,
1488      struct riscv_iommu_command *cmd)
1489  {
1490      unsigned resp_code = get_field(cmd->dword1,
1491                                     RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE);
1492  
1493      /* Using the access flag to carry response code information */
1494      IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW;
1495      return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm,
1496                             trace_riscv_iommu_ats_prgr);
1497  }
1498  
riscv_iommu_process_ddtp(RISCVIOMMUState * s)1499  static void riscv_iommu_process_ddtp(RISCVIOMMUState *s)
1500  {
1501      uint64_t old_ddtp = s->ddtp;
1502      uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP);
1503      unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE);
1504      unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE);
1505      bool ok = false;
1506  
1507      /*
1508       * Check for allowed DDTP.MODE transitions:
1509       * {OFF, BARE}        -> {OFF, BARE, 1LVL, 2LVL, 3LVL}
1510       * {1LVL, 2LVL, 3LVL} -> {OFF, BARE}
1511       */
1512      if (new_mode == old_mode ||
1513          new_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1514          new_mode == RISCV_IOMMU_DDTP_MODE_BARE) {
1515          ok = true;
1516      } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL ||
1517                 new_mode == RISCV_IOMMU_DDTP_MODE_2LVL ||
1518                 new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) {
1519          ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1520               old_mode == RISCV_IOMMU_DDTP_MODE_BARE;
1521      }
1522  
1523      if (ok) {
1524          /* clear reserved and busy bits, report back sanitized version */
1525          new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN,
1526                               RISCV_IOMMU_DDTP_MODE, new_mode);
1527      } else {
1528          new_ddtp = old_ddtp;
1529      }
1530      s->ddtp = new_ddtp;
1531  
1532      riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp);
1533  }
1534  
1535  /* Command function and opcode field. */
1536  #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op))
1537  
riscv_iommu_process_cq_tail(RISCVIOMMUState * s)1538  static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s)
1539  {
1540      struct riscv_iommu_command cmd;
1541      MemTxResult res;
1542      dma_addr_t addr;
1543      uint32_t tail, head, ctrl;
1544      uint64_t cmd_opcode;
1545      GHFunc func;
1546  
1547      ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1548      tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask;
1549      head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask;
1550  
1551      /* Check for pending error or queue processing disabled */
1552      if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) ||
1553          !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) {
1554          return;
1555      }
1556  
1557      while (tail != head) {
1558          addr = s->cq_addr  + head * sizeof(cmd);
1559          res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd),
1560                                MEMTXATTRS_UNSPECIFIED);
1561  
1562          if (res != MEMTX_OK) {
1563              riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1564                                    RISCV_IOMMU_CQCSR_CQMF, 0);
1565              goto fault;
1566          }
1567  
1568          trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1);
1569  
1570          cmd_opcode = get_field(cmd.dword0,
1571                                 RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC);
1572  
1573          switch (cmd_opcode) {
1574          case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C,
1575                               RISCV_IOMMU_CMD_IOFENCE_OPCODE):
1576              res = riscv_iommu_iofence(s,
1577                  cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2,
1578                  get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA));
1579  
1580              if (res != MEMTX_OK) {
1581                  riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1582                                        RISCV_IOMMU_CQCSR_CQMF, 0);
1583                  goto fault;
1584              }
1585              break;
1586  
1587          case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA,
1588                               RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1589              if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) {
1590                  /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */
1591                  goto cmd_ill;
1592              } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) {
1593                  /* invalidate all cache mappings */
1594                  func = riscv_iommu_iot_inval_all;
1595              } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) {
1596                  /* invalidate cache matching GSCID */
1597                  func = riscv_iommu_iot_inval_gscid;
1598              } else {
1599                  /* invalidate cache matching GSCID and ADDR (GPA) */
1600                  func = riscv_iommu_iot_inval_gscid_gpa;
1601              }
1602              riscv_iommu_iot_inval(s, func,
1603                  get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 0,
1604                  cmd.dword1 << 2 & TARGET_PAGE_MASK);
1605              break;
1606  
1607          case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA,
1608                               RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1609              if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) {
1610                  /* invalidate all cache mappings, simplified model */
1611                  func = riscv_iommu_iot_inval_all;
1612              } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV)) {
1613                  /* invalidate cache matching GSCID, simplified model */
1614                  func = riscv_iommu_iot_inval_gscid;
1615              } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) {
1616                  /* invalidate cache matching GSCID and PSCID */
1617                  func = riscv_iommu_iot_inval_pscid;
1618              } else {
1619                  /* invalidate cache matching GSCID and PSCID and ADDR (IOVA) */
1620                  func = riscv_iommu_iot_inval_pscid_iova;
1621              }
1622              riscv_iommu_iot_inval(s, func,
1623                  get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID),
1624                  get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_PSCID),
1625                  cmd.dword1 << 2 & TARGET_PAGE_MASK);
1626              break;
1627  
1628          case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT,
1629                               RISCV_IOMMU_CMD_IODIR_OPCODE):
1630              if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1631                  /* invalidate all device context cache mappings */
1632                  func = riscv_iommu_ctx_inval_all;
1633              } else {
1634                  /* invalidate all device context matching DID */
1635                  func = riscv_iommu_ctx_inval_devid;
1636              }
1637              riscv_iommu_ctx_inval(s, func,
1638                  get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0);
1639              break;
1640  
1641          case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT,
1642                               RISCV_IOMMU_CMD_IODIR_OPCODE):
1643              if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1644                  /* illegal command arguments IODIR_PDT & DV == 0 */
1645                  goto cmd_ill;
1646              } else {
1647                  func = riscv_iommu_ctx_inval_devid_procid;
1648              }
1649              riscv_iommu_ctx_inval(s, func,
1650                  get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID),
1651                  get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID));
1652              break;
1653  
1654          /* ATS commands */
1655          case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL,
1656                               RISCV_IOMMU_CMD_ATS_OPCODE):
1657              if (!s->enable_ats) {
1658                  goto cmd_ill;
1659              }
1660  
1661              riscv_iommu_ats_inval(s, &cmd);
1662              break;
1663  
1664          case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR,
1665                               RISCV_IOMMU_CMD_ATS_OPCODE):
1666              if (!s->enable_ats) {
1667                  goto cmd_ill;
1668              }
1669  
1670              riscv_iommu_ats_prgr(s, &cmd);
1671              break;
1672  
1673          default:
1674          cmd_ill:
1675              /* Invalid instruction, do not advance instruction index. */
1676              riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1677                  RISCV_IOMMU_CQCSR_CMD_ILL, 0);
1678              goto fault;
1679          }
1680  
1681          /* Advance and update head pointer after command completes. */
1682          head = (head + 1) & s->cq_mask;
1683          riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head);
1684      }
1685      return;
1686  
1687  fault:
1688      if (ctrl & RISCV_IOMMU_CQCSR_CIE) {
1689          riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ);
1690      }
1691  }
1692  
riscv_iommu_process_cq_control(RISCVIOMMUState * s)1693  static void riscv_iommu_process_cq_control(RISCVIOMMUState *s)
1694  {
1695      uint64_t base;
1696      uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1697      uint32_t ctrl_clr;
1698      bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN);
1699      bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON);
1700  
1701      if (enable && !active) {
1702          base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB);
1703          s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1;
1704          s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN));
1705          stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask);
1706          stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0);
1707          stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0);
1708          ctrl_set = RISCV_IOMMU_CQCSR_CQON;
1709          ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF |
1710                     RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO |
1711                     RISCV_IOMMU_CQCSR_FENCE_W_IP;
1712      } else if (!enable && active) {
1713          stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0);
1714          ctrl_set = 0;
1715          ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON;
1716      } else {
1717          ctrl_set = 0;
1718          ctrl_clr = RISCV_IOMMU_CQCSR_BUSY;
1719      }
1720  
1721      riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr);
1722  }
1723  
riscv_iommu_process_fq_control(RISCVIOMMUState * s)1724  static void riscv_iommu_process_fq_control(RISCVIOMMUState *s)
1725  {
1726      uint64_t base;
1727      uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1728      uint32_t ctrl_clr;
1729      bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN);
1730      bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON);
1731  
1732      if (enable && !active) {
1733          base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB);
1734          s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1;
1735          s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN));
1736          stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask);
1737          stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0);
1738          stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0);
1739          ctrl_set = RISCV_IOMMU_FQCSR_FQON;
1740          ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF |
1741              RISCV_IOMMU_FQCSR_FQOF;
1742      } else if (!enable && active) {
1743          stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0);
1744          ctrl_set = 0;
1745          ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON;
1746      } else {
1747          ctrl_set = 0;
1748          ctrl_clr = RISCV_IOMMU_FQCSR_BUSY;
1749      }
1750  
1751      riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr);
1752  }
1753  
riscv_iommu_process_pq_control(RISCVIOMMUState * s)1754  static void riscv_iommu_process_pq_control(RISCVIOMMUState *s)
1755  {
1756      uint64_t base;
1757      uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
1758      uint32_t ctrl_clr;
1759      bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN);
1760      bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON);
1761  
1762      if (enable && !active) {
1763          base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB);
1764          s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1;
1765          s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN));
1766          stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask);
1767          stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0);
1768          stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0);
1769          ctrl_set = RISCV_IOMMU_PQCSR_PQON;
1770          ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF |
1771              RISCV_IOMMU_PQCSR_PQOF;
1772      } else if (!enable && active) {
1773          stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0);
1774          ctrl_set = 0;
1775          ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON;
1776      } else {
1777          ctrl_set = 0;
1778          ctrl_clr = RISCV_IOMMU_PQCSR_BUSY;
1779      }
1780  
1781      riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr);
1782  }
1783  
riscv_iommu_process_dbg(RISCVIOMMUState * s)1784  static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
1785  {
1786      uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA);
1787      uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL);
1788      unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID);
1789      unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID);
1790      RISCVIOMMUContext *ctx;
1791      void *ref;
1792  
1793      if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) {
1794          return;
1795      }
1796  
1797      ctx = riscv_iommu_ctx(s, devid, pid, &ref);
1798      if (ctx == NULL) {
1799          riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE,
1800                                   RISCV_IOMMU_TR_RESPONSE_FAULT |
1801                                   (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10));
1802      } else {
1803          IOMMUTLBEntry iotlb = {
1804              .iova = iova,
1805              .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW,
1806              .addr_mask = ~0,
1807              .target_as = NULL,
1808          };
1809          int fault = riscv_iommu_translate(s, ctx, &iotlb, false);
1810          if (fault) {
1811              iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
1812          } else {
1813              iova = iotlb.translated_addr & ~iotlb.addr_mask;
1814              iova >>= TARGET_PAGE_BITS;
1815              iova &= RISCV_IOMMU_TR_RESPONSE_PPN;
1816  
1817              /* We do not support superpages (> 4kbs) for now */
1818              iova &= ~RISCV_IOMMU_TR_RESPONSE_S;
1819          }
1820          riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
1821      }
1822  
1823      riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
1824          RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
1825      riscv_iommu_ctx_put(s, ref);
1826  }
1827  
1828  typedef void riscv_iommu_process_fn(RISCVIOMMUState *s);
1829  
riscv_iommu_update_icvec(RISCVIOMMUState * s,uint64_t data)1830  static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data)
1831  {
1832      uint64_t icvec = 0;
1833  
1834      icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV,
1835                   s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV);
1836  
1837      icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV,
1838                   s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV);
1839  
1840      icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV,
1841                   s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV);
1842  
1843      icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV,
1844                   s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV);
1845  
1846      trace_riscv_iommu_icvec_write(data, icvec);
1847  
1848      riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec);
1849  }
1850  
riscv_iommu_update_ipsr(RISCVIOMMUState * s,uint64_t data)1851  static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
1852  {
1853      uint32_t cqcsr, fqcsr, pqcsr;
1854      uint32_t ipsr_set = 0;
1855      uint32_t ipsr_clr = 0;
1856  
1857      if (data & RISCV_IOMMU_IPSR_CIP) {
1858          cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1859  
1860          if (cqcsr & RISCV_IOMMU_CQCSR_CIE &&
1861              (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP ||
1862               cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL ||
1863               cqcsr & RISCV_IOMMU_CQCSR_CMD_TO ||
1864               cqcsr & RISCV_IOMMU_CQCSR_CQMF)) {
1865              ipsr_set |= RISCV_IOMMU_IPSR_CIP;
1866          } else {
1867              ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
1868          }
1869      } else {
1870          ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
1871      }
1872  
1873      if (data & RISCV_IOMMU_IPSR_FIP) {
1874          fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1875  
1876          if (fqcsr & RISCV_IOMMU_FQCSR_FIE &&
1877              (fqcsr & RISCV_IOMMU_FQCSR_FQOF ||
1878               fqcsr & RISCV_IOMMU_FQCSR_FQMF)) {
1879              ipsr_set |= RISCV_IOMMU_IPSR_FIP;
1880          } else {
1881              ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
1882          }
1883      } else {
1884          ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
1885      }
1886  
1887      if (data & RISCV_IOMMU_IPSR_PIP) {
1888          pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
1889  
1890          if (pqcsr & RISCV_IOMMU_PQCSR_PIE &&
1891              (pqcsr & RISCV_IOMMU_PQCSR_PQOF ||
1892               pqcsr & RISCV_IOMMU_PQCSR_PQMF)) {
1893              ipsr_set |= RISCV_IOMMU_IPSR_PIP;
1894          } else {
1895              ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
1896          }
1897      } else {
1898          ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
1899      }
1900  
1901      riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
1902  }
1903  
1904  /*
1905   * Write the resulting value of 'data' for the reg specified
1906   * by 'reg_addr', after considering read-only/read-write/write-clear
1907   * bits, in the pointer 'dest'.
1908   *
1909   * The result is written in little-endian.
1910   */
riscv_iommu_write_reg_val(RISCVIOMMUState * s,void * dest,hwaddr reg_addr,int size,uint64_t data)1911  static void riscv_iommu_write_reg_val(RISCVIOMMUState *s,
1912                                        void *dest, hwaddr reg_addr,
1913                                        int size, uint64_t data)
1914  {
1915      uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size);
1916      uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size);
1917      uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size);
1918  
1919      stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc));
1920  }
1921  
riscv_iommu_mmio_write(void * opaque,hwaddr addr,uint64_t data,unsigned size,MemTxAttrs attrs)1922  static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
1923                                            uint64_t data, unsigned size,
1924                                            MemTxAttrs attrs)
1925  {
1926      riscv_iommu_process_fn *process_fn = NULL;
1927      RISCVIOMMUState *s = opaque;
1928      uint32_t regb = addr & ~3;
1929      uint32_t busy = 0;
1930      uint64_t val = 0;
1931  
1932      if ((addr & (size - 1)) != 0) {
1933          /* Unsupported MMIO alignment or access size */
1934          return MEMTX_ERROR;
1935      }
1936  
1937      if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
1938          /* Unsupported MMIO access location. */
1939          return MEMTX_ACCESS_ERROR;
1940      }
1941  
1942      /* Track actionable MMIO write. */
1943      switch (regb) {
1944      case RISCV_IOMMU_REG_DDTP:
1945      case RISCV_IOMMU_REG_DDTP + 4:
1946          process_fn = riscv_iommu_process_ddtp;
1947          regb = RISCV_IOMMU_REG_DDTP;
1948          busy = RISCV_IOMMU_DDTP_BUSY;
1949          break;
1950  
1951      case RISCV_IOMMU_REG_CQT:
1952          process_fn = riscv_iommu_process_cq_tail;
1953          break;
1954  
1955      case RISCV_IOMMU_REG_CQCSR:
1956          process_fn = riscv_iommu_process_cq_control;
1957          busy = RISCV_IOMMU_CQCSR_BUSY;
1958          break;
1959  
1960      case RISCV_IOMMU_REG_FQCSR:
1961          process_fn = riscv_iommu_process_fq_control;
1962          busy = RISCV_IOMMU_FQCSR_BUSY;
1963          break;
1964  
1965      case RISCV_IOMMU_REG_PQCSR:
1966          process_fn = riscv_iommu_process_pq_control;
1967          busy = RISCV_IOMMU_PQCSR_BUSY;
1968          break;
1969  
1970      case RISCV_IOMMU_REG_ICVEC:
1971      case RISCV_IOMMU_REG_IPSR:
1972          /*
1973           * ICVEC and IPSR have special read/write procedures. We'll
1974           * call their respective helpers and exit.
1975           */
1976          riscv_iommu_write_reg_val(s, &val, addr, size, data);
1977  
1978          /*
1979           * 'val' is stored as LE. Switch to host endianess
1980           * before using it.
1981           */
1982          val = le64_to_cpu(val);
1983  
1984          if (regb == RISCV_IOMMU_REG_ICVEC) {
1985              riscv_iommu_update_icvec(s, val);
1986          } else {
1987              riscv_iommu_update_ipsr(s, val);
1988          }
1989  
1990          return MEMTX_OK;
1991  
1992      case RISCV_IOMMU_REG_TR_REQ_CTL:
1993          process_fn = riscv_iommu_process_dbg;
1994          regb = RISCV_IOMMU_REG_TR_REQ_CTL;
1995          busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
1996          break;
1997  
1998      default:
1999          break;
2000      }
2001  
2002      /*
2003       * Registers update might be not synchronized with core logic.
2004       * If system software updates register when relevant BUSY bit
2005       * is set IOMMU behavior of additional writes to the register
2006       * is UNSPECIFIED.
2007       */
2008      riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data);
2009  
2010      /* Busy flag update, MSB 4-byte register. */
2011      if (busy) {
2012          uint32_t rw = ldl_le_p(&s->regs_rw[regb]);
2013          stl_le_p(&s->regs_rw[regb], rw | busy);
2014      }
2015  
2016      if (process_fn) {
2017          process_fn(s);
2018      }
2019  
2020      return MEMTX_OK;
2021  }
2022  
riscv_iommu_mmio_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)2023  static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
2024      uint64_t *data, unsigned size, MemTxAttrs attrs)
2025  {
2026      RISCVIOMMUState *s = opaque;
2027      uint64_t val = -1;
2028      uint8_t *ptr;
2029  
2030      if ((addr & (size - 1)) != 0) {
2031          /* Unsupported MMIO alignment. */
2032          return MEMTX_ERROR;
2033      }
2034  
2035      if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
2036          return MEMTX_ACCESS_ERROR;
2037      }
2038  
2039      ptr = &s->regs_rw[addr];
2040      val = ldn_le_p(ptr, size);
2041  
2042      *data = val;
2043  
2044      return MEMTX_OK;
2045  }
2046  
2047  static const MemoryRegionOps riscv_iommu_mmio_ops = {
2048      .read_with_attrs = riscv_iommu_mmio_read,
2049      .write_with_attrs = riscv_iommu_mmio_write,
2050      .endianness = DEVICE_NATIVE_ENDIAN,
2051      .impl = {
2052          .min_access_size = 4,
2053          .max_access_size = 8,
2054          .unaligned = false,
2055      },
2056      .valid = {
2057          .min_access_size = 4,
2058          .max_access_size = 8,
2059      }
2060  };
2061  
2062  /*
2063   * Translations matching MSI pattern check are redirected to "riscv-iommu-trap"
2064   * memory region as untranslated address, for additional MSI/MRIF interception
2065   * by IOMMU interrupt remapping implementation.
2066   * Note: Device emulation code generating an MSI is expected to provide a valid
2067   * memory transaction attributes with requested_id set.
2068   */
riscv_iommu_trap_write(void * opaque,hwaddr addr,uint64_t data,unsigned size,MemTxAttrs attrs)2069  static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr,
2070      uint64_t data, unsigned size, MemTxAttrs attrs)
2071  {
2072      RISCVIOMMUState* s = (RISCVIOMMUState *)opaque;
2073      RISCVIOMMUContext *ctx;
2074      MemTxResult res;
2075      void *ref;
2076      uint32_t devid = attrs.requester_id;
2077  
2078      if (attrs.unspecified) {
2079          return MEMTX_ACCESS_ERROR;
2080      }
2081  
2082      /* FIXME: PCIe bus remapping for attached endpoints. */
2083      devid |= s->bus << 8;
2084  
2085      ctx = riscv_iommu_ctx(s, devid, 0, &ref);
2086      if (ctx == NULL) {
2087          res = MEMTX_ACCESS_ERROR;
2088      } else {
2089          res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs);
2090      }
2091      riscv_iommu_ctx_put(s, ref);
2092      return res;
2093  }
2094  
riscv_iommu_trap_read(void * opaque,hwaddr addr,uint64_t * data,unsigned size,MemTxAttrs attrs)2095  static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr,
2096      uint64_t *data, unsigned size, MemTxAttrs attrs)
2097  {
2098      return MEMTX_ACCESS_ERROR;
2099  }
2100  
2101  static const MemoryRegionOps riscv_iommu_trap_ops = {
2102      .read_with_attrs = riscv_iommu_trap_read,
2103      .write_with_attrs = riscv_iommu_trap_write,
2104      .endianness = DEVICE_LITTLE_ENDIAN,
2105      .impl = {
2106          .min_access_size = 4,
2107          .max_access_size = 8,
2108          .unaligned = true,
2109      },
2110      .valid = {
2111          .min_access_size = 4,
2112          .max_access_size = 8,
2113      }
2114  };
2115  
riscv_iommu_realize(DeviceState * dev,Error ** errp)2116  static void riscv_iommu_realize(DeviceState *dev, Error **errp)
2117  {
2118      RISCVIOMMUState *s = RISCV_IOMMU(dev);
2119  
2120      s->cap = s->version & RISCV_IOMMU_CAP_VERSION;
2121      if (s->enable_msi) {
2122          s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF;
2123      }
2124      if (s->enable_ats) {
2125          s->cap |= RISCV_IOMMU_CAP_ATS;
2126      }
2127      if (s->enable_s_stage) {
2128          s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 |
2129                    RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57;
2130      }
2131      if (s->enable_g_stage) {
2132          s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
2133                    RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4;
2134      }
2135      /* Enable translation debug interface */
2136      s->cap |= RISCV_IOMMU_CAP_DBG;
2137  
2138      /* Report QEMU target physical address space limits */
2139      s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS,
2140                         TARGET_PHYS_ADDR_SPACE_BITS);
2141  
2142      /* TODO: method to report supported PID bits */
2143      s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */
2144      s->cap |= RISCV_IOMMU_CAP_PD8;
2145  
2146      /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */
2147      s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
2148                          RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE);
2149  
2150      /* register storage */
2151      s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2152      s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2153      s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2154  
2155       /* Mark all registers read-only */
2156      memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE);
2157  
2158      /*
2159       * Register complete MMIO space, including MSI/PBA registers.
2160       * Note, PCIDevice implementation will add overlapping MR for MSI/PBA,
2161       * managed directly by the PCIDevice implementation.
2162       */
2163      memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s,
2164          "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE);
2165  
2166      /* Set power-on register state */
2167      stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap);
2168      stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0);
2169      stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL],
2170               ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI));
2171      stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP],
2172          ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE));
2173      stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB],
2174          ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN));
2175      stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB],
2176          ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN));
2177      stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB],
2178          ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN));
2179      stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF |
2180          RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL);
2181      stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON |
2182          RISCV_IOMMU_CQCSR_BUSY);
2183      stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF |
2184          RISCV_IOMMU_FQCSR_FQOF);
2185      stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON |
2186          RISCV_IOMMU_FQCSR_BUSY);
2187      stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF |
2188          RISCV_IOMMU_PQCSR_PQOF);
2189      stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON |
2190          RISCV_IOMMU_PQCSR_BUSY);
2191      stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0);
2192      stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0);
2193      stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp);
2194      /* If debug registers enabled. */
2195      if (s->cap & RISCV_IOMMU_CAP_DBG) {
2196          stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0);
2197          stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL],
2198              RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2199      }
2200  
2201      /* Memory region for downstream access, if specified. */
2202      if (s->target_mr) {
2203          s->target_as = g_new0(AddressSpace, 1);
2204          address_space_init(s->target_as, s->target_mr,
2205              "riscv-iommu-downstream");
2206      } else {
2207          /* Fallback to global system memory. */
2208          s->target_as = &address_space_memory;
2209      }
2210  
2211      /* Memory region for untranslated MRIF/MSI writes */
2212      memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s,
2213              "riscv-iommu-trap", ~0ULL);
2214      address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as");
2215  
2216      /* Device translation context cache */
2217      s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
2218                                           riscv_iommu_ctx_equal,
2219                                           g_free, NULL);
2220  
2221      s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
2222                                           riscv_iommu_iot_equal,
2223                                           g_free, NULL);
2224  
2225      s->iommus.le_next = NULL;
2226      s->iommus.le_prev = NULL;
2227      QLIST_INIT(&s->spaces);
2228  }
2229  
riscv_iommu_unrealize(DeviceState * dev)2230  static void riscv_iommu_unrealize(DeviceState *dev)
2231  {
2232      RISCVIOMMUState *s = RISCV_IOMMU(dev);
2233  
2234      g_hash_table_unref(s->iot_cache);
2235      g_hash_table_unref(s->ctx_cache);
2236  }
2237  
2238  static Property riscv_iommu_properties[] = {
2239      DEFINE_PROP_UINT32("version", RISCVIOMMUState, version,
2240          RISCV_IOMMU_SPEC_DOT_VER),
2241      DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0),
2242      DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit,
2243          LIMIT_CACHE_IOT),
2244      DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE),
2245      DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE),
2246      DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE),
2247      DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE),
2248      DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE),
2249      DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
2250          TYPE_MEMORY_REGION, MemoryRegion *),
2251      DEFINE_PROP_END_OF_LIST(),
2252  };
2253  
riscv_iommu_class_init(ObjectClass * klass,void * data)2254  static void riscv_iommu_class_init(ObjectClass *klass, void* data)
2255  {
2256      DeviceClass *dc = DEVICE_CLASS(klass);
2257  
2258      /* internal device for riscv-iommu-{pci/sys}, not user-creatable */
2259      dc->user_creatable = false;
2260      dc->realize = riscv_iommu_realize;
2261      dc->unrealize = riscv_iommu_unrealize;
2262      device_class_set_props(dc, riscv_iommu_properties);
2263  }
2264  
2265  static const TypeInfo riscv_iommu_info = {
2266      .name = TYPE_RISCV_IOMMU,
2267      .parent = TYPE_DEVICE,
2268      .instance_size = sizeof(RISCVIOMMUState),
2269      .class_init = riscv_iommu_class_init,
2270  };
2271  
2272  static const char *IOMMU_FLAG_STR[] = {
2273      "NA",
2274      "RO",
2275      "WR",
2276      "RW",
2277  };
2278  
2279  /* RISC-V IOMMU Memory Region - Address Translation Space */
riscv_iommu_memory_region_translate(IOMMUMemoryRegion * iommu_mr,hwaddr addr,IOMMUAccessFlags flag,int iommu_idx)2280  static IOMMUTLBEntry riscv_iommu_memory_region_translate(
2281      IOMMUMemoryRegion *iommu_mr, hwaddr addr,
2282      IOMMUAccessFlags flag, int iommu_idx)
2283  {
2284      RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2285      RISCVIOMMUContext *ctx;
2286      void *ref;
2287      IOMMUTLBEntry iotlb = {
2288          .iova = addr,
2289          .target_as = as->iommu->target_as,
2290          .addr_mask = ~0ULL,
2291          .perm = flag,
2292      };
2293  
2294      ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref);
2295      if (ctx == NULL) {
2296          /* Translation disabled or invalid. */
2297          iotlb.addr_mask = 0;
2298          iotlb.perm = IOMMU_NONE;
2299      } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) {
2300          /* Translation disabled or fault reported. */
2301          iotlb.addr_mask = 0;
2302          iotlb.perm = IOMMU_NONE;
2303      }
2304  
2305      /* Trace all dma translations with original access flags. */
2306      trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid),
2307                            PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx,
2308                            IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova,
2309                            iotlb.translated_addr);
2310  
2311      riscv_iommu_ctx_put(as->iommu, ref);
2312  
2313      return iotlb;
2314  }
2315  
riscv_iommu_memory_region_notify(IOMMUMemoryRegion * iommu_mr,IOMMUNotifierFlag old,IOMMUNotifierFlag new,Error ** errp)2316  static int riscv_iommu_memory_region_notify(
2317      IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old,
2318      IOMMUNotifierFlag new, Error **errp)
2319  {
2320      RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2321  
2322      if (old == IOMMU_NOTIFIER_NONE) {
2323          as->notifier = true;
2324          trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name);
2325      } else if (new == IOMMU_NOTIFIER_NONE) {
2326          as->notifier = false;
2327          trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name);
2328      }
2329  
2330      return 0;
2331  }
2332  
pci_is_iommu(PCIDevice * pdev)2333  static inline bool pci_is_iommu(PCIDevice *pdev)
2334  {
2335      return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806;
2336  }
2337  
riscv_iommu_find_as(PCIBus * bus,void * opaque,int devfn)2338  static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn)
2339  {
2340      RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
2341      PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
2342      AddressSpace *as = NULL;
2343  
2344      if (pdev && pci_is_iommu(pdev)) {
2345          return s->target_as;
2346      }
2347  
2348      /* Find first registered IOMMU device */
2349      while (s->iommus.le_prev) {
2350          s = *(s->iommus.le_prev);
2351      }
2352  
2353      /* Find first matching IOMMU */
2354      while (s != NULL && as == NULL) {
2355          as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn));
2356          s = s->iommus.le_next;
2357      }
2358  
2359      return as ? as : &address_space_memory;
2360  }
2361  
2362  static const PCIIOMMUOps riscv_iommu_ops = {
2363      .get_address_space = riscv_iommu_find_as,
2364  };
2365  
riscv_iommu_pci_setup_iommu(RISCVIOMMUState * iommu,PCIBus * bus,Error ** errp)2366  void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
2367          Error **errp)
2368  {
2369      if (bus->iommu_ops &&
2370          bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
2371          /* Allow multiple IOMMUs on the same PCIe bus, link known devices */
2372          RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
2373          QLIST_INSERT_AFTER(last, iommu, iommus);
2374      } else if (!bus->iommu_ops && !bus->iommu_opaque) {
2375          pci_setup_iommu(bus, &riscv_iommu_ops, iommu);
2376      } else {
2377          error_setg(errp, "can't register secondary IOMMU for PCI bus #%d",
2378              pci_bus_num(bus));
2379      }
2380  }
2381  
riscv_iommu_memory_region_index(IOMMUMemoryRegion * iommu_mr,MemTxAttrs attrs)2382  static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
2383      MemTxAttrs attrs)
2384  {
2385      return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid;
2386  }
2387  
riscv_iommu_memory_region_index_len(IOMMUMemoryRegion * iommu_mr)2388  static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr)
2389  {
2390      RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2391      return 1 << as->iommu->pid_bits;
2392  }
2393  
riscv_iommu_memory_region_init(ObjectClass * klass,void * data)2394  static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data)
2395  {
2396      IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
2397  
2398      imrc->translate = riscv_iommu_memory_region_translate;
2399      imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
2400      imrc->attrs_to_index = riscv_iommu_memory_region_index;
2401      imrc->num_indexes = riscv_iommu_memory_region_index_len;
2402  }
2403  
2404  static const TypeInfo riscv_iommu_memory_region_info = {
2405      .parent = TYPE_IOMMU_MEMORY_REGION,
2406      .name = TYPE_RISCV_IOMMU_MEMORY_REGION,
2407      .class_init = riscv_iommu_memory_region_init,
2408  };
2409  
riscv_iommu_register_mr_types(void)2410  static void riscv_iommu_register_mr_types(void)
2411  {
2412      type_register_static(&riscv_iommu_memory_region_info);
2413      type_register_static(&riscv_iommu_info);
2414  }
2415  
2416  type_init(riscv_iommu_register_mr_types);
2417