1 /* 2 * QEMU emulation of an RISC-V IOMMU 3 * 4 * Copyright (C) 2021-2023, Rivos Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qom/object.h" 21 #include "hw/pci/pci_bus.h" 22 #include "hw/pci/pci_device.h" 23 #include "hw/qdev-properties.h" 24 #include "hw/riscv/riscv_hart.h" 25 #include "migration/vmstate.h" 26 #include "qapi/error.h" 27 #include "qemu/timer.h" 28 29 #include "cpu_bits.h" 30 #include "riscv-iommu.h" 31 #include "riscv-iommu-bits.h" 32 #include "trace.h" 33 34 #define LIMIT_CACHE_CTX (1U << 7) 35 #define LIMIT_CACHE_IOT (1U << 20) 36 37 /* Physical page number coversions */ 38 #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) 39 #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) 40 41 typedef struct RISCVIOMMUContext RISCVIOMMUContext; 42 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; 43 44 /* Device assigned I/O address space */ 45 struct RISCVIOMMUSpace { 46 IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */ 47 AddressSpace iova_as; /* IOVA address space for attached device */ 48 RISCVIOMMUState *iommu; /* Managing IOMMU device state */ 49 uint32_t devid; /* Requester identifier, AKA device_id */ 50 bool notifier; /* IOMMU unmap notifier enabled */ 51 QLIST_ENTRY(RISCVIOMMUSpace) list; 52 }; 53 54 /* Device translation context state. */ 55 struct RISCVIOMMUContext { 56 uint64_t devid:24; /* Requester Id, AKA device_id */ 57 uint64_t process_id:20; /* Process ID. PASID for PCIe */ 58 uint64_t tc; /* Translation Control */ 59 uint64_t ta; /* Translation Attributes */ 60 uint64_t satp; /* S-Stage address translation and protection */ 61 uint64_t gatp; /* G-Stage address translation and protection */ 62 uint64_t msi_addr_mask; /* MSI filtering - address mask */ 63 uint64_t msi_addr_pattern; /* MSI filtering - address pattern */ 64 uint64_t msiptp; /* MSI redirection page table pointer */ 65 }; 66 67 /* Address translation cache entry */ 68 struct RISCVIOMMUEntry { 69 uint64_t iova:44; /* IOVA Page Number */ 70 uint64_t pscid:20; /* Process Soft-Context identifier */ 71 uint64_t phys:44; /* Physical Page Number */ 72 uint64_t gscid:16; /* Guest Soft-Context identifier */ 73 uint64_t perm:2; /* IOMMU_RW flags */ 74 }; 75 76 /* IOMMU index for transactions without process_id specified. */ 77 #define RISCV_IOMMU_NOPROCID 0 78 79 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) 80 { 81 switch (vec_type) { 82 case RISCV_IOMMU_INTR_CQ: 83 return icvec & RISCV_IOMMU_ICVEC_CIV; 84 case RISCV_IOMMU_INTR_FQ: 85 return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4; 86 case RISCV_IOMMU_INTR_PM: 87 return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8; 88 case RISCV_IOMMU_INTR_PQ: 89 return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12; 90 default: 91 g_assert_not_reached(); 92 } 93 } 94 95 static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) 96 { 97 const uint32_t fctl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FCTL); 98 uint32_t ipsr, icvec, vector; 99 100 if (fctl & RISCV_IOMMU_FCTL_WSI || !s->notify) { 101 return; 102 } 103 104 icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); 105 ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0); 106 107 if (!(ipsr & (1 << vec_type))) { 108 vector = riscv_iommu_get_icvec_vector(icvec, vec_type); 109 s->notify(s, vector); 110 trace_riscv_iommu_notify_int_vector(vec_type, vector); 111 } 112 } 113 114 static void riscv_iommu_fault(RISCVIOMMUState *s, 115 struct riscv_iommu_fq_record *ev) 116 { 117 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 118 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask; 119 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask; 120 uint32_t next = (tail + 1) & s->fq_mask; 121 uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID); 122 123 trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 124 PCI_FUNC(devid), ev->hdr, ev->iotval); 125 126 if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) || 127 !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) { 128 return; 129 } 130 131 if (head == next) { 132 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 133 RISCV_IOMMU_FQCSR_FQOF, 0); 134 } else { 135 dma_addr_t addr = s->fq_addr + tail * sizeof(*ev); 136 if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev), 137 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 138 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 139 RISCV_IOMMU_FQCSR_FQMF, 0); 140 } else { 141 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next); 142 } 143 } 144 145 if (ctrl & RISCV_IOMMU_FQCSR_FIE) { 146 riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ); 147 } 148 } 149 150 static void riscv_iommu_pri(RISCVIOMMUState *s, 151 struct riscv_iommu_pq_record *pr) 152 { 153 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 154 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask; 155 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask; 156 uint32_t next = (tail + 1) & s->pq_mask; 157 uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID); 158 159 trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 160 PCI_FUNC(devid), pr->payload); 161 162 if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) || 163 !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) { 164 return; 165 } 166 167 if (head == next) { 168 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 169 RISCV_IOMMU_PQCSR_PQOF, 0); 170 } else { 171 dma_addr_t addr = s->pq_addr + tail * sizeof(*pr); 172 if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr), 173 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 174 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 175 RISCV_IOMMU_PQCSR_PQMF, 0); 176 } else { 177 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next); 178 } 179 } 180 181 if (ctrl & RISCV_IOMMU_PQCSR_PIE) { 182 riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ); 183 } 184 } 185 186 /* 187 * Discards all bits from 'val' whose matching bits in the same 188 * positions in the mask 'ext' are zeros, and packs the remaining 189 * bits from 'val' contiguously at the least-significant end of the 190 * result, keeping the same bit order as 'val' and filling any 191 * other bits at the most-significant end of the result with zeros. 192 * 193 * For example, for the following 'val' and 'ext', the return 'ret' 194 * will be: 195 * 196 * val = a b c d e f g h 197 * ext = 1 0 1 0 0 1 1 0 198 * ret = 0 0 0 0 a c f g 199 * 200 * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3 201 * "Process to translate addresses of MSIs", is similar to bit manip 202 * function PEXT (Parallel bits extract) from x86. 203 */ 204 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext) 205 { 206 uint64_t ret = 0; 207 uint64_t rot = 1; 208 209 while (ext) { 210 if (ext & 1) { 211 if (val & 1) { 212 ret |= rot; 213 } 214 rot <<= 1; 215 } 216 val >>= 1; 217 ext >>= 1; 218 } 219 220 return ret; 221 } 222 223 /* Check if GPA matches MSI/MRIF pattern. */ 224 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 225 dma_addr_t gpa) 226 { 227 if (!s->enable_msi) { 228 return false; 229 } 230 231 if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) != 232 RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 233 return false; /* Invalid MSI/MRIF mode */ 234 } 235 236 if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) { 237 return false; /* GPA not in MSI range defined by AIA IMSIC rules. */ 238 } 239 240 return true; 241 } 242 243 /* 244 * RISCV IOMMU Address Translation Lookup - Page Table Walk 245 * 246 * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c 247 * Both implementation can be merged into single helper function in future. 248 * Keeping them separate for now, as error reporting and flow specifics are 249 * sufficiently different for separate implementation. 250 * 251 * @s : IOMMU Device State 252 * @ctx : Translation context for device id and process address space id. 253 * @iotlb : translation data: physical address and access mode. 254 * @return : success or fault cause code. 255 */ 256 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 257 IOMMUTLBEntry *iotlb) 258 { 259 dma_addr_t addr, base; 260 uint64_t satp, gatp, pte; 261 bool en_s, en_g; 262 struct { 263 unsigned char step; 264 unsigned char levels; 265 unsigned char ptidxbits; 266 unsigned char ptesize; 267 } sc[2]; 268 /* Translation stage phase */ 269 enum { 270 S_STAGE = 0, 271 G_STAGE = 1, 272 } pass; 273 MemTxResult ret; 274 275 satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 276 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 277 278 en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; 279 en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; 280 281 /* 282 * Early check for MSI address match when IOVA == GPA. 283 * Note that the (!en_s) condition means that the MSI 284 * page table may only be used when guest pages are 285 * mapped using the g-stage page table, whether single- 286 * or two-stage paging is enabled. It's unavoidable though, 287 * because the spec mandates that we do a first-stage 288 * translation before we check the MSI page table, which 289 * means we can't do an early MSI check unless we have 290 * strictly !en_s. 291 */ 292 if (!en_s && (iotlb->perm & IOMMU_WO) && 293 riscv_iommu_msi_check(s, ctx, iotlb->iova)) { 294 iotlb->target_as = &s->trap_as; 295 iotlb->translated_addr = iotlb->iova; 296 iotlb->addr_mask = ~TARGET_PAGE_MASK; 297 return 0; 298 } 299 300 /* Exit early for pass-through mode. */ 301 if (!(en_s || en_g)) { 302 iotlb->translated_addr = iotlb->iova; 303 iotlb->addr_mask = ~TARGET_PAGE_MASK; 304 /* Allow R/W in pass-through mode */ 305 iotlb->perm = IOMMU_RW; 306 return 0; 307 } 308 309 /* S/G translation parameters. */ 310 for (pass = 0; pass < 2; pass++) { 311 uint32_t sv_mode; 312 313 sc[pass].step = 0; 314 if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : 315 (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { 316 /* 32bit mode for GXL/SXL == 1 */ 317 switch (pass ? gatp : satp) { 318 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 319 sc[pass].levels = 0; 320 sc[pass].ptidxbits = 0; 321 sc[pass].ptesize = 0; 322 break; 323 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: 324 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32; 325 if (!(s->cap & sv_mode)) { 326 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 327 } 328 sc[pass].levels = 2; 329 sc[pass].ptidxbits = 10; 330 sc[pass].ptesize = 4; 331 break; 332 default: 333 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 334 } 335 } else { 336 /* 64bit mode for GXL/SXL == 0 */ 337 switch (pass ? gatp : satp) { 338 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 339 sc[pass].levels = 0; 340 sc[pass].ptidxbits = 0; 341 sc[pass].ptesize = 0; 342 break; 343 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: 344 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39; 345 if (!(s->cap & sv_mode)) { 346 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 347 } 348 sc[pass].levels = 3; 349 sc[pass].ptidxbits = 9; 350 sc[pass].ptesize = 8; 351 break; 352 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: 353 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48; 354 if (!(s->cap & sv_mode)) { 355 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 356 } 357 sc[pass].levels = 4; 358 sc[pass].ptidxbits = 9; 359 sc[pass].ptesize = 8; 360 break; 361 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: 362 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57; 363 if (!(s->cap & sv_mode)) { 364 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 365 } 366 sc[pass].levels = 5; 367 sc[pass].ptidxbits = 9; 368 sc[pass].ptesize = 8; 369 break; 370 default: 371 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 372 } 373 } 374 }; 375 376 /* S/G stages translation tables root pointers */ 377 gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); 378 satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); 379 addr = (en_s && en_g) ? satp : iotlb->iova; 380 base = en_g ? gatp : satp; 381 pass = en_g ? G_STAGE : S_STAGE; 382 383 do { 384 const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; 385 const unsigned va_bits = widened + sc[pass].ptidxbits; 386 const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * 387 (sc[pass].levels - 1 - sc[pass].step); 388 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); 389 const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; 390 const bool ade = 391 ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE); 392 393 /* Address range check before first level lookup */ 394 if (!sc[pass].step) { 395 const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1; 396 if ((addr & va_mask) != addr) { 397 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; 398 } 399 } 400 401 /* Read page table entry */ 402 if (sc[pass].ptesize == 4) { 403 uint32_t pte32 = 0; 404 ret = ldl_le_dma(s->target_as, pte_addr, &pte32, 405 MEMTXATTRS_UNSPECIFIED); 406 pte = pte32; 407 } else { 408 ret = ldq_le_dma(s->target_as, pte_addr, &pte, 409 MEMTXATTRS_UNSPECIFIED); 410 } 411 if (ret != MEMTX_OK) { 412 return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT 413 : RISCV_IOMMU_FQ_CAUSE_RD_FAULT; 414 } 415 416 sc[pass].step++; 417 hwaddr ppn = pte >> PTE_PPN_SHIFT; 418 419 if (!(pte & PTE_V)) { 420 break; /* Invalid PTE */ 421 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { 422 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ 423 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { 424 break; /* Reserved leaf PTE flags: PTE_W */ 425 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { 426 break; /* Reserved leaf PTE flags: PTE_W + PTE_X */ 427 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { 428 break; /* Misaligned PPN */ 429 } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { 430 break; /* Read access check failed */ 431 } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { 432 break; /* Write access check failed */ 433 } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { 434 break; /* Access bit not set */ 435 } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { 436 break; /* Dirty bit not set */ 437 } else { 438 /* Leaf PTE, translation completed. */ 439 sc[pass].step = sc[pass].levels; 440 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); 441 /* Update address mask based on smallest translation granularity */ 442 iotlb->addr_mask &= (1ULL << va_skip) - 1; 443 /* Continue with S-Stage translation? */ 444 if (pass && sc[0].step != sc[0].levels) { 445 pass = S_STAGE; 446 addr = iotlb->iova; 447 continue; 448 } 449 /* Translation phase completed (GPA or SPA) */ 450 iotlb->translated_addr = base; 451 iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO) 452 : IOMMU_RO; 453 454 /* Check MSI GPA address match */ 455 if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && 456 riscv_iommu_msi_check(s, ctx, base)) { 457 /* Trap MSI writes and return GPA address. */ 458 iotlb->target_as = &s->trap_as; 459 iotlb->addr_mask = ~TARGET_PAGE_MASK; 460 return 0; 461 } 462 463 /* Continue with G-Stage translation? */ 464 if (!pass && en_g) { 465 pass = G_STAGE; 466 addr = base; 467 base = gatp; 468 sc[pass].step = 0; 469 continue; 470 } 471 472 return 0; 473 } 474 475 if (sc[pass].step == sc[pass].levels) { 476 break; /* Can't find leaf PTE */ 477 } 478 479 /* Continue with G-Stage translation? */ 480 if (!pass && en_g) { 481 pass = G_STAGE; 482 addr = base; 483 base = gatp; 484 sc[pass].step = 0; 485 } 486 } while (1); 487 488 return (iotlb->perm & IOMMU_WO) ? 489 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 490 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : 491 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : 492 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); 493 } 494 495 static void riscv_iommu_report_fault(RISCVIOMMUState *s, 496 RISCVIOMMUContext *ctx, 497 uint32_t fault_type, uint32_t cause, 498 bool pv, 499 uint64_t iotval, uint64_t iotval2) 500 { 501 struct riscv_iommu_fq_record ev = { 0 }; 502 503 if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { 504 switch (cause) { 505 case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: 506 case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: 507 case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: 508 case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: 509 case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: 510 case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: 511 case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: 512 break; 513 default: 514 /* DTF prevents reporting a fault for this given cause */ 515 return; 516 } 517 } 518 519 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); 520 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); 521 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); 522 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); 523 524 if (pv) { 525 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id); 526 } 527 528 ev.iotval = iotval; 529 ev.iotval2 = iotval2; 530 531 riscv_iommu_fault(s, &ev); 532 } 533 534 /* Redirect MSI write for given GPA. */ 535 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, 536 RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, 537 unsigned size, MemTxAttrs attrs) 538 { 539 MemTxResult res; 540 dma_addr_t addr; 541 uint64_t intn; 542 uint32_t n190; 543 uint64_t pte[2]; 544 int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 545 int cause; 546 547 /* Interrupt File Number */ 548 intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); 549 if (intn >= 256) { 550 /* Interrupt file number out of range */ 551 res = MEMTX_ACCESS_ERROR; 552 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 553 goto err; 554 } 555 556 /* fetch MSI PTE */ 557 addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); 558 addr = addr | (intn * sizeof(pte)); 559 res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), 560 MEMTXATTRS_UNSPECIFIED); 561 if (res != MEMTX_OK) { 562 if (res == MEMTX_DECODE_ERROR) { 563 cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; 564 } else { 565 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 566 } 567 goto err; 568 } 569 570 le64_to_cpus(&pte[0]); 571 le64_to_cpus(&pte[1]); 572 573 if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) { 574 /* 575 * The spec mentions that: "If msipte.C == 1, then further 576 * processing to interpret the PTE is implementation 577 * defined.". We'll abort with cause = 262 for this 578 * case too. 579 */ 580 res = MEMTX_ACCESS_ERROR; 581 cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; 582 goto err; 583 } 584 585 switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { 586 case RISCV_IOMMU_MSI_PTE_M_BASIC: 587 /* MSI Pass-through mode */ 588 addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); 589 590 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 591 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 592 gpa, addr); 593 594 res = dma_memory_write(s->target_as, addr, &data, size, attrs); 595 if (res != MEMTX_OK) { 596 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 597 goto err; 598 } 599 600 return MEMTX_OK; 601 case RISCV_IOMMU_MSI_PTE_M_MRIF: 602 /* MRIF mode, continue. */ 603 break; 604 default: 605 res = MEMTX_ACCESS_ERROR; 606 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 607 goto err; 608 } 609 610 /* 611 * Report an error for interrupt identities exceeding the maximum allowed 612 * for an IMSIC interrupt file (2047) or destination address is not 32-bit 613 * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. 614 */ 615 if ((data > 2047) || (gpa & 3)) { 616 res = MEMTX_ACCESS_ERROR; 617 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 618 goto err; 619 } 620 621 /* MSI MRIF mode, non atomic pending bit update */ 622 623 /* MRIF pending bit address */ 624 addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; 625 addr = addr | ((data & 0x7c0) >> 3); 626 627 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 628 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 629 gpa, addr); 630 631 /* MRIF pending bit mask */ 632 data = 1ULL << (data & 0x03f); 633 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 634 if (res != MEMTX_OK) { 635 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 636 goto err; 637 } 638 639 intn = intn | data; 640 res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs); 641 if (res != MEMTX_OK) { 642 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 643 goto err; 644 } 645 646 /* Get MRIF enable bits */ 647 addr = addr + sizeof(intn); 648 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 649 if (res != MEMTX_OK) { 650 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 651 goto err; 652 } 653 654 if (!(intn & data)) { 655 /* notification disabled, MRIF update completed. */ 656 return MEMTX_OK; 657 } 658 659 /* Send notification message */ 660 addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); 661 n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | 662 (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); 663 664 res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs); 665 if (res != MEMTX_OK) { 666 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 667 goto err; 668 } 669 670 trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); 671 672 return MEMTX_OK; 673 674 err: 675 riscv_iommu_report_fault(s, ctx, fault_type, cause, 676 !!ctx->process_id, 0, 0); 677 return res; 678 } 679 680 /* 681 * Check device context configuration as described by the 682 * riscv-iommu spec section "Device-context configuration 683 * checks". 684 */ 685 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, 686 RISCVIOMMUContext *ctx) 687 { 688 uint32_t fsc_mode, msi_mode; 689 uint64_t gatp; 690 691 if (!(s->cap & RISCV_IOMMU_CAP_ATS) && 692 (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS || 693 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI || 694 ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) { 695 return false; 696 } 697 698 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) && 699 (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA || 700 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) { 701 return false; 702 } 703 704 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && 705 ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { 706 return false; 707 } 708 709 if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && 710 ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { 711 return false; 712 } 713 714 if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { 715 msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); 716 717 if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && 718 msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 719 return false; 720 } 721 } 722 723 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 724 if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA && 725 gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { 726 return false; 727 } 728 729 fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 730 731 if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { 732 switch (fsc_mode) { 733 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 734 if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { 735 return false; 736 } 737 break; 738 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 739 if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { 740 return false; 741 } 742 break; 743 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 744 if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { 745 return false; 746 } 747 break; 748 } 749 } else { 750 /* DC.tc.PDTV is 0 */ 751 if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { 752 return false; 753 } 754 755 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 756 if (fsc_mode == RISCV_IOMMU_CAP_SV32 && 757 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 758 return false; 759 } 760 } else { 761 switch (fsc_mode) { 762 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 763 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 764 return false; 765 } 766 break; 767 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 768 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 769 return false; 770 } 771 break; 772 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 773 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 774 return false; 775 } 776 break; 777 } 778 } 779 } 780 781 /* 782 * CAP_END is always zero (only one endianess). FCTL_BE is 783 * always zero (little-endian accesses). Thus TC_SBE must 784 * always be LE, i.e. zero. 785 */ 786 if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { 787 return false; 788 } 789 790 return true; 791 } 792 793 /* 794 * Validate process context (PC) according to section 795 * "Process-context configuration checks". 796 */ 797 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, 798 RISCVIOMMUContext *ctx) 799 { 800 uint32_t mode; 801 802 if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { 803 return false; 804 } 805 806 if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { 807 return false; 808 } 809 810 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 811 switch (mode) { 812 case RISCV_IOMMU_DC_FSC_MODE_BARE: 813 /* sv39 and sv32 modes have the same value (8) */ 814 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 815 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 816 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 817 break; 818 default: 819 return false; 820 } 821 822 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 823 if (mode == RISCV_IOMMU_CAP_SV32 && 824 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 825 return false; 826 } 827 } else { 828 switch (mode) { 829 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 830 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 831 return false; 832 } 833 break; 834 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 835 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 836 return false; 837 } 838 break; 839 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 840 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 841 return false; 842 } 843 break; 844 } 845 } 846 847 return true; 848 } 849 850 /* 851 * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk 852 * 853 * @s : IOMMU Device State 854 * @ctx : Device Translation Context with devid and process_id set. 855 * @return : success or fault code. 856 */ 857 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) 858 { 859 const uint64_t ddtp = s->ddtp; 860 unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); 861 dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); 862 struct riscv_iommu_dc dc; 863 /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */ 864 const int dc_fmt = !s->enable_msi; 865 const size_t dc_len = sizeof(dc) >> dc_fmt; 866 unsigned depth; 867 uint64_t de; 868 869 switch (mode) { 870 case RISCV_IOMMU_DDTP_MODE_OFF: 871 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; 872 873 case RISCV_IOMMU_DDTP_MODE_BARE: 874 /* mock up pass-through translation context */ 875 ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 876 RISCV_IOMMU_DC_IOHGATP_MODE_BARE); 877 ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 878 RISCV_IOMMU_DC_FSC_MODE_BARE); 879 880 ctx->tc = RISCV_IOMMU_DC_TC_V; 881 if (s->enable_ats) { 882 ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS; 883 } 884 885 ctx->ta = 0; 886 ctx->msiptp = 0; 887 return 0; 888 889 case RISCV_IOMMU_DDTP_MODE_1LVL: 890 depth = 0; 891 break; 892 893 case RISCV_IOMMU_DDTP_MODE_2LVL: 894 depth = 1; 895 break; 896 897 case RISCV_IOMMU_DDTP_MODE_3LVL: 898 depth = 2; 899 break; 900 901 default: 902 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 903 } 904 905 /* 906 * Check supported device id width (in bits). 907 * See IOMMU Specification, Chapter 6. Software guidelines. 908 * - if extended device-context format is used: 909 * 1LVL: 6, 2LVL: 15, 3LVL: 24 910 * - if base device-context format is used: 911 * 1LVL: 7, 2LVL: 16, 3LVL: 24 912 */ 913 if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { 914 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 915 } 916 917 /* Device directory tree walk */ 918 for (; depth-- > 0; ) { 919 /* 920 * Select device id index bits based on device directory tree level 921 * and device context format. 922 * See IOMMU Specification, Chapter 2. Data Structures. 923 * - if extended device-context format is used: 924 * device index: [23:15][14:6][5:0] 925 * - if base device-context format is used: 926 * device index: [23:16][15:7][6:0] 927 */ 928 const int split = depth * 9 + 6 + dc_fmt; 929 addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; 930 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 931 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 932 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 933 } 934 le64_to_cpus(&de); 935 if (!(de & RISCV_IOMMU_DDTE_VALID)) { 936 /* invalid directory entry */ 937 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 938 } 939 if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { 940 /* reserved bits set */ 941 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 942 } 943 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); 944 } 945 946 /* index into device context entry page */ 947 addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; 948 949 memset(&dc, 0, sizeof(dc)); 950 if (dma_memory_read(s->target_as, addr, &dc, dc_len, 951 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 952 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 953 } 954 955 /* Set translation context. */ 956 ctx->tc = le64_to_cpu(dc.tc); 957 ctx->gatp = le64_to_cpu(dc.iohgatp); 958 ctx->satp = le64_to_cpu(dc.fsc); 959 ctx->ta = le64_to_cpu(dc.ta); 960 ctx->msiptp = le64_to_cpu(dc.msiptp); 961 ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); 962 ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); 963 964 if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { 965 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 966 } 967 968 if (!riscv_iommu_validate_device_ctx(s, ctx)) { 969 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 970 } 971 972 /* FSC field checks */ 973 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 974 addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); 975 976 if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { 977 if (ctx->process_id != RISCV_IOMMU_NOPROCID) { 978 /* PID is disabled */ 979 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 980 } 981 if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { 982 /* Invalid translation mode */ 983 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 984 } 985 return 0; 986 } 987 988 if (ctx->process_id == RISCV_IOMMU_NOPROCID) { 989 if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { 990 /* No default process_id enabled, set BARE mode */ 991 ctx->satp = 0ULL; 992 return 0; 993 } else { 994 /* Use default process_id #0 */ 995 ctx->process_id = 0; 996 } 997 } 998 999 if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { 1000 /* No S-Stage translation, done. */ 1001 return 0; 1002 } 1003 1004 /* FSC.TC.PDTV enabled */ 1005 if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { 1006 /* Invalid PDTP.MODE */ 1007 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1008 } 1009 1010 for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { 1011 /* 1012 * Select process id index bits based on process directory tree 1013 * level. See IOMMU Specification, 2.2. Process-Directory-Table. 1014 */ 1015 const int split = depth * 9 + 8; 1016 addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; 1017 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 1018 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1019 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1020 } 1021 le64_to_cpus(&de); 1022 if (!(de & RISCV_IOMMU_PC_TA_V)) { 1023 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1024 } 1025 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN)); 1026 } 1027 1028 /* Leaf entry in PDT */ 1029 addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; 1030 if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, 1031 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1032 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1033 } 1034 1035 /* Use FSC and TA from process directory entry. */ 1036 ctx->ta = le64_to_cpu(dc.ta); 1037 ctx->satp = le64_to_cpu(dc.fsc); 1038 1039 if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { 1040 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1041 } 1042 1043 if (!riscv_iommu_validate_process_ctx(s, ctx)) { 1044 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1045 } 1046 1047 return 0; 1048 } 1049 1050 /* Translation Context cache support */ 1051 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2) 1052 { 1053 RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; 1054 RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; 1055 return c1->devid == c2->devid && 1056 c1->process_id == c2->process_id; 1057 } 1058 1059 static guint riscv_iommu_ctx_hash(gconstpointer v) 1060 { 1061 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; 1062 /* 1063 * Generate simple hash of (process_id, devid) 1064 * assuming 24-bit wide devid. 1065 */ 1066 return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); 1067 } 1068 1069 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value, 1070 gpointer data) 1071 { 1072 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1073 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1074 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1075 ctx->devid == arg->devid && 1076 ctx->process_id == arg->process_id) { 1077 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1078 } 1079 } 1080 1081 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value, 1082 gpointer data) 1083 { 1084 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1085 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1086 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1087 ctx->devid == arg->devid) { 1088 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1089 } 1090 } 1091 1092 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value, 1093 gpointer data) 1094 { 1095 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1096 if (ctx->tc & RISCV_IOMMU_DC_TC_V) { 1097 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1098 } 1099 } 1100 1101 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, 1102 uint32_t devid, uint32_t process_id) 1103 { 1104 GHashTable *ctx_cache; 1105 RISCVIOMMUContext key = { 1106 .devid = devid, 1107 .process_id = process_id, 1108 }; 1109 ctx_cache = g_hash_table_ref(s->ctx_cache); 1110 g_hash_table_foreach(ctx_cache, func, &key); 1111 g_hash_table_unref(ctx_cache); 1112 } 1113 1114 /* Find or allocate translation context for a given {device_id, process_id} */ 1115 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, 1116 unsigned devid, unsigned process_id, 1117 void **ref) 1118 { 1119 GHashTable *ctx_cache; 1120 RISCVIOMMUContext *ctx; 1121 RISCVIOMMUContext key = { 1122 .devid = devid, 1123 .process_id = process_id, 1124 }; 1125 1126 ctx_cache = g_hash_table_ref(s->ctx_cache); 1127 ctx = g_hash_table_lookup(ctx_cache, &key); 1128 1129 if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { 1130 *ref = ctx_cache; 1131 return ctx; 1132 } 1133 1134 ctx = g_new0(RISCVIOMMUContext, 1); 1135 ctx->devid = devid; 1136 ctx->process_id = process_id; 1137 1138 int fault = riscv_iommu_ctx_fetch(s, ctx); 1139 if (!fault) { 1140 if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { 1141 g_hash_table_unref(ctx_cache); 1142 ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 1143 riscv_iommu_ctx_equal, 1144 g_free, NULL); 1145 g_hash_table_ref(ctx_cache); 1146 g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); 1147 } 1148 g_hash_table_add(ctx_cache, ctx); 1149 *ref = ctx_cache; 1150 return ctx; 1151 } 1152 1153 g_hash_table_unref(ctx_cache); 1154 *ref = NULL; 1155 1156 riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, 1157 fault, !!process_id, 0, 0); 1158 1159 g_free(ctx); 1160 return NULL; 1161 } 1162 1163 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) 1164 { 1165 if (ref) { 1166 g_hash_table_unref((GHashTable *)ref); 1167 } 1168 } 1169 1170 /* Find or allocate address space for a given device */ 1171 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid) 1172 { 1173 RISCVIOMMUSpace *as; 1174 1175 /* FIXME: PCIe bus remapping for attached endpoints. */ 1176 devid |= s->bus << 8; 1177 1178 QLIST_FOREACH(as, &s->spaces, list) { 1179 if (as->devid == devid) { 1180 break; 1181 } 1182 } 1183 1184 if (as == NULL) { 1185 char name[64]; 1186 as = g_new0(RISCVIOMMUSpace, 1); 1187 1188 as->iommu = s; 1189 as->devid = devid; 1190 1191 snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", 1192 PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1193 1194 /* IOVA address space, untranslated addresses */ 1195 memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), 1196 TYPE_RISCV_IOMMU_MEMORY_REGION, 1197 OBJECT(as), "riscv_iommu", UINT64_MAX); 1198 address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name); 1199 1200 QLIST_INSERT_HEAD(&s->spaces, as, list); 1201 1202 trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), 1203 PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1204 } 1205 return &as->iova_as; 1206 } 1207 1208 /* Translation Object cache support */ 1209 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2) 1210 { 1211 RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1; 1212 RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2; 1213 return t1->gscid == t2->gscid && t1->pscid == t2->pscid && 1214 t1->iova == t2->iova; 1215 } 1216 1217 static guint riscv_iommu_iot_hash(gconstpointer v) 1218 { 1219 RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v; 1220 return (guint)t->iova; 1221 } 1222 1223 /* GV: 1 PSCV: 1 AV: 1 */ 1224 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value, 1225 gpointer data) 1226 { 1227 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1228 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1229 if (iot->gscid == arg->gscid && 1230 iot->pscid == arg->pscid && 1231 iot->iova == arg->iova) { 1232 iot->perm = IOMMU_NONE; 1233 } 1234 } 1235 1236 /* GV: 1 PSCV: 1 AV: 0 */ 1237 static void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, 1238 gpointer data) 1239 { 1240 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1241 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1242 if (iot->gscid == arg->gscid && 1243 iot->pscid == arg->pscid) { 1244 iot->perm = IOMMU_NONE; 1245 } 1246 } 1247 1248 /* GV: 1 GVMA: 1 */ 1249 static void riscv_iommu_iot_inval_gscid_gpa(gpointer key, gpointer value, 1250 gpointer data) 1251 { 1252 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1253 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1254 if (iot->gscid == arg->gscid) { 1255 /* simplified cache, no GPA matching */ 1256 iot->perm = IOMMU_NONE; 1257 } 1258 } 1259 1260 /* GV: 1 GVMA: 0 */ 1261 static void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, 1262 gpointer data) 1263 { 1264 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1265 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1266 if (iot->gscid == arg->gscid) { 1267 iot->perm = IOMMU_NONE; 1268 } 1269 } 1270 1271 /* GV: 0 */ 1272 static void riscv_iommu_iot_inval_all(gpointer key, gpointer value, 1273 gpointer data) 1274 { 1275 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1276 iot->perm = IOMMU_NONE; 1277 } 1278 1279 /* caller should keep ref-count for iot_cache object */ 1280 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx, 1281 GHashTable *iot_cache, hwaddr iova) 1282 { 1283 RISCVIOMMUEntry key = { 1284 .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID), 1285 .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID), 1286 .iova = PPN_DOWN(iova), 1287 }; 1288 return g_hash_table_lookup(iot_cache, &key); 1289 } 1290 1291 /* caller should keep ref-count for iot_cache object */ 1292 static void riscv_iommu_iot_update(RISCVIOMMUState *s, 1293 GHashTable *iot_cache, RISCVIOMMUEntry *iot) 1294 { 1295 if (!s->iot_limit) { 1296 return; 1297 } 1298 1299 if (g_hash_table_size(s->iot_cache) >= s->iot_limit) { 1300 iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 1301 riscv_iommu_iot_equal, 1302 g_free, NULL); 1303 g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache)); 1304 } 1305 g_hash_table_add(iot_cache, iot); 1306 } 1307 1308 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func, 1309 uint32_t gscid, uint32_t pscid, hwaddr iova) 1310 { 1311 GHashTable *iot_cache; 1312 RISCVIOMMUEntry key = { 1313 .gscid = gscid, 1314 .pscid = pscid, 1315 .iova = PPN_DOWN(iova), 1316 }; 1317 1318 iot_cache = g_hash_table_ref(s->iot_cache); 1319 g_hash_table_foreach(iot_cache, func, &key); 1320 g_hash_table_unref(iot_cache); 1321 } 1322 1323 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 1324 IOMMUTLBEntry *iotlb, bool enable_cache) 1325 { 1326 RISCVIOMMUEntry *iot; 1327 IOMMUAccessFlags perm; 1328 bool enable_pid; 1329 bool enable_pri; 1330 GHashTable *iot_cache; 1331 int fault; 1332 1333 iot_cache = g_hash_table_ref(s->iot_cache); 1334 /* 1335 * TC[32] is reserved for custom extensions, used here to temporarily 1336 * enable automatic page-request generation for ATS queries. 1337 */ 1338 enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); 1339 enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); 1340 1341 /* Check for ATS request. */ 1342 if (iotlb->perm == IOMMU_NONE) { 1343 /* Check if ATS is disabled. */ 1344 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) { 1345 enable_pri = false; 1346 fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1347 goto done; 1348 } 1349 } 1350 1351 iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova); 1352 perm = iot ? iot->perm : IOMMU_NONE; 1353 if (perm != IOMMU_NONE) { 1354 iotlb->translated_addr = PPN_PHYS(iot->phys); 1355 iotlb->addr_mask = ~TARGET_PAGE_MASK; 1356 iotlb->perm = perm; 1357 fault = 0; 1358 goto done; 1359 } 1360 1361 /* Translate using device directory / page table information. */ 1362 fault = riscv_iommu_spa_fetch(s, ctx, iotlb); 1363 1364 if (!fault && iotlb->target_as == &s->trap_as) { 1365 /* Do not cache trapped MSI translations */ 1366 goto done; 1367 } 1368 1369 /* 1370 * We made an implementation choice to not cache identity-mapped 1371 * translations, as allowed by the specification, to avoid 1372 * translation cache evictions for other devices sharing the 1373 * IOMMU hardware model. 1374 */ 1375 if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) { 1376 iot = g_new0(RISCVIOMMUEntry, 1); 1377 iot->iova = PPN_DOWN(iotlb->iova); 1378 iot->phys = PPN_DOWN(iotlb->translated_addr); 1379 iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); 1380 iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); 1381 iot->perm = iotlb->perm; 1382 riscv_iommu_iot_update(s, iot_cache, iot); 1383 } 1384 1385 done: 1386 g_hash_table_unref(iot_cache); 1387 1388 if (enable_pri && fault) { 1389 struct riscv_iommu_pq_record pr = {0}; 1390 if (enable_pid) { 1391 pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, 1392 RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id); 1393 } 1394 pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid); 1395 pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | 1396 RISCV_IOMMU_PREQ_PAYLOAD_M; 1397 riscv_iommu_pri(s, &pr); 1398 return fault; 1399 } 1400 1401 if (fault) { 1402 unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ; 1403 1404 if (iotlb->perm & IOMMU_RW) { 1405 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 1406 } else if (iotlb->perm & IOMMU_RO) { 1407 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; 1408 } 1409 1410 riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, 1411 iotlb->iova, iotlb->translated_addr); 1412 return fault; 1413 } 1414 1415 return 0; 1416 } 1417 1418 /* IOMMU Command Interface */ 1419 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, 1420 uint64_t addr, uint32_t data) 1421 { 1422 /* 1423 * ATS processing in this implementation of the IOMMU is synchronous, 1424 * no need to wait for completions here. 1425 */ 1426 if (!notify) { 1427 return MEMTX_OK; 1428 } 1429 1430 return dma_memory_write(s->target_as, addr, &data, sizeof(data), 1431 MEMTXATTRS_UNSPECIFIED); 1432 } 1433 1434 static void riscv_iommu_ats(RISCVIOMMUState *s, 1435 struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag, 1436 IOMMUAccessFlags perm, 1437 void (*trace_fn)(const char *id)) 1438 { 1439 RISCVIOMMUSpace *as = NULL; 1440 IOMMUNotifier *n; 1441 IOMMUTLBEvent event; 1442 uint32_t pid; 1443 uint32_t devid; 1444 const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV; 1445 1446 if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) { 1447 /* Use device segment and requester id */ 1448 devid = get_field(cmd->dword0, 1449 RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID); 1450 } else { 1451 devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID); 1452 } 1453 1454 pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID); 1455 1456 QLIST_FOREACH(as, &s->spaces, list) { 1457 if (as->devid == devid) { 1458 break; 1459 } 1460 } 1461 1462 if (!as || !as->notifier) { 1463 return; 1464 } 1465 1466 event.type = flag; 1467 event.entry.perm = perm; 1468 event.entry.target_as = s->target_as; 1469 1470 IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) { 1471 if (!pv || n->iommu_idx == pid) { 1472 event.entry.iova = n->start; 1473 event.entry.addr_mask = n->end - n->start; 1474 trace_fn(as->iova_mr.parent_obj.name); 1475 memory_region_notify_iommu_one(n, &event); 1476 } 1477 } 1478 } 1479 1480 static void riscv_iommu_ats_inval(RISCVIOMMUState *s, 1481 struct riscv_iommu_command *cmd) 1482 { 1483 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE, 1484 trace_riscv_iommu_ats_inval); 1485 } 1486 1487 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s, 1488 struct riscv_iommu_command *cmd) 1489 { 1490 unsigned resp_code = get_field(cmd->dword1, 1491 RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE); 1492 1493 /* Using the access flag to carry response code information */ 1494 IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW; 1495 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm, 1496 trace_riscv_iommu_ats_prgr); 1497 } 1498 1499 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) 1500 { 1501 uint64_t old_ddtp = s->ddtp; 1502 uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); 1503 unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); 1504 unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); 1505 bool ok = false; 1506 1507 /* 1508 * Check for allowed DDTP.MODE transitions: 1509 * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} 1510 * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} 1511 */ 1512 if (new_mode == old_mode || 1513 new_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1514 new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { 1515 ok = true; 1516 } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || 1517 new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || 1518 new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { 1519 ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1520 old_mode == RISCV_IOMMU_DDTP_MODE_BARE; 1521 } 1522 1523 if (ok) { 1524 /* clear reserved and busy bits, report back sanitized version */ 1525 new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, 1526 RISCV_IOMMU_DDTP_MODE, new_mode); 1527 } else { 1528 new_ddtp = old_ddtp; 1529 } 1530 s->ddtp = new_ddtp; 1531 1532 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); 1533 } 1534 1535 /* Command function and opcode field. */ 1536 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) 1537 1538 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) 1539 { 1540 struct riscv_iommu_command cmd; 1541 MemTxResult res; 1542 dma_addr_t addr; 1543 uint32_t tail, head, ctrl; 1544 uint64_t cmd_opcode; 1545 GHFunc func; 1546 1547 ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1548 tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; 1549 head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; 1550 1551 /* Check for pending error or queue processing disabled */ 1552 if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || 1553 !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) { 1554 return; 1555 } 1556 1557 while (tail != head) { 1558 addr = s->cq_addr + head * sizeof(cmd); 1559 res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), 1560 MEMTXATTRS_UNSPECIFIED); 1561 1562 if (res != MEMTX_OK) { 1563 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1564 RISCV_IOMMU_CQCSR_CQMF, 0); 1565 goto fault; 1566 } 1567 1568 trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); 1569 1570 cmd_opcode = get_field(cmd.dword0, 1571 RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC); 1572 1573 switch (cmd_opcode) { 1574 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, 1575 RISCV_IOMMU_CMD_IOFENCE_OPCODE): 1576 res = riscv_iommu_iofence(s, 1577 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2, 1578 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); 1579 1580 if (res != MEMTX_OK) { 1581 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1582 RISCV_IOMMU_CQCSR_CQMF, 0); 1583 goto fault; 1584 } 1585 break; 1586 1587 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, 1588 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1589 if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) { 1590 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */ 1591 goto cmd_ill; 1592 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) { 1593 /* invalidate all cache mappings */ 1594 func = riscv_iommu_iot_inval_all; 1595 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) { 1596 /* invalidate cache matching GSCID */ 1597 func = riscv_iommu_iot_inval_gscid; 1598 } else { 1599 /* invalidate cache matching GSCID and ADDR (GPA) */ 1600 func = riscv_iommu_iot_inval_gscid_gpa; 1601 } 1602 riscv_iommu_iot_inval(s, func, 1603 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 0, 1604 cmd.dword1 << 2 & TARGET_PAGE_MASK); 1605 break; 1606 1607 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, 1608 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1609 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) { 1610 /* invalidate all cache mappings, simplified model */ 1611 func = riscv_iommu_iot_inval_all; 1612 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV)) { 1613 /* invalidate cache matching GSCID, simplified model */ 1614 func = riscv_iommu_iot_inval_gscid; 1615 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) { 1616 /* invalidate cache matching GSCID and PSCID */ 1617 func = riscv_iommu_iot_inval_pscid; 1618 } else { 1619 /* invalidate cache matching GSCID and PSCID and ADDR (IOVA) */ 1620 func = riscv_iommu_iot_inval_pscid_iova; 1621 } 1622 riscv_iommu_iot_inval(s, func, 1623 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 1624 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_PSCID), 1625 cmd.dword1 << 2 & TARGET_PAGE_MASK); 1626 break; 1627 1628 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, 1629 RISCV_IOMMU_CMD_IODIR_OPCODE): 1630 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1631 /* invalidate all device context cache mappings */ 1632 func = riscv_iommu_ctx_inval_all; 1633 } else { 1634 /* invalidate all device context matching DID */ 1635 func = riscv_iommu_ctx_inval_devid; 1636 } 1637 riscv_iommu_ctx_inval(s, func, 1638 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); 1639 break; 1640 1641 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, 1642 RISCV_IOMMU_CMD_IODIR_OPCODE): 1643 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1644 /* illegal command arguments IODIR_PDT & DV == 0 */ 1645 goto cmd_ill; 1646 } else { 1647 func = riscv_iommu_ctx_inval_devid_procid; 1648 } 1649 riscv_iommu_ctx_inval(s, func, 1650 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 1651 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); 1652 break; 1653 1654 /* ATS commands */ 1655 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL, 1656 RISCV_IOMMU_CMD_ATS_OPCODE): 1657 if (!s->enable_ats) { 1658 goto cmd_ill; 1659 } 1660 1661 riscv_iommu_ats_inval(s, &cmd); 1662 break; 1663 1664 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR, 1665 RISCV_IOMMU_CMD_ATS_OPCODE): 1666 if (!s->enable_ats) { 1667 goto cmd_ill; 1668 } 1669 1670 riscv_iommu_ats_prgr(s, &cmd); 1671 break; 1672 1673 default: 1674 cmd_ill: 1675 /* Invalid instruction, do not advance instruction index. */ 1676 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1677 RISCV_IOMMU_CQCSR_CMD_ILL, 0); 1678 goto fault; 1679 } 1680 1681 /* Advance and update head pointer after command completes. */ 1682 head = (head + 1) & s->cq_mask; 1683 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); 1684 } 1685 return; 1686 1687 fault: 1688 if (ctrl & RISCV_IOMMU_CQCSR_CIE) { 1689 riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); 1690 } 1691 } 1692 1693 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) 1694 { 1695 uint64_t base; 1696 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1697 uint32_t ctrl_clr; 1698 bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); 1699 bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); 1700 1701 if (enable && !active) { 1702 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); 1703 s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1; 1704 s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); 1705 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); 1706 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); 1707 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); 1708 ctrl_set = RISCV_IOMMU_CQCSR_CQON; 1709 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | 1710 RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO | 1711 RISCV_IOMMU_CQCSR_FENCE_W_IP; 1712 } else if (!enable && active) { 1713 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); 1714 ctrl_set = 0; 1715 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; 1716 } else { 1717 ctrl_set = 0; 1718 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; 1719 } 1720 1721 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); 1722 } 1723 1724 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) 1725 { 1726 uint64_t base; 1727 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1728 uint32_t ctrl_clr; 1729 bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); 1730 bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); 1731 1732 if (enable && !active) { 1733 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); 1734 s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1; 1735 s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); 1736 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); 1737 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); 1738 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); 1739 ctrl_set = RISCV_IOMMU_FQCSR_FQON; 1740 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | 1741 RISCV_IOMMU_FQCSR_FQOF; 1742 } else if (!enable && active) { 1743 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); 1744 ctrl_set = 0; 1745 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; 1746 } else { 1747 ctrl_set = 0; 1748 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; 1749 } 1750 1751 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); 1752 } 1753 1754 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) 1755 { 1756 uint64_t base; 1757 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 1758 uint32_t ctrl_clr; 1759 bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); 1760 bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); 1761 1762 if (enable && !active) { 1763 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); 1764 s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1; 1765 s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); 1766 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); 1767 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); 1768 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); 1769 ctrl_set = RISCV_IOMMU_PQCSR_PQON; 1770 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | 1771 RISCV_IOMMU_PQCSR_PQOF; 1772 } else if (!enable && active) { 1773 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); 1774 ctrl_set = 0; 1775 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; 1776 } else { 1777 ctrl_set = 0; 1778 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; 1779 } 1780 1781 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); 1782 } 1783 1784 static void riscv_iommu_process_dbg(RISCVIOMMUState *s) 1785 { 1786 uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA); 1787 uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL); 1788 unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID); 1789 unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID); 1790 RISCVIOMMUContext *ctx; 1791 void *ref; 1792 1793 if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) { 1794 return; 1795 } 1796 1797 ctx = riscv_iommu_ctx(s, devid, pid, &ref); 1798 if (ctx == NULL) { 1799 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, 1800 RISCV_IOMMU_TR_RESPONSE_FAULT | 1801 (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10)); 1802 } else { 1803 IOMMUTLBEntry iotlb = { 1804 .iova = iova, 1805 .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW, 1806 .addr_mask = ~0, 1807 .target_as = NULL, 1808 }; 1809 int fault = riscv_iommu_translate(s, ctx, &iotlb, false); 1810 if (fault) { 1811 iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); 1812 } else { 1813 iova = iotlb.translated_addr & ~iotlb.addr_mask; 1814 iova >>= TARGET_PAGE_BITS; 1815 iova &= RISCV_IOMMU_TR_RESPONSE_PPN; 1816 1817 /* We do not support superpages (> 4kbs) for now */ 1818 iova &= ~RISCV_IOMMU_TR_RESPONSE_S; 1819 } 1820 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); 1821 } 1822 1823 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 1824 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 1825 riscv_iommu_ctx_put(s, ref); 1826 } 1827 1828 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); 1829 1830 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data) 1831 { 1832 uint64_t icvec = 0; 1833 1834 icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV, 1835 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV); 1836 1837 icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV, 1838 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV); 1839 1840 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV, 1841 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV); 1842 1843 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV, 1844 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV); 1845 1846 trace_riscv_iommu_icvec_write(data, icvec); 1847 1848 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec); 1849 } 1850 1851 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) 1852 { 1853 uint32_t cqcsr, fqcsr, pqcsr; 1854 uint32_t ipsr_set = 0; 1855 uint32_t ipsr_clr = 0; 1856 1857 if (data & RISCV_IOMMU_IPSR_CIP) { 1858 cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1859 1860 if (cqcsr & RISCV_IOMMU_CQCSR_CIE && 1861 (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || 1862 cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || 1863 cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || 1864 cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { 1865 ipsr_set |= RISCV_IOMMU_IPSR_CIP; 1866 } else { 1867 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1868 } 1869 } else { 1870 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1871 } 1872 1873 if (data & RISCV_IOMMU_IPSR_FIP) { 1874 fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1875 1876 if (fqcsr & RISCV_IOMMU_FQCSR_FIE && 1877 (fqcsr & RISCV_IOMMU_FQCSR_FQOF || 1878 fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { 1879 ipsr_set |= RISCV_IOMMU_IPSR_FIP; 1880 } else { 1881 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 1882 } 1883 } else { 1884 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 1885 } 1886 1887 if (data & RISCV_IOMMU_IPSR_PIP) { 1888 pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 1889 1890 if (pqcsr & RISCV_IOMMU_PQCSR_PIE && 1891 (pqcsr & RISCV_IOMMU_PQCSR_PQOF || 1892 pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { 1893 ipsr_set |= RISCV_IOMMU_IPSR_PIP; 1894 } else { 1895 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 1896 } 1897 } else { 1898 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 1899 } 1900 1901 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); 1902 } 1903 1904 /* 1905 * Write the resulting value of 'data' for the reg specified 1906 * by 'reg_addr', after considering read-only/read-write/write-clear 1907 * bits, in the pointer 'dest'. 1908 * 1909 * The result is written in little-endian. 1910 */ 1911 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s, 1912 void *dest, hwaddr reg_addr, 1913 int size, uint64_t data) 1914 { 1915 uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size); 1916 uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size); 1917 uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size); 1918 1919 stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc)); 1920 } 1921 1922 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, 1923 uint64_t data, unsigned size, 1924 MemTxAttrs attrs) 1925 { 1926 riscv_iommu_process_fn *process_fn = NULL; 1927 RISCVIOMMUState *s = opaque; 1928 uint32_t regb = addr & ~3; 1929 uint32_t busy = 0; 1930 uint64_t val = 0; 1931 1932 if ((addr & (size - 1)) != 0) { 1933 /* Unsupported MMIO alignment or access size */ 1934 return MEMTX_ERROR; 1935 } 1936 1937 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 1938 /* Unsupported MMIO access location. */ 1939 return MEMTX_ACCESS_ERROR; 1940 } 1941 1942 /* Track actionable MMIO write. */ 1943 switch (regb) { 1944 case RISCV_IOMMU_REG_DDTP: 1945 case RISCV_IOMMU_REG_DDTP + 4: 1946 process_fn = riscv_iommu_process_ddtp; 1947 regb = RISCV_IOMMU_REG_DDTP; 1948 busy = RISCV_IOMMU_DDTP_BUSY; 1949 break; 1950 1951 case RISCV_IOMMU_REG_CQT: 1952 process_fn = riscv_iommu_process_cq_tail; 1953 break; 1954 1955 case RISCV_IOMMU_REG_CQCSR: 1956 process_fn = riscv_iommu_process_cq_control; 1957 busy = RISCV_IOMMU_CQCSR_BUSY; 1958 break; 1959 1960 case RISCV_IOMMU_REG_FQCSR: 1961 process_fn = riscv_iommu_process_fq_control; 1962 busy = RISCV_IOMMU_FQCSR_BUSY; 1963 break; 1964 1965 case RISCV_IOMMU_REG_PQCSR: 1966 process_fn = riscv_iommu_process_pq_control; 1967 busy = RISCV_IOMMU_PQCSR_BUSY; 1968 break; 1969 1970 case RISCV_IOMMU_REG_ICVEC: 1971 case RISCV_IOMMU_REG_IPSR: 1972 /* 1973 * ICVEC and IPSR have special read/write procedures. We'll 1974 * call their respective helpers and exit. 1975 */ 1976 riscv_iommu_write_reg_val(s, &val, addr, size, data); 1977 1978 /* 1979 * 'val' is stored as LE. Switch to host endianess 1980 * before using it. 1981 */ 1982 val = le64_to_cpu(val); 1983 1984 if (regb == RISCV_IOMMU_REG_ICVEC) { 1985 riscv_iommu_update_icvec(s, val); 1986 } else { 1987 riscv_iommu_update_ipsr(s, val); 1988 } 1989 1990 return MEMTX_OK; 1991 1992 case RISCV_IOMMU_REG_TR_REQ_CTL: 1993 process_fn = riscv_iommu_process_dbg; 1994 regb = RISCV_IOMMU_REG_TR_REQ_CTL; 1995 busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY; 1996 break; 1997 1998 default: 1999 break; 2000 } 2001 2002 /* 2003 * Registers update might be not synchronized with core logic. 2004 * If system software updates register when relevant BUSY bit 2005 * is set IOMMU behavior of additional writes to the register 2006 * is UNSPECIFIED. 2007 */ 2008 riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data); 2009 2010 /* Busy flag update, MSB 4-byte register. */ 2011 if (busy) { 2012 uint32_t rw = ldl_le_p(&s->regs_rw[regb]); 2013 stl_le_p(&s->regs_rw[regb], rw | busy); 2014 } 2015 2016 if (process_fn) { 2017 process_fn(s); 2018 } 2019 2020 return MEMTX_OK; 2021 } 2022 2023 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, 2024 uint64_t *data, unsigned size, MemTxAttrs attrs) 2025 { 2026 RISCVIOMMUState *s = opaque; 2027 uint64_t val = -1; 2028 uint8_t *ptr; 2029 2030 if ((addr & (size - 1)) != 0) { 2031 /* Unsupported MMIO alignment. */ 2032 return MEMTX_ERROR; 2033 } 2034 2035 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2036 return MEMTX_ACCESS_ERROR; 2037 } 2038 2039 ptr = &s->regs_rw[addr]; 2040 val = ldn_le_p(ptr, size); 2041 2042 *data = val; 2043 2044 return MEMTX_OK; 2045 } 2046 2047 static const MemoryRegionOps riscv_iommu_mmio_ops = { 2048 .read_with_attrs = riscv_iommu_mmio_read, 2049 .write_with_attrs = riscv_iommu_mmio_write, 2050 .endianness = DEVICE_NATIVE_ENDIAN, 2051 .impl = { 2052 .min_access_size = 4, 2053 .max_access_size = 8, 2054 .unaligned = false, 2055 }, 2056 .valid = { 2057 .min_access_size = 4, 2058 .max_access_size = 8, 2059 } 2060 }; 2061 2062 /* 2063 * Translations matching MSI pattern check are redirected to "riscv-iommu-trap" 2064 * memory region as untranslated address, for additional MSI/MRIF interception 2065 * by IOMMU interrupt remapping implementation. 2066 * Note: Device emulation code generating an MSI is expected to provide a valid 2067 * memory transaction attributes with requested_id set. 2068 */ 2069 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, 2070 uint64_t data, unsigned size, MemTxAttrs attrs) 2071 { 2072 RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; 2073 RISCVIOMMUContext *ctx; 2074 MemTxResult res; 2075 void *ref; 2076 uint32_t devid = attrs.requester_id; 2077 2078 if (attrs.unspecified) { 2079 return MEMTX_ACCESS_ERROR; 2080 } 2081 2082 /* FIXME: PCIe bus remapping for attached endpoints. */ 2083 devid |= s->bus << 8; 2084 2085 ctx = riscv_iommu_ctx(s, devid, 0, &ref); 2086 if (ctx == NULL) { 2087 res = MEMTX_ACCESS_ERROR; 2088 } else { 2089 res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); 2090 } 2091 riscv_iommu_ctx_put(s, ref); 2092 return res; 2093 } 2094 2095 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, 2096 uint64_t *data, unsigned size, MemTxAttrs attrs) 2097 { 2098 return MEMTX_ACCESS_ERROR; 2099 } 2100 2101 static const MemoryRegionOps riscv_iommu_trap_ops = { 2102 .read_with_attrs = riscv_iommu_trap_read, 2103 .write_with_attrs = riscv_iommu_trap_write, 2104 .endianness = DEVICE_LITTLE_ENDIAN, 2105 .impl = { 2106 .min_access_size = 4, 2107 .max_access_size = 8, 2108 .unaligned = true, 2109 }, 2110 .valid = { 2111 .min_access_size = 4, 2112 .max_access_size = 8, 2113 } 2114 }; 2115 2116 static void riscv_iommu_realize(DeviceState *dev, Error **errp) 2117 { 2118 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2119 2120 s->cap = s->version & RISCV_IOMMU_CAP_VERSION; 2121 if (s->enable_msi) { 2122 s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; 2123 } 2124 if (s->enable_ats) { 2125 s->cap |= RISCV_IOMMU_CAP_ATS; 2126 } 2127 if (s->enable_s_stage) { 2128 s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | 2129 RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; 2130 } 2131 if (s->enable_g_stage) { 2132 s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | 2133 RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; 2134 } 2135 /* Enable translation debug interface */ 2136 s->cap |= RISCV_IOMMU_CAP_DBG; 2137 2138 /* Report QEMU target physical address space limits */ 2139 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, 2140 TARGET_PHYS_ADDR_SPACE_BITS); 2141 2142 /* TODO: method to report supported PID bits */ 2143 s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ 2144 s->cap |= RISCV_IOMMU_CAP_PD8; 2145 2146 /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ 2147 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? 2148 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); 2149 2150 /* register storage */ 2151 s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2152 s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2153 s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2154 2155 /* Mark all registers read-only */ 2156 memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); 2157 2158 /* 2159 * Register complete MMIO space, including MSI/PBA registers. 2160 * Note, PCIDevice implementation will add overlapping MR for MSI/PBA, 2161 * managed directly by the PCIDevice implementation. 2162 */ 2163 memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s, 2164 "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); 2165 2166 /* Set power-on register state */ 2167 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); 2168 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); 2169 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], 2170 ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); 2171 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], 2172 ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); 2173 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], 2174 ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); 2175 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], 2176 ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); 2177 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], 2178 ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); 2179 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF | 2180 RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); 2181 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON | 2182 RISCV_IOMMU_CQCSR_BUSY); 2183 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF | 2184 RISCV_IOMMU_FQCSR_FQOF); 2185 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON | 2186 RISCV_IOMMU_FQCSR_BUSY); 2187 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF | 2188 RISCV_IOMMU_PQCSR_PQOF); 2189 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON | 2190 RISCV_IOMMU_PQCSR_BUSY); 2191 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); 2192 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); 2193 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); 2194 /* If debug registers enabled. */ 2195 if (s->cap & RISCV_IOMMU_CAP_DBG) { 2196 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0); 2197 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL], 2198 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2199 } 2200 2201 /* Memory region for downstream access, if specified. */ 2202 if (s->target_mr) { 2203 s->target_as = g_new0(AddressSpace, 1); 2204 address_space_init(s->target_as, s->target_mr, 2205 "riscv-iommu-downstream"); 2206 } else { 2207 /* Fallback to global system memory. */ 2208 s->target_as = &address_space_memory; 2209 } 2210 2211 /* Memory region for untranslated MRIF/MSI writes */ 2212 memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, 2213 "riscv-iommu-trap", ~0ULL); 2214 address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); 2215 2216 /* Device translation context cache */ 2217 s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 2218 riscv_iommu_ctx_equal, 2219 g_free, NULL); 2220 2221 s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 2222 riscv_iommu_iot_equal, 2223 g_free, NULL); 2224 2225 s->iommus.le_next = NULL; 2226 s->iommus.le_prev = NULL; 2227 QLIST_INIT(&s->spaces); 2228 } 2229 2230 static void riscv_iommu_unrealize(DeviceState *dev) 2231 { 2232 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2233 2234 g_hash_table_unref(s->iot_cache); 2235 g_hash_table_unref(s->ctx_cache); 2236 } 2237 2238 static Property riscv_iommu_properties[] = { 2239 DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, 2240 RISCV_IOMMU_SPEC_DOT_VER), 2241 DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), 2242 DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit, 2243 LIMIT_CACHE_IOT), 2244 DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), 2245 DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE), 2246 DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), 2247 DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), 2248 DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), 2249 DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, 2250 TYPE_MEMORY_REGION, MemoryRegion *), 2251 DEFINE_PROP_END_OF_LIST(), 2252 }; 2253 2254 static void riscv_iommu_class_init(ObjectClass *klass, void* data) 2255 { 2256 DeviceClass *dc = DEVICE_CLASS(klass); 2257 2258 /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ 2259 dc->user_creatable = false; 2260 dc->realize = riscv_iommu_realize; 2261 dc->unrealize = riscv_iommu_unrealize; 2262 device_class_set_props(dc, riscv_iommu_properties); 2263 } 2264 2265 static const TypeInfo riscv_iommu_info = { 2266 .name = TYPE_RISCV_IOMMU, 2267 .parent = TYPE_DEVICE, 2268 .instance_size = sizeof(RISCVIOMMUState), 2269 .class_init = riscv_iommu_class_init, 2270 }; 2271 2272 static const char *IOMMU_FLAG_STR[] = { 2273 "NA", 2274 "RO", 2275 "WR", 2276 "RW", 2277 }; 2278 2279 /* RISC-V IOMMU Memory Region - Address Translation Space */ 2280 static IOMMUTLBEntry riscv_iommu_memory_region_translate( 2281 IOMMUMemoryRegion *iommu_mr, hwaddr addr, 2282 IOMMUAccessFlags flag, int iommu_idx) 2283 { 2284 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2285 RISCVIOMMUContext *ctx; 2286 void *ref; 2287 IOMMUTLBEntry iotlb = { 2288 .iova = addr, 2289 .target_as = as->iommu->target_as, 2290 .addr_mask = ~0ULL, 2291 .perm = flag, 2292 }; 2293 2294 ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); 2295 if (ctx == NULL) { 2296 /* Translation disabled or invalid. */ 2297 iotlb.addr_mask = 0; 2298 iotlb.perm = IOMMU_NONE; 2299 } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) { 2300 /* Translation disabled or fault reported. */ 2301 iotlb.addr_mask = 0; 2302 iotlb.perm = IOMMU_NONE; 2303 } 2304 2305 /* Trace all dma translations with original access flags. */ 2306 trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid), 2307 PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx, 2308 IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, 2309 iotlb.translated_addr); 2310 2311 riscv_iommu_ctx_put(as->iommu, ref); 2312 2313 return iotlb; 2314 } 2315 2316 static int riscv_iommu_memory_region_notify( 2317 IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, 2318 IOMMUNotifierFlag new, Error **errp) 2319 { 2320 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2321 2322 if (old == IOMMU_NOTIFIER_NONE) { 2323 as->notifier = true; 2324 trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); 2325 } else if (new == IOMMU_NOTIFIER_NONE) { 2326 as->notifier = false; 2327 trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); 2328 } 2329 2330 return 0; 2331 } 2332 2333 static inline bool pci_is_iommu(PCIDevice *pdev) 2334 { 2335 return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; 2336 } 2337 2338 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn) 2339 { 2340 RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; 2341 PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); 2342 AddressSpace *as = NULL; 2343 2344 if (pdev && pci_is_iommu(pdev)) { 2345 return s->target_as; 2346 } 2347 2348 /* Find first registered IOMMU device */ 2349 while (s->iommus.le_prev) { 2350 s = *(s->iommus.le_prev); 2351 } 2352 2353 /* Find first matching IOMMU */ 2354 while (s != NULL && as == NULL) { 2355 as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn)); 2356 s = s->iommus.le_next; 2357 } 2358 2359 return as ? as : &address_space_memory; 2360 } 2361 2362 static const PCIIOMMUOps riscv_iommu_ops = { 2363 .get_address_space = riscv_iommu_find_as, 2364 }; 2365 2366 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, 2367 Error **errp) 2368 { 2369 if (bus->iommu_ops && 2370 bus->iommu_ops->get_address_space == riscv_iommu_find_as) { 2371 /* Allow multiple IOMMUs on the same PCIe bus, link known devices */ 2372 RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; 2373 QLIST_INSERT_AFTER(last, iommu, iommus); 2374 } else if (!bus->iommu_ops && !bus->iommu_opaque) { 2375 pci_setup_iommu(bus, &riscv_iommu_ops, iommu); 2376 } else { 2377 error_setg(errp, "can't register secondary IOMMU for PCI bus #%d", 2378 pci_bus_num(bus)); 2379 } 2380 } 2381 2382 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, 2383 MemTxAttrs attrs) 2384 { 2385 return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; 2386 } 2387 2388 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr) 2389 { 2390 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2391 return 1 << as->iommu->pid_bits; 2392 } 2393 2394 static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data) 2395 { 2396 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 2397 2398 imrc->translate = riscv_iommu_memory_region_translate; 2399 imrc->notify_flag_changed = riscv_iommu_memory_region_notify; 2400 imrc->attrs_to_index = riscv_iommu_memory_region_index; 2401 imrc->num_indexes = riscv_iommu_memory_region_index_len; 2402 } 2403 2404 static const TypeInfo riscv_iommu_memory_region_info = { 2405 .parent = TYPE_IOMMU_MEMORY_REGION, 2406 .name = TYPE_RISCV_IOMMU_MEMORY_REGION, 2407 .class_init = riscv_iommu_memory_region_init, 2408 }; 2409 2410 static void riscv_iommu_register_mr_types(void) 2411 { 2412 type_register_static(&riscv_iommu_memory_region_info); 2413 type_register_static(&riscv_iommu_info); 2414 } 2415 2416 type_init(riscv_iommu_register_mr_types); 2417