1 /* 2 * QEMU emulation of an RISC-V IOMMU 3 * 4 * Copyright (C) 2021-2023, Rivos Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qom/object.h" 21 #include "hw/pci/pci_bus.h" 22 #include "hw/pci/pci_device.h" 23 #include "hw/qdev-properties.h" 24 #include "hw/riscv/riscv_hart.h" 25 #include "migration/vmstate.h" 26 #include "qapi/error.h" 27 #include "qemu/timer.h" 28 29 #include "cpu_bits.h" 30 #include "riscv-iommu.h" 31 #include "riscv-iommu-bits.h" 32 #include "trace.h" 33 34 #define LIMIT_CACHE_CTX (1U << 7) 35 #define LIMIT_CACHE_IOT (1U << 20) 36 37 /* Physical page number coversions */ 38 #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) 39 #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) 40 41 typedef struct RISCVIOMMUContext RISCVIOMMUContext; 42 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; 43 44 /* Device assigned I/O address space */ 45 struct RISCVIOMMUSpace { 46 IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */ 47 AddressSpace iova_as; /* IOVA address space for attached device */ 48 RISCVIOMMUState *iommu; /* Managing IOMMU device state */ 49 uint32_t devid; /* Requester identifier, AKA device_id */ 50 bool notifier; /* IOMMU unmap notifier enabled */ 51 QLIST_ENTRY(RISCVIOMMUSpace) list; 52 }; 53 54 /* Device translation context state. */ 55 struct RISCVIOMMUContext { 56 uint64_t devid:24; /* Requester Id, AKA device_id */ 57 uint64_t process_id:20; /* Process ID. PASID for PCIe */ 58 uint64_t tc; /* Translation Control */ 59 uint64_t ta; /* Translation Attributes */ 60 uint64_t satp; /* S-Stage address translation and protection */ 61 uint64_t gatp; /* G-Stage address translation and protection */ 62 uint64_t msi_addr_mask; /* MSI filtering - address mask */ 63 uint64_t msi_addr_pattern; /* MSI filtering - address pattern */ 64 uint64_t msiptp; /* MSI redirection page table pointer */ 65 }; 66 67 /* Address translation cache entry */ 68 struct RISCVIOMMUEntry { 69 uint64_t iova:44; /* IOVA Page Number */ 70 uint64_t pscid:20; /* Process Soft-Context identifier */ 71 uint64_t phys:44; /* Physical Page Number */ 72 uint64_t gscid:16; /* Guest Soft-Context identifier */ 73 uint64_t perm:2; /* IOMMU_RW flags */ 74 }; 75 76 /* IOMMU index for transactions without process_id specified. */ 77 #define RISCV_IOMMU_NOPROCID 0 78 79 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) 80 { 81 switch (vec_type) { 82 case RISCV_IOMMU_INTR_CQ: 83 return icvec & RISCV_IOMMU_ICVEC_CIV; 84 case RISCV_IOMMU_INTR_FQ: 85 return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4; 86 case RISCV_IOMMU_INTR_PM: 87 return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8; 88 case RISCV_IOMMU_INTR_PQ: 89 return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12; 90 default: 91 g_assert_not_reached(); 92 } 93 } 94 95 static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) 96 { 97 const uint32_t fctl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FCTL); 98 uint32_t ipsr, icvec, vector; 99 100 if (fctl & RISCV_IOMMU_FCTL_WSI || !s->notify) { 101 return; 102 } 103 104 icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); 105 ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0); 106 107 if (!(ipsr & (1 << vec_type))) { 108 vector = riscv_iommu_get_icvec_vector(icvec, vec_type); 109 s->notify(s, vector); 110 trace_riscv_iommu_notify_int_vector(vec_type, vector); 111 } 112 } 113 114 static void riscv_iommu_fault(RISCVIOMMUState *s, 115 struct riscv_iommu_fq_record *ev) 116 { 117 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 118 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask; 119 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask; 120 uint32_t next = (tail + 1) & s->fq_mask; 121 uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID); 122 123 trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 124 PCI_FUNC(devid), ev->hdr, ev->iotval); 125 126 if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) || 127 !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) { 128 return; 129 } 130 131 if (head == next) { 132 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 133 RISCV_IOMMU_FQCSR_FQOF, 0); 134 } else { 135 dma_addr_t addr = s->fq_addr + tail * sizeof(*ev); 136 if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev), 137 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 138 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 139 RISCV_IOMMU_FQCSR_FQMF, 0); 140 } else { 141 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next); 142 } 143 } 144 145 if (ctrl & RISCV_IOMMU_FQCSR_FIE) { 146 riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ); 147 } 148 } 149 150 static void riscv_iommu_pri(RISCVIOMMUState *s, 151 struct riscv_iommu_pq_record *pr) 152 { 153 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 154 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask; 155 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask; 156 uint32_t next = (tail + 1) & s->pq_mask; 157 uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID); 158 159 trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 160 PCI_FUNC(devid), pr->payload); 161 162 if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) || 163 !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) { 164 return; 165 } 166 167 if (head == next) { 168 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 169 RISCV_IOMMU_PQCSR_PQOF, 0); 170 } else { 171 dma_addr_t addr = s->pq_addr + tail * sizeof(*pr); 172 if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr), 173 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 174 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 175 RISCV_IOMMU_PQCSR_PQMF, 0); 176 } else { 177 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next); 178 } 179 } 180 181 if (ctrl & RISCV_IOMMU_PQCSR_PIE) { 182 riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ); 183 } 184 } 185 186 /* 187 * Discards all bits from 'val' whose matching bits in the same 188 * positions in the mask 'ext' are zeros, and packs the remaining 189 * bits from 'val' contiguously at the least-significant end of the 190 * result, keeping the same bit order as 'val' and filling any 191 * other bits at the most-significant end of the result with zeros. 192 * 193 * For example, for the following 'val' and 'ext', the return 'ret' 194 * will be: 195 * 196 * val = a b c d e f g h 197 * ext = 1 0 1 0 0 1 1 0 198 * ret = 0 0 0 0 a c f g 199 * 200 * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3 201 * "Process to translate addresses of MSIs", is similar to bit manip 202 * function PEXT (Parallel bits extract) from x86. 203 */ 204 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext) 205 { 206 uint64_t ret = 0; 207 uint64_t rot = 1; 208 209 while (ext) { 210 if (ext & 1) { 211 if (val & 1) { 212 ret |= rot; 213 } 214 rot <<= 1; 215 } 216 val >>= 1; 217 ext >>= 1; 218 } 219 220 return ret; 221 } 222 223 /* Check if GPA matches MSI/MRIF pattern. */ 224 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 225 dma_addr_t gpa) 226 { 227 if (!s->enable_msi) { 228 return false; 229 } 230 231 if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) != 232 RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 233 return false; /* Invalid MSI/MRIF mode */ 234 } 235 236 if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) { 237 return false; /* GPA not in MSI range defined by AIA IMSIC rules. */ 238 } 239 240 return true; 241 } 242 243 /* 244 * RISCV IOMMU Address Translation Lookup - Page Table Walk 245 * 246 * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c 247 * Both implementation can be merged into single helper function in future. 248 * Keeping them separate for now, as error reporting and flow specifics are 249 * sufficiently different for separate implementation. 250 * 251 * @s : IOMMU Device State 252 * @ctx : Translation context for device id and process address space id. 253 * @iotlb : translation data: physical address and access mode. 254 * @return : success or fault cause code. 255 */ 256 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 257 IOMMUTLBEntry *iotlb) 258 { 259 dma_addr_t addr, base; 260 uint64_t satp, gatp, pte; 261 bool en_s, en_g; 262 struct { 263 unsigned char step; 264 unsigned char levels; 265 unsigned char ptidxbits; 266 unsigned char ptesize; 267 } sc[2]; 268 /* Translation stage phase */ 269 enum { 270 S_STAGE = 0, 271 G_STAGE = 1, 272 } pass; 273 MemTxResult ret; 274 275 satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 276 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 277 278 en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; 279 en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; 280 281 /* 282 * Early check for MSI address match when IOVA == GPA. 283 * Note that the (!en_s) condition means that the MSI 284 * page table may only be used when guest pages are 285 * mapped using the g-stage page table, whether single- 286 * or two-stage paging is enabled. It's unavoidable though, 287 * because the spec mandates that we do a first-stage 288 * translation before we check the MSI page table, which 289 * means we can't do an early MSI check unless we have 290 * strictly !en_s. 291 */ 292 if (!en_s && (iotlb->perm & IOMMU_WO) && 293 riscv_iommu_msi_check(s, ctx, iotlb->iova)) { 294 iotlb->target_as = &s->trap_as; 295 iotlb->translated_addr = iotlb->iova; 296 iotlb->addr_mask = ~TARGET_PAGE_MASK; 297 return 0; 298 } 299 300 /* Exit early for pass-through mode. */ 301 if (!(en_s || en_g)) { 302 iotlb->translated_addr = iotlb->iova; 303 iotlb->addr_mask = ~TARGET_PAGE_MASK; 304 /* Allow R/W in pass-through mode */ 305 iotlb->perm = IOMMU_RW; 306 return 0; 307 } 308 309 /* S/G translation parameters. */ 310 for (pass = 0; pass < 2; pass++) { 311 uint32_t sv_mode; 312 313 sc[pass].step = 0; 314 if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : 315 (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { 316 /* 32bit mode for GXL/SXL == 1 */ 317 switch (pass ? gatp : satp) { 318 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 319 sc[pass].levels = 0; 320 sc[pass].ptidxbits = 0; 321 sc[pass].ptesize = 0; 322 break; 323 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: 324 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32; 325 if (!(s->cap & sv_mode)) { 326 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 327 } 328 sc[pass].levels = 2; 329 sc[pass].ptidxbits = 10; 330 sc[pass].ptesize = 4; 331 break; 332 default: 333 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 334 } 335 } else { 336 /* 64bit mode for GXL/SXL == 0 */ 337 switch (pass ? gatp : satp) { 338 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 339 sc[pass].levels = 0; 340 sc[pass].ptidxbits = 0; 341 sc[pass].ptesize = 0; 342 break; 343 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: 344 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39; 345 if (!(s->cap & sv_mode)) { 346 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 347 } 348 sc[pass].levels = 3; 349 sc[pass].ptidxbits = 9; 350 sc[pass].ptesize = 8; 351 break; 352 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: 353 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48; 354 if (!(s->cap & sv_mode)) { 355 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 356 } 357 sc[pass].levels = 4; 358 sc[pass].ptidxbits = 9; 359 sc[pass].ptesize = 8; 360 break; 361 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: 362 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57; 363 if (!(s->cap & sv_mode)) { 364 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 365 } 366 sc[pass].levels = 5; 367 sc[pass].ptidxbits = 9; 368 sc[pass].ptesize = 8; 369 break; 370 default: 371 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 372 } 373 } 374 }; 375 376 /* S/G stages translation tables root pointers */ 377 gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); 378 satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); 379 addr = (en_s && en_g) ? satp : iotlb->iova; 380 base = en_g ? gatp : satp; 381 pass = en_g ? G_STAGE : S_STAGE; 382 383 do { 384 const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; 385 const unsigned va_bits = widened + sc[pass].ptidxbits; 386 const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * 387 (sc[pass].levels - 1 - sc[pass].step); 388 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); 389 const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; 390 const bool ade = 391 ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE); 392 393 /* Address range check before first level lookup */ 394 if (!sc[pass].step) { 395 const uint64_t va_len = va_skip + va_bits; 396 const uint64_t va_mask = (1ULL << va_len) - 1; 397 398 if (pass == S_STAGE && va_len > 32) { 399 target_ulong mask, masked_msbs; 400 401 mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1; 402 masked_msbs = (addr >> (va_len - 1)) & mask; 403 404 if (masked_msbs != 0 && masked_msbs != mask) { 405 return (iotlb->perm & IOMMU_WO) ? 406 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S : 407 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S; 408 } 409 } else { 410 if ((addr & va_mask) != addr) { 411 return (iotlb->perm & IOMMU_WO) ? 412 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 413 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS; 414 } 415 } 416 } 417 418 /* Read page table entry */ 419 if (sc[pass].ptesize == 4) { 420 uint32_t pte32 = 0; 421 ret = ldl_le_dma(s->target_as, pte_addr, &pte32, 422 MEMTXATTRS_UNSPECIFIED); 423 pte = pte32; 424 } else { 425 ret = ldq_le_dma(s->target_as, pte_addr, &pte, 426 MEMTXATTRS_UNSPECIFIED); 427 } 428 if (ret != MEMTX_OK) { 429 return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT 430 : RISCV_IOMMU_FQ_CAUSE_RD_FAULT; 431 } 432 433 sc[pass].step++; 434 hwaddr ppn = pte >> PTE_PPN_SHIFT; 435 436 if (!(pte & PTE_V)) { 437 break; /* Invalid PTE */ 438 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { 439 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ 440 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { 441 break; /* Reserved leaf PTE flags: PTE_W */ 442 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { 443 break; /* Reserved leaf PTE flags: PTE_W + PTE_X */ 444 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { 445 break; /* Misaligned PPN */ 446 } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { 447 break; /* Read access check failed */ 448 } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { 449 break; /* Write access check failed */ 450 } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { 451 break; /* Access bit not set */ 452 } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { 453 break; /* Dirty bit not set */ 454 } else { 455 /* Leaf PTE, translation completed. */ 456 sc[pass].step = sc[pass].levels; 457 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); 458 /* Update address mask based on smallest translation granularity */ 459 iotlb->addr_mask &= (1ULL << va_skip) - 1; 460 /* Continue with S-Stage translation? */ 461 if (pass && sc[0].step != sc[0].levels) { 462 pass = S_STAGE; 463 addr = iotlb->iova; 464 continue; 465 } 466 /* Translation phase completed (GPA or SPA) */ 467 iotlb->translated_addr = base; 468 iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO) 469 : IOMMU_RO; 470 471 /* Check MSI GPA address match */ 472 if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && 473 riscv_iommu_msi_check(s, ctx, base)) { 474 /* Trap MSI writes and return GPA address. */ 475 iotlb->target_as = &s->trap_as; 476 iotlb->addr_mask = ~TARGET_PAGE_MASK; 477 return 0; 478 } 479 480 /* Continue with G-Stage translation? */ 481 if (!pass && en_g) { 482 pass = G_STAGE; 483 addr = base; 484 base = gatp; 485 sc[pass].step = 0; 486 continue; 487 } 488 489 return 0; 490 } 491 492 if (sc[pass].step == sc[pass].levels) { 493 break; /* Can't find leaf PTE */ 494 } 495 496 /* Continue with G-Stage translation? */ 497 if (!pass && en_g) { 498 pass = G_STAGE; 499 addr = base; 500 base = gatp; 501 sc[pass].step = 0; 502 } 503 } while (1); 504 505 return (iotlb->perm & IOMMU_WO) ? 506 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 507 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : 508 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : 509 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); 510 } 511 512 static void riscv_iommu_report_fault(RISCVIOMMUState *s, 513 RISCVIOMMUContext *ctx, 514 uint32_t fault_type, uint32_t cause, 515 bool pv, 516 uint64_t iotval, uint64_t iotval2) 517 { 518 struct riscv_iommu_fq_record ev = { 0 }; 519 520 if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { 521 switch (cause) { 522 case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: 523 case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: 524 case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: 525 case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: 526 case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: 527 case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: 528 case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: 529 break; 530 default: 531 /* DTF prevents reporting a fault for this given cause */ 532 return; 533 } 534 } 535 536 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); 537 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); 538 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); 539 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); 540 541 if (pv) { 542 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id); 543 } 544 545 ev.iotval = iotval; 546 ev.iotval2 = iotval2; 547 548 riscv_iommu_fault(s, &ev); 549 } 550 551 /* Redirect MSI write for given GPA. */ 552 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, 553 RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, 554 unsigned size, MemTxAttrs attrs) 555 { 556 MemTxResult res; 557 dma_addr_t addr; 558 uint64_t intn; 559 uint32_t n190; 560 uint64_t pte[2]; 561 int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 562 int cause; 563 564 /* Interrupt File Number */ 565 intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); 566 if (intn >= 256) { 567 /* Interrupt file number out of range */ 568 res = MEMTX_ACCESS_ERROR; 569 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 570 goto err; 571 } 572 573 /* fetch MSI PTE */ 574 addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); 575 addr = addr | (intn * sizeof(pte)); 576 res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), 577 MEMTXATTRS_UNSPECIFIED); 578 if (res != MEMTX_OK) { 579 if (res == MEMTX_DECODE_ERROR) { 580 cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; 581 } else { 582 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 583 } 584 goto err; 585 } 586 587 le64_to_cpus(&pte[0]); 588 le64_to_cpus(&pte[1]); 589 590 if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) { 591 /* 592 * The spec mentions that: "If msipte.C == 1, then further 593 * processing to interpret the PTE is implementation 594 * defined.". We'll abort with cause = 262 for this 595 * case too. 596 */ 597 res = MEMTX_ACCESS_ERROR; 598 cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; 599 goto err; 600 } 601 602 switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { 603 case RISCV_IOMMU_MSI_PTE_M_BASIC: 604 /* MSI Pass-through mode */ 605 addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); 606 607 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 608 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 609 gpa, addr); 610 611 res = dma_memory_write(s->target_as, addr, &data, size, attrs); 612 if (res != MEMTX_OK) { 613 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 614 goto err; 615 } 616 617 return MEMTX_OK; 618 case RISCV_IOMMU_MSI_PTE_M_MRIF: 619 /* MRIF mode, continue. */ 620 break; 621 default: 622 res = MEMTX_ACCESS_ERROR; 623 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 624 goto err; 625 } 626 627 /* 628 * Report an error for interrupt identities exceeding the maximum allowed 629 * for an IMSIC interrupt file (2047) or destination address is not 32-bit 630 * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. 631 */ 632 if ((data > 2047) || (gpa & 3)) { 633 res = MEMTX_ACCESS_ERROR; 634 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 635 goto err; 636 } 637 638 /* MSI MRIF mode, non atomic pending bit update */ 639 640 /* MRIF pending bit address */ 641 addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; 642 addr = addr | ((data & 0x7c0) >> 3); 643 644 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 645 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 646 gpa, addr); 647 648 /* MRIF pending bit mask */ 649 data = 1ULL << (data & 0x03f); 650 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 651 if (res != MEMTX_OK) { 652 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 653 goto err; 654 } 655 656 intn = intn | data; 657 res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs); 658 if (res != MEMTX_OK) { 659 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 660 goto err; 661 } 662 663 /* Get MRIF enable bits */ 664 addr = addr + sizeof(intn); 665 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 666 if (res != MEMTX_OK) { 667 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 668 goto err; 669 } 670 671 if (!(intn & data)) { 672 /* notification disabled, MRIF update completed. */ 673 return MEMTX_OK; 674 } 675 676 /* Send notification message */ 677 addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); 678 n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | 679 (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); 680 681 res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs); 682 if (res != MEMTX_OK) { 683 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 684 goto err; 685 } 686 687 trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); 688 689 return MEMTX_OK; 690 691 err: 692 riscv_iommu_report_fault(s, ctx, fault_type, cause, 693 !!ctx->process_id, 0, 0); 694 return res; 695 } 696 697 /* 698 * Check device context configuration as described by the 699 * riscv-iommu spec section "Device-context configuration 700 * checks". 701 */ 702 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, 703 RISCVIOMMUContext *ctx) 704 { 705 uint32_t fsc_mode, msi_mode; 706 uint64_t gatp; 707 708 if (!(s->cap & RISCV_IOMMU_CAP_ATS) && 709 (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS || 710 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI || 711 ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) { 712 return false; 713 } 714 715 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) && 716 (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA || 717 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) { 718 return false; 719 } 720 721 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && 722 ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { 723 return false; 724 } 725 726 if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && 727 ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { 728 return false; 729 } 730 731 if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { 732 msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); 733 734 if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && 735 msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 736 return false; 737 } 738 } 739 740 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 741 if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA && 742 gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { 743 return false; 744 } 745 746 fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 747 748 if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { 749 switch (fsc_mode) { 750 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 751 if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { 752 return false; 753 } 754 break; 755 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 756 if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { 757 return false; 758 } 759 break; 760 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 761 if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { 762 return false; 763 } 764 break; 765 } 766 } else { 767 /* DC.tc.PDTV is 0 */ 768 if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { 769 return false; 770 } 771 772 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 773 if (fsc_mode == RISCV_IOMMU_CAP_SV32 && 774 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 775 return false; 776 } 777 } else { 778 switch (fsc_mode) { 779 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 780 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 781 return false; 782 } 783 break; 784 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 785 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 786 return false; 787 } 788 break; 789 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 790 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 791 return false; 792 } 793 break; 794 } 795 } 796 } 797 798 /* 799 * CAP_END is always zero (only one endianess). FCTL_BE is 800 * always zero (little-endian accesses). Thus TC_SBE must 801 * always be LE, i.e. zero. 802 */ 803 if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { 804 return false; 805 } 806 807 return true; 808 } 809 810 /* 811 * Validate process context (PC) according to section 812 * "Process-context configuration checks". 813 */ 814 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, 815 RISCVIOMMUContext *ctx) 816 { 817 uint32_t mode; 818 819 if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { 820 return false; 821 } 822 823 if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { 824 return false; 825 } 826 827 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 828 switch (mode) { 829 case RISCV_IOMMU_DC_FSC_MODE_BARE: 830 /* sv39 and sv32 modes have the same value (8) */ 831 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 832 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 833 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 834 break; 835 default: 836 return false; 837 } 838 839 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 840 if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 && 841 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 842 return false; 843 } 844 } else { 845 switch (mode) { 846 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 847 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 848 return false; 849 } 850 break; 851 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 852 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 853 return false; 854 } 855 break; 856 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 857 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 858 return false; 859 } 860 break; 861 } 862 } 863 864 return true; 865 } 866 867 /* 868 * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk 869 * 870 * @s : IOMMU Device State 871 * @ctx : Device Translation Context with devid and process_id set. 872 * @return : success or fault code. 873 */ 874 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) 875 { 876 const uint64_t ddtp = s->ddtp; 877 unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); 878 dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); 879 struct riscv_iommu_dc dc; 880 /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */ 881 const int dc_fmt = !s->enable_msi; 882 const size_t dc_len = sizeof(dc) >> dc_fmt; 883 int depth; 884 uint64_t de; 885 886 switch (mode) { 887 case RISCV_IOMMU_DDTP_MODE_OFF: 888 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; 889 890 case RISCV_IOMMU_DDTP_MODE_BARE: 891 /* mock up pass-through translation context */ 892 ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 893 RISCV_IOMMU_DC_IOHGATP_MODE_BARE); 894 ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 895 RISCV_IOMMU_DC_FSC_MODE_BARE); 896 897 ctx->tc = RISCV_IOMMU_DC_TC_V; 898 if (s->enable_ats) { 899 ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS; 900 } 901 902 ctx->ta = 0; 903 ctx->msiptp = 0; 904 return 0; 905 906 case RISCV_IOMMU_DDTP_MODE_1LVL: 907 depth = 0; 908 break; 909 910 case RISCV_IOMMU_DDTP_MODE_2LVL: 911 depth = 1; 912 break; 913 914 case RISCV_IOMMU_DDTP_MODE_3LVL: 915 depth = 2; 916 break; 917 918 default: 919 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 920 } 921 922 /* 923 * Check supported device id width (in bits). 924 * See IOMMU Specification, Chapter 6. Software guidelines. 925 * - if extended device-context format is used: 926 * 1LVL: 6, 2LVL: 15, 3LVL: 24 927 * - if base device-context format is used: 928 * 1LVL: 7, 2LVL: 16, 3LVL: 24 929 */ 930 if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { 931 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 932 } 933 934 /* Device directory tree walk */ 935 for (; depth-- > 0; ) { 936 /* 937 * Select device id index bits based on device directory tree level 938 * and device context format. 939 * See IOMMU Specification, Chapter 2. Data Structures. 940 * - if extended device-context format is used: 941 * device index: [23:15][14:6][5:0] 942 * - if base device-context format is used: 943 * device index: [23:16][15:7][6:0] 944 */ 945 const int split = depth * 9 + 6 + dc_fmt; 946 addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; 947 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 948 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 949 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 950 } 951 le64_to_cpus(&de); 952 if (!(de & RISCV_IOMMU_DDTE_VALID)) { 953 /* invalid directory entry */ 954 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 955 } 956 if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { 957 /* reserved bits set */ 958 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 959 } 960 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); 961 } 962 963 /* index into device context entry page */ 964 addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; 965 966 memset(&dc, 0, sizeof(dc)); 967 if (dma_memory_read(s->target_as, addr, &dc, dc_len, 968 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 969 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 970 } 971 972 /* Set translation context. */ 973 ctx->tc = le64_to_cpu(dc.tc); 974 ctx->gatp = le64_to_cpu(dc.iohgatp); 975 ctx->satp = le64_to_cpu(dc.fsc); 976 ctx->ta = le64_to_cpu(dc.ta); 977 ctx->msiptp = le64_to_cpu(dc.msiptp); 978 ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); 979 ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); 980 981 if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { 982 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 983 } 984 985 if (!riscv_iommu_validate_device_ctx(s, ctx)) { 986 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 987 } 988 989 /* FSC field checks */ 990 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 991 addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); 992 993 if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { 994 if (ctx->process_id != RISCV_IOMMU_NOPROCID) { 995 /* PID is disabled */ 996 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 997 } 998 if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { 999 /* Invalid translation mode */ 1000 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 1001 } 1002 return 0; 1003 } 1004 1005 if (ctx->process_id == RISCV_IOMMU_NOPROCID) { 1006 if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { 1007 /* No default process_id enabled, set BARE mode */ 1008 ctx->satp = 0ULL; 1009 return 0; 1010 } else { 1011 /* Use default process_id #0 */ 1012 ctx->process_id = 0; 1013 } 1014 } 1015 1016 if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { 1017 /* No S-Stage translation, done. */ 1018 return 0; 1019 } 1020 1021 /* FSC.TC.PDTV enabled */ 1022 if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { 1023 /* Invalid PDTP.MODE */ 1024 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1025 } 1026 1027 for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { 1028 /* 1029 * Select process id index bits based on process directory tree 1030 * level. See IOMMU Specification, 2.2. Process-Directory-Table. 1031 */ 1032 const int split = depth * 9 + 8; 1033 addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; 1034 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 1035 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1036 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1037 } 1038 le64_to_cpus(&de); 1039 if (!(de & RISCV_IOMMU_PC_TA_V)) { 1040 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1041 } 1042 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN)); 1043 } 1044 1045 /* Leaf entry in PDT */ 1046 addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; 1047 if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, 1048 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1049 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1050 } 1051 1052 /* Use FSC and TA from process directory entry. */ 1053 ctx->ta = le64_to_cpu(dc.ta); 1054 ctx->satp = le64_to_cpu(dc.fsc); 1055 1056 if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { 1057 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1058 } 1059 1060 if (!riscv_iommu_validate_process_ctx(s, ctx)) { 1061 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1062 } 1063 1064 return 0; 1065 } 1066 1067 /* Translation Context cache support */ 1068 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2) 1069 { 1070 RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; 1071 RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; 1072 return c1->devid == c2->devid && 1073 c1->process_id == c2->process_id; 1074 } 1075 1076 static guint riscv_iommu_ctx_hash(gconstpointer v) 1077 { 1078 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; 1079 /* 1080 * Generate simple hash of (process_id, devid) 1081 * assuming 24-bit wide devid. 1082 */ 1083 return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); 1084 } 1085 1086 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value, 1087 gpointer data) 1088 { 1089 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1090 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1091 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1092 ctx->devid == arg->devid && 1093 ctx->process_id == arg->process_id) { 1094 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1095 } 1096 } 1097 1098 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value, 1099 gpointer data) 1100 { 1101 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1102 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1103 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1104 ctx->devid == arg->devid) { 1105 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1106 } 1107 } 1108 1109 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value, 1110 gpointer data) 1111 { 1112 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1113 if (ctx->tc & RISCV_IOMMU_DC_TC_V) { 1114 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1115 } 1116 } 1117 1118 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, 1119 uint32_t devid, uint32_t process_id) 1120 { 1121 GHashTable *ctx_cache; 1122 RISCVIOMMUContext key = { 1123 .devid = devid, 1124 .process_id = process_id, 1125 }; 1126 ctx_cache = g_hash_table_ref(s->ctx_cache); 1127 g_hash_table_foreach(ctx_cache, func, &key); 1128 g_hash_table_unref(ctx_cache); 1129 } 1130 1131 /* Find or allocate translation context for a given {device_id, process_id} */ 1132 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, 1133 unsigned devid, unsigned process_id, 1134 void **ref) 1135 { 1136 GHashTable *ctx_cache; 1137 RISCVIOMMUContext *ctx; 1138 RISCVIOMMUContext key = { 1139 .devid = devid, 1140 .process_id = process_id, 1141 }; 1142 1143 ctx_cache = g_hash_table_ref(s->ctx_cache); 1144 ctx = g_hash_table_lookup(ctx_cache, &key); 1145 1146 if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { 1147 *ref = ctx_cache; 1148 return ctx; 1149 } 1150 1151 ctx = g_new0(RISCVIOMMUContext, 1); 1152 ctx->devid = devid; 1153 ctx->process_id = process_id; 1154 1155 int fault = riscv_iommu_ctx_fetch(s, ctx); 1156 if (!fault) { 1157 if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { 1158 g_hash_table_unref(ctx_cache); 1159 ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 1160 riscv_iommu_ctx_equal, 1161 g_free, NULL); 1162 g_hash_table_ref(ctx_cache); 1163 g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); 1164 } 1165 g_hash_table_add(ctx_cache, ctx); 1166 *ref = ctx_cache; 1167 return ctx; 1168 } 1169 1170 g_hash_table_unref(ctx_cache); 1171 *ref = NULL; 1172 1173 riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, 1174 fault, !!process_id, 0, 0); 1175 1176 g_free(ctx); 1177 return NULL; 1178 } 1179 1180 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) 1181 { 1182 if (ref) { 1183 g_hash_table_unref((GHashTable *)ref); 1184 } 1185 } 1186 1187 /* Find or allocate address space for a given device */ 1188 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid) 1189 { 1190 RISCVIOMMUSpace *as; 1191 1192 /* FIXME: PCIe bus remapping for attached endpoints. */ 1193 devid |= s->bus << 8; 1194 1195 QLIST_FOREACH(as, &s->spaces, list) { 1196 if (as->devid == devid) { 1197 break; 1198 } 1199 } 1200 1201 if (as == NULL) { 1202 char name[64]; 1203 as = g_new0(RISCVIOMMUSpace, 1); 1204 1205 as->iommu = s; 1206 as->devid = devid; 1207 1208 snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", 1209 PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1210 1211 /* IOVA address space, untranslated addresses */ 1212 memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), 1213 TYPE_RISCV_IOMMU_MEMORY_REGION, 1214 OBJECT(as), "riscv_iommu", UINT64_MAX); 1215 address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name); 1216 1217 QLIST_INSERT_HEAD(&s->spaces, as, list); 1218 1219 trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), 1220 PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1221 } 1222 return &as->iova_as; 1223 } 1224 1225 /* Translation Object cache support */ 1226 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2) 1227 { 1228 RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1; 1229 RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2; 1230 return t1->gscid == t2->gscid && t1->pscid == t2->pscid && 1231 t1->iova == t2->iova; 1232 } 1233 1234 static guint riscv_iommu_iot_hash(gconstpointer v) 1235 { 1236 RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v; 1237 return (guint)t->iova; 1238 } 1239 1240 /* GV: 1 PSCV: 1 AV: 1 */ 1241 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value, 1242 gpointer data) 1243 { 1244 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1245 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1246 if (iot->gscid == arg->gscid && 1247 iot->pscid == arg->pscid && 1248 iot->iova == arg->iova) { 1249 iot->perm = IOMMU_NONE; 1250 } 1251 } 1252 1253 /* GV: 1 PSCV: 1 AV: 0 */ 1254 static void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, 1255 gpointer data) 1256 { 1257 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1258 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1259 if (iot->gscid == arg->gscid && 1260 iot->pscid == arg->pscid) { 1261 iot->perm = IOMMU_NONE; 1262 } 1263 } 1264 1265 /* GV: 1 GVMA: 1 */ 1266 static void riscv_iommu_iot_inval_gscid_gpa(gpointer key, gpointer value, 1267 gpointer data) 1268 { 1269 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1270 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1271 if (iot->gscid == arg->gscid) { 1272 /* simplified cache, no GPA matching */ 1273 iot->perm = IOMMU_NONE; 1274 } 1275 } 1276 1277 /* GV: 1 GVMA: 0 */ 1278 static void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, 1279 gpointer data) 1280 { 1281 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1282 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1283 if (iot->gscid == arg->gscid) { 1284 iot->perm = IOMMU_NONE; 1285 } 1286 } 1287 1288 /* GV: 0 */ 1289 static void riscv_iommu_iot_inval_all(gpointer key, gpointer value, 1290 gpointer data) 1291 { 1292 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1293 iot->perm = IOMMU_NONE; 1294 } 1295 1296 /* caller should keep ref-count for iot_cache object */ 1297 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx, 1298 GHashTable *iot_cache, hwaddr iova) 1299 { 1300 RISCVIOMMUEntry key = { 1301 .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID), 1302 .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID), 1303 .iova = PPN_DOWN(iova), 1304 }; 1305 return g_hash_table_lookup(iot_cache, &key); 1306 } 1307 1308 /* caller should keep ref-count for iot_cache object */ 1309 static void riscv_iommu_iot_update(RISCVIOMMUState *s, 1310 GHashTable *iot_cache, RISCVIOMMUEntry *iot) 1311 { 1312 if (!s->iot_limit) { 1313 return; 1314 } 1315 1316 if (g_hash_table_size(s->iot_cache) >= s->iot_limit) { 1317 iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 1318 riscv_iommu_iot_equal, 1319 g_free, NULL); 1320 g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache)); 1321 } 1322 g_hash_table_add(iot_cache, iot); 1323 } 1324 1325 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func, 1326 uint32_t gscid, uint32_t pscid, hwaddr iova) 1327 { 1328 GHashTable *iot_cache; 1329 RISCVIOMMUEntry key = { 1330 .gscid = gscid, 1331 .pscid = pscid, 1332 .iova = PPN_DOWN(iova), 1333 }; 1334 1335 iot_cache = g_hash_table_ref(s->iot_cache); 1336 g_hash_table_foreach(iot_cache, func, &key); 1337 g_hash_table_unref(iot_cache); 1338 } 1339 1340 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 1341 IOMMUTLBEntry *iotlb, bool enable_cache) 1342 { 1343 RISCVIOMMUEntry *iot; 1344 IOMMUAccessFlags perm; 1345 bool enable_pid; 1346 bool enable_pri; 1347 GHashTable *iot_cache; 1348 int fault; 1349 1350 iot_cache = g_hash_table_ref(s->iot_cache); 1351 /* 1352 * TC[32] is reserved for custom extensions, used here to temporarily 1353 * enable automatic page-request generation for ATS queries. 1354 */ 1355 enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); 1356 enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); 1357 1358 /* Check for ATS request. */ 1359 if (iotlb->perm == IOMMU_NONE) { 1360 /* Check if ATS is disabled. */ 1361 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) { 1362 enable_pri = false; 1363 fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1364 goto done; 1365 } 1366 } 1367 1368 iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova); 1369 perm = iot ? iot->perm : IOMMU_NONE; 1370 if (perm != IOMMU_NONE) { 1371 iotlb->translated_addr = PPN_PHYS(iot->phys); 1372 iotlb->addr_mask = ~TARGET_PAGE_MASK; 1373 iotlb->perm = perm; 1374 fault = 0; 1375 goto done; 1376 } 1377 1378 /* Translate using device directory / page table information. */ 1379 fault = riscv_iommu_spa_fetch(s, ctx, iotlb); 1380 1381 if (!fault && iotlb->target_as == &s->trap_as) { 1382 /* Do not cache trapped MSI translations */ 1383 goto done; 1384 } 1385 1386 /* 1387 * We made an implementation choice to not cache identity-mapped 1388 * translations, as allowed by the specification, to avoid 1389 * translation cache evictions for other devices sharing the 1390 * IOMMU hardware model. 1391 */ 1392 if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) { 1393 iot = g_new0(RISCVIOMMUEntry, 1); 1394 iot->iova = PPN_DOWN(iotlb->iova); 1395 iot->phys = PPN_DOWN(iotlb->translated_addr); 1396 iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); 1397 iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); 1398 iot->perm = iotlb->perm; 1399 riscv_iommu_iot_update(s, iot_cache, iot); 1400 } 1401 1402 done: 1403 g_hash_table_unref(iot_cache); 1404 1405 if (enable_pri && fault) { 1406 struct riscv_iommu_pq_record pr = {0}; 1407 if (enable_pid) { 1408 pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, 1409 RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id); 1410 } 1411 pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid); 1412 pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | 1413 RISCV_IOMMU_PREQ_PAYLOAD_M; 1414 riscv_iommu_pri(s, &pr); 1415 return fault; 1416 } 1417 1418 if (fault) { 1419 unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ; 1420 1421 if (iotlb->perm & IOMMU_RW) { 1422 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 1423 } else if (iotlb->perm & IOMMU_RO) { 1424 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; 1425 } 1426 1427 riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, 1428 iotlb->iova, iotlb->translated_addr); 1429 return fault; 1430 } 1431 1432 return 0; 1433 } 1434 1435 /* IOMMU Command Interface */ 1436 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, 1437 uint64_t addr, uint32_t data) 1438 { 1439 /* 1440 * ATS processing in this implementation of the IOMMU is synchronous, 1441 * no need to wait for completions here. 1442 */ 1443 if (!notify) { 1444 return MEMTX_OK; 1445 } 1446 1447 return dma_memory_write(s->target_as, addr, &data, sizeof(data), 1448 MEMTXATTRS_UNSPECIFIED); 1449 } 1450 1451 static void riscv_iommu_ats(RISCVIOMMUState *s, 1452 struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag, 1453 IOMMUAccessFlags perm, 1454 void (*trace_fn)(const char *id)) 1455 { 1456 RISCVIOMMUSpace *as = NULL; 1457 IOMMUNotifier *n; 1458 IOMMUTLBEvent event; 1459 uint32_t pid; 1460 uint32_t devid; 1461 const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV; 1462 1463 if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) { 1464 /* Use device segment and requester id */ 1465 devid = get_field(cmd->dword0, 1466 RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID); 1467 } else { 1468 devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID); 1469 } 1470 1471 pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID); 1472 1473 QLIST_FOREACH(as, &s->spaces, list) { 1474 if (as->devid == devid) { 1475 break; 1476 } 1477 } 1478 1479 if (!as || !as->notifier) { 1480 return; 1481 } 1482 1483 event.type = flag; 1484 event.entry.perm = perm; 1485 event.entry.target_as = s->target_as; 1486 1487 IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) { 1488 if (!pv || n->iommu_idx == pid) { 1489 event.entry.iova = n->start; 1490 event.entry.addr_mask = n->end - n->start; 1491 trace_fn(as->iova_mr.parent_obj.name); 1492 memory_region_notify_iommu_one(n, &event); 1493 } 1494 } 1495 } 1496 1497 static void riscv_iommu_ats_inval(RISCVIOMMUState *s, 1498 struct riscv_iommu_command *cmd) 1499 { 1500 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE, 1501 trace_riscv_iommu_ats_inval); 1502 } 1503 1504 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s, 1505 struct riscv_iommu_command *cmd) 1506 { 1507 unsigned resp_code = get_field(cmd->dword1, 1508 RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE); 1509 1510 /* Using the access flag to carry response code information */ 1511 IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW; 1512 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm, 1513 trace_riscv_iommu_ats_prgr); 1514 } 1515 1516 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) 1517 { 1518 uint64_t old_ddtp = s->ddtp; 1519 uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); 1520 unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); 1521 unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); 1522 bool ok = false; 1523 1524 /* 1525 * Check for allowed DDTP.MODE transitions: 1526 * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} 1527 * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} 1528 */ 1529 if (new_mode == old_mode || 1530 new_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1531 new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { 1532 ok = true; 1533 } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || 1534 new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || 1535 new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { 1536 ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1537 old_mode == RISCV_IOMMU_DDTP_MODE_BARE; 1538 } 1539 1540 if (ok) { 1541 /* clear reserved and busy bits, report back sanitized version */ 1542 new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, 1543 RISCV_IOMMU_DDTP_MODE, new_mode); 1544 } else { 1545 new_ddtp = old_ddtp; 1546 } 1547 s->ddtp = new_ddtp; 1548 1549 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); 1550 } 1551 1552 /* Command function and opcode field. */ 1553 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) 1554 1555 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) 1556 { 1557 struct riscv_iommu_command cmd; 1558 MemTxResult res; 1559 dma_addr_t addr; 1560 uint32_t tail, head, ctrl; 1561 uint64_t cmd_opcode; 1562 GHFunc func; 1563 1564 ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1565 tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; 1566 head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; 1567 1568 /* Check for pending error or queue processing disabled */ 1569 if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || 1570 !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) { 1571 return; 1572 } 1573 1574 while (tail != head) { 1575 addr = s->cq_addr + head * sizeof(cmd); 1576 res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), 1577 MEMTXATTRS_UNSPECIFIED); 1578 1579 if (res != MEMTX_OK) { 1580 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1581 RISCV_IOMMU_CQCSR_CQMF, 0); 1582 goto fault; 1583 } 1584 1585 trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); 1586 1587 cmd_opcode = get_field(cmd.dword0, 1588 RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC); 1589 1590 switch (cmd_opcode) { 1591 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, 1592 RISCV_IOMMU_CMD_IOFENCE_OPCODE): 1593 res = riscv_iommu_iofence(s, 1594 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2, 1595 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); 1596 1597 if (res != MEMTX_OK) { 1598 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1599 RISCV_IOMMU_CQCSR_CQMF, 0); 1600 goto fault; 1601 } 1602 break; 1603 1604 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, 1605 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1606 if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) { 1607 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */ 1608 goto cmd_ill; 1609 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) { 1610 /* invalidate all cache mappings */ 1611 func = riscv_iommu_iot_inval_all; 1612 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) { 1613 /* invalidate cache matching GSCID */ 1614 func = riscv_iommu_iot_inval_gscid; 1615 } else { 1616 /* invalidate cache matching GSCID and ADDR (GPA) */ 1617 func = riscv_iommu_iot_inval_gscid_gpa; 1618 } 1619 riscv_iommu_iot_inval(s, func, 1620 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 0, 1621 cmd.dword1 << 2 & TARGET_PAGE_MASK); 1622 break; 1623 1624 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, 1625 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1626 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) { 1627 /* invalidate all cache mappings, simplified model */ 1628 func = riscv_iommu_iot_inval_all; 1629 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV)) { 1630 /* invalidate cache matching GSCID, simplified model */ 1631 func = riscv_iommu_iot_inval_gscid; 1632 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) { 1633 /* invalidate cache matching GSCID and PSCID */ 1634 func = riscv_iommu_iot_inval_pscid; 1635 } else { 1636 /* invalidate cache matching GSCID and PSCID and ADDR (IOVA) */ 1637 func = riscv_iommu_iot_inval_pscid_iova; 1638 } 1639 riscv_iommu_iot_inval(s, func, 1640 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 1641 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_PSCID), 1642 cmd.dword1 << 2 & TARGET_PAGE_MASK); 1643 break; 1644 1645 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, 1646 RISCV_IOMMU_CMD_IODIR_OPCODE): 1647 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1648 /* invalidate all device context cache mappings */ 1649 func = riscv_iommu_ctx_inval_all; 1650 } else { 1651 /* invalidate all device context matching DID */ 1652 func = riscv_iommu_ctx_inval_devid; 1653 } 1654 riscv_iommu_ctx_inval(s, func, 1655 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); 1656 break; 1657 1658 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, 1659 RISCV_IOMMU_CMD_IODIR_OPCODE): 1660 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1661 /* illegal command arguments IODIR_PDT & DV == 0 */ 1662 goto cmd_ill; 1663 } else { 1664 func = riscv_iommu_ctx_inval_devid_procid; 1665 } 1666 riscv_iommu_ctx_inval(s, func, 1667 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 1668 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); 1669 break; 1670 1671 /* ATS commands */ 1672 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL, 1673 RISCV_IOMMU_CMD_ATS_OPCODE): 1674 if (!s->enable_ats) { 1675 goto cmd_ill; 1676 } 1677 1678 riscv_iommu_ats_inval(s, &cmd); 1679 break; 1680 1681 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR, 1682 RISCV_IOMMU_CMD_ATS_OPCODE): 1683 if (!s->enable_ats) { 1684 goto cmd_ill; 1685 } 1686 1687 riscv_iommu_ats_prgr(s, &cmd); 1688 break; 1689 1690 default: 1691 cmd_ill: 1692 /* Invalid instruction, do not advance instruction index. */ 1693 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1694 RISCV_IOMMU_CQCSR_CMD_ILL, 0); 1695 goto fault; 1696 } 1697 1698 /* Advance and update head pointer after command completes. */ 1699 head = (head + 1) & s->cq_mask; 1700 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); 1701 } 1702 return; 1703 1704 fault: 1705 if (ctrl & RISCV_IOMMU_CQCSR_CIE) { 1706 riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); 1707 } 1708 } 1709 1710 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) 1711 { 1712 uint64_t base; 1713 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1714 uint32_t ctrl_clr; 1715 bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); 1716 bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); 1717 1718 if (enable && !active) { 1719 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); 1720 s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1; 1721 s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); 1722 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); 1723 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); 1724 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); 1725 ctrl_set = RISCV_IOMMU_CQCSR_CQON; 1726 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | 1727 RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO | 1728 RISCV_IOMMU_CQCSR_FENCE_W_IP; 1729 } else if (!enable && active) { 1730 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); 1731 ctrl_set = 0; 1732 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; 1733 } else { 1734 ctrl_set = 0; 1735 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; 1736 } 1737 1738 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); 1739 } 1740 1741 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) 1742 { 1743 uint64_t base; 1744 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1745 uint32_t ctrl_clr; 1746 bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); 1747 bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); 1748 1749 if (enable && !active) { 1750 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); 1751 s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1; 1752 s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); 1753 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); 1754 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); 1755 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); 1756 ctrl_set = RISCV_IOMMU_FQCSR_FQON; 1757 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | 1758 RISCV_IOMMU_FQCSR_FQOF; 1759 } else if (!enable && active) { 1760 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); 1761 ctrl_set = 0; 1762 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; 1763 } else { 1764 ctrl_set = 0; 1765 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; 1766 } 1767 1768 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); 1769 } 1770 1771 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) 1772 { 1773 uint64_t base; 1774 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 1775 uint32_t ctrl_clr; 1776 bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); 1777 bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); 1778 1779 if (enable && !active) { 1780 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); 1781 s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1; 1782 s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); 1783 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); 1784 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); 1785 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); 1786 ctrl_set = RISCV_IOMMU_PQCSR_PQON; 1787 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | 1788 RISCV_IOMMU_PQCSR_PQOF; 1789 } else if (!enable && active) { 1790 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); 1791 ctrl_set = 0; 1792 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; 1793 } else { 1794 ctrl_set = 0; 1795 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; 1796 } 1797 1798 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); 1799 } 1800 1801 static void riscv_iommu_process_dbg(RISCVIOMMUState *s) 1802 { 1803 uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA); 1804 uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL); 1805 unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID); 1806 unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID); 1807 RISCVIOMMUContext *ctx; 1808 void *ref; 1809 1810 if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) { 1811 return; 1812 } 1813 1814 ctx = riscv_iommu_ctx(s, devid, pid, &ref); 1815 if (ctx == NULL) { 1816 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, 1817 RISCV_IOMMU_TR_RESPONSE_FAULT | 1818 (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10)); 1819 } else { 1820 IOMMUTLBEntry iotlb = { 1821 .iova = iova, 1822 .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW, 1823 .addr_mask = ~0, 1824 .target_as = NULL, 1825 }; 1826 int fault = riscv_iommu_translate(s, ctx, &iotlb, false); 1827 if (fault) { 1828 iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); 1829 } else { 1830 iova = iotlb.translated_addr & ~iotlb.addr_mask; 1831 iova >>= TARGET_PAGE_BITS; 1832 iova &= RISCV_IOMMU_TR_RESPONSE_PPN; 1833 1834 /* We do not support superpages (> 4kbs) for now */ 1835 iova &= ~RISCV_IOMMU_TR_RESPONSE_S; 1836 } 1837 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); 1838 } 1839 1840 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 1841 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 1842 riscv_iommu_ctx_put(s, ref); 1843 } 1844 1845 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); 1846 1847 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data) 1848 { 1849 uint64_t icvec = 0; 1850 1851 icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV, 1852 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV); 1853 1854 icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV, 1855 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV); 1856 1857 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV, 1858 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV); 1859 1860 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV, 1861 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV); 1862 1863 trace_riscv_iommu_icvec_write(data, icvec); 1864 1865 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec); 1866 } 1867 1868 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) 1869 { 1870 uint32_t cqcsr, fqcsr, pqcsr; 1871 uint32_t ipsr_set = 0; 1872 uint32_t ipsr_clr = 0; 1873 1874 if (data & RISCV_IOMMU_IPSR_CIP) { 1875 cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1876 1877 if (cqcsr & RISCV_IOMMU_CQCSR_CIE && 1878 (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || 1879 cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || 1880 cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || 1881 cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { 1882 ipsr_set |= RISCV_IOMMU_IPSR_CIP; 1883 } else { 1884 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1885 } 1886 } else { 1887 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1888 } 1889 1890 if (data & RISCV_IOMMU_IPSR_FIP) { 1891 fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1892 1893 if (fqcsr & RISCV_IOMMU_FQCSR_FIE && 1894 (fqcsr & RISCV_IOMMU_FQCSR_FQOF || 1895 fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { 1896 ipsr_set |= RISCV_IOMMU_IPSR_FIP; 1897 } else { 1898 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 1899 } 1900 } else { 1901 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 1902 } 1903 1904 if (data & RISCV_IOMMU_IPSR_PIP) { 1905 pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 1906 1907 if (pqcsr & RISCV_IOMMU_PQCSR_PIE && 1908 (pqcsr & RISCV_IOMMU_PQCSR_PQOF || 1909 pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { 1910 ipsr_set |= RISCV_IOMMU_IPSR_PIP; 1911 } else { 1912 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 1913 } 1914 } else { 1915 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 1916 } 1917 1918 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); 1919 } 1920 1921 /* 1922 * Write the resulting value of 'data' for the reg specified 1923 * by 'reg_addr', after considering read-only/read-write/write-clear 1924 * bits, in the pointer 'dest'. 1925 * 1926 * The result is written in little-endian. 1927 */ 1928 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s, 1929 void *dest, hwaddr reg_addr, 1930 int size, uint64_t data) 1931 { 1932 uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size); 1933 uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size); 1934 uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size); 1935 1936 stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc)); 1937 } 1938 1939 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, 1940 uint64_t data, unsigned size, 1941 MemTxAttrs attrs) 1942 { 1943 riscv_iommu_process_fn *process_fn = NULL; 1944 RISCVIOMMUState *s = opaque; 1945 uint32_t regb = addr & ~3; 1946 uint32_t busy = 0; 1947 uint64_t val = 0; 1948 1949 if ((addr & (size - 1)) != 0) { 1950 /* Unsupported MMIO alignment or access size */ 1951 return MEMTX_ERROR; 1952 } 1953 1954 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 1955 /* Unsupported MMIO access location. */ 1956 return MEMTX_ACCESS_ERROR; 1957 } 1958 1959 /* Track actionable MMIO write. */ 1960 switch (regb) { 1961 case RISCV_IOMMU_REG_DDTP: 1962 case RISCV_IOMMU_REG_DDTP + 4: 1963 process_fn = riscv_iommu_process_ddtp; 1964 regb = RISCV_IOMMU_REG_DDTP; 1965 busy = RISCV_IOMMU_DDTP_BUSY; 1966 break; 1967 1968 case RISCV_IOMMU_REG_CQT: 1969 process_fn = riscv_iommu_process_cq_tail; 1970 break; 1971 1972 case RISCV_IOMMU_REG_CQCSR: 1973 process_fn = riscv_iommu_process_cq_control; 1974 busy = RISCV_IOMMU_CQCSR_BUSY; 1975 break; 1976 1977 case RISCV_IOMMU_REG_FQCSR: 1978 process_fn = riscv_iommu_process_fq_control; 1979 busy = RISCV_IOMMU_FQCSR_BUSY; 1980 break; 1981 1982 case RISCV_IOMMU_REG_PQCSR: 1983 process_fn = riscv_iommu_process_pq_control; 1984 busy = RISCV_IOMMU_PQCSR_BUSY; 1985 break; 1986 1987 case RISCV_IOMMU_REG_ICVEC: 1988 case RISCV_IOMMU_REG_IPSR: 1989 /* 1990 * ICVEC and IPSR have special read/write procedures. We'll 1991 * call their respective helpers and exit. 1992 */ 1993 riscv_iommu_write_reg_val(s, &val, addr, size, data); 1994 1995 /* 1996 * 'val' is stored as LE. Switch to host endianess 1997 * before using it. 1998 */ 1999 val = le64_to_cpu(val); 2000 2001 if (regb == RISCV_IOMMU_REG_ICVEC) { 2002 riscv_iommu_update_icvec(s, val); 2003 } else { 2004 riscv_iommu_update_ipsr(s, val); 2005 } 2006 2007 return MEMTX_OK; 2008 2009 case RISCV_IOMMU_REG_TR_REQ_CTL: 2010 process_fn = riscv_iommu_process_dbg; 2011 regb = RISCV_IOMMU_REG_TR_REQ_CTL; 2012 busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY; 2013 break; 2014 2015 default: 2016 break; 2017 } 2018 2019 /* 2020 * Registers update might be not synchronized with core logic. 2021 * If system software updates register when relevant BUSY bit 2022 * is set IOMMU behavior of additional writes to the register 2023 * is UNSPECIFIED. 2024 */ 2025 riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data); 2026 2027 /* Busy flag update, MSB 4-byte register. */ 2028 if (busy) { 2029 uint32_t rw = ldl_le_p(&s->regs_rw[regb]); 2030 stl_le_p(&s->regs_rw[regb], rw | busy); 2031 } 2032 2033 if (process_fn) { 2034 process_fn(s); 2035 } 2036 2037 return MEMTX_OK; 2038 } 2039 2040 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, 2041 uint64_t *data, unsigned size, MemTxAttrs attrs) 2042 { 2043 RISCVIOMMUState *s = opaque; 2044 uint64_t val = -1; 2045 uint8_t *ptr; 2046 2047 if ((addr & (size - 1)) != 0) { 2048 /* Unsupported MMIO alignment. */ 2049 return MEMTX_ERROR; 2050 } 2051 2052 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2053 return MEMTX_ACCESS_ERROR; 2054 } 2055 2056 ptr = &s->regs_rw[addr]; 2057 val = ldn_le_p(ptr, size); 2058 2059 *data = val; 2060 2061 return MEMTX_OK; 2062 } 2063 2064 static const MemoryRegionOps riscv_iommu_mmio_ops = { 2065 .read_with_attrs = riscv_iommu_mmio_read, 2066 .write_with_attrs = riscv_iommu_mmio_write, 2067 .endianness = DEVICE_NATIVE_ENDIAN, 2068 .impl = { 2069 .min_access_size = 4, 2070 .max_access_size = 8, 2071 .unaligned = false, 2072 }, 2073 .valid = { 2074 .min_access_size = 4, 2075 .max_access_size = 8, 2076 } 2077 }; 2078 2079 /* 2080 * Translations matching MSI pattern check are redirected to "riscv-iommu-trap" 2081 * memory region as untranslated address, for additional MSI/MRIF interception 2082 * by IOMMU interrupt remapping implementation. 2083 * Note: Device emulation code generating an MSI is expected to provide a valid 2084 * memory transaction attributes with requested_id set. 2085 */ 2086 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, 2087 uint64_t data, unsigned size, MemTxAttrs attrs) 2088 { 2089 RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; 2090 RISCVIOMMUContext *ctx; 2091 MemTxResult res; 2092 void *ref; 2093 uint32_t devid = attrs.requester_id; 2094 2095 if (attrs.unspecified) { 2096 return MEMTX_ACCESS_ERROR; 2097 } 2098 2099 /* FIXME: PCIe bus remapping for attached endpoints. */ 2100 devid |= s->bus << 8; 2101 2102 ctx = riscv_iommu_ctx(s, devid, 0, &ref); 2103 if (ctx == NULL) { 2104 res = MEMTX_ACCESS_ERROR; 2105 } else { 2106 res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); 2107 } 2108 riscv_iommu_ctx_put(s, ref); 2109 return res; 2110 } 2111 2112 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, 2113 uint64_t *data, unsigned size, MemTxAttrs attrs) 2114 { 2115 return MEMTX_ACCESS_ERROR; 2116 } 2117 2118 static const MemoryRegionOps riscv_iommu_trap_ops = { 2119 .read_with_attrs = riscv_iommu_trap_read, 2120 .write_with_attrs = riscv_iommu_trap_write, 2121 .endianness = DEVICE_LITTLE_ENDIAN, 2122 .impl = { 2123 .min_access_size = 4, 2124 .max_access_size = 8, 2125 .unaligned = true, 2126 }, 2127 .valid = { 2128 .min_access_size = 4, 2129 .max_access_size = 8, 2130 } 2131 }; 2132 2133 static void riscv_iommu_realize(DeviceState *dev, Error **errp) 2134 { 2135 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2136 2137 s->cap = s->version & RISCV_IOMMU_CAP_VERSION; 2138 if (s->enable_msi) { 2139 s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; 2140 } 2141 if (s->enable_ats) { 2142 s->cap |= RISCV_IOMMU_CAP_ATS; 2143 } 2144 if (s->enable_s_stage) { 2145 s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | 2146 RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; 2147 } 2148 if (s->enable_g_stage) { 2149 s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | 2150 RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; 2151 } 2152 /* Enable translation debug interface */ 2153 s->cap |= RISCV_IOMMU_CAP_DBG; 2154 2155 /* Report QEMU target physical address space limits */ 2156 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, 2157 TARGET_PHYS_ADDR_SPACE_BITS); 2158 2159 /* TODO: method to report supported PID bits */ 2160 s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ 2161 s->cap |= RISCV_IOMMU_CAP_PD8; 2162 2163 /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ 2164 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? 2165 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); 2166 2167 /* register storage */ 2168 s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2169 s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2170 s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2171 2172 /* Mark all registers read-only */ 2173 memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); 2174 2175 /* 2176 * Register complete MMIO space, including MSI/PBA registers. 2177 * Note, PCIDevice implementation will add overlapping MR for MSI/PBA, 2178 * managed directly by the PCIDevice implementation. 2179 */ 2180 memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s, 2181 "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); 2182 2183 /* Set power-on register state */ 2184 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); 2185 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); 2186 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], 2187 ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); 2188 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], 2189 ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); 2190 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], 2191 ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); 2192 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], 2193 ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); 2194 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], 2195 ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); 2196 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF | 2197 RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); 2198 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON | 2199 RISCV_IOMMU_CQCSR_BUSY); 2200 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF | 2201 RISCV_IOMMU_FQCSR_FQOF); 2202 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON | 2203 RISCV_IOMMU_FQCSR_BUSY); 2204 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF | 2205 RISCV_IOMMU_PQCSR_PQOF); 2206 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON | 2207 RISCV_IOMMU_PQCSR_BUSY); 2208 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); 2209 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); 2210 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); 2211 /* If debug registers enabled. */ 2212 if (s->cap & RISCV_IOMMU_CAP_DBG) { 2213 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0); 2214 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL], 2215 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2216 } 2217 2218 /* Memory region for downstream access, if specified. */ 2219 if (s->target_mr) { 2220 s->target_as = g_new0(AddressSpace, 1); 2221 address_space_init(s->target_as, s->target_mr, 2222 "riscv-iommu-downstream"); 2223 } else { 2224 /* Fallback to global system memory. */ 2225 s->target_as = &address_space_memory; 2226 } 2227 2228 /* Memory region for untranslated MRIF/MSI writes */ 2229 memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, 2230 "riscv-iommu-trap", ~0ULL); 2231 address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); 2232 2233 /* Device translation context cache */ 2234 s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 2235 riscv_iommu_ctx_equal, 2236 g_free, NULL); 2237 2238 s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 2239 riscv_iommu_iot_equal, 2240 g_free, NULL); 2241 2242 s->iommus.le_next = NULL; 2243 s->iommus.le_prev = NULL; 2244 QLIST_INIT(&s->spaces); 2245 } 2246 2247 static void riscv_iommu_unrealize(DeviceState *dev) 2248 { 2249 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2250 2251 g_hash_table_unref(s->iot_cache); 2252 g_hash_table_unref(s->ctx_cache); 2253 } 2254 2255 static const Property riscv_iommu_properties[] = { 2256 DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, 2257 RISCV_IOMMU_SPEC_DOT_VER), 2258 DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), 2259 DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit, 2260 LIMIT_CACHE_IOT), 2261 DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), 2262 DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE), 2263 DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), 2264 DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), 2265 DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), 2266 DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, 2267 TYPE_MEMORY_REGION, MemoryRegion *), 2268 DEFINE_PROP_END_OF_LIST(), 2269 }; 2270 2271 static void riscv_iommu_class_init(ObjectClass *klass, void* data) 2272 { 2273 DeviceClass *dc = DEVICE_CLASS(klass); 2274 2275 /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ 2276 dc->user_creatable = false; 2277 dc->realize = riscv_iommu_realize; 2278 dc->unrealize = riscv_iommu_unrealize; 2279 device_class_set_props(dc, riscv_iommu_properties); 2280 } 2281 2282 static const TypeInfo riscv_iommu_info = { 2283 .name = TYPE_RISCV_IOMMU, 2284 .parent = TYPE_DEVICE, 2285 .instance_size = sizeof(RISCVIOMMUState), 2286 .class_init = riscv_iommu_class_init, 2287 }; 2288 2289 static const char *IOMMU_FLAG_STR[] = { 2290 "NA", 2291 "RO", 2292 "WR", 2293 "RW", 2294 }; 2295 2296 /* RISC-V IOMMU Memory Region - Address Translation Space */ 2297 static IOMMUTLBEntry riscv_iommu_memory_region_translate( 2298 IOMMUMemoryRegion *iommu_mr, hwaddr addr, 2299 IOMMUAccessFlags flag, int iommu_idx) 2300 { 2301 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2302 RISCVIOMMUContext *ctx; 2303 void *ref; 2304 IOMMUTLBEntry iotlb = { 2305 .iova = addr, 2306 .target_as = as->iommu->target_as, 2307 .addr_mask = ~0ULL, 2308 .perm = flag, 2309 }; 2310 2311 ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); 2312 if (ctx == NULL) { 2313 /* Translation disabled or invalid. */ 2314 iotlb.addr_mask = 0; 2315 iotlb.perm = IOMMU_NONE; 2316 } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) { 2317 /* Translation disabled or fault reported. */ 2318 iotlb.addr_mask = 0; 2319 iotlb.perm = IOMMU_NONE; 2320 } 2321 2322 /* Trace all dma translations with original access flags. */ 2323 trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid), 2324 PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx, 2325 IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, 2326 iotlb.translated_addr); 2327 2328 riscv_iommu_ctx_put(as->iommu, ref); 2329 2330 return iotlb; 2331 } 2332 2333 static int riscv_iommu_memory_region_notify( 2334 IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, 2335 IOMMUNotifierFlag new, Error **errp) 2336 { 2337 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2338 2339 if (old == IOMMU_NOTIFIER_NONE) { 2340 as->notifier = true; 2341 trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); 2342 } else if (new == IOMMU_NOTIFIER_NONE) { 2343 as->notifier = false; 2344 trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); 2345 } 2346 2347 return 0; 2348 } 2349 2350 static inline bool pci_is_iommu(PCIDevice *pdev) 2351 { 2352 return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; 2353 } 2354 2355 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn) 2356 { 2357 RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; 2358 PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); 2359 AddressSpace *as = NULL; 2360 2361 if (pdev && pci_is_iommu(pdev)) { 2362 return s->target_as; 2363 } 2364 2365 /* Find first registered IOMMU device */ 2366 while (s->iommus.le_prev) { 2367 s = *(s->iommus.le_prev); 2368 } 2369 2370 /* Find first matching IOMMU */ 2371 while (s != NULL && as == NULL) { 2372 as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn)); 2373 s = s->iommus.le_next; 2374 } 2375 2376 return as ? as : &address_space_memory; 2377 } 2378 2379 static const PCIIOMMUOps riscv_iommu_ops = { 2380 .get_address_space = riscv_iommu_find_as, 2381 }; 2382 2383 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, 2384 Error **errp) 2385 { 2386 if (bus->iommu_ops && 2387 bus->iommu_ops->get_address_space == riscv_iommu_find_as) { 2388 /* Allow multiple IOMMUs on the same PCIe bus, link known devices */ 2389 RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; 2390 QLIST_INSERT_AFTER(last, iommu, iommus); 2391 } else if (!bus->iommu_ops && !bus->iommu_opaque) { 2392 pci_setup_iommu(bus, &riscv_iommu_ops, iommu); 2393 } else { 2394 error_setg(errp, "can't register secondary IOMMU for PCI bus #%d", 2395 pci_bus_num(bus)); 2396 } 2397 } 2398 2399 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, 2400 MemTxAttrs attrs) 2401 { 2402 return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; 2403 } 2404 2405 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr) 2406 { 2407 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2408 return 1 << as->iommu->pid_bits; 2409 } 2410 2411 static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data) 2412 { 2413 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 2414 2415 imrc->translate = riscv_iommu_memory_region_translate; 2416 imrc->notify_flag_changed = riscv_iommu_memory_region_notify; 2417 imrc->attrs_to_index = riscv_iommu_memory_region_index; 2418 imrc->num_indexes = riscv_iommu_memory_region_index_len; 2419 } 2420 2421 static const TypeInfo riscv_iommu_memory_region_info = { 2422 .parent = TYPE_IOMMU_MEMORY_REGION, 2423 .name = TYPE_RISCV_IOMMU_MEMORY_REGION, 2424 .class_init = riscv_iommu_memory_region_init, 2425 }; 2426 2427 static void riscv_iommu_register_mr_types(void) 2428 { 2429 type_register_static(&riscv_iommu_memory_region_info); 2430 type_register_static(&riscv_iommu_info); 2431 } 2432 2433 type_init(riscv_iommu_register_mr_types); 2434