1 /* 2 * QEMU emulation of an RISC-V IOMMU 3 * 4 * Copyright (C) 2021-2023, Rivos Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qom/object.h" 21 #include "hw/pci/pci_bus.h" 22 #include "hw/pci/pci_device.h" 23 #include "hw/qdev-properties.h" 24 #include "hw/riscv/riscv_hart.h" 25 #include "migration/vmstate.h" 26 #include "qapi/error.h" 27 #include "qemu/timer.h" 28 29 #include "cpu_bits.h" 30 #include "riscv-iommu.h" 31 #include "riscv-iommu-bits.h" 32 #include "trace.h" 33 34 #define LIMIT_CACHE_CTX (1U << 7) 35 #define LIMIT_CACHE_IOT (1U << 20) 36 37 /* Physical page number coversions */ 38 #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) 39 #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) 40 41 typedef struct RISCVIOMMUContext RISCVIOMMUContext; 42 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; 43 44 /* Device assigned I/O address space */ 45 struct RISCVIOMMUSpace { 46 IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */ 47 AddressSpace iova_as; /* IOVA address space for attached device */ 48 RISCVIOMMUState *iommu; /* Managing IOMMU device state */ 49 uint32_t devid; /* Requester identifier, AKA device_id */ 50 bool notifier; /* IOMMU unmap notifier enabled */ 51 QLIST_ENTRY(RISCVIOMMUSpace) list; 52 }; 53 54 /* Device translation context state. */ 55 struct RISCVIOMMUContext { 56 uint64_t devid:24; /* Requester Id, AKA device_id */ 57 uint64_t process_id:20; /* Process ID. PASID for PCIe */ 58 uint64_t tc; /* Translation Control */ 59 uint64_t ta; /* Translation Attributes */ 60 uint64_t satp; /* S-Stage address translation and protection */ 61 uint64_t gatp; /* G-Stage address translation and protection */ 62 uint64_t msi_addr_mask; /* MSI filtering - address mask */ 63 uint64_t msi_addr_pattern; /* MSI filtering - address pattern */ 64 uint64_t msiptp; /* MSI redirection page table pointer */ 65 }; 66 67 /* Address translation cache entry */ 68 struct RISCVIOMMUEntry { 69 uint64_t iova:44; /* IOVA Page Number */ 70 uint64_t pscid:20; /* Process Soft-Context identifier */ 71 uint64_t phys:44; /* Physical Page Number */ 72 uint64_t gscid:16; /* Guest Soft-Context identifier */ 73 uint64_t perm:2; /* IOMMU_RW flags */ 74 }; 75 76 /* IOMMU index for transactions without process_id specified. */ 77 #define RISCV_IOMMU_NOPROCID 0 78 79 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) 80 { 81 switch (vec_type) { 82 case RISCV_IOMMU_INTR_CQ: 83 return icvec & RISCV_IOMMU_ICVEC_CIV; 84 case RISCV_IOMMU_INTR_FQ: 85 return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4; 86 case RISCV_IOMMU_INTR_PM: 87 return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8; 88 case RISCV_IOMMU_INTR_PQ: 89 return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12; 90 default: 91 g_assert_not_reached(); 92 } 93 } 94 95 static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) 96 { 97 const uint32_t fctl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FCTL); 98 uint32_t ipsr, icvec, vector; 99 100 if (fctl & RISCV_IOMMU_FCTL_WSI || !s->notify) { 101 return; 102 } 103 104 icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); 105 ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0); 106 107 if (!(ipsr & (1 << vec_type))) { 108 vector = riscv_iommu_get_icvec_vector(icvec, vec_type); 109 s->notify(s, vector); 110 trace_riscv_iommu_notify_int_vector(vec_type, vector); 111 } 112 } 113 114 static void riscv_iommu_fault(RISCVIOMMUState *s, 115 struct riscv_iommu_fq_record *ev) 116 { 117 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 118 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask; 119 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask; 120 uint32_t next = (tail + 1) & s->fq_mask; 121 uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID); 122 123 trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 124 PCI_FUNC(devid), ev->hdr, ev->iotval); 125 126 if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) || 127 !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) { 128 return; 129 } 130 131 if (head == next) { 132 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 133 RISCV_IOMMU_FQCSR_FQOF, 0); 134 } else { 135 dma_addr_t addr = s->fq_addr + tail * sizeof(*ev); 136 if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev), 137 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 138 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 139 RISCV_IOMMU_FQCSR_FQMF, 0); 140 } else { 141 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next); 142 } 143 } 144 145 if (ctrl & RISCV_IOMMU_FQCSR_FIE) { 146 riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ); 147 } 148 } 149 150 static void riscv_iommu_pri(RISCVIOMMUState *s, 151 struct riscv_iommu_pq_record *pr) 152 { 153 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 154 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask; 155 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask; 156 uint32_t next = (tail + 1) & s->pq_mask; 157 uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID); 158 159 trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 160 PCI_FUNC(devid), pr->payload); 161 162 if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) || 163 !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) { 164 return; 165 } 166 167 if (head == next) { 168 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 169 RISCV_IOMMU_PQCSR_PQOF, 0); 170 } else { 171 dma_addr_t addr = s->pq_addr + tail * sizeof(*pr); 172 if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr), 173 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 174 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 175 RISCV_IOMMU_PQCSR_PQMF, 0); 176 } else { 177 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next); 178 } 179 } 180 181 if (ctrl & RISCV_IOMMU_PQCSR_PIE) { 182 riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ); 183 } 184 } 185 186 /* Portable implementation of pext_u64, bit-mask extraction. */ 187 static uint64_t _pext_u64(uint64_t val, uint64_t ext) 188 { 189 uint64_t ret = 0; 190 uint64_t rot = 1; 191 192 while (ext) { 193 if (ext & 1) { 194 if (val & 1) { 195 ret |= rot; 196 } 197 rot <<= 1; 198 } 199 val >>= 1; 200 ext >>= 1; 201 } 202 203 return ret; 204 } 205 206 /* Check if GPA matches MSI/MRIF pattern. */ 207 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 208 dma_addr_t gpa) 209 { 210 if (!s->enable_msi) { 211 return false; 212 } 213 214 if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) != 215 RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 216 return false; /* Invalid MSI/MRIF mode */ 217 } 218 219 if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) { 220 return false; /* GPA not in MSI range defined by AIA IMSIC rules. */ 221 } 222 223 return true; 224 } 225 226 /* 227 * RISCV IOMMU Address Translation Lookup - Page Table Walk 228 * 229 * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c 230 * Both implementation can be merged into single helper function in future. 231 * Keeping them separate for now, as error reporting and flow specifics are 232 * sufficiently different for separate implementation. 233 * 234 * @s : IOMMU Device State 235 * @ctx : Translation context for device id and process address space id. 236 * @iotlb : translation data: physical address and access mode. 237 * @return : success or fault cause code. 238 */ 239 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 240 IOMMUTLBEntry *iotlb) 241 { 242 dma_addr_t addr, base; 243 uint64_t satp, gatp, pte; 244 bool en_s, en_g; 245 struct { 246 unsigned char step; 247 unsigned char levels; 248 unsigned char ptidxbits; 249 unsigned char ptesize; 250 } sc[2]; 251 /* Translation stage phase */ 252 enum { 253 S_STAGE = 0, 254 G_STAGE = 1, 255 } pass; 256 MemTxResult ret; 257 258 satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 259 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 260 261 en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; 262 en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; 263 264 /* 265 * Early check for MSI address match when IOVA == GPA. 266 * Note that the (!en_s) condition means that the MSI 267 * page table may only be used when guest pages are 268 * mapped using the g-stage page table, whether single- 269 * or two-stage paging is enabled. It's unavoidable though, 270 * because the spec mandates that we do a first-stage 271 * translation before we check the MSI page table, which 272 * means we can't do an early MSI check unless we have 273 * strictly !en_s. 274 */ 275 if (!en_s && (iotlb->perm & IOMMU_WO) && 276 riscv_iommu_msi_check(s, ctx, iotlb->iova)) { 277 iotlb->target_as = &s->trap_as; 278 iotlb->translated_addr = iotlb->iova; 279 iotlb->addr_mask = ~TARGET_PAGE_MASK; 280 return 0; 281 } 282 283 /* Exit early for pass-through mode. */ 284 if (!(en_s || en_g)) { 285 iotlb->translated_addr = iotlb->iova; 286 iotlb->addr_mask = ~TARGET_PAGE_MASK; 287 /* Allow R/W in pass-through mode */ 288 iotlb->perm = IOMMU_RW; 289 return 0; 290 } 291 292 /* S/G translation parameters. */ 293 for (pass = 0; pass < 2; pass++) { 294 uint32_t sv_mode; 295 296 sc[pass].step = 0; 297 if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : 298 (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { 299 /* 32bit mode for GXL/SXL == 1 */ 300 switch (pass ? gatp : satp) { 301 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 302 sc[pass].levels = 0; 303 sc[pass].ptidxbits = 0; 304 sc[pass].ptesize = 0; 305 break; 306 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: 307 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32; 308 if (!(s->cap & sv_mode)) { 309 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 310 } 311 sc[pass].levels = 2; 312 sc[pass].ptidxbits = 10; 313 sc[pass].ptesize = 4; 314 break; 315 default: 316 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 317 } 318 } else { 319 /* 64bit mode for GXL/SXL == 0 */ 320 switch (pass ? gatp : satp) { 321 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 322 sc[pass].levels = 0; 323 sc[pass].ptidxbits = 0; 324 sc[pass].ptesize = 0; 325 break; 326 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: 327 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39; 328 if (!(s->cap & sv_mode)) { 329 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 330 } 331 sc[pass].levels = 3; 332 sc[pass].ptidxbits = 9; 333 sc[pass].ptesize = 8; 334 break; 335 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: 336 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48; 337 if (!(s->cap & sv_mode)) { 338 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 339 } 340 sc[pass].levels = 4; 341 sc[pass].ptidxbits = 9; 342 sc[pass].ptesize = 8; 343 break; 344 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: 345 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57; 346 if (!(s->cap & sv_mode)) { 347 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 348 } 349 sc[pass].levels = 5; 350 sc[pass].ptidxbits = 9; 351 sc[pass].ptesize = 8; 352 break; 353 default: 354 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 355 } 356 } 357 }; 358 359 /* S/G stages translation tables root pointers */ 360 gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); 361 satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); 362 addr = (en_s && en_g) ? satp : iotlb->iova; 363 base = en_g ? gatp : satp; 364 pass = en_g ? G_STAGE : S_STAGE; 365 366 do { 367 const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; 368 const unsigned va_bits = widened + sc[pass].ptidxbits; 369 const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * 370 (sc[pass].levels - 1 - sc[pass].step); 371 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); 372 const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; 373 const bool ade = 374 ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE); 375 376 /* Address range check before first level lookup */ 377 if (!sc[pass].step) { 378 const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1; 379 if ((addr & va_mask) != addr) { 380 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; 381 } 382 } 383 384 /* Read page table entry */ 385 if (sc[pass].ptesize == 4) { 386 uint32_t pte32 = 0; 387 ret = ldl_le_dma(s->target_as, pte_addr, &pte32, 388 MEMTXATTRS_UNSPECIFIED); 389 pte = pte32; 390 } else { 391 ret = ldq_le_dma(s->target_as, pte_addr, &pte, 392 MEMTXATTRS_UNSPECIFIED); 393 } 394 if (ret != MEMTX_OK) { 395 return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT 396 : RISCV_IOMMU_FQ_CAUSE_RD_FAULT; 397 } 398 399 sc[pass].step++; 400 hwaddr ppn = pte >> PTE_PPN_SHIFT; 401 402 if (!(pte & PTE_V)) { 403 break; /* Invalid PTE */ 404 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { 405 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ 406 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { 407 break; /* Reserved leaf PTE flags: PTE_W */ 408 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { 409 break; /* Reserved leaf PTE flags: PTE_W + PTE_X */ 410 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { 411 break; /* Misaligned PPN */ 412 } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { 413 break; /* Read access check failed */ 414 } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { 415 break; /* Write access check failed */ 416 } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { 417 break; /* Access bit not set */ 418 } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { 419 break; /* Dirty bit not set */ 420 } else { 421 /* Leaf PTE, translation completed. */ 422 sc[pass].step = sc[pass].levels; 423 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); 424 /* Update address mask based on smallest translation granularity */ 425 iotlb->addr_mask &= (1ULL << va_skip) - 1; 426 /* Continue with S-Stage translation? */ 427 if (pass && sc[0].step != sc[0].levels) { 428 pass = S_STAGE; 429 addr = iotlb->iova; 430 continue; 431 } 432 /* Translation phase completed (GPA or SPA) */ 433 iotlb->translated_addr = base; 434 iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO) 435 : IOMMU_RO; 436 437 /* Check MSI GPA address match */ 438 if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && 439 riscv_iommu_msi_check(s, ctx, base)) { 440 /* Trap MSI writes and return GPA address. */ 441 iotlb->target_as = &s->trap_as; 442 iotlb->addr_mask = ~TARGET_PAGE_MASK; 443 return 0; 444 } 445 446 /* Continue with G-Stage translation? */ 447 if (!pass && en_g) { 448 pass = G_STAGE; 449 addr = base; 450 base = gatp; 451 sc[pass].step = 0; 452 continue; 453 } 454 455 return 0; 456 } 457 458 if (sc[pass].step == sc[pass].levels) { 459 break; /* Can't find leaf PTE */ 460 } 461 462 /* Continue with G-Stage translation? */ 463 if (!pass && en_g) { 464 pass = G_STAGE; 465 addr = base; 466 base = gatp; 467 sc[pass].step = 0; 468 } 469 } while (1); 470 471 return (iotlb->perm & IOMMU_WO) ? 472 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 473 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : 474 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : 475 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); 476 } 477 478 static void riscv_iommu_report_fault(RISCVIOMMUState *s, 479 RISCVIOMMUContext *ctx, 480 uint32_t fault_type, uint32_t cause, 481 bool pv, 482 uint64_t iotval, uint64_t iotval2) 483 { 484 struct riscv_iommu_fq_record ev = { 0 }; 485 486 if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { 487 switch (cause) { 488 case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: 489 case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: 490 case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: 491 case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: 492 case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: 493 case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: 494 case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: 495 break; 496 default: 497 /* DTF prevents reporting a fault for this given cause */ 498 return; 499 } 500 } 501 502 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); 503 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); 504 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); 505 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); 506 507 if (pv) { 508 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id); 509 } 510 511 ev.iotval = iotval; 512 ev.iotval2 = iotval2; 513 514 riscv_iommu_fault(s, &ev); 515 } 516 517 /* Redirect MSI write for given GPA. */ 518 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, 519 RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, 520 unsigned size, MemTxAttrs attrs) 521 { 522 MemTxResult res; 523 dma_addr_t addr; 524 uint64_t intn; 525 uint32_t n190; 526 uint64_t pte[2]; 527 int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 528 int cause; 529 530 /* Interrupt File Number */ 531 intn = _pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); 532 if (intn >= 256) { 533 /* Interrupt file number out of range */ 534 res = MEMTX_ACCESS_ERROR; 535 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 536 goto err; 537 } 538 539 /* fetch MSI PTE */ 540 addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); 541 addr = addr | (intn * sizeof(pte)); 542 res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), 543 MEMTXATTRS_UNSPECIFIED); 544 if (res != MEMTX_OK) { 545 if (res == MEMTX_DECODE_ERROR) { 546 cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; 547 } else { 548 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 549 } 550 goto err; 551 } 552 553 le64_to_cpus(&pte[0]); 554 le64_to_cpus(&pte[1]); 555 556 if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) { 557 /* 558 * The spec mentions that: "If msipte.C == 1, then further 559 * processing to interpret the PTE is implementation 560 * defined.". We'll abort with cause = 262 for this 561 * case too. 562 */ 563 res = MEMTX_ACCESS_ERROR; 564 cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; 565 goto err; 566 } 567 568 switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { 569 case RISCV_IOMMU_MSI_PTE_M_BASIC: 570 /* MSI Pass-through mode */ 571 addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); 572 573 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 574 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 575 gpa, addr); 576 577 res = dma_memory_write(s->target_as, addr, &data, size, attrs); 578 if (res != MEMTX_OK) { 579 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 580 goto err; 581 } 582 583 return MEMTX_OK; 584 case RISCV_IOMMU_MSI_PTE_M_MRIF: 585 /* MRIF mode, continue. */ 586 break; 587 default: 588 res = MEMTX_ACCESS_ERROR; 589 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 590 goto err; 591 } 592 593 /* 594 * Report an error for interrupt identities exceeding the maximum allowed 595 * for an IMSIC interrupt file (2047) or destination address is not 32-bit 596 * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. 597 */ 598 if ((data > 2047) || (gpa & 3)) { 599 res = MEMTX_ACCESS_ERROR; 600 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 601 goto err; 602 } 603 604 /* MSI MRIF mode, non atomic pending bit update */ 605 606 /* MRIF pending bit address */ 607 addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; 608 addr = addr | ((data & 0x7c0) >> 3); 609 610 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 611 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 612 gpa, addr); 613 614 /* MRIF pending bit mask */ 615 data = 1ULL << (data & 0x03f); 616 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 617 if (res != MEMTX_OK) { 618 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 619 goto err; 620 } 621 622 intn = intn | data; 623 res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs); 624 if (res != MEMTX_OK) { 625 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 626 goto err; 627 } 628 629 /* Get MRIF enable bits */ 630 addr = addr + sizeof(intn); 631 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 632 if (res != MEMTX_OK) { 633 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 634 goto err; 635 } 636 637 if (!(intn & data)) { 638 /* notification disabled, MRIF update completed. */ 639 return MEMTX_OK; 640 } 641 642 /* Send notification message */ 643 addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); 644 n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | 645 (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); 646 647 res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs); 648 if (res != MEMTX_OK) { 649 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 650 goto err; 651 } 652 653 trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); 654 655 return MEMTX_OK; 656 657 err: 658 riscv_iommu_report_fault(s, ctx, fault_type, cause, 659 !!ctx->process_id, 0, 0); 660 return res; 661 } 662 663 /* 664 * Check device context configuration as described by the 665 * riscv-iommu spec section "Device-context configuration 666 * checks". 667 */ 668 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, 669 RISCVIOMMUContext *ctx) 670 { 671 uint32_t fsc_mode, msi_mode; 672 uint64_t gatp; 673 674 if (!(s->cap & RISCV_IOMMU_CAP_ATS) && 675 (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS || 676 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI || 677 ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) { 678 return false; 679 } 680 681 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) && 682 (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA || 683 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) { 684 return false; 685 } 686 687 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && 688 ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { 689 return false; 690 } 691 692 if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && 693 ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { 694 return false; 695 } 696 697 if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { 698 msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); 699 700 if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && 701 msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 702 return false; 703 } 704 } 705 706 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 707 if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA && 708 gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { 709 return false; 710 } 711 712 fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 713 714 if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { 715 switch (fsc_mode) { 716 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 717 if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { 718 return false; 719 } 720 break; 721 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 722 if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { 723 return false; 724 } 725 break; 726 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 727 if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { 728 return false; 729 } 730 break; 731 } 732 } else { 733 /* DC.tc.PDTV is 0 */ 734 if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { 735 return false; 736 } 737 738 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 739 if (fsc_mode == RISCV_IOMMU_CAP_SV32 && 740 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 741 return false; 742 } 743 } else { 744 switch (fsc_mode) { 745 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 746 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 747 return false; 748 } 749 break; 750 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 751 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 752 return false; 753 } 754 break; 755 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 756 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 757 return false; 758 } 759 break; 760 } 761 } 762 } 763 764 /* 765 * CAP_END is always zero (only one endianess). FCTL_BE is 766 * always zero (little-endian accesses). Thus TC_SBE must 767 * always be LE, i.e. zero. 768 */ 769 if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { 770 return false; 771 } 772 773 return true; 774 } 775 776 /* 777 * Validate process context (PC) according to section 778 * "Process-context configuration checks". 779 */ 780 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, 781 RISCVIOMMUContext *ctx) 782 { 783 uint32_t mode; 784 785 if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { 786 return false; 787 } 788 789 if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { 790 return false; 791 } 792 793 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 794 switch (mode) { 795 case RISCV_IOMMU_DC_FSC_MODE_BARE: 796 /* sv39 and sv32 modes have the same value (8) */ 797 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 798 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 799 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 800 break; 801 default: 802 return false; 803 } 804 805 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 806 if (mode == RISCV_IOMMU_CAP_SV32 && 807 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 808 return false; 809 } 810 } else { 811 switch (mode) { 812 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 813 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 814 return false; 815 } 816 break; 817 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 818 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 819 return false; 820 } 821 break; 822 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 823 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 824 return false; 825 } 826 break; 827 } 828 } 829 830 return true; 831 } 832 833 /* 834 * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk 835 * 836 * @s : IOMMU Device State 837 * @ctx : Device Translation Context with devid and process_id set. 838 * @return : success or fault code. 839 */ 840 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) 841 { 842 const uint64_t ddtp = s->ddtp; 843 unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); 844 dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); 845 struct riscv_iommu_dc dc; 846 /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */ 847 const int dc_fmt = !s->enable_msi; 848 const size_t dc_len = sizeof(dc) >> dc_fmt; 849 unsigned depth; 850 uint64_t de; 851 852 switch (mode) { 853 case RISCV_IOMMU_DDTP_MODE_OFF: 854 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; 855 856 case RISCV_IOMMU_DDTP_MODE_BARE: 857 /* mock up pass-through translation context */ 858 ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 859 RISCV_IOMMU_DC_IOHGATP_MODE_BARE); 860 ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 861 RISCV_IOMMU_DC_FSC_MODE_BARE); 862 863 ctx->tc = RISCV_IOMMU_DC_TC_V; 864 if (s->enable_ats) { 865 ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS; 866 } 867 868 ctx->ta = 0; 869 ctx->msiptp = 0; 870 return 0; 871 872 case RISCV_IOMMU_DDTP_MODE_1LVL: 873 depth = 0; 874 break; 875 876 case RISCV_IOMMU_DDTP_MODE_2LVL: 877 depth = 1; 878 break; 879 880 case RISCV_IOMMU_DDTP_MODE_3LVL: 881 depth = 2; 882 break; 883 884 default: 885 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 886 } 887 888 /* 889 * Check supported device id width (in bits). 890 * See IOMMU Specification, Chapter 6. Software guidelines. 891 * - if extended device-context format is used: 892 * 1LVL: 6, 2LVL: 15, 3LVL: 24 893 * - if base device-context format is used: 894 * 1LVL: 7, 2LVL: 16, 3LVL: 24 895 */ 896 if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { 897 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 898 } 899 900 /* Device directory tree walk */ 901 for (; depth-- > 0; ) { 902 /* 903 * Select device id index bits based on device directory tree level 904 * and device context format. 905 * See IOMMU Specification, Chapter 2. Data Structures. 906 * - if extended device-context format is used: 907 * device index: [23:15][14:6][5:0] 908 * - if base device-context format is used: 909 * device index: [23:16][15:7][6:0] 910 */ 911 const int split = depth * 9 + 6 + dc_fmt; 912 addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; 913 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 914 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 915 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 916 } 917 le64_to_cpus(&de); 918 if (!(de & RISCV_IOMMU_DDTE_VALID)) { 919 /* invalid directory entry */ 920 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 921 } 922 if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { 923 /* reserved bits set */ 924 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 925 } 926 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); 927 } 928 929 /* index into device context entry page */ 930 addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; 931 932 memset(&dc, 0, sizeof(dc)); 933 if (dma_memory_read(s->target_as, addr, &dc, dc_len, 934 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 935 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 936 } 937 938 /* Set translation context. */ 939 ctx->tc = le64_to_cpu(dc.tc); 940 ctx->gatp = le64_to_cpu(dc.iohgatp); 941 ctx->satp = le64_to_cpu(dc.fsc); 942 ctx->ta = le64_to_cpu(dc.ta); 943 ctx->msiptp = le64_to_cpu(dc.msiptp); 944 ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); 945 ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); 946 947 if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { 948 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 949 } 950 951 if (!riscv_iommu_validate_device_ctx(s, ctx)) { 952 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 953 } 954 955 /* FSC field checks */ 956 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 957 addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); 958 959 if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { 960 if (ctx->process_id != RISCV_IOMMU_NOPROCID) { 961 /* PID is disabled */ 962 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 963 } 964 if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { 965 /* Invalid translation mode */ 966 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 967 } 968 return 0; 969 } 970 971 if (ctx->process_id == RISCV_IOMMU_NOPROCID) { 972 if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { 973 /* No default process_id enabled, set BARE mode */ 974 ctx->satp = 0ULL; 975 return 0; 976 } else { 977 /* Use default process_id #0 */ 978 ctx->process_id = 0; 979 } 980 } 981 982 if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { 983 /* No S-Stage translation, done. */ 984 return 0; 985 } 986 987 /* FSC.TC.PDTV enabled */ 988 if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { 989 /* Invalid PDTP.MODE */ 990 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 991 } 992 993 for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { 994 /* 995 * Select process id index bits based on process directory tree 996 * level. See IOMMU Specification, 2.2. Process-Directory-Table. 997 */ 998 const int split = depth * 9 + 8; 999 addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; 1000 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 1001 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1002 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1003 } 1004 le64_to_cpus(&de); 1005 if (!(de & RISCV_IOMMU_PC_TA_V)) { 1006 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1007 } 1008 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN)); 1009 } 1010 1011 /* Leaf entry in PDT */ 1012 addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; 1013 if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, 1014 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1015 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1016 } 1017 1018 /* Use FSC and TA from process directory entry. */ 1019 ctx->ta = le64_to_cpu(dc.ta); 1020 ctx->satp = le64_to_cpu(dc.fsc); 1021 1022 if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { 1023 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1024 } 1025 1026 if (!riscv_iommu_validate_process_ctx(s, ctx)) { 1027 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1028 } 1029 1030 return 0; 1031 } 1032 1033 /* Translation Context cache support */ 1034 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2) 1035 { 1036 RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; 1037 RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; 1038 return c1->devid == c2->devid && 1039 c1->process_id == c2->process_id; 1040 } 1041 1042 static guint riscv_iommu_ctx_hash(gconstpointer v) 1043 { 1044 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; 1045 /* 1046 * Generate simple hash of (process_id, devid) 1047 * assuming 24-bit wide devid. 1048 */ 1049 return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); 1050 } 1051 1052 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value, 1053 gpointer data) 1054 { 1055 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1056 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1057 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1058 ctx->devid == arg->devid && 1059 ctx->process_id == arg->process_id) { 1060 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1061 } 1062 } 1063 1064 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value, 1065 gpointer data) 1066 { 1067 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1068 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1069 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1070 ctx->devid == arg->devid) { 1071 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1072 } 1073 } 1074 1075 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value, 1076 gpointer data) 1077 { 1078 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1079 if (ctx->tc & RISCV_IOMMU_DC_TC_V) { 1080 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1081 } 1082 } 1083 1084 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, 1085 uint32_t devid, uint32_t process_id) 1086 { 1087 GHashTable *ctx_cache; 1088 RISCVIOMMUContext key = { 1089 .devid = devid, 1090 .process_id = process_id, 1091 }; 1092 ctx_cache = g_hash_table_ref(s->ctx_cache); 1093 g_hash_table_foreach(ctx_cache, func, &key); 1094 g_hash_table_unref(ctx_cache); 1095 } 1096 1097 /* Find or allocate translation context for a given {device_id, process_id} */ 1098 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, 1099 unsigned devid, unsigned process_id, 1100 void **ref) 1101 { 1102 GHashTable *ctx_cache; 1103 RISCVIOMMUContext *ctx; 1104 RISCVIOMMUContext key = { 1105 .devid = devid, 1106 .process_id = process_id, 1107 }; 1108 1109 ctx_cache = g_hash_table_ref(s->ctx_cache); 1110 ctx = g_hash_table_lookup(ctx_cache, &key); 1111 1112 if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { 1113 *ref = ctx_cache; 1114 return ctx; 1115 } 1116 1117 ctx = g_new0(RISCVIOMMUContext, 1); 1118 ctx->devid = devid; 1119 ctx->process_id = process_id; 1120 1121 int fault = riscv_iommu_ctx_fetch(s, ctx); 1122 if (!fault) { 1123 if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { 1124 g_hash_table_unref(ctx_cache); 1125 ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 1126 riscv_iommu_ctx_equal, 1127 g_free, NULL); 1128 g_hash_table_ref(ctx_cache); 1129 g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); 1130 } 1131 g_hash_table_add(ctx_cache, ctx); 1132 *ref = ctx_cache; 1133 return ctx; 1134 } 1135 1136 g_hash_table_unref(ctx_cache); 1137 *ref = NULL; 1138 1139 riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, 1140 fault, !!process_id, 0, 0); 1141 1142 g_free(ctx); 1143 return NULL; 1144 } 1145 1146 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) 1147 { 1148 if (ref) { 1149 g_hash_table_unref((GHashTable *)ref); 1150 } 1151 } 1152 1153 /* Find or allocate address space for a given device */ 1154 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid) 1155 { 1156 RISCVIOMMUSpace *as; 1157 1158 /* FIXME: PCIe bus remapping for attached endpoints. */ 1159 devid |= s->bus << 8; 1160 1161 QLIST_FOREACH(as, &s->spaces, list) { 1162 if (as->devid == devid) { 1163 break; 1164 } 1165 } 1166 1167 if (as == NULL) { 1168 char name[64]; 1169 as = g_new0(RISCVIOMMUSpace, 1); 1170 1171 as->iommu = s; 1172 as->devid = devid; 1173 1174 snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", 1175 PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1176 1177 /* IOVA address space, untranslated addresses */ 1178 memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), 1179 TYPE_RISCV_IOMMU_MEMORY_REGION, 1180 OBJECT(as), "riscv_iommu", UINT64_MAX); 1181 address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name); 1182 1183 QLIST_INSERT_HEAD(&s->spaces, as, list); 1184 1185 trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), 1186 PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1187 } 1188 return &as->iova_as; 1189 } 1190 1191 /* Translation Object cache support */ 1192 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2) 1193 { 1194 RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1; 1195 RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2; 1196 return t1->gscid == t2->gscid && t1->pscid == t2->pscid && 1197 t1->iova == t2->iova; 1198 } 1199 1200 static guint riscv_iommu_iot_hash(gconstpointer v) 1201 { 1202 RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v; 1203 return (guint)t->iova; 1204 } 1205 1206 /* GV: 1 PSCV: 1 AV: 1 */ 1207 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value, 1208 gpointer data) 1209 { 1210 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1211 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1212 if (iot->gscid == arg->gscid && 1213 iot->pscid == arg->pscid && 1214 iot->iova == arg->iova) { 1215 iot->perm = IOMMU_NONE; 1216 } 1217 } 1218 1219 /* GV: 1 PSCV: 1 AV: 0 */ 1220 static void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, 1221 gpointer data) 1222 { 1223 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1224 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1225 if (iot->gscid == arg->gscid && 1226 iot->pscid == arg->pscid) { 1227 iot->perm = IOMMU_NONE; 1228 } 1229 } 1230 1231 /* GV: 1 GVMA: 1 */ 1232 static void riscv_iommu_iot_inval_gscid_gpa(gpointer key, gpointer value, 1233 gpointer data) 1234 { 1235 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1236 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1237 if (iot->gscid == arg->gscid) { 1238 /* simplified cache, no GPA matching */ 1239 iot->perm = IOMMU_NONE; 1240 } 1241 } 1242 1243 /* GV: 1 GVMA: 0 */ 1244 static void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, 1245 gpointer data) 1246 { 1247 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1248 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1249 if (iot->gscid == arg->gscid) { 1250 iot->perm = IOMMU_NONE; 1251 } 1252 } 1253 1254 /* GV: 0 */ 1255 static void riscv_iommu_iot_inval_all(gpointer key, gpointer value, 1256 gpointer data) 1257 { 1258 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1259 iot->perm = IOMMU_NONE; 1260 } 1261 1262 /* caller should keep ref-count for iot_cache object */ 1263 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx, 1264 GHashTable *iot_cache, hwaddr iova) 1265 { 1266 RISCVIOMMUEntry key = { 1267 .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID), 1268 .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID), 1269 .iova = PPN_DOWN(iova), 1270 }; 1271 return g_hash_table_lookup(iot_cache, &key); 1272 } 1273 1274 /* caller should keep ref-count for iot_cache object */ 1275 static void riscv_iommu_iot_update(RISCVIOMMUState *s, 1276 GHashTable *iot_cache, RISCVIOMMUEntry *iot) 1277 { 1278 if (!s->iot_limit) { 1279 return; 1280 } 1281 1282 if (g_hash_table_size(s->iot_cache) >= s->iot_limit) { 1283 iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 1284 riscv_iommu_iot_equal, 1285 g_free, NULL); 1286 g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache)); 1287 } 1288 g_hash_table_add(iot_cache, iot); 1289 } 1290 1291 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func, 1292 uint32_t gscid, uint32_t pscid, hwaddr iova) 1293 { 1294 GHashTable *iot_cache; 1295 RISCVIOMMUEntry key = { 1296 .gscid = gscid, 1297 .pscid = pscid, 1298 .iova = PPN_DOWN(iova), 1299 }; 1300 1301 iot_cache = g_hash_table_ref(s->iot_cache); 1302 g_hash_table_foreach(iot_cache, func, &key); 1303 g_hash_table_unref(iot_cache); 1304 } 1305 1306 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 1307 IOMMUTLBEntry *iotlb, bool enable_cache) 1308 { 1309 RISCVIOMMUEntry *iot; 1310 IOMMUAccessFlags perm; 1311 bool enable_pid; 1312 bool enable_pri; 1313 GHashTable *iot_cache; 1314 int fault; 1315 1316 iot_cache = g_hash_table_ref(s->iot_cache); 1317 /* 1318 * TC[32] is reserved for custom extensions, used here to temporarily 1319 * enable automatic page-request generation for ATS queries. 1320 */ 1321 enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); 1322 enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); 1323 1324 /* Check for ATS request. */ 1325 if (iotlb->perm == IOMMU_NONE) { 1326 /* Check if ATS is disabled. */ 1327 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) { 1328 enable_pri = false; 1329 fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1330 goto done; 1331 } 1332 } 1333 1334 iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova); 1335 perm = iot ? iot->perm : IOMMU_NONE; 1336 if (perm != IOMMU_NONE) { 1337 iotlb->translated_addr = PPN_PHYS(iot->phys); 1338 iotlb->addr_mask = ~TARGET_PAGE_MASK; 1339 iotlb->perm = perm; 1340 fault = 0; 1341 goto done; 1342 } 1343 1344 /* Translate using device directory / page table information. */ 1345 fault = riscv_iommu_spa_fetch(s, ctx, iotlb); 1346 1347 if (!fault && iotlb->target_as == &s->trap_as) { 1348 /* Do not cache trapped MSI translations */ 1349 goto done; 1350 } 1351 1352 /* 1353 * We made an implementation choice to not cache identity-mapped 1354 * translations, as allowed by the specification, to avoid 1355 * translation cache evictions for other devices sharing the 1356 * IOMMU hardware model. 1357 */ 1358 if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) { 1359 iot = g_new0(RISCVIOMMUEntry, 1); 1360 iot->iova = PPN_DOWN(iotlb->iova); 1361 iot->phys = PPN_DOWN(iotlb->translated_addr); 1362 iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); 1363 iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); 1364 iot->perm = iotlb->perm; 1365 riscv_iommu_iot_update(s, iot_cache, iot); 1366 } 1367 1368 done: 1369 g_hash_table_unref(iot_cache); 1370 1371 if (enable_pri && fault) { 1372 struct riscv_iommu_pq_record pr = {0}; 1373 if (enable_pid) { 1374 pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, 1375 RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id); 1376 } 1377 pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid); 1378 pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | 1379 RISCV_IOMMU_PREQ_PAYLOAD_M; 1380 riscv_iommu_pri(s, &pr); 1381 return fault; 1382 } 1383 1384 if (fault) { 1385 unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ; 1386 1387 if (iotlb->perm & IOMMU_RW) { 1388 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 1389 } else if (iotlb->perm & IOMMU_RO) { 1390 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; 1391 } 1392 1393 riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, 1394 iotlb->iova, iotlb->translated_addr); 1395 return fault; 1396 } 1397 1398 return 0; 1399 } 1400 1401 /* IOMMU Command Interface */ 1402 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, 1403 uint64_t addr, uint32_t data) 1404 { 1405 /* 1406 * ATS processing in this implementation of the IOMMU is synchronous, 1407 * no need to wait for completions here. 1408 */ 1409 if (!notify) { 1410 return MEMTX_OK; 1411 } 1412 1413 return dma_memory_write(s->target_as, addr, &data, sizeof(data), 1414 MEMTXATTRS_UNSPECIFIED); 1415 } 1416 1417 static void riscv_iommu_ats(RISCVIOMMUState *s, 1418 struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag, 1419 IOMMUAccessFlags perm, 1420 void (*trace_fn)(const char *id)) 1421 { 1422 RISCVIOMMUSpace *as = NULL; 1423 IOMMUNotifier *n; 1424 IOMMUTLBEvent event; 1425 uint32_t pid; 1426 uint32_t devid; 1427 const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV; 1428 1429 if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) { 1430 /* Use device segment and requester id */ 1431 devid = get_field(cmd->dword0, 1432 RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID); 1433 } else { 1434 devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID); 1435 } 1436 1437 pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID); 1438 1439 QLIST_FOREACH(as, &s->spaces, list) { 1440 if (as->devid == devid) { 1441 break; 1442 } 1443 } 1444 1445 if (!as || !as->notifier) { 1446 return; 1447 } 1448 1449 event.type = flag; 1450 event.entry.perm = perm; 1451 event.entry.target_as = s->target_as; 1452 1453 IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) { 1454 if (!pv || n->iommu_idx == pid) { 1455 event.entry.iova = n->start; 1456 event.entry.addr_mask = n->end - n->start; 1457 trace_fn(as->iova_mr.parent_obj.name); 1458 memory_region_notify_iommu_one(n, &event); 1459 } 1460 } 1461 } 1462 1463 static void riscv_iommu_ats_inval(RISCVIOMMUState *s, 1464 struct riscv_iommu_command *cmd) 1465 { 1466 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE, 1467 trace_riscv_iommu_ats_inval); 1468 } 1469 1470 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s, 1471 struct riscv_iommu_command *cmd) 1472 { 1473 unsigned resp_code = get_field(cmd->dword1, 1474 RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE); 1475 1476 /* Using the access flag to carry response code information */ 1477 IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW; 1478 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm, 1479 trace_riscv_iommu_ats_prgr); 1480 } 1481 1482 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) 1483 { 1484 uint64_t old_ddtp = s->ddtp; 1485 uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); 1486 unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); 1487 unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); 1488 bool ok = false; 1489 1490 /* 1491 * Check for allowed DDTP.MODE transitions: 1492 * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} 1493 * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} 1494 */ 1495 if (new_mode == old_mode || 1496 new_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1497 new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { 1498 ok = true; 1499 } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || 1500 new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || 1501 new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { 1502 ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1503 old_mode == RISCV_IOMMU_DDTP_MODE_BARE; 1504 } 1505 1506 if (ok) { 1507 /* clear reserved and busy bits, report back sanitized version */ 1508 new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, 1509 RISCV_IOMMU_DDTP_MODE, new_mode); 1510 } else { 1511 new_ddtp = old_ddtp; 1512 } 1513 s->ddtp = new_ddtp; 1514 1515 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); 1516 } 1517 1518 /* Command function and opcode field. */ 1519 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) 1520 1521 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) 1522 { 1523 struct riscv_iommu_command cmd; 1524 MemTxResult res; 1525 dma_addr_t addr; 1526 uint32_t tail, head, ctrl; 1527 uint64_t cmd_opcode; 1528 GHFunc func; 1529 1530 ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1531 tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; 1532 head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; 1533 1534 /* Check for pending error or queue processing disabled */ 1535 if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || 1536 !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) { 1537 return; 1538 } 1539 1540 while (tail != head) { 1541 addr = s->cq_addr + head * sizeof(cmd); 1542 res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), 1543 MEMTXATTRS_UNSPECIFIED); 1544 1545 if (res != MEMTX_OK) { 1546 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1547 RISCV_IOMMU_CQCSR_CQMF, 0); 1548 goto fault; 1549 } 1550 1551 trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); 1552 1553 cmd_opcode = get_field(cmd.dword0, 1554 RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC); 1555 1556 switch (cmd_opcode) { 1557 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, 1558 RISCV_IOMMU_CMD_IOFENCE_OPCODE): 1559 res = riscv_iommu_iofence(s, 1560 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2, 1561 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); 1562 1563 if (res != MEMTX_OK) { 1564 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1565 RISCV_IOMMU_CQCSR_CQMF, 0); 1566 goto fault; 1567 } 1568 break; 1569 1570 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, 1571 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1572 if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) { 1573 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */ 1574 goto cmd_ill; 1575 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) { 1576 /* invalidate all cache mappings */ 1577 func = riscv_iommu_iot_inval_all; 1578 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) { 1579 /* invalidate cache matching GSCID */ 1580 func = riscv_iommu_iot_inval_gscid; 1581 } else { 1582 /* invalidate cache matching GSCID and ADDR (GPA) */ 1583 func = riscv_iommu_iot_inval_gscid_gpa; 1584 } 1585 riscv_iommu_iot_inval(s, func, 1586 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 0, 1587 cmd.dword1 << 2 & TARGET_PAGE_MASK); 1588 break; 1589 1590 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, 1591 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1592 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) { 1593 /* invalidate all cache mappings, simplified model */ 1594 func = riscv_iommu_iot_inval_all; 1595 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV)) { 1596 /* invalidate cache matching GSCID, simplified model */ 1597 func = riscv_iommu_iot_inval_gscid; 1598 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) { 1599 /* invalidate cache matching GSCID and PSCID */ 1600 func = riscv_iommu_iot_inval_pscid; 1601 } else { 1602 /* invalidate cache matching GSCID and PSCID and ADDR (IOVA) */ 1603 func = riscv_iommu_iot_inval_pscid_iova; 1604 } 1605 riscv_iommu_iot_inval(s, func, 1606 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 1607 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_PSCID), 1608 cmd.dword1 << 2 & TARGET_PAGE_MASK); 1609 break; 1610 1611 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, 1612 RISCV_IOMMU_CMD_IODIR_OPCODE): 1613 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1614 /* invalidate all device context cache mappings */ 1615 func = riscv_iommu_ctx_inval_all; 1616 } else { 1617 /* invalidate all device context matching DID */ 1618 func = riscv_iommu_ctx_inval_devid; 1619 } 1620 riscv_iommu_ctx_inval(s, func, 1621 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); 1622 break; 1623 1624 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, 1625 RISCV_IOMMU_CMD_IODIR_OPCODE): 1626 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1627 /* illegal command arguments IODIR_PDT & DV == 0 */ 1628 goto cmd_ill; 1629 } else { 1630 func = riscv_iommu_ctx_inval_devid_procid; 1631 } 1632 riscv_iommu_ctx_inval(s, func, 1633 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 1634 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); 1635 break; 1636 1637 /* ATS commands */ 1638 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL, 1639 RISCV_IOMMU_CMD_ATS_OPCODE): 1640 if (!s->enable_ats) { 1641 goto cmd_ill; 1642 } 1643 1644 riscv_iommu_ats_inval(s, &cmd); 1645 break; 1646 1647 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR, 1648 RISCV_IOMMU_CMD_ATS_OPCODE): 1649 if (!s->enable_ats) { 1650 goto cmd_ill; 1651 } 1652 1653 riscv_iommu_ats_prgr(s, &cmd); 1654 break; 1655 1656 default: 1657 cmd_ill: 1658 /* Invalid instruction, do not advance instruction index. */ 1659 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1660 RISCV_IOMMU_CQCSR_CMD_ILL, 0); 1661 goto fault; 1662 } 1663 1664 /* Advance and update head pointer after command completes. */ 1665 head = (head + 1) & s->cq_mask; 1666 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); 1667 } 1668 return; 1669 1670 fault: 1671 if (ctrl & RISCV_IOMMU_CQCSR_CIE) { 1672 riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); 1673 } 1674 } 1675 1676 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) 1677 { 1678 uint64_t base; 1679 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1680 uint32_t ctrl_clr; 1681 bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); 1682 bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); 1683 1684 if (enable && !active) { 1685 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); 1686 s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1; 1687 s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); 1688 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); 1689 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); 1690 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); 1691 ctrl_set = RISCV_IOMMU_CQCSR_CQON; 1692 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | 1693 RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO | 1694 RISCV_IOMMU_CQCSR_FENCE_W_IP; 1695 } else if (!enable && active) { 1696 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); 1697 ctrl_set = 0; 1698 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; 1699 } else { 1700 ctrl_set = 0; 1701 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; 1702 } 1703 1704 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); 1705 } 1706 1707 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) 1708 { 1709 uint64_t base; 1710 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1711 uint32_t ctrl_clr; 1712 bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); 1713 bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); 1714 1715 if (enable && !active) { 1716 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); 1717 s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1; 1718 s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); 1719 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); 1720 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); 1721 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); 1722 ctrl_set = RISCV_IOMMU_FQCSR_FQON; 1723 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | 1724 RISCV_IOMMU_FQCSR_FQOF; 1725 } else if (!enable && active) { 1726 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); 1727 ctrl_set = 0; 1728 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; 1729 } else { 1730 ctrl_set = 0; 1731 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; 1732 } 1733 1734 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); 1735 } 1736 1737 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) 1738 { 1739 uint64_t base; 1740 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 1741 uint32_t ctrl_clr; 1742 bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); 1743 bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); 1744 1745 if (enable && !active) { 1746 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); 1747 s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1; 1748 s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); 1749 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); 1750 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); 1751 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); 1752 ctrl_set = RISCV_IOMMU_PQCSR_PQON; 1753 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | 1754 RISCV_IOMMU_PQCSR_PQOF; 1755 } else if (!enable && active) { 1756 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); 1757 ctrl_set = 0; 1758 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; 1759 } else { 1760 ctrl_set = 0; 1761 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; 1762 } 1763 1764 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); 1765 } 1766 1767 static void riscv_iommu_process_dbg(RISCVIOMMUState *s) 1768 { 1769 uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA); 1770 uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL); 1771 unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID); 1772 unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID); 1773 RISCVIOMMUContext *ctx; 1774 void *ref; 1775 1776 if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) { 1777 return; 1778 } 1779 1780 ctx = riscv_iommu_ctx(s, devid, pid, &ref); 1781 if (ctx == NULL) { 1782 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, 1783 RISCV_IOMMU_TR_RESPONSE_FAULT | 1784 (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10)); 1785 } else { 1786 IOMMUTLBEntry iotlb = { 1787 .iova = iova, 1788 .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW, 1789 .addr_mask = ~0, 1790 .target_as = NULL, 1791 }; 1792 int fault = riscv_iommu_translate(s, ctx, &iotlb, false); 1793 if (fault) { 1794 iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); 1795 } else { 1796 iova = iotlb.translated_addr & ~iotlb.addr_mask; 1797 iova >>= TARGET_PAGE_BITS; 1798 iova &= RISCV_IOMMU_TR_RESPONSE_PPN; 1799 1800 /* We do not support superpages (> 4kbs) for now */ 1801 iova &= ~RISCV_IOMMU_TR_RESPONSE_S; 1802 } 1803 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); 1804 } 1805 1806 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 1807 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 1808 riscv_iommu_ctx_put(s, ref); 1809 } 1810 1811 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); 1812 1813 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data) 1814 { 1815 uint64_t icvec = 0; 1816 1817 icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV, 1818 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV); 1819 1820 icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV, 1821 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV); 1822 1823 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV, 1824 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV); 1825 1826 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV, 1827 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV); 1828 1829 trace_riscv_iommu_icvec_write(data, icvec); 1830 1831 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec); 1832 } 1833 1834 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) 1835 { 1836 uint32_t cqcsr, fqcsr, pqcsr; 1837 uint32_t ipsr_set = 0; 1838 uint32_t ipsr_clr = 0; 1839 1840 if (data & RISCV_IOMMU_IPSR_CIP) { 1841 cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1842 1843 if (cqcsr & RISCV_IOMMU_CQCSR_CIE && 1844 (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || 1845 cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || 1846 cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || 1847 cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { 1848 ipsr_set |= RISCV_IOMMU_IPSR_CIP; 1849 } else { 1850 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1851 } 1852 } else { 1853 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1854 } 1855 1856 if (data & RISCV_IOMMU_IPSR_FIP) { 1857 fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1858 1859 if (fqcsr & RISCV_IOMMU_FQCSR_FIE && 1860 (fqcsr & RISCV_IOMMU_FQCSR_FQOF || 1861 fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { 1862 ipsr_set |= RISCV_IOMMU_IPSR_FIP; 1863 } else { 1864 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 1865 } 1866 } else { 1867 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 1868 } 1869 1870 if (data & RISCV_IOMMU_IPSR_PIP) { 1871 pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 1872 1873 if (pqcsr & RISCV_IOMMU_PQCSR_PIE && 1874 (pqcsr & RISCV_IOMMU_PQCSR_PQOF || 1875 pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { 1876 ipsr_set |= RISCV_IOMMU_IPSR_PIP; 1877 } else { 1878 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 1879 } 1880 } else { 1881 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 1882 } 1883 1884 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); 1885 } 1886 1887 /* 1888 * Write the resulting value of 'data' for the reg specified 1889 * by 'reg_addr', after considering read-only/read-write/write-clear 1890 * bits, in the pointer 'dest'. 1891 * 1892 * The result is written in little-endian. 1893 */ 1894 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s, 1895 void *dest, hwaddr reg_addr, 1896 int size, uint64_t data) 1897 { 1898 uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size); 1899 uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size); 1900 uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size); 1901 1902 stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc)); 1903 } 1904 1905 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, 1906 uint64_t data, unsigned size, 1907 MemTxAttrs attrs) 1908 { 1909 riscv_iommu_process_fn *process_fn = NULL; 1910 RISCVIOMMUState *s = opaque; 1911 uint32_t regb = addr & ~3; 1912 uint32_t busy = 0; 1913 uint64_t val = 0; 1914 1915 if ((addr & (size - 1)) != 0) { 1916 /* Unsupported MMIO alignment or access size */ 1917 return MEMTX_ERROR; 1918 } 1919 1920 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 1921 /* Unsupported MMIO access location. */ 1922 return MEMTX_ACCESS_ERROR; 1923 } 1924 1925 /* Track actionable MMIO write. */ 1926 switch (regb) { 1927 case RISCV_IOMMU_REG_DDTP: 1928 case RISCV_IOMMU_REG_DDTP + 4: 1929 process_fn = riscv_iommu_process_ddtp; 1930 regb = RISCV_IOMMU_REG_DDTP; 1931 busy = RISCV_IOMMU_DDTP_BUSY; 1932 break; 1933 1934 case RISCV_IOMMU_REG_CQT: 1935 process_fn = riscv_iommu_process_cq_tail; 1936 break; 1937 1938 case RISCV_IOMMU_REG_CQCSR: 1939 process_fn = riscv_iommu_process_cq_control; 1940 busy = RISCV_IOMMU_CQCSR_BUSY; 1941 break; 1942 1943 case RISCV_IOMMU_REG_FQCSR: 1944 process_fn = riscv_iommu_process_fq_control; 1945 busy = RISCV_IOMMU_FQCSR_BUSY; 1946 break; 1947 1948 case RISCV_IOMMU_REG_PQCSR: 1949 process_fn = riscv_iommu_process_pq_control; 1950 busy = RISCV_IOMMU_PQCSR_BUSY; 1951 break; 1952 1953 case RISCV_IOMMU_REG_ICVEC: 1954 case RISCV_IOMMU_REG_IPSR: 1955 /* 1956 * ICVEC and IPSR have special read/write procedures. We'll 1957 * call their respective helpers and exit. 1958 */ 1959 riscv_iommu_write_reg_val(s, &val, addr, size, data); 1960 1961 /* 1962 * 'val' is stored as LE. Switch to host endianess 1963 * before using it. 1964 */ 1965 val = le64_to_cpu(val); 1966 1967 if (regb == RISCV_IOMMU_REG_ICVEC) { 1968 riscv_iommu_update_icvec(s, val); 1969 } else { 1970 riscv_iommu_update_ipsr(s, val); 1971 } 1972 1973 return MEMTX_OK; 1974 1975 case RISCV_IOMMU_REG_TR_REQ_CTL: 1976 process_fn = riscv_iommu_process_dbg; 1977 regb = RISCV_IOMMU_REG_TR_REQ_CTL; 1978 busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY; 1979 break; 1980 1981 default: 1982 break; 1983 } 1984 1985 /* 1986 * Registers update might be not synchronized with core logic. 1987 * If system software updates register when relevant BUSY bit 1988 * is set IOMMU behavior of additional writes to the register 1989 * is UNSPECIFIED. 1990 */ 1991 riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data); 1992 1993 /* Busy flag update, MSB 4-byte register. */ 1994 if (busy) { 1995 uint32_t rw = ldl_le_p(&s->regs_rw[regb]); 1996 stl_le_p(&s->regs_rw[regb], rw | busy); 1997 } 1998 1999 if (process_fn) { 2000 process_fn(s); 2001 } 2002 2003 return MEMTX_OK; 2004 } 2005 2006 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, 2007 uint64_t *data, unsigned size, MemTxAttrs attrs) 2008 { 2009 RISCVIOMMUState *s = opaque; 2010 uint64_t val = -1; 2011 uint8_t *ptr; 2012 2013 if ((addr & (size - 1)) != 0) { 2014 /* Unsupported MMIO alignment. */ 2015 return MEMTX_ERROR; 2016 } 2017 2018 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2019 return MEMTX_ACCESS_ERROR; 2020 } 2021 2022 ptr = &s->regs_rw[addr]; 2023 val = ldn_le_p(ptr, size); 2024 2025 *data = val; 2026 2027 return MEMTX_OK; 2028 } 2029 2030 static const MemoryRegionOps riscv_iommu_mmio_ops = { 2031 .read_with_attrs = riscv_iommu_mmio_read, 2032 .write_with_attrs = riscv_iommu_mmio_write, 2033 .endianness = DEVICE_NATIVE_ENDIAN, 2034 .impl = { 2035 .min_access_size = 4, 2036 .max_access_size = 8, 2037 .unaligned = false, 2038 }, 2039 .valid = { 2040 .min_access_size = 4, 2041 .max_access_size = 8, 2042 } 2043 }; 2044 2045 /* 2046 * Translations matching MSI pattern check are redirected to "riscv-iommu-trap" 2047 * memory region as untranslated address, for additional MSI/MRIF interception 2048 * by IOMMU interrupt remapping implementation. 2049 * Note: Device emulation code generating an MSI is expected to provide a valid 2050 * memory transaction attributes with requested_id set. 2051 */ 2052 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, 2053 uint64_t data, unsigned size, MemTxAttrs attrs) 2054 { 2055 RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; 2056 RISCVIOMMUContext *ctx; 2057 MemTxResult res; 2058 void *ref; 2059 uint32_t devid = attrs.requester_id; 2060 2061 if (attrs.unspecified) { 2062 return MEMTX_ACCESS_ERROR; 2063 } 2064 2065 /* FIXME: PCIe bus remapping for attached endpoints. */ 2066 devid |= s->bus << 8; 2067 2068 ctx = riscv_iommu_ctx(s, devid, 0, &ref); 2069 if (ctx == NULL) { 2070 res = MEMTX_ACCESS_ERROR; 2071 } else { 2072 res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); 2073 } 2074 riscv_iommu_ctx_put(s, ref); 2075 return res; 2076 } 2077 2078 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, 2079 uint64_t *data, unsigned size, MemTxAttrs attrs) 2080 { 2081 return MEMTX_ACCESS_ERROR; 2082 } 2083 2084 static const MemoryRegionOps riscv_iommu_trap_ops = { 2085 .read_with_attrs = riscv_iommu_trap_read, 2086 .write_with_attrs = riscv_iommu_trap_write, 2087 .endianness = DEVICE_LITTLE_ENDIAN, 2088 .impl = { 2089 .min_access_size = 4, 2090 .max_access_size = 8, 2091 .unaligned = true, 2092 }, 2093 .valid = { 2094 .min_access_size = 4, 2095 .max_access_size = 8, 2096 } 2097 }; 2098 2099 static void riscv_iommu_realize(DeviceState *dev, Error **errp) 2100 { 2101 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2102 2103 s->cap = s->version & RISCV_IOMMU_CAP_VERSION; 2104 if (s->enable_msi) { 2105 s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; 2106 } 2107 if (s->enable_ats) { 2108 s->cap |= RISCV_IOMMU_CAP_ATS; 2109 } 2110 if (s->enable_s_stage) { 2111 s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | 2112 RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; 2113 } 2114 if (s->enable_g_stage) { 2115 s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | 2116 RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; 2117 } 2118 /* Enable translation debug interface */ 2119 s->cap |= RISCV_IOMMU_CAP_DBG; 2120 2121 /* Report QEMU target physical address space limits */ 2122 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, 2123 TARGET_PHYS_ADDR_SPACE_BITS); 2124 2125 /* TODO: method to report supported PID bits */ 2126 s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ 2127 s->cap |= RISCV_IOMMU_CAP_PD8; 2128 2129 /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ 2130 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? 2131 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); 2132 2133 /* register storage */ 2134 s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2135 s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2136 s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2137 2138 /* Mark all registers read-only */ 2139 memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); 2140 2141 /* 2142 * Register complete MMIO space, including MSI/PBA registers. 2143 * Note, PCIDevice implementation will add overlapping MR for MSI/PBA, 2144 * managed directly by the PCIDevice implementation. 2145 */ 2146 memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s, 2147 "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); 2148 2149 /* Set power-on register state */ 2150 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); 2151 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); 2152 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], 2153 ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); 2154 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], 2155 ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); 2156 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], 2157 ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); 2158 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], 2159 ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); 2160 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], 2161 ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); 2162 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF | 2163 RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); 2164 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON | 2165 RISCV_IOMMU_CQCSR_BUSY); 2166 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF | 2167 RISCV_IOMMU_FQCSR_FQOF); 2168 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON | 2169 RISCV_IOMMU_FQCSR_BUSY); 2170 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF | 2171 RISCV_IOMMU_PQCSR_PQOF); 2172 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON | 2173 RISCV_IOMMU_PQCSR_BUSY); 2174 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); 2175 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); 2176 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); 2177 /* If debug registers enabled. */ 2178 if (s->cap & RISCV_IOMMU_CAP_DBG) { 2179 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0); 2180 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL], 2181 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2182 } 2183 2184 /* Memory region for downstream access, if specified. */ 2185 if (s->target_mr) { 2186 s->target_as = g_new0(AddressSpace, 1); 2187 address_space_init(s->target_as, s->target_mr, 2188 "riscv-iommu-downstream"); 2189 } else { 2190 /* Fallback to global system memory. */ 2191 s->target_as = &address_space_memory; 2192 } 2193 2194 /* Memory region for untranslated MRIF/MSI writes */ 2195 memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, 2196 "riscv-iommu-trap", ~0ULL); 2197 address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); 2198 2199 /* Device translation context cache */ 2200 s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 2201 riscv_iommu_ctx_equal, 2202 g_free, NULL); 2203 2204 s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 2205 riscv_iommu_iot_equal, 2206 g_free, NULL); 2207 2208 s->iommus.le_next = NULL; 2209 s->iommus.le_prev = NULL; 2210 QLIST_INIT(&s->spaces); 2211 } 2212 2213 static void riscv_iommu_unrealize(DeviceState *dev) 2214 { 2215 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2216 2217 g_hash_table_unref(s->iot_cache); 2218 g_hash_table_unref(s->ctx_cache); 2219 } 2220 2221 static Property riscv_iommu_properties[] = { 2222 DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, 2223 RISCV_IOMMU_SPEC_DOT_VER), 2224 DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), 2225 DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit, 2226 LIMIT_CACHE_IOT), 2227 DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), 2228 DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE), 2229 DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), 2230 DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), 2231 DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), 2232 DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, 2233 TYPE_MEMORY_REGION, MemoryRegion *), 2234 DEFINE_PROP_END_OF_LIST(), 2235 }; 2236 2237 static void riscv_iommu_class_init(ObjectClass *klass, void* data) 2238 { 2239 DeviceClass *dc = DEVICE_CLASS(klass); 2240 2241 /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ 2242 dc->user_creatable = false; 2243 dc->realize = riscv_iommu_realize; 2244 dc->unrealize = riscv_iommu_unrealize; 2245 device_class_set_props(dc, riscv_iommu_properties); 2246 } 2247 2248 static const TypeInfo riscv_iommu_info = { 2249 .name = TYPE_RISCV_IOMMU, 2250 .parent = TYPE_DEVICE, 2251 .instance_size = sizeof(RISCVIOMMUState), 2252 .class_init = riscv_iommu_class_init, 2253 }; 2254 2255 static const char *IOMMU_FLAG_STR[] = { 2256 "NA", 2257 "RO", 2258 "WR", 2259 "RW", 2260 }; 2261 2262 /* RISC-V IOMMU Memory Region - Address Translation Space */ 2263 static IOMMUTLBEntry riscv_iommu_memory_region_translate( 2264 IOMMUMemoryRegion *iommu_mr, hwaddr addr, 2265 IOMMUAccessFlags flag, int iommu_idx) 2266 { 2267 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2268 RISCVIOMMUContext *ctx; 2269 void *ref; 2270 IOMMUTLBEntry iotlb = { 2271 .iova = addr, 2272 .target_as = as->iommu->target_as, 2273 .addr_mask = ~0ULL, 2274 .perm = flag, 2275 }; 2276 2277 ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); 2278 if (ctx == NULL) { 2279 /* Translation disabled or invalid. */ 2280 iotlb.addr_mask = 0; 2281 iotlb.perm = IOMMU_NONE; 2282 } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) { 2283 /* Translation disabled or fault reported. */ 2284 iotlb.addr_mask = 0; 2285 iotlb.perm = IOMMU_NONE; 2286 } 2287 2288 /* Trace all dma translations with original access flags. */ 2289 trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid), 2290 PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx, 2291 IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, 2292 iotlb.translated_addr); 2293 2294 riscv_iommu_ctx_put(as->iommu, ref); 2295 2296 return iotlb; 2297 } 2298 2299 static int riscv_iommu_memory_region_notify( 2300 IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, 2301 IOMMUNotifierFlag new, Error **errp) 2302 { 2303 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2304 2305 if (old == IOMMU_NOTIFIER_NONE) { 2306 as->notifier = true; 2307 trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); 2308 } else if (new == IOMMU_NOTIFIER_NONE) { 2309 as->notifier = false; 2310 trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); 2311 } 2312 2313 return 0; 2314 } 2315 2316 static inline bool pci_is_iommu(PCIDevice *pdev) 2317 { 2318 return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; 2319 } 2320 2321 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn) 2322 { 2323 RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; 2324 PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); 2325 AddressSpace *as = NULL; 2326 2327 if (pdev && pci_is_iommu(pdev)) { 2328 return s->target_as; 2329 } 2330 2331 /* Find first registered IOMMU device */ 2332 while (s->iommus.le_prev) { 2333 s = *(s->iommus.le_prev); 2334 } 2335 2336 /* Find first matching IOMMU */ 2337 while (s != NULL && as == NULL) { 2338 as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn)); 2339 s = s->iommus.le_next; 2340 } 2341 2342 return as ? as : &address_space_memory; 2343 } 2344 2345 static const PCIIOMMUOps riscv_iommu_ops = { 2346 .get_address_space = riscv_iommu_find_as, 2347 }; 2348 2349 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, 2350 Error **errp) 2351 { 2352 if (bus->iommu_ops && 2353 bus->iommu_ops->get_address_space == riscv_iommu_find_as) { 2354 /* Allow multiple IOMMUs on the same PCIe bus, link known devices */ 2355 RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; 2356 QLIST_INSERT_AFTER(last, iommu, iommus); 2357 } else if (!bus->iommu_ops && !bus->iommu_opaque) { 2358 pci_setup_iommu(bus, &riscv_iommu_ops, iommu); 2359 } else { 2360 error_setg(errp, "can't register secondary IOMMU for PCI bus #%d", 2361 pci_bus_num(bus)); 2362 } 2363 } 2364 2365 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, 2366 MemTxAttrs attrs) 2367 { 2368 return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; 2369 } 2370 2371 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr) 2372 { 2373 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2374 return 1 << as->iommu->pid_bits; 2375 } 2376 2377 static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data) 2378 { 2379 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 2380 2381 imrc->translate = riscv_iommu_memory_region_translate; 2382 imrc->notify_flag_changed = riscv_iommu_memory_region_notify; 2383 imrc->attrs_to_index = riscv_iommu_memory_region_index; 2384 imrc->num_indexes = riscv_iommu_memory_region_index_len; 2385 } 2386 2387 static const TypeInfo riscv_iommu_memory_region_info = { 2388 .parent = TYPE_IOMMU_MEMORY_REGION, 2389 .name = TYPE_RISCV_IOMMU_MEMORY_REGION, 2390 .class_init = riscv_iommu_memory_region_init, 2391 }; 2392 2393 static void riscv_iommu_register_mr_types(void) 2394 { 2395 type_register_static(&riscv_iommu_memory_region_info); 2396 type_register_static(&riscv_iommu_info); 2397 } 2398 2399 type_init(riscv_iommu_register_mr_types); 2400