1 /* 2 * QEMU emulation of an RISC-V IOMMU 3 * 4 * Copyright (C) 2021-2023, Rivos Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qom/object.h" 21 #include "exec/target_page.h" 22 #include "hw/pci/pci_bus.h" 23 #include "hw/pci/pci_device.h" 24 #include "hw/qdev-properties.h" 25 #include "hw/riscv/riscv_hart.h" 26 #include "migration/vmstate.h" 27 #include "qapi/error.h" 28 #include "qemu/timer.h" 29 30 #include "cpu_bits.h" 31 #include "riscv-iommu.h" 32 #include "riscv-iommu-bits.h" 33 #include "riscv-iommu-hpm.h" 34 #include "trace.h" 35 36 #define LIMIT_CACHE_CTX (1U << 7) 37 #define LIMIT_CACHE_IOT (1U << 20) 38 39 /* Physical page number coversions */ 40 #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) 41 #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) 42 43 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; 44 45 /* Device assigned I/O address space */ 46 struct RISCVIOMMUSpace { 47 IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */ 48 AddressSpace iova_as; /* IOVA address space for attached device */ 49 RISCVIOMMUState *iommu; /* Managing IOMMU device state */ 50 uint32_t devid; /* Requester identifier, AKA device_id */ 51 bool notifier; /* IOMMU unmap notifier enabled */ 52 QLIST_ENTRY(RISCVIOMMUSpace) list; 53 }; 54 55 typedef enum RISCVIOMMUTransTag { 56 RISCV_IOMMU_TRANS_TAG_BY, /* Bypass */ 57 RISCV_IOMMU_TRANS_TAG_SS, /* Single Stage */ 58 RISCV_IOMMU_TRANS_TAG_VG, /* G-stage only */ 59 RISCV_IOMMU_TRANS_TAG_VN, /* Nested translation */ 60 } RISCVIOMMUTransTag; 61 62 /* Address translation cache entry */ 63 struct RISCVIOMMUEntry { 64 RISCVIOMMUTransTag tag; /* Translation Tag */ 65 uint64_t iova:44; /* IOVA Page Number */ 66 uint64_t pscid:20; /* Process Soft-Context identifier */ 67 uint64_t phys:44; /* Physical Page Number */ 68 uint64_t gscid:16; /* Guest Soft-Context identifier */ 69 uint64_t perm:2; /* IOMMU_RW flags */ 70 }; 71 72 /* IOMMU index for transactions without process_id specified. */ 73 #define RISCV_IOMMU_NOPROCID 0 74 75 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) 76 { 77 switch (vec_type) { 78 case RISCV_IOMMU_INTR_CQ: 79 return icvec & RISCV_IOMMU_ICVEC_CIV; 80 case RISCV_IOMMU_INTR_FQ: 81 return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4; 82 case RISCV_IOMMU_INTR_PM: 83 return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8; 84 case RISCV_IOMMU_INTR_PQ: 85 return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12; 86 default: 87 g_assert_not_reached(); 88 } 89 } 90 91 void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) 92 { 93 uint32_t ipsr, icvec, vector; 94 95 if (!s->notify) { 96 return; 97 } 98 99 icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); 100 ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0); 101 102 if (!(ipsr & (1 << vec_type))) { 103 vector = riscv_iommu_get_icvec_vector(icvec, vec_type); 104 s->notify(s, vector); 105 trace_riscv_iommu_notify_int_vector(vec_type, vector); 106 } 107 } 108 109 static void riscv_iommu_fault(RISCVIOMMUState *s, 110 struct riscv_iommu_fq_record *ev) 111 { 112 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 113 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask; 114 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask; 115 uint32_t next = (tail + 1) & s->fq_mask; 116 uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID); 117 118 trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 119 PCI_FUNC(devid), ev->hdr, ev->iotval); 120 121 if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) || 122 !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) { 123 return; 124 } 125 126 if (head == next) { 127 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 128 RISCV_IOMMU_FQCSR_FQOF, 0); 129 } else { 130 dma_addr_t addr = s->fq_addr + tail * sizeof(*ev); 131 if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev), 132 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 133 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 134 RISCV_IOMMU_FQCSR_FQMF, 0); 135 } else { 136 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next); 137 } 138 } 139 140 if (ctrl & RISCV_IOMMU_FQCSR_FIE) { 141 riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ); 142 } 143 } 144 145 static void riscv_iommu_pri(RISCVIOMMUState *s, 146 struct riscv_iommu_pq_record *pr) 147 { 148 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 149 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask; 150 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask; 151 uint32_t next = (tail + 1) & s->pq_mask; 152 uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID); 153 154 trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 155 PCI_FUNC(devid), pr->payload); 156 157 if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) || 158 !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) { 159 return; 160 } 161 162 if (head == next) { 163 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 164 RISCV_IOMMU_PQCSR_PQOF, 0); 165 } else { 166 dma_addr_t addr = s->pq_addr + tail * sizeof(*pr); 167 if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr), 168 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 169 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 170 RISCV_IOMMU_PQCSR_PQMF, 0); 171 } else { 172 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next); 173 } 174 } 175 176 if (ctrl & RISCV_IOMMU_PQCSR_PIE) { 177 riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ); 178 } 179 } 180 181 /* 182 * Discards all bits from 'val' whose matching bits in the same 183 * positions in the mask 'ext' are zeros, and packs the remaining 184 * bits from 'val' contiguously at the least-significant end of the 185 * result, keeping the same bit order as 'val' and filling any 186 * other bits at the most-significant end of the result with zeros. 187 * 188 * For example, for the following 'val' and 'ext', the return 'ret' 189 * will be: 190 * 191 * val = a b c d e f g h 192 * ext = 1 0 1 0 0 1 1 0 193 * ret = 0 0 0 0 a c f g 194 * 195 * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3 196 * "Process to translate addresses of MSIs", is similar to bit manip 197 * function PEXT (Parallel bits extract) from x86. 198 */ 199 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext) 200 { 201 uint64_t ret = 0; 202 uint64_t rot = 1; 203 204 while (ext) { 205 if (ext & 1) { 206 if (val & 1) { 207 ret |= rot; 208 } 209 rot <<= 1; 210 } 211 val >>= 1; 212 ext >>= 1; 213 } 214 215 return ret; 216 } 217 218 /* Check if GPA matches MSI/MRIF pattern. */ 219 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 220 dma_addr_t gpa) 221 { 222 if (!s->enable_msi) { 223 return false; 224 } 225 226 if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) != 227 RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 228 return false; /* Invalid MSI/MRIF mode */ 229 } 230 231 if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) { 232 return false; /* GPA not in MSI range defined by AIA IMSIC rules. */ 233 } 234 235 return true; 236 } 237 238 /* 239 * RISCV IOMMU Address Translation Lookup - Page Table Walk 240 * 241 * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c 242 * Both implementation can be merged into single helper function in future. 243 * Keeping them separate for now, as error reporting and flow specifics are 244 * sufficiently different for separate implementation. 245 * 246 * @s : IOMMU Device State 247 * @ctx : Translation context for device id and process address space id. 248 * @iotlb : translation data: physical address and access mode. 249 * @return : success or fault cause code. 250 */ 251 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 252 IOMMUTLBEntry *iotlb) 253 { 254 dma_addr_t addr, base; 255 uint64_t satp, gatp, pte; 256 bool en_s, en_g; 257 struct { 258 unsigned char step; 259 unsigned char levels; 260 unsigned char ptidxbits; 261 unsigned char ptesize; 262 } sc[2]; 263 /* Translation stage phase */ 264 enum { 265 S_STAGE = 0, 266 G_STAGE = 1, 267 } pass; 268 MemTxResult ret; 269 270 satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 271 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 272 273 en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; 274 en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; 275 276 /* 277 * Early check for MSI address match when IOVA == GPA. 278 * Note that the (!en_s) condition means that the MSI 279 * page table may only be used when guest pages are 280 * mapped using the g-stage page table, whether single- 281 * or two-stage paging is enabled. It's unavoidable though, 282 * because the spec mandates that we do a first-stage 283 * translation before we check the MSI page table, which 284 * means we can't do an early MSI check unless we have 285 * strictly !en_s. 286 */ 287 if (!en_s && (iotlb->perm & IOMMU_WO) && 288 riscv_iommu_msi_check(s, ctx, iotlb->iova)) { 289 iotlb->target_as = &s->trap_as; 290 iotlb->translated_addr = iotlb->iova; 291 iotlb->addr_mask = ~TARGET_PAGE_MASK; 292 return 0; 293 } 294 295 /* Exit early for pass-through mode. */ 296 if (!(en_s || en_g)) { 297 iotlb->translated_addr = iotlb->iova; 298 iotlb->addr_mask = ~TARGET_PAGE_MASK; 299 /* Allow R/W in pass-through mode */ 300 iotlb->perm = IOMMU_RW; 301 return 0; 302 } 303 304 /* S/G translation parameters. */ 305 for (pass = 0; pass < 2; pass++) { 306 uint32_t sv_mode; 307 308 sc[pass].step = 0; 309 if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : 310 (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { 311 /* 32bit mode for GXL/SXL == 1 */ 312 switch (pass ? gatp : satp) { 313 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 314 sc[pass].levels = 0; 315 sc[pass].ptidxbits = 0; 316 sc[pass].ptesize = 0; 317 break; 318 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: 319 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32; 320 if (!(s->cap & sv_mode)) { 321 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 322 } 323 sc[pass].levels = 2; 324 sc[pass].ptidxbits = 10; 325 sc[pass].ptesize = 4; 326 break; 327 default: 328 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 329 } 330 } else { 331 /* 64bit mode for GXL/SXL == 0 */ 332 switch (pass ? gatp : satp) { 333 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 334 sc[pass].levels = 0; 335 sc[pass].ptidxbits = 0; 336 sc[pass].ptesize = 0; 337 break; 338 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: 339 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39; 340 if (!(s->cap & sv_mode)) { 341 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 342 } 343 sc[pass].levels = 3; 344 sc[pass].ptidxbits = 9; 345 sc[pass].ptesize = 8; 346 break; 347 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: 348 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48; 349 if (!(s->cap & sv_mode)) { 350 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 351 } 352 sc[pass].levels = 4; 353 sc[pass].ptidxbits = 9; 354 sc[pass].ptesize = 8; 355 break; 356 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: 357 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57; 358 if (!(s->cap & sv_mode)) { 359 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 360 } 361 sc[pass].levels = 5; 362 sc[pass].ptidxbits = 9; 363 sc[pass].ptesize = 8; 364 break; 365 default: 366 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 367 } 368 } 369 }; 370 371 /* S/G stages translation tables root pointers */ 372 gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); 373 satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); 374 addr = (en_s && en_g) ? satp : iotlb->iova; 375 base = en_g ? gatp : satp; 376 pass = en_g ? G_STAGE : S_STAGE; 377 378 do { 379 const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; 380 const unsigned va_bits = widened + sc[pass].ptidxbits; 381 const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * 382 (sc[pass].levels - 1 - sc[pass].step); 383 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); 384 const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; 385 const bool ade = 386 ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE); 387 388 /* Address range check before first level lookup */ 389 if (!sc[pass].step) { 390 const uint64_t va_len = va_skip + va_bits; 391 const uint64_t va_mask = (1ULL << va_len) - 1; 392 393 if (pass == S_STAGE && va_len > 32) { 394 target_ulong mask, masked_msbs; 395 396 mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1; 397 masked_msbs = (addr >> (va_len - 1)) & mask; 398 399 if (masked_msbs != 0 && masked_msbs != mask) { 400 return (iotlb->perm & IOMMU_WO) ? 401 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S : 402 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S; 403 } 404 } else { 405 if ((addr & va_mask) != addr) { 406 return (iotlb->perm & IOMMU_WO) ? 407 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 408 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS; 409 } 410 } 411 } 412 413 414 if (pass == S_STAGE) { 415 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS); 416 } else { 417 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS); 418 } 419 420 /* Read page table entry */ 421 if (sc[pass].ptesize == 4) { 422 uint32_t pte32 = 0; 423 ret = ldl_le_dma(s->target_as, pte_addr, &pte32, 424 MEMTXATTRS_UNSPECIFIED); 425 pte = pte32; 426 } else { 427 ret = ldq_le_dma(s->target_as, pte_addr, &pte, 428 MEMTXATTRS_UNSPECIFIED); 429 } 430 if (ret != MEMTX_OK) { 431 return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT 432 : RISCV_IOMMU_FQ_CAUSE_RD_FAULT; 433 } 434 435 sc[pass].step++; 436 hwaddr ppn = pte >> PTE_PPN_SHIFT; 437 438 if (!(pte & PTE_V)) { 439 break; /* Invalid PTE */ 440 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { 441 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ 442 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { 443 break; /* Reserved leaf PTE flags: PTE_W */ 444 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { 445 break; /* Reserved leaf PTE flags: PTE_W + PTE_X */ 446 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { 447 break; /* Misaligned PPN */ 448 } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { 449 break; /* Read access check failed */ 450 } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { 451 break; /* Write access check failed */ 452 } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { 453 break; /* Access bit not set */ 454 } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { 455 break; /* Dirty bit not set */ 456 } else { 457 /* Leaf PTE, translation completed. */ 458 sc[pass].step = sc[pass].levels; 459 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); 460 /* Update address mask based on smallest translation granularity */ 461 iotlb->addr_mask &= (1ULL << va_skip) - 1; 462 /* Continue with S-Stage translation? */ 463 if (pass && sc[0].step != sc[0].levels) { 464 pass = S_STAGE; 465 addr = iotlb->iova; 466 continue; 467 } 468 /* Translation phase completed (GPA or SPA) */ 469 iotlb->translated_addr = base; 470 iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO) 471 : IOMMU_RO; 472 473 /* Check MSI GPA address match */ 474 if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && 475 riscv_iommu_msi_check(s, ctx, base)) { 476 /* Trap MSI writes and return GPA address. */ 477 iotlb->target_as = &s->trap_as; 478 iotlb->addr_mask = ~TARGET_PAGE_MASK; 479 return 0; 480 } 481 482 /* Continue with G-Stage translation? */ 483 if (!pass && en_g) { 484 pass = G_STAGE; 485 addr = base; 486 base = gatp; 487 sc[pass].step = 0; 488 continue; 489 } 490 491 return 0; 492 } 493 494 if (sc[pass].step == sc[pass].levels) { 495 break; /* Can't find leaf PTE */ 496 } 497 498 /* Continue with G-Stage translation? */ 499 if (!pass && en_g) { 500 pass = G_STAGE; 501 addr = base; 502 base = gatp; 503 sc[pass].step = 0; 504 } 505 } while (1); 506 507 return (iotlb->perm & IOMMU_WO) ? 508 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 509 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : 510 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : 511 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); 512 } 513 514 static void riscv_iommu_report_fault(RISCVIOMMUState *s, 515 RISCVIOMMUContext *ctx, 516 uint32_t fault_type, uint32_t cause, 517 bool pv, 518 uint64_t iotval, uint64_t iotval2) 519 { 520 struct riscv_iommu_fq_record ev = { 0 }; 521 522 if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { 523 switch (cause) { 524 case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: 525 case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: 526 case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: 527 case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: 528 case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: 529 case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: 530 case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: 531 break; 532 default: 533 /* DTF prevents reporting a fault for this given cause */ 534 return; 535 } 536 } 537 538 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); 539 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); 540 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); 541 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); 542 543 if (pv) { 544 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id); 545 } 546 547 ev.iotval = iotval; 548 ev.iotval2 = iotval2; 549 550 riscv_iommu_fault(s, &ev); 551 } 552 553 /* Redirect MSI write for given GPA. */ 554 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, 555 RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, 556 unsigned size, MemTxAttrs attrs) 557 { 558 MemTxResult res; 559 dma_addr_t addr; 560 uint64_t intn; 561 size_t offset; 562 uint32_t n190; 563 uint64_t pte[2]; 564 int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 565 int cause; 566 567 /* Interrupt File Number */ 568 intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); 569 offset = intn * sizeof(pte); 570 571 /* fetch MSI PTE */ 572 addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); 573 if (addr & offset) { 574 /* Interrupt file number out of range */ 575 res = MEMTX_ACCESS_ERROR; 576 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 577 goto err; 578 } 579 580 addr |= offset; 581 res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), 582 MEMTXATTRS_UNSPECIFIED); 583 if (res != MEMTX_OK) { 584 if (res == MEMTX_DECODE_ERROR) { 585 cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; 586 } else { 587 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 588 } 589 goto err; 590 } 591 592 le64_to_cpus(&pte[0]); 593 le64_to_cpus(&pte[1]); 594 595 if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) { 596 /* 597 * The spec mentions that: "If msipte.C == 1, then further 598 * processing to interpret the PTE is implementation 599 * defined.". We'll abort with cause = 262 for this 600 * case too. 601 */ 602 res = MEMTX_ACCESS_ERROR; 603 cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; 604 goto err; 605 } 606 607 switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { 608 case RISCV_IOMMU_MSI_PTE_M_BASIC: 609 /* MSI Pass-through mode */ 610 addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); 611 612 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 613 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 614 gpa, addr); 615 616 res = dma_memory_write(s->target_as, addr, &data, size, attrs); 617 if (res != MEMTX_OK) { 618 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 619 goto err; 620 } 621 622 return MEMTX_OK; 623 case RISCV_IOMMU_MSI_PTE_M_MRIF: 624 /* MRIF mode, continue. */ 625 break; 626 default: 627 res = MEMTX_ACCESS_ERROR; 628 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 629 goto err; 630 } 631 632 /* 633 * Report an error for interrupt identities exceeding the maximum allowed 634 * for an IMSIC interrupt file (2047) or destination address is not 32-bit 635 * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. 636 */ 637 if ((data > 2047) || (gpa & 3)) { 638 res = MEMTX_ACCESS_ERROR; 639 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 640 goto err; 641 } 642 643 /* MSI MRIF mode, non atomic pending bit update */ 644 645 /* MRIF pending bit address */ 646 addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; 647 addr = addr | ((data & 0x7c0) >> 3); 648 649 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 650 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 651 gpa, addr); 652 653 /* MRIF pending bit mask */ 654 data = 1ULL << (data & 0x03f); 655 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 656 if (res != MEMTX_OK) { 657 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 658 goto err; 659 } 660 661 intn = intn | data; 662 res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs); 663 if (res != MEMTX_OK) { 664 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 665 goto err; 666 } 667 668 /* Get MRIF enable bits */ 669 addr = addr + sizeof(intn); 670 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 671 if (res != MEMTX_OK) { 672 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 673 goto err; 674 } 675 676 if (!(intn & data)) { 677 /* notification disabled, MRIF update completed. */ 678 return MEMTX_OK; 679 } 680 681 /* Send notification message */ 682 addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); 683 n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | 684 (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); 685 686 res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs); 687 if (res != MEMTX_OK) { 688 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 689 goto err; 690 } 691 692 trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); 693 694 return MEMTX_OK; 695 696 err: 697 riscv_iommu_report_fault(s, ctx, fault_type, cause, 698 !!ctx->process_id, 0, 0); 699 return res; 700 } 701 702 /* 703 * Check device context configuration as described by the 704 * riscv-iommu spec section "Device-context configuration 705 * checks". 706 */ 707 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, 708 RISCVIOMMUContext *ctx) 709 { 710 uint32_t fsc_mode, msi_mode; 711 uint64_t gatp; 712 713 if (!(s->cap & RISCV_IOMMU_CAP_ATS) && 714 (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS || 715 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI || 716 ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) { 717 return false; 718 } 719 720 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) && 721 (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA || 722 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) { 723 return false; 724 } 725 726 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && 727 ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { 728 return false; 729 } 730 731 if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && 732 ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { 733 return false; 734 } 735 736 if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { 737 msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); 738 739 if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && 740 msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 741 return false; 742 } 743 } 744 745 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 746 if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA && 747 gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { 748 return false; 749 } 750 751 fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 752 753 if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { 754 switch (fsc_mode) { 755 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 756 if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { 757 return false; 758 } 759 break; 760 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 761 if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { 762 return false; 763 } 764 break; 765 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 766 if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { 767 return false; 768 } 769 break; 770 } 771 } else { 772 /* DC.tc.PDTV is 0 */ 773 if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { 774 return false; 775 } 776 777 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 778 if (fsc_mode == RISCV_IOMMU_CAP_SV32 && 779 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 780 return false; 781 } 782 } else { 783 switch (fsc_mode) { 784 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 785 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 786 return false; 787 } 788 break; 789 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 790 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 791 return false; 792 } 793 break; 794 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 795 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 796 return false; 797 } 798 break; 799 } 800 } 801 } 802 803 /* 804 * CAP_END is always zero (only one endianess). FCTL_BE is 805 * always zero (little-endian accesses). Thus TC_SBE must 806 * always be LE, i.e. zero. 807 */ 808 if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { 809 return false; 810 } 811 812 return true; 813 } 814 815 /* 816 * Validate process context (PC) according to section 817 * "Process-context configuration checks". 818 */ 819 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, 820 RISCVIOMMUContext *ctx) 821 { 822 uint32_t mode; 823 824 if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { 825 return false; 826 } 827 828 if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { 829 return false; 830 } 831 832 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 833 switch (mode) { 834 case RISCV_IOMMU_DC_FSC_MODE_BARE: 835 /* sv39 and sv32 modes have the same value (8) */ 836 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 837 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 838 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 839 break; 840 default: 841 return false; 842 } 843 844 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 845 if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 && 846 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 847 return false; 848 } 849 } else { 850 switch (mode) { 851 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 852 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 853 return false; 854 } 855 break; 856 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 857 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 858 return false; 859 } 860 break; 861 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 862 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 863 return false; 864 } 865 break; 866 } 867 } 868 869 return true; 870 } 871 872 /** 873 * pdt_memory_read: PDT wrapper of dma_memory_read. 874 * 875 * @s: IOMMU Device State 876 * @ctx: Device Translation Context with devid and pasid set 877 * @addr: address within that address space 878 * @buf: buffer with the data transferred 879 * @len: length of the data transferred 880 * @attrs: memory transaction attributes 881 */ 882 static MemTxResult pdt_memory_read(RISCVIOMMUState *s, 883 RISCVIOMMUContext *ctx, 884 dma_addr_t addr, 885 void *buf, dma_addr_t len, 886 MemTxAttrs attrs) 887 { 888 uint64_t gatp_mode, pte; 889 struct { 890 unsigned char step; 891 unsigned char levels; 892 unsigned char ptidxbits; 893 unsigned char ptesize; 894 } sc; 895 MemTxResult ret; 896 dma_addr_t base = addr; 897 898 /* G stages translation mode */ 899 gatp_mode = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 900 if (gatp_mode == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { 901 goto out; 902 } 903 904 /* G stages translation tables root pointer */ 905 base = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); 906 907 /* Start at step 0 */ 908 sc.step = 0; 909 910 if (s->fctl & RISCV_IOMMU_FCTL_GXL) { 911 /* 32bit mode for GXL == 1 */ 912 switch (gatp_mode) { 913 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: 914 if (!(s->cap & RISCV_IOMMU_CAP_SV32X4)) { 915 return MEMTX_ACCESS_ERROR; 916 } 917 sc.levels = 2; 918 sc.ptidxbits = 10; 919 sc.ptesize = 4; 920 break; 921 default: 922 return MEMTX_ACCESS_ERROR; 923 } 924 } else { 925 /* 64bit mode for GXL == 0 */ 926 switch (gatp_mode) { 927 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: 928 if (!(s->cap & RISCV_IOMMU_CAP_SV39X4)) { 929 return MEMTX_ACCESS_ERROR; 930 } 931 sc.levels = 3; 932 sc.ptidxbits = 9; 933 sc.ptesize = 8; 934 break; 935 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: 936 if (!(s->cap & RISCV_IOMMU_CAP_SV48X4)) { 937 return MEMTX_ACCESS_ERROR; 938 } 939 sc.levels = 4; 940 sc.ptidxbits = 9; 941 sc.ptesize = 8; 942 break; 943 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: 944 if (!(s->cap & RISCV_IOMMU_CAP_SV57X4)) { 945 return MEMTX_ACCESS_ERROR; 946 } 947 sc.levels = 5; 948 sc.ptidxbits = 9; 949 sc.ptesize = 8; 950 break; 951 default: 952 return MEMTX_ACCESS_ERROR; 953 } 954 } 955 956 do { 957 const unsigned va_bits = (sc.step ? 0 : 2) + sc.ptidxbits; 958 const unsigned va_skip = TARGET_PAGE_BITS + sc.ptidxbits * 959 (sc.levels - 1 - sc.step); 960 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); 961 const dma_addr_t pte_addr = base + idx * sc.ptesize; 962 963 /* Address range check before first level lookup */ 964 if (!sc.step) { 965 const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1; 966 if ((addr & va_mask) != addr) { 967 return MEMTX_ACCESS_ERROR; 968 } 969 } 970 971 /* Read page table entry */ 972 if (sc.ptesize == 4) { 973 uint32_t pte32 = 0; 974 ret = ldl_le_dma(s->target_as, pte_addr, &pte32, attrs); 975 pte = pte32; 976 } else { 977 ret = ldq_le_dma(s->target_as, pte_addr, &pte, attrs); 978 } 979 if (ret != MEMTX_OK) { 980 return ret; 981 } 982 983 sc.step++; 984 hwaddr ppn = pte >> PTE_PPN_SHIFT; 985 986 if (!(pte & PTE_V)) { 987 return MEMTX_ACCESS_ERROR; /* Invalid PTE */ 988 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { 989 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ 990 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { 991 return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W */ 992 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { 993 return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W + PTE_X */ 994 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { 995 return MEMTX_ACCESS_ERROR; /* Misaligned PPN */ 996 } else { 997 /* Leaf PTE, translation completed. */ 998 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); 999 break; 1000 } 1001 1002 if (sc.step == sc.levels) { 1003 return MEMTX_ACCESS_ERROR; /* Can't find leaf PTE */ 1004 } 1005 } while (1); 1006 1007 out: 1008 return dma_memory_read(s->target_as, base, buf, len, attrs); 1009 } 1010 1011 /* 1012 * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk 1013 * 1014 * @s : IOMMU Device State 1015 * @ctx : Device Translation Context with devid and process_id set. 1016 * @return : success or fault code. 1017 */ 1018 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) 1019 { 1020 const uint64_t ddtp = s->ddtp; 1021 unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); 1022 dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); 1023 struct riscv_iommu_dc dc; 1024 /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */ 1025 const int dc_fmt = !s->enable_msi; 1026 const size_t dc_len = sizeof(dc) >> dc_fmt; 1027 int depth; 1028 uint64_t de; 1029 1030 switch (mode) { 1031 case RISCV_IOMMU_DDTP_MODE_OFF: 1032 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; 1033 1034 case RISCV_IOMMU_DDTP_MODE_BARE: 1035 /* mock up pass-through translation context */ 1036 ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 1037 RISCV_IOMMU_DC_IOHGATP_MODE_BARE); 1038 ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 1039 RISCV_IOMMU_DC_FSC_MODE_BARE); 1040 1041 ctx->tc = RISCV_IOMMU_DC_TC_V; 1042 if (s->enable_ats) { 1043 ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS; 1044 } 1045 1046 ctx->ta = 0; 1047 ctx->msiptp = 0; 1048 return 0; 1049 1050 case RISCV_IOMMU_DDTP_MODE_1LVL: 1051 depth = 0; 1052 break; 1053 1054 case RISCV_IOMMU_DDTP_MODE_2LVL: 1055 depth = 1; 1056 break; 1057 1058 case RISCV_IOMMU_DDTP_MODE_3LVL: 1059 depth = 2; 1060 break; 1061 1062 default: 1063 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 1064 } 1065 1066 /* 1067 * Check supported device id width (in bits). 1068 * See IOMMU Specification, Chapter 6. Software guidelines. 1069 * - if extended device-context format is used: 1070 * 1LVL: 6, 2LVL: 15, 3LVL: 24 1071 * - if base device-context format is used: 1072 * 1LVL: 7, 2LVL: 16, 3LVL: 24 1073 */ 1074 if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { 1075 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1076 } 1077 1078 /* Device directory tree walk */ 1079 for (; depth-- > 0; ) { 1080 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK); 1081 /* 1082 * Select device id index bits based on device directory tree level 1083 * and device context format. 1084 * See IOMMU Specification, Chapter 2. Data Structures. 1085 * - if extended device-context format is used: 1086 * device index: [23:15][14:6][5:0] 1087 * - if base device-context format is used: 1088 * device index: [23:16][15:7][6:0] 1089 */ 1090 const int split = depth * 9 + 6 + dc_fmt; 1091 addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; 1092 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 1093 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1094 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 1095 } 1096 le64_to_cpus(&de); 1097 if (!(de & RISCV_IOMMU_DDTE_VALID)) { 1098 /* invalid directory entry */ 1099 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 1100 } 1101 if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { 1102 /* reserved bits set */ 1103 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 1104 } 1105 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); 1106 } 1107 1108 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK); 1109 1110 /* index into device context entry page */ 1111 addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; 1112 1113 memset(&dc, 0, sizeof(dc)); 1114 if (dma_memory_read(s->target_as, addr, &dc, dc_len, 1115 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1116 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 1117 } 1118 1119 /* Set translation context. */ 1120 ctx->tc = le64_to_cpu(dc.tc); 1121 ctx->gatp = le64_to_cpu(dc.iohgatp); 1122 ctx->satp = le64_to_cpu(dc.fsc); 1123 ctx->ta = le64_to_cpu(dc.ta); 1124 ctx->msiptp = le64_to_cpu(dc.msiptp); 1125 ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); 1126 ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); 1127 1128 if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { 1129 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 1130 } 1131 1132 if (!riscv_iommu_validate_device_ctx(s, ctx)) { 1133 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 1134 } 1135 1136 /* FSC field checks */ 1137 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 1138 addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); 1139 1140 if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { 1141 if (ctx->process_id != RISCV_IOMMU_NOPROCID) { 1142 /* PID is disabled */ 1143 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1144 } 1145 if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { 1146 /* Invalid translation mode */ 1147 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 1148 } 1149 return 0; 1150 } 1151 1152 if (ctx->process_id == RISCV_IOMMU_NOPROCID) { 1153 if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { 1154 /* No default process_id enabled, set BARE mode */ 1155 ctx->satp = 0ULL; 1156 return 0; 1157 } else { 1158 /* Use default process_id #0 */ 1159 ctx->process_id = 0; 1160 } 1161 } 1162 1163 if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { 1164 /* No S-Stage translation, done. */ 1165 return 0; 1166 } 1167 1168 /* FSC.TC.PDTV enabled */ 1169 if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { 1170 /* Invalid PDTP.MODE */ 1171 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1172 } 1173 1174 for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { 1175 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); 1176 1177 /* 1178 * Select process id index bits based on process directory tree 1179 * level. See IOMMU Specification, 2.2. Process-Directory-Table. 1180 */ 1181 const int split = depth * 9 + 8; 1182 addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; 1183 if (pdt_memory_read(s, ctx, addr, &de, sizeof(de), 1184 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1185 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1186 } 1187 le64_to_cpus(&de); 1188 if (!(de & RISCV_IOMMU_PDTE_VALID)) { 1189 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1190 } 1191 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PDTE_PPN)); 1192 } 1193 1194 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); 1195 1196 /* Leaf entry in PDT */ 1197 addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; 1198 if (pdt_memory_read(s, ctx, addr, &dc.ta, sizeof(uint64_t) * 2, 1199 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1200 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1201 } 1202 1203 /* Use FSC and TA from process directory entry. */ 1204 ctx->ta = le64_to_cpu(dc.ta); 1205 ctx->satp = le64_to_cpu(dc.fsc); 1206 1207 if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { 1208 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1209 } 1210 1211 if (!riscv_iommu_validate_process_ctx(s, ctx)) { 1212 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1213 } 1214 1215 return 0; 1216 } 1217 1218 /* Translation Context cache support */ 1219 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2) 1220 { 1221 RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; 1222 RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; 1223 return c1->devid == c2->devid && 1224 c1->process_id == c2->process_id; 1225 } 1226 1227 static guint riscv_iommu_ctx_hash(gconstpointer v) 1228 { 1229 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; 1230 /* 1231 * Generate simple hash of (process_id, devid) 1232 * assuming 24-bit wide devid. 1233 */ 1234 return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); 1235 } 1236 1237 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value, 1238 gpointer data) 1239 { 1240 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1241 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1242 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1243 ctx->devid == arg->devid && 1244 ctx->process_id == arg->process_id) { 1245 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1246 } 1247 } 1248 1249 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value, 1250 gpointer data) 1251 { 1252 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1253 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1254 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1255 ctx->devid == arg->devid) { 1256 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1257 } 1258 } 1259 1260 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value, 1261 gpointer data) 1262 { 1263 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1264 if (ctx->tc & RISCV_IOMMU_DC_TC_V) { 1265 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1266 } 1267 } 1268 1269 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, 1270 uint32_t devid, uint32_t process_id) 1271 { 1272 GHashTable *ctx_cache; 1273 RISCVIOMMUContext key = { 1274 .devid = devid, 1275 .process_id = process_id, 1276 }; 1277 ctx_cache = g_hash_table_ref(s->ctx_cache); 1278 g_hash_table_foreach(ctx_cache, func, &key); 1279 g_hash_table_unref(ctx_cache); 1280 } 1281 1282 /* Find or allocate translation context for a given {device_id, process_id} */ 1283 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, 1284 unsigned devid, unsigned process_id, 1285 void **ref) 1286 { 1287 GHashTable *ctx_cache; 1288 RISCVIOMMUContext *ctx; 1289 RISCVIOMMUContext key = { 1290 .devid = devid, 1291 .process_id = process_id, 1292 }; 1293 1294 ctx_cache = g_hash_table_ref(s->ctx_cache); 1295 ctx = g_hash_table_lookup(ctx_cache, &key); 1296 1297 if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { 1298 *ref = ctx_cache; 1299 return ctx; 1300 } 1301 1302 ctx = g_new0(RISCVIOMMUContext, 1); 1303 ctx->devid = devid; 1304 ctx->process_id = process_id; 1305 1306 int fault = riscv_iommu_ctx_fetch(s, ctx); 1307 if (!fault) { 1308 if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { 1309 g_hash_table_unref(ctx_cache); 1310 ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 1311 riscv_iommu_ctx_equal, 1312 g_free, NULL); 1313 g_hash_table_ref(ctx_cache); 1314 g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); 1315 } 1316 g_hash_table_add(ctx_cache, ctx); 1317 *ref = ctx_cache; 1318 return ctx; 1319 } 1320 1321 g_hash_table_unref(ctx_cache); 1322 *ref = NULL; 1323 1324 riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, 1325 fault, !!process_id, 0, 0); 1326 1327 g_free(ctx); 1328 return NULL; 1329 } 1330 1331 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) 1332 { 1333 if (ref) { 1334 g_hash_table_unref((GHashTable *)ref); 1335 } 1336 } 1337 1338 /* Find or allocate address space for a given device */ 1339 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid) 1340 { 1341 RISCVIOMMUSpace *as; 1342 1343 /* FIXME: PCIe bus remapping for attached endpoints. */ 1344 devid |= s->bus << 8; 1345 1346 QLIST_FOREACH(as, &s->spaces, list) { 1347 if (as->devid == devid) { 1348 break; 1349 } 1350 } 1351 1352 if (as == NULL) { 1353 char name[64]; 1354 as = g_new0(RISCVIOMMUSpace, 1); 1355 1356 as->iommu = s; 1357 as->devid = devid; 1358 1359 snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", 1360 PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1361 1362 /* IOVA address space, untranslated addresses */ 1363 memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), 1364 TYPE_RISCV_IOMMU_MEMORY_REGION, 1365 OBJECT(as), "riscv_iommu", UINT64_MAX); 1366 address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name); 1367 1368 QLIST_INSERT_HEAD(&s->spaces, as, list); 1369 1370 trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), 1371 PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1372 } 1373 return &as->iova_as; 1374 } 1375 1376 /* Translation Object cache support */ 1377 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2) 1378 { 1379 RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1; 1380 RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2; 1381 return t1->gscid == t2->gscid && t1->pscid == t2->pscid && 1382 t1->iova == t2->iova && t1->tag == t2->tag; 1383 } 1384 1385 static guint riscv_iommu_iot_hash(gconstpointer v) 1386 { 1387 RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v; 1388 return (guint)t->iova; 1389 } 1390 1391 /* GV: 0 AV: 0 PSCV: 0 GVMA: 0 */ 1392 /* GV: 0 AV: 0 GVMA: 1 */ 1393 static 1394 void riscv_iommu_iot_inval_all(gpointer key, gpointer value, gpointer data) 1395 { 1396 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1397 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1398 if (iot->tag == arg->tag) { 1399 iot->perm = IOMMU_NONE; 1400 } 1401 } 1402 1403 /* GV: 0 AV: 0 PSCV: 1 GVMA: 0 */ 1404 static 1405 void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, gpointer data) 1406 { 1407 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1408 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1409 if (iot->tag == arg->tag && 1410 iot->pscid == arg->pscid) { 1411 iot->perm = IOMMU_NONE; 1412 } 1413 } 1414 1415 /* GV: 0 AV: 1 PSCV: 0 GVMA: 0 */ 1416 static 1417 void riscv_iommu_iot_inval_iova(gpointer key, gpointer value, gpointer data) 1418 { 1419 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1420 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1421 if (iot->tag == arg->tag && 1422 iot->iova == arg->iova) { 1423 iot->perm = IOMMU_NONE; 1424 } 1425 } 1426 1427 /* GV: 0 AV: 1 PSCV: 1 GVMA: 0 */ 1428 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value, 1429 gpointer data) 1430 { 1431 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1432 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1433 if (iot->tag == arg->tag && 1434 iot->pscid == arg->pscid && 1435 iot->iova == arg->iova) { 1436 iot->perm = IOMMU_NONE; 1437 } 1438 } 1439 1440 /* GV: 1 AV: 0 PSCV: 0 GVMA: 0 */ 1441 /* GV: 1 AV: 0 GVMA: 1 */ 1442 static 1443 void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, gpointer data) 1444 { 1445 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1446 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1447 if (iot->tag == arg->tag && 1448 iot->gscid == arg->gscid) { 1449 iot->perm = IOMMU_NONE; 1450 } 1451 } 1452 1453 /* GV: 1 AV: 0 PSCV: 1 GVMA: 0 */ 1454 static void riscv_iommu_iot_inval_gscid_pscid(gpointer key, gpointer value, 1455 gpointer data) 1456 { 1457 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1458 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1459 if (iot->tag == arg->tag && 1460 iot->gscid == arg->gscid && 1461 iot->pscid == arg->pscid) { 1462 iot->perm = IOMMU_NONE; 1463 } 1464 } 1465 1466 /* GV: 1 AV: 1 PSCV: 0 GVMA: 0 */ 1467 /* GV: 1 AV: 1 GVMA: 1 */ 1468 static void riscv_iommu_iot_inval_gscid_iova(gpointer key, gpointer value, 1469 gpointer data) 1470 { 1471 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1472 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1473 if (iot->tag == arg->tag && 1474 iot->gscid == arg->gscid && 1475 iot->iova == arg->iova) { 1476 iot->perm = IOMMU_NONE; 1477 } 1478 } 1479 1480 /* GV: 1 AV: 1 PSCV: 1 GVMA: 0 */ 1481 static void riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key, gpointer value, 1482 gpointer data) 1483 { 1484 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1485 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1486 if (iot->tag == arg->tag && 1487 iot->gscid == arg->gscid && 1488 iot->pscid == arg->pscid && 1489 iot->iova == arg->iova) { 1490 iot->perm = IOMMU_NONE; 1491 } 1492 } 1493 1494 /* caller should keep ref-count for iot_cache object */ 1495 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx, 1496 GHashTable *iot_cache, hwaddr iova, RISCVIOMMUTransTag transtag) 1497 { 1498 RISCVIOMMUEntry key = { 1499 .tag = transtag, 1500 .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID), 1501 .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID), 1502 .iova = PPN_DOWN(iova), 1503 }; 1504 return g_hash_table_lookup(iot_cache, &key); 1505 } 1506 1507 /* caller should keep ref-count for iot_cache object */ 1508 static void riscv_iommu_iot_update(RISCVIOMMUState *s, 1509 GHashTable *iot_cache, RISCVIOMMUEntry *iot) 1510 { 1511 if (!s->iot_limit) { 1512 return; 1513 } 1514 1515 if (g_hash_table_size(s->iot_cache) >= s->iot_limit) { 1516 iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 1517 riscv_iommu_iot_equal, 1518 g_free, NULL); 1519 g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache)); 1520 } 1521 g_hash_table_add(iot_cache, iot); 1522 } 1523 1524 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func, 1525 uint32_t gscid, uint32_t pscid, hwaddr iova, RISCVIOMMUTransTag transtag) 1526 { 1527 GHashTable *iot_cache; 1528 RISCVIOMMUEntry key = { 1529 .tag = transtag, 1530 .gscid = gscid, 1531 .pscid = pscid, 1532 .iova = PPN_DOWN(iova), 1533 }; 1534 1535 iot_cache = g_hash_table_ref(s->iot_cache); 1536 g_hash_table_foreach(iot_cache, func, &key); 1537 g_hash_table_unref(iot_cache); 1538 } 1539 1540 static RISCVIOMMUTransTag riscv_iommu_get_transtag(RISCVIOMMUContext *ctx) 1541 { 1542 uint64_t satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 1543 uint64_t gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 1544 1545 if (satp == RISCV_IOMMU_DC_FSC_MODE_BARE) { 1546 return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ? 1547 RISCV_IOMMU_TRANS_TAG_BY : RISCV_IOMMU_TRANS_TAG_VG; 1548 } else { 1549 return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ? 1550 RISCV_IOMMU_TRANS_TAG_SS : RISCV_IOMMU_TRANS_TAG_VN; 1551 } 1552 } 1553 1554 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 1555 IOMMUTLBEntry *iotlb, bool enable_cache) 1556 { 1557 RISCVIOMMUTransTag transtag = riscv_iommu_get_transtag(ctx); 1558 RISCVIOMMUEntry *iot; 1559 IOMMUAccessFlags perm; 1560 bool enable_pid; 1561 bool enable_pri; 1562 GHashTable *iot_cache; 1563 int fault; 1564 1565 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ); 1566 1567 iot_cache = g_hash_table_ref(s->iot_cache); 1568 /* 1569 * TC[32] is reserved for custom extensions, used here to temporarily 1570 * enable automatic page-request generation for ATS queries. 1571 */ 1572 enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); 1573 enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); 1574 1575 /* Check for ATS request. */ 1576 if (iotlb->perm == IOMMU_NONE) { 1577 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ); 1578 /* Check if ATS is disabled. */ 1579 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) { 1580 enable_pri = false; 1581 fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1582 goto done; 1583 } 1584 } 1585 1586 iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova, transtag); 1587 perm = iot ? iot->perm : IOMMU_NONE; 1588 if (perm != IOMMU_NONE) { 1589 iotlb->translated_addr = PPN_PHYS(iot->phys); 1590 iotlb->addr_mask = ~TARGET_PAGE_MASK; 1591 iotlb->perm = perm; 1592 fault = 0; 1593 goto done; 1594 } 1595 1596 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS); 1597 1598 /* Translate using device directory / page table information. */ 1599 fault = riscv_iommu_spa_fetch(s, ctx, iotlb); 1600 1601 if (!fault && iotlb->target_as == &s->trap_as) { 1602 /* Do not cache trapped MSI translations */ 1603 goto done; 1604 } 1605 1606 /* 1607 * We made an implementation choice to not cache identity-mapped 1608 * translations, as allowed by the specification, to avoid 1609 * translation cache evictions for other devices sharing the 1610 * IOMMU hardware model. 1611 */ 1612 if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) { 1613 iot = g_new0(RISCVIOMMUEntry, 1); 1614 iot->iova = PPN_DOWN(iotlb->iova); 1615 iot->phys = PPN_DOWN(iotlb->translated_addr); 1616 iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); 1617 iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); 1618 iot->perm = iotlb->perm; 1619 iot->tag = transtag; 1620 riscv_iommu_iot_update(s, iot_cache, iot); 1621 } 1622 1623 done: 1624 g_hash_table_unref(iot_cache); 1625 1626 if (enable_pri && fault) { 1627 struct riscv_iommu_pq_record pr = {0}; 1628 if (enable_pid) { 1629 pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, 1630 RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id); 1631 } 1632 pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid); 1633 pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | 1634 RISCV_IOMMU_PREQ_PAYLOAD_M; 1635 riscv_iommu_pri(s, &pr); 1636 return fault; 1637 } 1638 1639 if (fault) { 1640 unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ; 1641 1642 if (iotlb->perm & IOMMU_RW) { 1643 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 1644 } else if (iotlb->perm & IOMMU_RO) { 1645 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; 1646 } 1647 1648 riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, 1649 iotlb->iova, iotlb->translated_addr); 1650 return fault; 1651 } 1652 1653 return 0; 1654 } 1655 1656 /* IOMMU Command Interface */ 1657 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, 1658 uint64_t addr, uint32_t data) 1659 { 1660 /* 1661 * ATS processing in this implementation of the IOMMU is synchronous, 1662 * no need to wait for completions here. 1663 */ 1664 if (!notify) { 1665 return MEMTX_OK; 1666 } 1667 1668 return dma_memory_write(s->target_as, addr, &data, sizeof(data), 1669 MEMTXATTRS_UNSPECIFIED); 1670 } 1671 1672 static void riscv_iommu_ats(RISCVIOMMUState *s, 1673 struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag, 1674 IOMMUAccessFlags perm, 1675 void (*trace_fn)(const char *id)) 1676 { 1677 RISCVIOMMUSpace *as = NULL; 1678 IOMMUNotifier *n; 1679 IOMMUTLBEvent event; 1680 uint32_t pid; 1681 uint32_t devid; 1682 const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV; 1683 1684 if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) { 1685 /* Use device segment and requester id */ 1686 devid = get_field(cmd->dword0, 1687 RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID); 1688 } else { 1689 devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID); 1690 } 1691 1692 pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID); 1693 1694 QLIST_FOREACH(as, &s->spaces, list) { 1695 if (as->devid == devid) { 1696 break; 1697 } 1698 } 1699 1700 if (!as || !as->notifier) { 1701 return; 1702 } 1703 1704 event.type = flag; 1705 event.entry.perm = perm; 1706 event.entry.target_as = s->target_as; 1707 1708 IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) { 1709 if (!pv || n->iommu_idx == pid) { 1710 event.entry.iova = n->start; 1711 event.entry.addr_mask = n->end - n->start; 1712 trace_fn(as->iova_mr.parent_obj.name); 1713 memory_region_notify_iommu_one(n, &event); 1714 } 1715 } 1716 } 1717 1718 static void riscv_iommu_ats_inval(RISCVIOMMUState *s, 1719 struct riscv_iommu_command *cmd) 1720 { 1721 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE, 1722 trace_riscv_iommu_ats_inval); 1723 } 1724 1725 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s, 1726 struct riscv_iommu_command *cmd) 1727 { 1728 unsigned resp_code = get_field(cmd->dword1, 1729 RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE); 1730 1731 /* Using the access flag to carry response code information */ 1732 IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW; 1733 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm, 1734 trace_riscv_iommu_ats_prgr); 1735 } 1736 1737 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) 1738 { 1739 uint64_t old_ddtp = s->ddtp; 1740 uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); 1741 unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); 1742 unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); 1743 bool ok = false; 1744 1745 /* 1746 * Check for allowed DDTP.MODE transitions: 1747 * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} 1748 * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} 1749 */ 1750 if (new_mode == old_mode || 1751 new_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1752 new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { 1753 ok = true; 1754 } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || 1755 new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || 1756 new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { 1757 ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1758 old_mode == RISCV_IOMMU_DDTP_MODE_BARE; 1759 } 1760 1761 if (ok) { 1762 /* clear reserved and busy bits, report back sanitized version */ 1763 new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, 1764 RISCV_IOMMU_DDTP_MODE, new_mode); 1765 } else { 1766 new_ddtp = old_ddtp; 1767 } 1768 s->ddtp = new_ddtp; 1769 1770 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); 1771 } 1772 1773 /* Command function and opcode field. */ 1774 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) 1775 1776 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) 1777 { 1778 struct riscv_iommu_command cmd; 1779 MemTxResult res; 1780 dma_addr_t addr; 1781 uint32_t tail, head, ctrl; 1782 uint64_t cmd_opcode; 1783 GHFunc func; 1784 1785 ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1786 tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; 1787 head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; 1788 1789 /* Check for pending error or queue processing disabled */ 1790 if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || 1791 !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) { 1792 return; 1793 } 1794 1795 while (tail != head) { 1796 addr = s->cq_addr + head * sizeof(cmd); 1797 res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), 1798 MEMTXATTRS_UNSPECIFIED); 1799 1800 if (res != MEMTX_OK) { 1801 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1802 RISCV_IOMMU_CQCSR_CQMF, 0); 1803 goto fault; 1804 } 1805 1806 trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); 1807 1808 cmd_opcode = get_field(cmd.dword0, 1809 RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC); 1810 1811 switch (cmd_opcode) { 1812 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, 1813 RISCV_IOMMU_CMD_IOFENCE_OPCODE): 1814 res = riscv_iommu_iofence(s, 1815 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2, 1816 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); 1817 1818 if (res != MEMTX_OK) { 1819 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1820 RISCV_IOMMU_CQCSR_CQMF, 0); 1821 goto fault; 1822 } 1823 break; 1824 1825 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, 1826 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1827 { 1828 bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV); 1829 bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV); 1830 bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV); 1831 uint32_t gscid = get_field(cmd.dword0, 1832 RISCV_IOMMU_CMD_IOTINVAL_GSCID); 1833 uint32_t pscid = get_field(cmd.dword0, 1834 RISCV_IOMMU_CMD_IOTINVAL_PSCID); 1835 hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK; 1836 1837 if (pscv) { 1838 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */ 1839 goto cmd_ill; 1840 } 1841 1842 func = riscv_iommu_iot_inval_all; 1843 1844 if (gv) { 1845 func = (av) ? riscv_iommu_iot_inval_gscid_iova : 1846 riscv_iommu_iot_inval_gscid; 1847 } 1848 1849 riscv_iommu_iot_inval( 1850 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VG); 1851 1852 riscv_iommu_iot_inval( 1853 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VN); 1854 break; 1855 } 1856 1857 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, 1858 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1859 { 1860 bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV); 1861 bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV); 1862 bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV); 1863 uint32_t gscid = get_field(cmd.dword0, 1864 RISCV_IOMMU_CMD_IOTINVAL_GSCID); 1865 uint32_t pscid = get_field(cmd.dword0, 1866 RISCV_IOMMU_CMD_IOTINVAL_PSCID); 1867 hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK; 1868 RISCVIOMMUTransTag transtag; 1869 1870 if (gv) { 1871 transtag = RISCV_IOMMU_TRANS_TAG_VN; 1872 if (pscv) { 1873 func = (av) ? riscv_iommu_iot_inval_gscid_pscid_iova : 1874 riscv_iommu_iot_inval_gscid_pscid; 1875 } else { 1876 func = (av) ? riscv_iommu_iot_inval_gscid_iova : 1877 riscv_iommu_iot_inval_gscid; 1878 } 1879 } else { 1880 transtag = RISCV_IOMMU_TRANS_TAG_SS; 1881 if (pscv) { 1882 func = (av) ? riscv_iommu_iot_inval_pscid_iova : 1883 riscv_iommu_iot_inval_pscid; 1884 } else { 1885 func = (av) ? riscv_iommu_iot_inval_iova : 1886 riscv_iommu_iot_inval_all; 1887 } 1888 } 1889 1890 riscv_iommu_iot_inval(s, func, gscid, pscid, iova, transtag); 1891 break; 1892 } 1893 1894 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, 1895 RISCV_IOMMU_CMD_IODIR_OPCODE): 1896 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1897 /* invalidate all device context cache mappings */ 1898 func = riscv_iommu_ctx_inval_all; 1899 } else { 1900 /* invalidate all device context matching DID */ 1901 func = riscv_iommu_ctx_inval_devid; 1902 } 1903 riscv_iommu_ctx_inval(s, func, 1904 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); 1905 break; 1906 1907 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, 1908 RISCV_IOMMU_CMD_IODIR_OPCODE): 1909 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1910 /* illegal command arguments IODIR_PDT & DV == 0 */ 1911 goto cmd_ill; 1912 } else { 1913 func = riscv_iommu_ctx_inval_devid_procid; 1914 } 1915 riscv_iommu_ctx_inval(s, func, 1916 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 1917 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); 1918 break; 1919 1920 /* ATS commands */ 1921 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL, 1922 RISCV_IOMMU_CMD_ATS_OPCODE): 1923 if (!s->enable_ats) { 1924 goto cmd_ill; 1925 } 1926 1927 riscv_iommu_ats_inval(s, &cmd); 1928 break; 1929 1930 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR, 1931 RISCV_IOMMU_CMD_ATS_OPCODE): 1932 if (!s->enable_ats) { 1933 goto cmd_ill; 1934 } 1935 1936 riscv_iommu_ats_prgr(s, &cmd); 1937 break; 1938 1939 default: 1940 cmd_ill: 1941 /* Invalid instruction, do not advance instruction index. */ 1942 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1943 RISCV_IOMMU_CQCSR_CMD_ILL, 0); 1944 goto fault; 1945 } 1946 1947 /* Advance and update head pointer after command completes. */ 1948 head = (head + 1) & s->cq_mask; 1949 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); 1950 } 1951 return; 1952 1953 fault: 1954 if (ctrl & RISCV_IOMMU_CQCSR_CIE) { 1955 riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); 1956 } 1957 } 1958 1959 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) 1960 { 1961 uint64_t base; 1962 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1963 uint32_t ctrl_clr; 1964 bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); 1965 bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); 1966 1967 if (enable && !active) { 1968 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); 1969 s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1; 1970 s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); 1971 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); 1972 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); 1973 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); 1974 ctrl_set = RISCV_IOMMU_CQCSR_CQON; 1975 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | 1976 RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO | 1977 RISCV_IOMMU_CQCSR_FENCE_W_IP; 1978 } else if (!enable && active) { 1979 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); 1980 ctrl_set = 0; 1981 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; 1982 } else { 1983 ctrl_set = 0; 1984 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; 1985 } 1986 1987 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); 1988 } 1989 1990 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) 1991 { 1992 uint64_t base; 1993 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1994 uint32_t ctrl_clr; 1995 bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); 1996 bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); 1997 1998 if (enable && !active) { 1999 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); 2000 s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1; 2001 s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); 2002 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); 2003 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); 2004 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); 2005 ctrl_set = RISCV_IOMMU_FQCSR_FQON; 2006 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | 2007 RISCV_IOMMU_FQCSR_FQOF; 2008 } else if (!enable && active) { 2009 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); 2010 ctrl_set = 0; 2011 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; 2012 } else { 2013 ctrl_set = 0; 2014 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; 2015 } 2016 2017 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); 2018 } 2019 2020 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) 2021 { 2022 uint64_t base; 2023 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 2024 uint32_t ctrl_clr; 2025 bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); 2026 bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); 2027 2028 if (enable && !active) { 2029 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); 2030 s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1; 2031 s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); 2032 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); 2033 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); 2034 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); 2035 ctrl_set = RISCV_IOMMU_PQCSR_PQON; 2036 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | 2037 RISCV_IOMMU_PQCSR_PQOF; 2038 } else if (!enable && active) { 2039 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); 2040 ctrl_set = 0; 2041 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; 2042 } else { 2043 ctrl_set = 0; 2044 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; 2045 } 2046 2047 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); 2048 } 2049 2050 static void riscv_iommu_process_dbg(RISCVIOMMUState *s) 2051 { 2052 uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA); 2053 uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL); 2054 unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID); 2055 unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID); 2056 RISCVIOMMUContext *ctx; 2057 void *ref; 2058 2059 if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) { 2060 return; 2061 } 2062 2063 ctx = riscv_iommu_ctx(s, devid, pid, &ref); 2064 if (ctx == NULL) { 2065 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, 2066 RISCV_IOMMU_TR_RESPONSE_FAULT | 2067 (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10)); 2068 } else { 2069 IOMMUTLBEntry iotlb = { 2070 .iova = iova, 2071 .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW, 2072 .addr_mask = ~0, 2073 .target_as = NULL, 2074 }; 2075 int fault = riscv_iommu_translate(s, ctx, &iotlb, false); 2076 if (fault) { 2077 iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); 2078 } else { 2079 iova = iotlb.translated_addr & ~iotlb.addr_mask; 2080 iova = set_field(0, RISCV_IOMMU_TR_RESPONSE_PPN, PPN_DOWN(iova)); 2081 } 2082 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); 2083 } 2084 2085 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 2086 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2087 riscv_iommu_ctx_put(s, ref); 2088 } 2089 2090 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); 2091 2092 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data) 2093 { 2094 uint64_t icvec = 0; 2095 2096 icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV, 2097 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV); 2098 2099 icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV, 2100 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV); 2101 2102 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV, 2103 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV); 2104 2105 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV, 2106 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV); 2107 2108 trace_riscv_iommu_icvec_write(data, icvec); 2109 2110 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec); 2111 } 2112 2113 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) 2114 { 2115 uint32_t cqcsr, fqcsr, pqcsr; 2116 uint32_t ipsr_set = 0; 2117 uint32_t ipsr_clr = 0; 2118 2119 if (data & RISCV_IOMMU_IPSR_CIP) { 2120 cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 2121 2122 if (cqcsr & RISCV_IOMMU_CQCSR_CIE && 2123 (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || 2124 cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || 2125 cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || 2126 cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { 2127 ipsr_set |= RISCV_IOMMU_IPSR_CIP; 2128 } else { 2129 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 2130 } 2131 } else { 2132 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 2133 } 2134 2135 if (data & RISCV_IOMMU_IPSR_FIP) { 2136 fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 2137 2138 if (fqcsr & RISCV_IOMMU_FQCSR_FIE && 2139 (fqcsr & RISCV_IOMMU_FQCSR_FQOF || 2140 fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { 2141 ipsr_set |= RISCV_IOMMU_IPSR_FIP; 2142 } else { 2143 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 2144 } 2145 } else { 2146 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 2147 } 2148 2149 if (data & RISCV_IOMMU_IPSR_PIP) { 2150 pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 2151 2152 if (pqcsr & RISCV_IOMMU_PQCSR_PIE && 2153 (pqcsr & RISCV_IOMMU_PQCSR_PQOF || 2154 pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { 2155 ipsr_set |= RISCV_IOMMU_IPSR_PIP; 2156 } else { 2157 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 2158 } 2159 } else { 2160 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 2161 } 2162 2163 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); 2164 } 2165 2166 static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s, 2167 uint32_t regb, 2168 bool prev_cy_inh) 2169 { 2170 switch (regb) { 2171 case RISCV_IOMMU_REG_IOCOUNTINH: 2172 riscv_iommu_process_iocntinh_cy(s, prev_cy_inh); 2173 break; 2174 2175 case RISCV_IOMMU_REG_IOHPMCYCLES: 2176 case RISCV_IOMMU_REG_IOHPMCYCLES + 4: 2177 riscv_iommu_process_hpmcycle_write(s); 2178 break; 2179 2180 case RISCV_IOMMU_REG_IOHPMEVT_BASE ... 2181 RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4: 2182 riscv_iommu_process_hpmevt_write(s, regb & ~7); 2183 break; 2184 } 2185 } 2186 2187 /* 2188 * Write the resulting value of 'data' for the reg specified 2189 * by 'reg_addr', after considering read-only/read-write/write-clear 2190 * bits, in the pointer 'dest'. 2191 * 2192 * The result is written in little-endian. 2193 */ 2194 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s, 2195 void *dest, hwaddr reg_addr, 2196 int size, uint64_t data) 2197 { 2198 uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size); 2199 uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size); 2200 uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size); 2201 2202 stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc)); 2203 } 2204 2205 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, 2206 uint64_t data, unsigned size, 2207 MemTxAttrs attrs) 2208 { 2209 riscv_iommu_process_fn *process_fn = NULL; 2210 RISCVIOMMUState *s = opaque; 2211 uint32_t regb = addr & ~3; 2212 uint32_t busy = 0; 2213 uint64_t val = 0; 2214 bool cy_inh = false; 2215 2216 if ((addr & (size - 1)) != 0) { 2217 /* Unsupported MMIO alignment or access size */ 2218 return MEMTX_ERROR; 2219 } 2220 2221 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2222 /* Unsupported MMIO access location. */ 2223 return MEMTX_ACCESS_ERROR; 2224 } 2225 2226 /* Track actionable MMIO write. */ 2227 switch (regb) { 2228 case RISCV_IOMMU_REG_DDTP: 2229 case RISCV_IOMMU_REG_DDTP + 4: 2230 process_fn = riscv_iommu_process_ddtp; 2231 regb = RISCV_IOMMU_REG_DDTP; 2232 busy = RISCV_IOMMU_DDTP_BUSY; 2233 break; 2234 2235 case RISCV_IOMMU_REG_CQT: 2236 process_fn = riscv_iommu_process_cq_tail; 2237 break; 2238 2239 case RISCV_IOMMU_REG_CQCSR: 2240 process_fn = riscv_iommu_process_cq_control; 2241 busy = RISCV_IOMMU_CQCSR_BUSY; 2242 break; 2243 2244 case RISCV_IOMMU_REG_FQCSR: 2245 process_fn = riscv_iommu_process_fq_control; 2246 busy = RISCV_IOMMU_FQCSR_BUSY; 2247 break; 2248 2249 case RISCV_IOMMU_REG_PQCSR: 2250 process_fn = riscv_iommu_process_pq_control; 2251 busy = RISCV_IOMMU_PQCSR_BUSY; 2252 break; 2253 2254 case RISCV_IOMMU_REG_ICVEC: 2255 case RISCV_IOMMU_REG_IPSR: 2256 /* 2257 * ICVEC and IPSR have special read/write procedures. We'll 2258 * call their respective helpers and exit. 2259 */ 2260 riscv_iommu_write_reg_val(s, &val, addr, size, data); 2261 2262 /* 2263 * 'val' is stored as LE. Switch to host endianess 2264 * before using it. 2265 */ 2266 val = le64_to_cpu(val); 2267 2268 if (regb == RISCV_IOMMU_REG_ICVEC) { 2269 riscv_iommu_update_icvec(s, val); 2270 } else { 2271 riscv_iommu_update_ipsr(s, val); 2272 } 2273 2274 return MEMTX_OK; 2275 2276 case RISCV_IOMMU_REG_TR_REQ_CTL: 2277 process_fn = riscv_iommu_process_dbg; 2278 regb = RISCV_IOMMU_REG_TR_REQ_CTL; 2279 busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY; 2280 break; 2281 2282 case RISCV_IOMMU_REG_IOCOUNTINH: 2283 if (addr != RISCV_IOMMU_REG_IOCOUNTINH) { 2284 break; 2285 } 2286 /* Store previous value of CY bit. */ 2287 cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) & 2288 RISCV_IOMMU_IOCOUNTINH_CY); 2289 break; 2290 2291 2292 default: 2293 break; 2294 } 2295 2296 /* 2297 * Registers update might be not synchronized with core logic. 2298 * If system software updates register when relevant BUSY bit 2299 * is set IOMMU behavior of additional writes to the register 2300 * is UNSPECIFIED. 2301 */ 2302 riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data); 2303 2304 /* Busy flag update, MSB 4-byte register. */ 2305 if (busy) { 2306 uint32_t rw = ldl_le_p(&s->regs_rw[regb]); 2307 stl_le_p(&s->regs_rw[regb], rw | busy); 2308 } 2309 2310 /* Process HPM writes and update any internal state if needed. */ 2311 if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF && 2312 regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) { 2313 riscv_iommu_process_hpm_writes(s, regb, cy_inh); 2314 } 2315 2316 if (process_fn) { 2317 process_fn(s); 2318 } 2319 2320 return MEMTX_OK; 2321 } 2322 2323 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, 2324 uint64_t *data, unsigned size, MemTxAttrs attrs) 2325 { 2326 RISCVIOMMUState *s = opaque; 2327 uint64_t val = -1; 2328 uint8_t *ptr; 2329 2330 if ((addr & (size - 1)) != 0) { 2331 /* Unsupported MMIO alignment. */ 2332 return MEMTX_ERROR; 2333 } 2334 2335 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2336 return MEMTX_ACCESS_ERROR; 2337 } 2338 2339 /* Compute cycle register value. */ 2340 if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) { 2341 val = riscv_iommu_hpmcycle_read(s); 2342 ptr = (uint8_t *)&val + (addr & 7); 2343 } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) { 2344 /* 2345 * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer 2346 * callback completes. In which case CY_OF bit in 2347 * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the 2348 * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as 2349 * it's not dependent over the timer callback and is computed 2350 * from cycle overflow. 2351 */ 2352 val = ldq_le_p(&s->regs_rw[addr]); 2353 val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF) 2354 ? RISCV_IOMMU_IOCOUNTOVF_CY 2355 : 0; 2356 ptr = (uint8_t *)&val + (addr & 3); 2357 } else { 2358 ptr = &s->regs_rw[addr]; 2359 } 2360 2361 val = ldn_le_p(ptr, size); 2362 2363 *data = val; 2364 2365 return MEMTX_OK; 2366 } 2367 2368 static const MemoryRegionOps riscv_iommu_mmio_ops = { 2369 .read_with_attrs = riscv_iommu_mmio_read, 2370 .write_with_attrs = riscv_iommu_mmio_write, 2371 .endianness = DEVICE_NATIVE_ENDIAN, 2372 .impl = { 2373 .min_access_size = 4, 2374 .max_access_size = 8, 2375 .unaligned = false, 2376 }, 2377 .valid = { 2378 .min_access_size = 4, 2379 .max_access_size = 8, 2380 } 2381 }; 2382 2383 /* 2384 * Translations matching MSI pattern check are redirected to "riscv-iommu-trap" 2385 * memory region as untranslated address, for additional MSI/MRIF interception 2386 * by IOMMU interrupt remapping implementation. 2387 * Note: Device emulation code generating an MSI is expected to provide a valid 2388 * memory transaction attributes with requested_id set. 2389 */ 2390 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, 2391 uint64_t data, unsigned size, MemTxAttrs attrs) 2392 { 2393 RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; 2394 RISCVIOMMUContext *ctx; 2395 MemTxResult res; 2396 void *ref; 2397 uint32_t devid = attrs.requester_id; 2398 2399 if (attrs.unspecified) { 2400 return MEMTX_ACCESS_ERROR; 2401 } 2402 2403 /* FIXME: PCIe bus remapping for attached endpoints. */ 2404 devid |= s->bus << 8; 2405 2406 ctx = riscv_iommu_ctx(s, devid, 0, &ref); 2407 if (ctx == NULL) { 2408 res = MEMTX_ACCESS_ERROR; 2409 } else { 2410 res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); 2411 } 2412 riscv_iommu_ctx_put(s, ref); 2413 return res; 2414 } 2415 2416 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, 2417 uint64_t *data, unsigned size, MemTxAttrs attrs) 2418 { 2419 return MEMTX_ACCESS_ERROR; 2420 } 2421 2422 static const MemoryRegionOps riscv_iommu_trap_ops = { 2423 .read_with_attrs = riscv_iommu_trap_read, 2424 .write_with_attrs = riscv_iommu_trap_write, 2425 .endianness = DEVICE_LITTLE_ENDIAN, 2426 .impl = { 2427 .min_access_size = 4, 2428 .max_access_size = 8, 2429 .unaligned = true, 2430 }, 2431 .valid = { 2432 .min_access_size = 4, 2433 .max_access_size = 8, 2434 } 2435 }; 2436 2437 void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode) 2438 { 2439 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_IGS, mode); 2440 } 2441 2442 static void riscv_iommu_instance_init(Object *obj) 2443 { 2444 RISCVIOMMUState *s = RISCV_IOMMU(obj); 2445 2446 /* Enable translation debug interface */ 2447 s->cap = RISCV_IOMMU_CAP_DBG; 2448 2449 /* Report QEMU target physical address space limits */ 2450 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, 2451 TARGET_PHYS_ADDR_SPACE_BITS); 2452 2453 /* TODO: method to report supported PID bits */ 2454 s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ 2455 s->cap |= RISCV_IOMMU_CAP_PD8; 2456 2457 /* register storage */ 2458 s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2459 s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2460 s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2461 2462 /* Mark all registers read-only */ 2463 memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); 2464 2465 /* Device translation context cache */ 2466 s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 2467 riscv_iommu_ctx_equal, 2468 g_free, NULL); 2469 2470 s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 2471 riscv_iommu_iot_equal, 2472 g_free, NULL); 2473 2474 s->iommus.le_next = NULL; 2475 s->iommus.le_prev = NULL; 2476 QLIST_INIT(&s->spaces); 2477 } 2478 2479 static void riscv_iommu_realize(DeviceState *dev, Error **errp) 2480 { 2481 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2482 2483 s->cap |= s->version & RISCV_IOMMU_CAP_VERSION; 2484 if (s->enable_msi) { 2485 s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; 2486 } 2487 if (s->enable_ats) { 2488 s->cap |= RISCV_IOMMU_CAP_ATS; 2489 } 2490 if (s->enable_s_stage) { 2491 s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | 2492 RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; 2493 } 2494 if (s->enable_g_stage) { 2495 s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | 2496 RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4 | 2497 RISCV_IOMMU_CAP_SVRSW60T59B; 2498 } 2499 2500 if (s->hpm_cntrs > 0) { 2501 /* Clip number of HPM counters to maximum supported (31). */ 2502 if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) { 2503 s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM; 2504 } 2505 /* Enable hardware performance monitor interface */ 2506 s->cap |= RISCV_IOMMU_CAP_HPM; 2507 } 2508 2509 /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ 2510 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? 2511 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); 2512 2513 /* 2514 * Register complete MMIO space, including MSI/PBA registers. 2515 * Note, PCIDevice implementation will add overlapping MR for MSI/PBA, 2516 * managed directly by the PCIDevice implementation. 2517 */ 2518 memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s, 2519 "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); 2520 2521 /* Set power-on register state */ 2522 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); 2523 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); 2524 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], 2525 ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); 2526 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], 2527 ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); 2528 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], 2529 ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); 2530 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], 2531 ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); 2532 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], 2533 ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); 2534 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF | 2535 RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); 2536 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON | 2537 RISCV_IOMMU_CQCSR_BUSY); 2538 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF | 2539 RISCV_IOMMU_FQCSR_FQOF); 2540 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON | 2541 RISCV_IOMMU_FQCSR_BUSY); 2542 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF | 2543 RISCV_IOMMU_PQCSR_PQOF); 2544 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON | 2545 RISCV_IOMMU_PQCSR_BUSY); 2546 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); 2547 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); 2548 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); 2549 /* If debug registers enabled. */ 2550 if (s->cap & RISCV_IOMMU_CAP_DBG) { 2551 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0); 2552 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL], 2553 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2554 } 2555 2556 /* If HPM registers are enabled. */ 2557 if (s->cap & RISCV_IOMMU_CAP_HPM) { 2558 /* +1 for cycle counter bit. */ 2559 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH], 2560 ~((2 << s->hpm_cntrs) - 1)); 2561 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0); 2562 memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE], 2563 0x00, s->hpm_cntrs * 8); 2564 memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE], 2565 0x00, s->hpm_cntrs * 8); 2566 } 2567 2568 /* Memory region for downstream access, if specified. */ 2569 if (s->target_mr) { 2570 s->target_as = g_new0(AddressSpace, 1); 2571 address_space_init(s->target_as, s->target_mr, 2572 "riscv-iommu-downstream"); 2573 } else { 2574 /* Fallback to global system memory. */ 2575 s->target_as = &address_space_memory; 2576 } 2577 2578 /* Memory region for untranslated MRIF/MSI writes */ 2579 memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, 2580 "riscv-iommu-trap", ~0ULL); 2581 address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); 2582 2583 if (s->cap & RISCV_IOMMU_CAP_HPM) { 2584 s->hpm_timer = 2585 timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s); 2586 s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal); 2587 } 2588 } 2589 2590 static void riscv_iommu_unrealize(DeviceState *dev) 2591 { 2592 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2593 2594 g_hash_table_unref(s->iot_cache); 2595 g_hash_table_unref(s->ctx_cache); 2596 2597 if (s->cap & RISCV_IOMMU_CAP_HPM) { 2598 g_hash_table_unref(s->hpm_event_ctr_map); 2599 timer_free(s->hpm_timer); 2600 } 2601 } 2602 2603 void riscv_iommu_reset(RISCVIOMMUState *s) 2604 { 2605 uint32_t reg_clr; 2606 int ddtp_mode; 2607 2608 /* 2609 * Clear DDTP while setting DDTP_mode back to user 2610 * initial setting. 2611 */ 2612 ddtp_mode = s->enable_off ? 2613 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE; 2614 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, ddtp_mode); 2615 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, s->ddtp); 2616 2617 reg_clr = RISCV_IOMMU_CQCSR_CQEN | RISCV_IOMMU_CQCSR_CIE | 2618 RISCV_IOMMU_CQCSR_CQON | RISCV_IOMMU_CQCSR_BUSY; 2619 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 0, reg_clr); 2620 2621 reg_clr = RISCV_IOMMU_FQCSR_FQEN | RISCV_IOMMU_FQCSR_FIE | 2622 RISCV_IOMMU_FQCSR_FQON | RISCV_IOMMU_FQCSR_BUSY; 2623 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 0, reg_clr); 2624 2625 reg_clr = RISCV_IOMMU_PQCSR_PQEN | RISCV_IOMMU_PQCSR_PIE | 2626 RISCV_IOMMU_PQCSR_PQON | RISCV_IOMMU_PQCSR_BUSY; 2627 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 0, reg_clr); 2628 2629 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 2630 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2631 2632 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_IPSR, 0); 2633 2634 g_hash_table_remove_all(s->ctx_cache); 2635 g_hash_table_remove_all(s->iot_cache); 2636 } 2637 2638 static const Property riscv_iommu_properties[] = { 2639 DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, 2640 RISCV_IOMMU_SPEC_DOT_VER), 2641 DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), 2642 DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit, 2643 LIMIT_CACHE_IOT), 2644 DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), 2645 DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE), 2646 DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), 2647 DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), 2648 DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), 2649 DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, 2650 TYPE_MEMORY_REGION, MemoryRegion *), 2651 DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs, 2652 RISCV_IOMMU_IOCOUNT_NUM), 2653 }; 2654 2655 static void riscv_iommu_class_init(ObjectClass *klass, const void *data) 2656 { 2657 DeviceClass *dc = DEVICE_CLASS(klass); 2658 2659 /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ 2660 dc->user_creatable = false; 2661 dc->realize = riscv_iommu_realize; 2662 dc->unrealize = riscv_iommu_unrealize; 2663 device_class_set_props(dc, riscv_iommu_properties); 2664 } 2665 2666 static const TypeInfo riscv_iommu_info = { 2667 .name = TYPE_RISCV_IOMMU, 2668 .parent = TYPE_DEVICE, 2669 .instance_size = sizeof(RISCVIOMMUState), 2670 .instance_init = riscv_iommu_instance_init, 2671 .class_init = riscv_iommu_class_init, 2672 }; 2673 2674 static const char *IOMMU_FLAG_STR[] = { 2675 "NA", 2676 "RO", 2677 "WR", 2678 "RW", 2679 }; 2680 2681 /* RISC-V IOMMU Memory Region - Address Translation Space */ 2682 static IOMMUTLBEntry riscv_iommu_memory_region_translate( 2683 IOMMUMemoryRegion *iommu_mr, hwaddr addr, 2684 IOMMUAccessFlags flag, int iommu_idx) 2685 { 2686 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2687 RISCVIOMMUContext *ctx; 2688 void *ref; 2689 IOMMUTLBEntry iotlb = { 2690 .iova = addr, 2691 .target_as = as->iommu->target_as, 2692 .addr_mask = ~0ULL, 2693 .perm = flag, 2694 }; 2695 2696 ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); 2697 if (ctx == NULL) { 2698 /* Translation disabled or invalid. */ 2699 iotlb.addr_mask = 0; 2700 iotlb.perm = IOMMU_NONE; 2701 } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) { 2702 /* Translation disabled or fault reported. */ 2703 iotlb.addr_mask = 0; 2704 iotlb.perm = IOMMU_NONE; 2705 } 2706 2707 /* Trace all dma translations with original access flags. */ 2708 trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid), 2709 PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx, 2710 IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, 2711 iotlb.translated_addr); 2712 2713 riscv_iommu_ctx_put(as->iommu, ref); 2714 2715 return iotlb; 2716 } 2717 2718 static int riscv_iommu_memory_region_notify( 2719 IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, 2720 IOMMUNotifierFlag new, Error **errp) 2721 { 2722 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2723 2724 if (old == IOMMU_NOTIFIER_NONE) { 2725 as->notifier = true; 2726 trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); 2727 } else if (new == IOMMU_NOTIFIER_NONE) { 2728 as->notifier = false; 2729 trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); 2730 } 2731 2732 return 0; 2733 } 2734 2735 static inline bool pci_is_iommu(PCIDevice *pdev) 2736 { 2737 return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; 2738 } 2739 2740 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn) 2741 { 2742 RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; 2743 PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); 2744 AddressSpace *as = NULL; 2745 2746 if (pdev && pci_is_iommu(pdev)) { 2747 return s->target_as; 2748 } 2749 2750 /* Find first registered IOMMU device */ 2751 while (s->iommus.le_prev) { 2752 s = *(s->iommus.le_prev); 2753 } 2754 2755 /* Find first matching IOMMU */ 2756 while (s != NULL && as == NULL) { 2757 as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn)); 2758 s = s->iommus.le_next; 2759 } 2760 2761 return as ? as : &address_space_memory; 2762 } 2763 2764 static const PCIIOMMUOps riscv_iommu_ops = { 2765 .get_address_space = riscv_iommu_find_as, 2766 }; 2767 2768 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, 2769 Error **errp) 2770 { 2771 if (bus->iommu_ops && 2772 bus->iommu_ops->get_address_space == riscv_iommu_find_as) { 2773 /* Allow multiple IOMMUs on the same PCIe bus, link known devices */ 2774 RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; 2775 QLIST_INSERT_AFTER(last, iommu, iommus); 2776 } else if (!bus->iommu_ops && !bus->iommu_opaque) { 2777 pci_setup_iommu(bus, &riscv_iommu_ops, iommu); 2778 } else { 2779 error_setg(errp, "can't register secondary IOMMU for PCI bus #%d", 2780 pci_bus_num(bus)); 2781 } 2782 } 2783 2784 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, 2785 MemTxAttrs attrs) 2786 { 2787 return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; 2788 } 2789 2790 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr) 2791 { 2792 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2793 return 1 << as->iommu->pid_bits; 2794 } 2795 2796 static void riscv_iommu_memory_region_init(ObjectClass *klass, const void *data) 2797 { 2798 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 2799 2800 imrc->translate = riscv_iommu_memory_region_translate; 2801 imrc->notify_flag_changed = riscv_iommu_memory_region_notify; 2802 imrc->attrs_to_index = riscv_iommu_memory_region_index; 2803 imrc->num_indexes = riscv_iommu_memory_region_index_len; 2804 } 2805 2806 static const TypeInfo riscv_iommu_memory_region_info = { 2807 .parent = TYPE_IOMMU_MEMORY_REGION, 2808 .name = TYPE_RISCV_IOMMU_MEMORY_REGION, 2809 .class_init = riscv_iommu_memory_region_init, 2810 }; 2811 2812 static void riscv_iommu_register_mr_types(void) 2813 { 2814 type_register_static(&riscv_iommu_memory_region_info); 2815 type_register_static(&riscv_iommu_info); 2816 } 2817 2818 type_init(riscv_iommu_register_mr_types); 2819