1 /* 2 * QEMU emulation of an Intel IOMMU (VT-d) 3 * (DMA Remapping device) 4 * 5 * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com> 6 * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 18 * You should have received a copy of the GNU General Public License along 19 * with this program; if not, see <http://www.gnu.org/licenses/>. 20 */ 21 22 #include "qemu/osdep.h" 23 #include "qemu/error-report.h" 24 #include "qapi/error.h" 25 #include "hw/sysbus.h" 26 #include "exec/address-spaces.h" 27 #include "intel_iommu_internal.h" 28 #include "hw/pci/pci.h" 29 #include "hw/pci/pci_bus.h" 30 #include "hw/i386/pc.h" 31 #include "hw/i386/apic-msidef.h" 32 #include "hw/boards.h" 33 #include "hw/i386/x86-iommu.h" 34 #include "hw/pci-host/q35.h" 35 #include "sysemu/kvm.h" 36 #include "hw/i386/apic_internal.h" 37 #include "kvm_i386.h" 38 #include "trace.h" 39 40 /*#define DEBUG_INTEL_IOMMU*/ 41 #ifdef DEBUG_INTEL_IOMMU 42 enum { 43 DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, 44 DEBUG_CACHE, DEBUG_IR, 45 }; 46 #define VTD_DBGBIT(x) (1 << DEBUG_##x) 47 static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR); 48 49 #define VTD_DPRINTF(what, fmt, ...) do { \ 50 if (vtd_dbgflags & VTD_DBGBIT(what)) { \ 51 fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ 52 ## __VA_ARGS__); } \ 53 } while (0) 54 #else 55 #define VTD_DPRINTF(what, fmt, ...) do {} while (0) 56 #endif 57 58 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, 59 uint64_t wmask, uint64_t w1cmask) 60 { 61 stq_le_p(&s->csr[addr], val); 62 stq_le_p(&s->wmask[addr], wmask); 63 stq_le_p(&s->w1cmask[addr], w1cmask); 64 } 65 66 static void vtd_define_quad_wo(IntelIOMMUState *s, hwaddr addr, uint64_t mask) 67 { 68 stq_le_p(&s->womask[addr], mask); 69 } 70 71 static void vtd_define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val, 72 uint32_t wmask, uint32_t w1cmask) 73 { 74 stl_le_p(&s->csr[addr], val); 75 stl_le_p(&s->wmask[addr], wmask); 76 stl_le_p(&s->w1cmask[addr], w1cmask); 77 } 78 79 static void vtd_define_long_wo(IntelIOMMUState *s, hwaddr addr, uint32_t mask) 80 { 81 stl_le_p(&s->womask[addr], mask); 82 } 83 84 /* "External" get/set operations */ 85 static void vtd_set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val) 86 { 87 uint64_t oldval = ldq_le_p(&s->csr[addr]); 88 uint64_t wmask = ldq_le_p(&s->wmask[addr]); 89 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); 90 stq_le_p(&s->csr[addr], 91 ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); 92 } 93 94 static void vtd_set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val) 95 { 96 uint32_t oldval = ldl_le_p(&s->csr[addr]); 97 uint32_t wmask = ldl_le_p(&s->wmask[addr]); 98 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); 99 stl_le_p(&s->csr[addr], 100 ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); 101 } 102 103 static uint64_t vtd_get_quad(IntelIOMMUState *s, hwaddr addr) 104 { 105 uint64_t val = ldq_le_p(&s->csr[addr]); 106 uint64_t womask = ldq_le_p(&s->womask[addr]); 107 return val & ~womask; 108 } 109 110 static uint32_t vtd_get_long(IntelIOMMUState *s, hwaddr addr) 111 { 112 uint32_t val = ldl_le_p(&s->csr[addr]); 113 uint32_t womask = ldl_le_p(&s->womask[addr]); 114 return val & ~womask; 115 } 116 117 /* "Internal" get/set operations */ 118 static uint64_t vtd_get_quad_raw(IntelIOMMUState *s, hwaddr addr) 119 { 120 return ldq_le_p(&s->csr[addr]); 121 } 122 123 static uint32_t vtd_get_long_raw(IntelIOMMUState *s, hwaddr addr) 124 { 125 return ldl_le_p(&s->csr[addr]); 126 } 127 128 static void vtd_set_quad_raw(IntelIOMMUState *s, hwaddr addr, uint64_t val) 129 { 130 stq_le_p(&s->csr[addr], val); 131 } 132 133 static uint32_t vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr, 134 uint32_t clear, uint32_t mask) 135 { 136 uint32_t new_val = (ldl_le_p(&s->csr[addr]) & ~clear) | mask; 137 stl_le_p(&s->csr[addr], new_val); 138 return new_val; 139 } 140 141 static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr, 142 uint64_t clear, uint64_t mask) 143 { 144 uint64_t new_val = (ldq_le_p(&s->csr[addr]) & ~clear) | mask; 145 stq_le_p(&s->csr[addr], new_val); 146 return new_val; 147 } 148 149 /* GHashTable functions */ 150 static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2) 151 { 152 return *((const uint64_t *)v1) == *((const uint64_t *)v2); 153 } 154 155 static guint vtd_uint64_hash(gconstpointer v) 156 { 157 return (guint)*(const uint64_t *)v; 158 } 159 160 static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, 161 gpointer user_data) 162 { 163 VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; 164 uint16_t domain_id = *(uint16_t *)user_data; 165 return entry->domain_id == domain_id; 166 } 167 168 /* The shift of an addr for a certain level of paging structure */ 169 static inline uint32_t vtd_slpt_level_shift(uint32_t level) 170 { 171 assert(level != 0); 172 return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS; 173 } 174 175 static inline uint64_t vtd_slpt_level_page_mask(uint32_t level) 176 { 177 return ~((1ULL << vtd_slpt_level_shift(level)) - 1); 178 } 179 180 static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, 181 gpointer user_data) 182 { 183 VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; 184 VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; 185 uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask; 186 uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K; 187 return (entry->domain_id == info->domain_id) && 188 (((entry->gfn & info->mask) == gfn) || 189 (entry->gfn == gfn_tlb)); 190 } 191 192 /* Reset all the gen of VTDAddressSpace to zero and set the gen of 193 * IntelIOMMUState to 1. 194 */ 195 static void vtd_reset_context_cache(IntelIOMMUState *s) 196 { 197 VTDAddressSpace *vtd_as; 198 VTDBus *vtd_bus; 199 GHashTableIter bus_it; 200 uint32_t devfn_it; 201 202 g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr); 203 204 VTD_DPRINTF(CACHE, "global context_cache_gen=1"); 205 while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) { 206 for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) { 207 vtd_as = vtd_bus->dev_as[devfn_it]; 208 if (!vtd_as) { 209 continue; 210 } 211 vtd_as->context_cache_entry.context_cache_gen = 0; 212 } 213 } 214 s->context_cache_gen = 1; 215 } 216 217 static void vtd_reset_iotlb(IntelIOMMUState *s) 218 { 219 assert(s->iotlb); 220 g_hash_table_remove_all(s->iotlb); 221 } 222 223 static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id, 224 uint32_t level) 225 { 226 return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) | 227 ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT); 228 } 229 230 static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) 231 { 232 return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; 233 } 234 235 static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, 236 hwaddr addr) 237 { 238 VTDIOTLBEntry *entry; 239 uint64_t key; 240 int level; 241 242 for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { 243 key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level), 244 source_id, level); 245 entry = g_hash_table_lookup(s->iotlb, &key); 246 if (entry) { 247 goto out; 248 } 249 } 250 251 out: 252 return entry; 253 } 254 255 static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, 256 uint16_t domain_id, hwaddr addr, uint64_t slpte, 257 bool read_flags, bool write_flags, 258 uint32_t level) 259 { 260 VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); 261 uint64_t *key = g_malloc(sizeof(*key)); 262 uint64_t gfn = vtd_get_iotlb_gfn(addr, level); 263 264 trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id); 265 if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) { 266 trace_vtd_iotlb_reset("iotlb exceeds size limit"); 267 vtd_reset_iotlb(s); 268 } 269 270 entry->gfn = gfn; 271 entry->domain_id = domain_id; 272 entry->slpte = slpte; 273 entry->read_flags = read_flags; 274 entry->write_flags = write_flags; 275 entry->mask = vtd_slpt_level_page_mask(level); 276 *key = vtd_get_iotlb_key(gfn, source_id, level); 277 g_hash_table_replace(s->iotlb, key, entry); 278 } 279 280 /* Given the reg addr of both the message data and address, generate an 281 * interrupt via MSI. 282 */ 283 static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg, 284 hwaddr mesg_data_reg) 285 { 286 MSIMessage msi; 287 288 assert(mesg_data_reg < DMAR_REG_SIZE); 289 assert(mesg_addr_reg < DMAR_REG_SIZE); 290 291 msi.address = vtd_get_long_raw(s, mesg_addr_reg); 292 msi.data = vtd_get_long_raw(s, mesg_data_reg); 293 294 VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32, 295 msi.address, msi.data); 296 apic_get_class()->send_msi(&msi); 297 } 298 299 /* Generate a fault event to software via MSI if conditions are met. 300 * Notice that the value of FSTS_REG being passed to it should be the one 301 * before any update. 302 */ 303 static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts) 304 { 305 if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO || 306 pre_fsts & VTD_FSTS_IQE) { 307 VTD_DPRINTF(FLOG, "there are previous interrupt conditions " 308 "to be serviced by software, fault event is not generated " 309 "(FSTS_REG 0x%"PRIx32 ")", pre_fsts); 310 return; 311 } 312 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP); 313 if (vtd_get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) { 314 VTD_DPRINTF(FLOG, "Interrupt Mask set, fault event is not generated"); 315 } else { 316 vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); 317 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); 318 } 319 } 320 321 /* Check if the Fault (F) field of the Fault Recording Register referenced by 322 * @index is Set. 323 */ 324 static bool vtd_is_frcd_set(IntelIOMMUState *s, uint16_t index) 325 { 326 /* Each reg is 128-bit */ 327 hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); 328 addr += 8; /* Access the high 64-bit half */ 329 330 assert(index < DMAR_FRCD_REG_NR); 331 332 return vtd_get_quad_raw(s, addr) & VTD_FRCD_F; 333 } 334 335 /* Update the PPF field of Fault Status Register. 336 * Should be called whenever change the F field of any fault recording 337 * registers. 338 */ 339 static void vtd_update_fsts_ppf(IntelIOMMUState *s) 340 { 341 uint32_t i; 342 uint32_t ppf_mask = 0; 343 344 for (i = 0; i < DMAR_FRCD_REG_NR; i++) { 345 if (vtd_is_frcd_set(s, i)) { 346 ppf_mask = VTD_FSTS_PPF; 347 break; 348 } 349 } 350 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask); 351 VTD_DPRINTF(FLOG, "set PPF of FSTS_REG to %d", ppf_mask ? 1 : 0); 352 } 353 354 static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index) 355 { 356 /* Each reg is 128-bit */ 357 hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); 358 addr += 8; /* Access the high 64-bit half */ 359 360 assert(index < DMAR_FRCD_REG_NR); 361 362 vtd_set_clear_mask_quad(s, addr, 0, VTD_FRCD_F); 363 vtd_update_fsts_ppf(s); 364 } 365 366 /* Must not update F field now, should be done later */ 367 static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, 368 uint16_t source_id, hwaddr addr, 369 VTDFaultReason fault, bool is_write) 370 { 371 uint64_t hi = 0, lo; 372 hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); 373 374 assert(index < DMAR_FRCD_REG_NR); 375 376 lo = VTD_FRCD_FI(addr); 377 hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault); 378 if (!is_write) { 379 hi |= VTD_FRCD_T; 380 } 381 vtd_set_quad_raw(s, frcd_reg_addr, lo); 382 vtd_set_quad_raw(s, frcd_reg_addr + 8, hi); 383 VTD_DPRINTF(FLOG, "record to FRCD_REG #%"PRIu16 ": hi 0x%"PRIx64 384 ", lo 0x%"PRIx64, index, hi, lo); 385 } 386 387 /* Try to collapse multiple pending faults from the same requester */ 388 static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id) 389 { 390 uint32_t i; 391 uint64_t frcd_reg; 392 hwaddr addr = DMAR_FRCD_REG_OFFSET + 8; /* The high 64-bit half */ 393 394 for (i = 0; i < DMAR_FRCD_REG_NR; i++) { 395 frcd_reg = vtd_get_quad_raw(s, addr); 396 VTD_DPRINTF(FLOG, "frcd_reg #%d 0x%"PRIx64, i, frcd_reg); 397 if ((frcd_reg & VTD_FRCD_F) && 398 ((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) { 399 return true; 400 } 401 addr += 16; /* 128-bit for each */ 402 } 403 return false; 404 } 405 406 /* Log and report an DMAR (address translation) fault to software */ 407 static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, 408 hwaddr addr, VTDFaultReason fault, 409 bool is_write) 410 { 411 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); 412 413 assert(fault < VTD_FR_MAX); 414 415 if (fault == VTD_FR_RESERVED_ERR) { 416 /* This is not a normal fault reason case. Drop it. */ 417 return; 418 } 419 VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64 420 ", is_write %d", source_id, fault, addr, is_write); 421 if (fsts_reg & VTD_FSTS_PFO) { 422 VTD_DPRINTF(FLOG, "new fault is not recorded due to " 423 "Primary Fault Overflow"); 424 return; 425 } 426 if (vtd_try_collapse_fault(s, source_id)) { 427 VTD_DPRINTF(FLOG, "new fault is not recorded due to " 428 "compression of faults"); 429 return; 430 } 431 if (vtd_is_frcd_set(s, s->next_frcd_reg)) { 432 VTD_DPRINTF(FLOG, "Primary Fault Overflow and " 433 "new fault is not recorded, set PFO field"); 434 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO); 435 return; 436 } 437 438 vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write); 439 440 if (fsts_reg & VTD_FSTS_PPF) { 441 VTD_DPRINTF(FLOG, "there are pending faults already, " 442 "fault event is not generated"); 443 vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); 444 s->next_frcd_reg++; 445 if (s->next_frcd_reg == DMAR_FRCD_REG_NR) { 446 s->next_frcd_reg = 0; 447 } 448 } else { 449 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_FRI_MASK, 450 VTD_FSTS_FRI(s->next_frcd_reg)); 451 vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); /* Will set PPF */ 452 s->next_frcd_reg++; 453 if (s->next_frcd_reg == DMAR_FRCD_REG_NR) { 454 s->next_frcd_reg = 0; 455 } 456 /* This case actually cause the PPF to be Set. 457 * So generate fault event (interrupt). 458 */ 459 vtd_generate_fault_event(s, fsts_reg); 460 } 461 } 462 463 /* Handle Invalidation Queue Errors of queued invalidation interface error 464 * conditions. 465 */ 466 static void vtd_handle_inv_queue_error(IntelIOMMUState *s) 467 { 468 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); 469 470 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE); 471 vtd_generate_fault_event(s, fsts_reg); 472 } 473 474 /* Set the IWC field and try to generate an invalidation completion interrupt */ 475 static void vtd_generate_completion_event(IntelIOMMUState *s) 476 { 477 if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) { 478 trace_vtd_inv_desc_wait_irq("One pending, skip current"); 479 return; 480 } 481 vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC); 482 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP); 483 if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) { 484 trace_vtd_inv_desc_wait_irq("IM in IECTL_REG is set, " 485 "new event not generated"); 486 return; 487 } else { 488 /* Generate the interrupt event */ 489 trace_vtd_inv_desc_wait_irq("Generating complete event"); 490 vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG); 491 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); 492 } 493 } 494 495 static inline bool vtd_root_entry_present(VTDRootEntry *root) 496 { 497 return root->val & VTD_ROOT_ENTRY_P; 498 } 499 500 static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index, 501 VTDRootEntry *re) 502 { 503 dma_addr_t addr; 504 505 addr = s->root + index * sizeof(*re); 506 if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) { 507 trace_vtd_re_invalid(re->rsvd, re->val); 508 re->val = 0; 509 return -VTD_FR_ROOT_TABLE_INV; 510 } 511 re->val = le64_to_cpu(re->val); 512 return 0; 513 } 514 515 static inline bool vtd_context_entry_present(VTDContextEntry *context) 516 { 517 return context->lo & VTD_CONTEXT_ENTRY_P; 518 } 519 520 static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index, 521 VTDContextEntry *ce) 522 { 523 dma_addr_t addr; 524 525 /* we have checked that root entry is present */ 526 addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce); 527 if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) { 528 trace_vtd_re_invalid(root->rsvd, root->val); 529 return -VTD_FR_CONTEXT_TABLE_INV; 530 } 531 ce->lo = le64_to_cpu(ce->lo); 532 ce->hi = le64_to_cpu(ce->hi); 533 return 0; 534 } 535 536 static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce) 537 { 538 return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; 539 } 540 541 static inline uint64_t vtd_get_slpte_addr(uint64_t slpte) 542 { 543 return slpte & VTD_SL_PT_BASE_ADDR_MASK; 544 } 545 546 /* Whether the pte indicates the address of the page frame */ 547 static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level) 548 { 549 return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK); 550 } 551 552 /* Get the content of a spte located in @base_addr[@index] */ 553 static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) 554 { 555 uint64_t slpte; 556 557 assert(index < VTD_SL_PT_ENTRY_NR); 558 559 if (dma_memory_read(&address_space_memory, 560 base_addr + index * sizeof(slpte), &slpte, 561 sizeof(slpte))) { 562 slpte = (uint64_t)-1; 563 return slpte; 564 } 565 slpte = le64_to_cpu(slpte); 566 return slpte; 567 } 568 569 /* Given an iova and the level of paging structure, return the offset 570 * of current level. 571 */ 572 static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level) 573 { 574 return (iova >> vtd_slpt_level_shift(level)) & 575 ((1ULL << VTD_SL_LEVEL_BITS) - 1); 576 } 577 578 /* Check Capability Register to see if the @level of page-table is supported */ 579 static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level) 580 { 581 return VTD_CAP_SAGAW_MASK & s->cap & 582 (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT)); 583 } 584 585 /* Get the page-table level that hardware should use for the second-level 586 * page-table walk from the Address Width field of context-entry. 587 */ 588 static inline uint32_t vtd_get_level_from_context_entry(VTDContextEntry *ce) 589 { 590 return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW); 591 } 592 593 static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce) 594 { 595 return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9; 596 } 597 598 static inline uint64_t vtd_iova_limit(VTDContextEntry *ce) 599 { 600 uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce); 601 return 1ULL << MIN(ce_agaw, VTD_MGAW); 602 } 603 604 /* Return true if IOVA passes range check, otherwise false. */ 605 static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce) 606 { 607 /* 608 * Check if @iova is above 2^X-1, where X is the minimum of MGAW 609 * in CAP_REG and AW in context-entry. 610 */ 611 return !(iova & ~(vtd_iova_limit(ce) - 1)); 612 } 613 614 static const uint64_t vtd_paging_entry_rsvd_field[] = { 615 [0] = ~0ULL, 616 /* For not large page */ 617 [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 618 [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 619 [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 620 [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 621 /* For large page */ 622 [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 623 [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 624 [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 625 [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 626 }; 627 628 static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) 629 { 630 if (slpte & VTD_SL_PT_PAGE_SIZE_MASK) { 631 /* Maybe large page */ 632 return slpte & vtd_paging_entry_rsvd_field[level + 4]; 633 } else { 634 return slpte & vtd_paging_entry_rsvd_field[level]; 635 } 636 } 637 638 /* Given the @iova, get relevant @slptep. @slpte_level will be the last level 639 * of the translation, can be used for deciding the size of large page. 640 */ 641 static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, 642 uint64_t *slptep, uint32_t *slpte_level, 643 bool *reads, bool *writes) 644 { 645 dma_addr_t addr = vtd_get_slpt_base_from_context(ce); 646 uint32_t level = vtd_get_level_from_context_entry(ce); 647 uint32_t offset; 648 uint64_t slpte; 649 uint64_t access_right_check; 650 651 if (!vtd_iova_range_check(iova, ce)) { 652 VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova); 653 return -VTD_FR_ADDR_BEYOND_MGAW; 654 } 655 656 /* FIXME: what is the Atomics request here? */ 657 access_right_check = is_write ? VTD_SL_W : VTD_SL_R; 658 659 while (true) { 660 offset = vtd_iova_level_offset(iova, level); 661 slpte = vtd_get_slpte(addr, offset); 662 663 if (slpte == (uint64_t)-1) { 664 VTD_DPRINTF(GENERAL, "error: fail to access second-level paging " 665 "entry at level %"PRIu32 " for iova 0x%"PRIx64, 666 level, iova); 667 if (level == vtd_get_level_from_context_entry(ce)) { 668 /* Invalid programming of context-entry */ 669 return -VTD_FR_CONTEXT_ENTRY_INV; 670 } else { 671 return -VTD_FR_PAGING_ENTRY_INV; 672 } 673 } 674 *reads = (*reads) && (slpte & VTD_SL_R); 675 *writes = (*writes) && (slpte & VTD_SL_W); 676 if (!(slpte & access_right_check)) { 677 VTD_DPRINTF(GENERAL, "error: lack of %s permission for " 678 "iova 0x%"PRIx64 " slpte 0x%"PRIx64, 679 (is_write ? "write" : "read"), iova, slpte); 680 return is_write ? -VTD_FR_WRITE : -VTD_FR_READ; 681 } 682 if (vtd_slpte_nonzero_rsvd(slpte, level)) { 683 VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second " 684 "level paging entry level %"PRIu32 " slpte 0x%"PRIx64, 685 level, slpte); 686 return -VTD_FR_PAGING_ENTRY_RSVD; 687 } 688 689 if (vtd_is_last_slpte(slpte, level)) { 690 *slptep = slpte; 691 *slpte_level = level; 692 return 0; 693 } 694 addr = vtd_get_slpte_addr(slpte); 695 level--; 696 } 697 } 698 699 typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private); 700 701 /** 702 * vtd_page_walk_level - walk over specific level for IOVA range 703 * 704 * @addr: base GPA addr to start the walk 705 * @start: IOVA range start address 706 * @end: IOVA range end address (start <= addr < end) 707 * @hook_fn: hook func to be called when detected page 708 * @private: private data to be passed into hook func 709 * @read: whether parent level has read permission 710 * @write: whether parent level has write permission 711 * @notify_unmap: whether we should notify invalid entries 712 */ 713 static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, 714 uint64_t end, vtd_page_walk_hook hook_fn, 715 void *private, uint32_t level, 716 bool read, bool write, bool notify_unmap) 717 { 718 bool read_cur, write_cur, entry_valid; 719 uint32_t offset; 720 uint64_t slpte; 721 uint64_t subpage_size, subpage_mask; 722 IOMMUTLBEntry entry; 723 uint64_t iova = start; 724 uint64_t iova_next; 725 int ret = 0; 726 727 trace_vtd_page_walk_level(addr, level, start, end); 728 729 subpage_size = 1ULL << vtd_slpt_level_shift(level); 730 subpage_mask = vtd_slpt_level_page_mask(level); 731 732 while (iova < end) { 733 iova_next = (iova & subpage_mask) + subpage_size; 734 735 offset = vtd_iova_level_offset(iova, level); 736 slpte = vtd_get_slpte(addr, offset); 737 738 if (slpte == (uint64_t)-1) { 739 trace_vtd_page_walk_skip_read(iova, iova_next); 740 goto next; 741 } 742 743 if (vtd_slpte_nonzero_rsvd(slpte, level)) { 744 trace_vtd_page_walk_skip_reserve(iova, iova_next); 745 goto next; 746 } 747 748 /* Permissions are stacked with parents' */ 749 read_cur = read && (slpte & VTD_SL_R); 750 write_cur = write && (slpte & VTD_SL_W); 751 752 /* 753 * As long as we have either read/write permission, this is a 754 * valid entry. The rule works for both page entries and page 755 * table entries. 756 */ 757 entry_valid = read_cur | write_cur; 758 759 if (vtd_is_last_slpte(slpte, level)) { 760 entry.target_as = &address_space_memory; 761 entry.iova = iova & subpage_mask; 762 /* NOTE: this is only meaningful if entry_valid == true */ 763 entry.translated_addr = vtd_get_slpte_addr(slpte); 764 entry.addr_mask = ~subpage_mask; 765 entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); 766 if (!entry_valid && !notify_unmap) { 767 trace_vtd_page_walk_skip_perm(iova, iova_next); 768 goto next; 769 } 770 trace_vtd_page_walk_one(level, entry.iova, entry.translated_addr, 771 entry.addr_mask, entry.perm); 772 if (hook_fn) { 773 ret = hook_fn(&entry, private); 774 if (ret < 0) { 775 return ret; 776 } 777 } 778 } else { 779 if (!entry_valid) { 780 trace_vtd_page_walk_skip_perm(iova, iova_next); 781 goto next; 782 } 783 ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova, 784 MIN(iova_next, end), hook_fn, private, 785 level - 1, read_cur, write_cur, 786 notify_unmap); 787 if (ret < 0) { 788 return ret; 789 } 790 } 791 792 next: 793 iova = iova_next; 794 } 795 796 return 0; 797 } 798 799 /** 800 * vtd_page_walk - walk specific IOVA range, and call the hook 801 * 802 * @ce: context entry to walk upon 803 * @start: IOVA address to start the walk 804 * @end: IOVA range end address (start <= addr < end) 805 * @hook_fn: the hook that to be called for each detected area 806 * @private: private data for the hook function 807 */ 808 static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end, 809 vtd_page_walk_hook hook_fn, void *private, 810 bool notify_unmap) 811 { 812 dma_addr_t addr = vtd_get_slpt_base_from_context(ce); 813 uint32_t level = vtd_get_level_from_context_entry(ce); 814 815 if (!vtd_iova_range_check(start, ce)) { 816 return -VTD_FR_ADDR_BEYOND_MGAW; 817 } 818 819 if (!vtd_iova_range_check(end, ce)) { 820 /* Fix end so that it reaches the maximum */ 821 end = vtd_iova_limit(ce); 822 } 823 824 return vtd_page_walk_level(addr, start, end, hook_fn, private, 825 level, true, true, notify_unmap); 826 } 827 828 /* Map a device to its corresponding domain (context-entry) */ 829 static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, 830 uint8_t devfn, VTDContextEntry *ce) 831 { 832 VTDRootEntry re; 833 int ret_fr; 834 835 ret_fr = vtd_get_root_entry(s, bus_num, &re); 836 if (ret_fr) { 837 return ret_fr; 838 } 839 840 if (!vtd_root_entry_present(&re)) { 841 /* Not error - it's okay we don't have root entry. */ 842 trace_vtd_re_not_present(bus_num); 843 return -VTD_FR_ROOT_ENTRY_P; 844 } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { 845 trace_vtd_re_invalid(re.rsvd, re.val); 846 return -VTD_FR_ROOT_ENTRY_RSVD; 847 } 848 849 ret_fr = vtd_get_context_entry_from_root(&re, devfn, ce); 850 if (ret_fr) { 851 return ret_fr; 852 } 853 854 if (!vtd_context_entry_present(ce)) { 855 /* Not error - it's okay we don't have context entry. */ 856 trace_vtd_ce_not_present(bus_num, devfn); 857 return -VTD_FR_CONTEXT_ENTRY_P; 858 } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || 859 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { 860 trace_vtd_ce_invalid(ce->hi, ce->lo); 861 return -VTD_FR_CONTEXT_ENTRY_RSVD; 862 } 863 /* Check if the programming of context-entry is valid */ 864 if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) { 865 trace_vtd_ce_invalid(ce->hi, ce->lo); 866 return -VTD_FR_CONTEXT_ENTRY_INV; 867 } else { 868 switch (ce->lo & VTD_CONTEXT_ENTRY_TT) { 869 case VTD_CONTEXT_TT_MULTI_LEVEL: 870 /* fall through */ 871 case VTD_CONTEXT_TT_DEV_IOTLB: 872 break; 873 default: 874 trace_vtd_ce_invalid(ce->hi, ce->lo); 875 return -VTD_FR_CONTEXT_ENTRY_INV; 876 } 877 } 878 return 0; 879 } 880 881 static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn) 882 { 883 return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL); 884 } 885 886 static const bool vtd_qualified_faults[] = { 887 [VTD_FR_RESERVED] = false, 888 [VTD_FR_ROOT_ENTRY_P] = false, 889 [VTD_FR_CONTEXT_ENTRY_P] = true, 890 [VTD_FR_CONTEXT_ENTRY_INV] = true, 891 [VTD_FR_ADDR_BEYOND_MGAW] = true, 892 [VTD_FR_WRITE] = true, 893 [VTD_FR_READ] = true, 894 [VTD_FR_PAGING_ENTRY_INV] = true, 895 [VTD_FR_ROOT_TABLE_INV] = false, 896 [VTD_FR_CONTEXT_TABLE_INV] = false, 897 [VTD_FR_ROOT_ENTRY_RSVD] = false, 898 [VTD_FR_PAGING_ENTRY_RSVD] = true, 899 [VTD_FR_CONTEXT_ENTRY_TT] = true, 900 [VTD_FR_RESERVED_ERR] = false, 901 [VTD_FR_MAX] = false, 902 }; 903 904 /* To see if a fault condition is "qualified", which is reported to software 905 * only if the FPD field in the context-entry used to process the faulting 906 * request is 0. 907 */ 908 static inline bool vtd_is_qualified_fault(VTDFaultReason fault) 909 { 910 return vtd_qualified_faults[fault]; 911 } 912 913 static inline bool vtd_is_interrupt_addr(hwaddr addr) 914 { 915 return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; 916 } 917 918 /* Map dev to context-entry then do a paging-structures walk to do a iommu 919 * translation. 920 * 921 * Called from RCU critical section. 922 * 923 * @bus_num: The bus number 924 * @devfn: The devfn, which is the combined of device and function number 925 * @is_write: The access is a write operation 926 * @entry: IOMMUTLBEntry that contain the addr to be translated and result 927 */ 928 static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, 929 uint8_t devfn, hwaddr addr, bool is_write, 930 IOMMUTLBEntry *entry) 931 { 932 IntelIOMMUState *s = vtd_as->iommu_state; 933 VTDContextEntry ce; 934 uint8_t bus_num = pci_bus_num(bus); 935 VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry; 936 uint64_t slpte, page_mask; 937 uint32_t level; 938 uint16_t source_id = vtd_make_source_id(bus_num, devfn); 939 int ret_fr; 940 bool is_fpd_set = false; 941 bool reads = true; 942 bool writes = true; 943 VTDIOTLBEntry *iotlb_entry; 944 945 /* 946 * We have standalone memory region for interrupt addresses, we 947 * should never receive translation requests in this region. 948 */ 949 assert(!vtd_is_interrupt_addr(addr)); 950 951 /* Try to fetch slpte form IOTLB */ 952 iotlb_entry = vtd_lookup_iotlb(s, source_id, addr); 953 if (iotlb_entry) { 954 trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, 955 iotlb_entry->domain_id); 956 slpte = iotlb_entry->slpte; 957 reads = iotlb_entry->read_flags; 958 writes = iotlb_entry->write_flags; 959 page_mask = iotlb_entry->mask; 960 goto out; 961 } 962 /* Try to fetch context-entry from cache first */ 963 if (cc_entry->context_cache_gen == s->context_cache_gen) { 964 trace_vtd_iotlb_cc_hit(bus_num, devfn, cc_entry->context_entry.hi, 965 cc_entry->context_entry.lo, 966 cc_entry->context_cache_gen); 967 ce = cc_entry->context_entry; 968 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; 969 } else { 970 ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce); 971 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; 972 if (ret_fr) { 973 ret_fr = -ret_fr; 974 if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { 975 trace_vtd_fault_disabled(); 976 } else { 977 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); 978 } 979 return; 980 } 981 /* Update context-cache */ 982 trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo, 983 cc_entry->context_cache_gen, 984 s->context_cache_gen); 985 cc_entry->context_entry = ce; 986 cc_entry->context_cache_gen = s->context_cache_gen; 987 } 988 989 ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level, 990 &reads, &writes); 991 if (ret_fr) { 992 ret_fr = -ret_fr; 993 if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { 994 trace_vtd_fault_disabled(); 995 } else { 996 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); 997 } 998 return; 999 } 1000 1001 page_mask = vtd_slpt_level_page_mask(level); 1002 vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte, 1003 reads, writes, level); 1004 out: 1005 entry->iova = addr & page_mask; 1006 entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask; 1007 entry->addr_mask = ~page_mask; 1008 entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0); 1009 } 1010 1011 static void vtd_root_table_setup(IntelIOMMUState *s) 1012 { 1013 s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG); 1014 s->root_extended = s->root & VTD_RTADDR_RTT; 1015 s->root &= VTD_RTADDR_ADDR_MASK; 1016 1017 VTD_DPRINTF(CSR, "root_table addr 0x%"PRIx64 " %s", s->root, 1018 (s->root_extended ? "(extended)" : "")); 1019 } 1020 1021 static void vtd_iec_notify_all(IntelIOMMUState *s, bool global, 1022 uint32_t index, uint32_t mask) 1023 { 1024 x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask); 1025 } 1026 1027 static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) 1028 { 1029 uint64_t value = 0; 1030 value = vtd_get_quad_raw(s, DMAR_IRTA_REG); 1031 s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1); 1032 s->intr_root = value & VTD_IRTA_ADDR_MASK; 1033 s->intr_eime = value & VTD_IRTA_EIME; 1034 1035 /* Notify global invalidation */ 1036 vtd_iec_notify_all(s, true, 0, 0); 1037 1038 VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32, 1039 s->intr_root, s->intr_size); 1040 } 1041 1042 static void vtd_iommu_replay_all(IntelIOMMUState *s) 1043 { 1044 IntelIOMMUNotifierNode *node; 1045 1046 QLIST_FOREACH(node, &s->notifiers_list, next) { 1047 memory_region_iommu_replay_all(&node->vtd_as->iommu); 1048 } 1049 } 1050 1051 static void vtd_context_global_invalidate(IntelIOMMUState *s) 1052 { 1053 trace_vtd_inv_desc_cc_global(); 1054 s->context_cache_gen++; 1055 if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) { 1056 vtd_reset_context_cache(s); 1057 } 1058 /* 1059 * From VT-d spec 6.5.2.1, a global context entry invalidation 1060 * should be followed by a IOTLB global invalidation, so we should 1061 * be safe even without this. Hoewever, let's replay the region as 1062 * well to be safer, and go back here when we need finer tunes for 1063 * VT-d emulation codes. 1064 */ 1065 vtd_iommu_replay_all(s); 1066 } 1067 1068 1069 /* Find the VTD address space currently associated with a given bus number, 1070 */ 1071 static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) 1072 { 1073 VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; 1074 if (!vtd_bus) { 1075 /* Iterate over the registered buses to find the one 1076 * which currently hold this bus number, and update the bus_num lookup table: 1077 */ 1078 GHashTableIter iter; 1079 1080 g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); 1081 while (g_hash_table_iter_next (&iter, NULL, (void**)&vtd_bus)) { 1082 if (pci_bus_num(vtd_bus->bus) == bus_num) { 1083 s->vtd_as_by_bus_num[bus_num] = vtd_bus; 1084 return vtd_bus; 1085 } 1086 } 1087 } 1088 return vtd_bus; 1089 } 1090 1091 /* Do a context-cache device-selective invalidation. 1092 * @func_mask: FM field after shifting 1093 */ 1094 static void vtd_context_device_invalidate(IntelIOMMUState *s, 1095 uint16_t source_id, 1096 uint16_t func_mask) 1097 { 1098 uint16_t mask; 1099 VTDBus *vtd_bus; 1100 VTDAddressSpace *vtd_as; 1101 uint8_t bus_n, devfn; 1102 uint16_t devfn_it; 1103 1104 trace_vtd_inv_desc_cc_devices(source_id, func_mask); 1105 1106 switch (func_mask & 3) { 1107 case 0: 1108 mask = 0; /* No bits in the SID field masked */ 1109 break; 1110 case 1: 1111 mask = 4; /* Mask bit 2 in the SID field */ 1112 break; 1113 case 2: 1114 mask = 6; /* Mask bit 2:1 in the SID field */ 1115 break; 1116 case 3: 1117 mask = 7; /* Mask bit 2:0 in the SID field */ 1118 break; 1119 } 1120 mask = ~mask; 1121 1122 bus_n = VTD_SID_TO_BUS(source_id); 1123 vtd_bus = vtd_find_as_from_bus_num(s, bus_n); 1124 if (vtd_bus) { 1125 devfn = VTD_SID_TO_DEVFN(source_id); 1126 for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) { 1127 vtd_as = vtd_bus->dev_as[devfn_it]; 1128 if (vtd_as && ((devfn_it & mask) == (devfn & mask))) { 1129 trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it), 1130 VTD_PCI_FUNC(devfn_it)); 1131 vtd_as->context_cache_entry.context_cache_gen = 0; 1132 /* 1133 * So a device is moving out of (or moving into) a 1134 * domain, a replay() suites here to notify all the 1135 * IOMMU_NOTIFIER_MAP registers about this change. 1136 * This won't bring bad even if we have no such 1137 * notifier registered - the IOMMU notification 1138 * framework will skip MAP notifications if that 1139 * happened. 1140 */ 1141 memory_region_iommu_replay_all(&vtd_as->iommu); 1142 } 1143 } 1144 } 1145 } 1146 1147 /* Context-cache invalidation 1148 * Returns the Context Actual Invalidation Granularity. 1149 * @val: the content of the CCMD_REG 1150 */ 1151 static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val) 1152 { 1153 uint64_t caig; 1154 uint64_t type = val & VTD_CCMD_CIRG_MASK; 1155 1156 switch (type) { 1157 case VTD_CCMD_DOMAIN_INVL: 1158 VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, 1159 (uint16_t)VTD_CCMD_DID(val)); 1160 /* Fall through */ 1161 case VTD_CCMD_GLOBAL_INVL: 1162 VTD_DPRINTF(INV, "global invalidation"); 1163 caig = VTD_CCMD_GLOBAL_INVL_A; 1164 vtd_context_global_invalidate(s); 1165 break; 1166 1167 case VTD_CCMD_DEVICE_INVL: 1168 caig = VTD_CCMD_DEVICE_INVL_A; 1169 vtd_context_device_invalidate(s, VTD_CCMD_SID(val), VTD_CCMD_FM(val)); 1170 break; 1171 1172 default: 1173 VTD_DPRINTF(GENERAL, "error: invalid granularity"); 1174 caig = 0; 1175 } 1176 return caig; 1177 } 1178 1179 static void vtd_iotlb_global_invalidate(IntelIOMMUState *s) 1180 { 1181 trace_vtd_iotlb_reset("global invalidation recved"); 1182 vtd_reset_iotlb(s); 1183 vtd_iommu_replay_all(s); 1184 } 1185 1186 static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) 1187 { 1188 IntelIOMMUNotifierNode *node; 1189 VTDContextEntry ce; 1190 VTDAddressSpace *vtd_as; 1191 1192 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain, 1193 &domain_id); 1194 1195 QLIST_FOREACH(node, &s->notifiers_list, next) { 1196 vtd_as = node->vtd_as; 1197 if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), 1198 vtd_as->devfn, &ce) && 1199 domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) { 1200 memory_region_iommu_replay_all(&vtd_as->iommu); 1201 } 1202 } 1203 } 1204 1205 static int vtd_page_invalidate_notify_hook(IOMMUTLBEntry *entry, 1206 void *private) 1207 { 1208 memory_region_notify_iommu((MemoryRegion *)private, *entry); 1209 return 0; 1210 } 1211 1212 static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, 1213 uint16_t domain_id, hwaddr addr, 1214 uint8_t am) 1215 { 1216 IntelIOMMUNotifierNode *node; 1217 VTDContextEntry ce; 1218 int ret; 1219 1220 QLIST_FOREACH(node, &(s->notifiers_list), next) { 1221 VTDAddressSpace *vtd_as = node->vtd_as; 1222 ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), 1223 vtd_as->devfn, &ce); 1224 if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) { 1225 vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE, 1226 vtd_page_invalidate_notify_hook, 1227 (void *)&vtd_as->iommu, true); 1228 } 1229 } 1230 } 1231 1232 static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, 1233 hwaddr addr, uint8_t am) 1234 { 1235 VTDIOTLBPageInvInfo info; 1236 1237 assert(am <= VTD_MAMV); 1238 info.domain_id = domain_id; 1239 info.addr = addr; 1240 info.mask = ~((1 << am) - 1); 1241 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info); 1242 vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am); 1243 } 1244 1245 /* Flush IOTLB 1246 * Returns the IOTLB Actual Invalidation Granularity. 1247 * @val: the content of the IOTLB_REG 1248 */ 1249 static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val) 1250 { 1251 uint64_t iaig; 1252 uint64_t type = val & VTD_TLB_FLUSH_GRANU_MASK; 1253 uint16_t domain_id; 1254 hwaddr addr; 1255 uint8_t am; 1256 1257 switch (type) { 1258 case VTD_TLB_GLOBAL_FLUSH: 1259 VTD_DPRINTF(INV, "global invalidation"); 1260 iaig = VTD_TLB_GLOBAL_FLUSH_A; 1261 vtd_iotlb_global_invalidate(s); 1262 break; 1263 1264 case VTD_TLB_DSI_FLUSH: 1265 domain_id = VTD_TLB_DID(val); 1266 VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, 1267 domain_id); 1268 iaig = VTD_TLB_DSI_FLUSH_A; 1269 vtd_iotlb_domain_invalidate(s, domain_id); 1270 break; 1271 1272 case VTD_TLB_PSI_FLUSH: 1273 domain_id = VTD_TLB_DID(val); 1274 addr = vtd_get_quad_raw(s, DMAR_IVA_REG); 1275 am = VTD_IVA_AM(addr); 1276 addr = VTD_IVA_ADDR(addr); 1277 VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16 1278 " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am); 1279 if (am > VTD_MAMV) { 1280 VTD_DPRINTF(GENERAL, "error: supported max address mask value is " 1281 "%"PRIu8, (uint8_t)VTD_MAMV); 1282 iaig = 0; 1283 break; 1284 } 1285 iaig = VTD_TLB_PSI_FLUSH_A; 1286 vtd_iotlb_page_invalidate(s, domain_id, addr, am); 1287 break; 1288 1289 default: 1290 VTD_DPRINTF(GENERAL, "error: invalid granularity"); 1291 iaig = 0; 1292 } 1293 return iaig; 1294 } 1295 1296 static inline bool vtd_queued_inv_enable_check(IntelIOMMUState *s) 1297 { 1298 return s->iq_tail == 0; 1299 } 1300 1301 static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s) 1302 { 1303 return s->qi_enabled && (s->iq_tail == s->iq_head) && 1304 (s->iq_last_desc_type == VTD_INV_DESC_WAIT); 1305 } 1306 1307 static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) 1308 { 1309 uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG); 1310 1311 VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off")); 1312 if (en) { 1313 if (vtd_queued_inv_enable_check(s)) { 1314 s->iq = iqa_val & VTD_IQA_IQA_MASK; 1315 /* 2^(x+8) entries */ 1316 s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8); 1317 s->qi_enabled = true; 1318 VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val); 1319 VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d", 1320 s->iq, s->iq_size); 1321 /* Ok - report back to driver */ 1322 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES); 1323 } else { 1324 VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: " 1325 "tail %"PRIu16, s->iq_tail); 1326 } 1327 } else { 1328 if (vtd_queued_inv_disable_check(s)) { 1329 /* disable Queued Invalidation */ 1330 vtd_set_quad_raw(s, DMAR_IQH_REG, 0); 1331 s->iq_head = 0; 1332 s->qi_enabled = false; 1333 /* Ok - report back to driver */ 1334 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0); 1335 } else { 1336 VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: " 1337 "head %"PRIu16 ", tail %"PRIu16 1338 ", last_descriptor %"PRIu8, 1339 s->iq_head, s->iq_tail, s->iq_last_desc_type); 1340 } 1341 } 1342 } 1343 1344 /* Set Root Table Pointer */ 1345 static void vtd_handle_gcmd_srtp(IntelIOMMUState *s) 1346 { 1347 VTD_DPRINTF(CSR, "set Root Table Pointer"); 1348 1349 vtd_root_table_setup(s); 1350 /* Ok - report back to driver */ 1351 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS); 1352 } 1353 1354 /* Set Interrupt Remap Table Pointer */ 1355 static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s) 1356 { 1357 VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer"); 1358 1359 vtd_interrupt_remap_table_setup(s); 1360 /* Ok - report back to driver */ 1361 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS); 1362 } 1363 1364 static void vtd_switch_address_space(VTDAddressSpace *as) 1365 { 1366 assert(as); 1367 1368 trace_vtd_switch_address_space(pci_bus_num(as->bus), 1369 VTD_PCI_SLOT(as->devfn), 1370 VTD_PCI_FUNC(as->devfn), 1371 as->iommu_state->dmar_enabled); 1372 1373 /* Turn off first then on the other */ 1374 if (as->iommu_state->dmar_enabled) { 1375 memory_region_set_enabled(&as->sys_alias, false); 1376 memory_region_set_enabled(&as->iommu, true); 1377 } else { 1378 memory_region_set_enabled(&as->iommu, false); 1379 memory_region_set_enabled(&as->sys_alias, true); 1380 } 1381 } 1382 1383 static void vtd_switch_address_space_all(IntelIOMMUState *s) 1384 { 1385 GHashTableIter iter; 1386 VTDBus *vtd_bus; 1387 int i; 1388 1389 g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); 1390 while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { 1391 for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) { 1392 if (!vtd_bus->dev_as[i]) { 1393 continue; 1394 } 1395 vtd_switch_address_space(vtd_bus->dev_as[i]); 1396 } 1397 } 1398 } 1399 1400 /* Handle Translation Enable/Disable */ 1401 static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) 1402 { 1403 if (s->dmar_enabled == en) { 1404 return; 1405 } 1406 1407 VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off")); 1408 1409 if (en) { 1410 s->dmar_enabled = true; 1411 /* Ok - report back to driver */ 1412 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_TES); 1413 } else { 1414 s->dmar_enabled = false; 1415 1416 /* Clear the index of Fault Recording Register */ 1417 s->next_frcd_reg = 0; 1418 /* Ok - report back to driver */ 1419 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0); 1420 } 1421 1422 vtd_switch_address_space_all(s); 1423 } 1424 1425 /* Handle Interrupt Remap Enable/Disable */ 1426 static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en) 1427 { 1428 VTD_DPRINTF(CSR, "Interrupt Remap Enable %s", (en ? "on" : "off")); 1429 1430 if (en) { 1431 s->intr_enabled = true; 1432 /* Ok - report back to driver */ 1433 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRES); 1434 } else { 1435 s->intr_enabled = false; 1436 /* Ok - report back to driver */ 1437 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_IRES, 0); 1438 } 1439 } 1440 1441 /* Handle write to Global Command Register */ 1442 static void vtd_handle_gcmd_write(IntelIOMMUState *s) 1443 { 1444 uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG); 1445 uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG); 1446 uint32_t changed = status ^ val; 1447 1448 VTD_DPRINTF(CSR, "value 0x%"PRIx32 " status 0x%"PRIx32, val, status); 1449 if (changed & VTD_GCMD_TE) { 1450 /* Translation enable/disable */ 1451 vtd_handle_gcmd_te(s, val & VTD_GCMD_TE); 1452 } 1453 if (val & VTD_GCMD_SRTP) { 1454 /* Set/update the root-table pointer */ 1455 vtd_handle_gcmd_srtp(s); 1456 } 1457 if (changed & VTD_GCMD_QIE) { 1458 /* Queued Invalidation Enable */ 1459 vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE); 1460 } 1461 if (val & VTD_GCMD_SIRTP) { 1462 /* Set/update the interrupt remapping root-table pointer */ 1463 vtd_handle_gcmd_sirtp(s); 1464 } 1465 if (changed & VTD_GCMD_IRE) { 1466 /* Interrupt remap enable/disable */ 1467 vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE); 1468 } 1469 } 1470 1471 /* Handle write to Context Command Register */ 1472 static void vtd_handle_ccmd_write(IntelIOMMUState *s) 1473 { 1474 uint64_t ret; 1475 uint64_t val = vtd_get_quad_raw(s, DMAR_CCMD_REG); 1476 1477 /* Context-cache invalidation request */ 1478 if (val & VTD_CCMD_ICC) { 1479 if (s->qi_enabled) { 1480 VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, " 1481 "should not use register-based invalidation"); 1482 return; 1483 } 1484 ret = vtd_context_cache_invalidate(s, val); 1485 /* Invalidation completed. Change something to show */ 1486 vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL); 1487 ret = vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK, 1488 ret); 1489 VTD_DPRINTF(INV, "CCMD_REG write-back val: 0x%"PRIx64, ret); 1490 } 1491 } 1492 1493 /* Handle write to IOTLB Invalidation Register */ 1494 static void vtd_handle_iotlb_write(IntelIOMMUState *s) 1495 { 1496 uint64_t ret; 1497 uint64_t val = vtd_get_quad_raw(s, DMAR_IOTLB_REG); 1498 1499 /* IOTLB invalidation request */ 1500 if (val & VTD_TLB_IVT) { 1501 if (s->qi_enabled) { 1502 VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, " 1503 "should not use register-based invalidation"); 1504 return; 1505 } 1506 ret = vtd_iotlb_flush(s, val); 1507 /* Invalidation completed. Change something to show */ 1508 vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL); 1509 ret = vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, 1510 VTD_TLB_FLUSH_GRANU_MASK_A, ret); 1511 VTD_DPRINTF(INV, "IOTLB_REG write-back val: 0x%"PRIx64, ret); 1512 } 1513 } 1514 1515 /* Fetch an Invalidation Descriptor from the Invalidation Queue */ 1516 static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset, 1517 VTDInvDesc *inv_desc) 1518 { 1519 dma_addr_t addr = base_addr + offset * sizeof(*inv_desc); 1520 if (dma_memory_read(&address_space_memory, addr, inv_desc, 1521 sizeof(*inv_desc))) { 1522 VTD_DPRINTF(GENERAL, "error: fail to fetch Invalidation Descriptor " 1523 "base_addr 0x%"PRIx64 " offset %"PRIu32, base_addr, offset); 1524 inv_desc->lo = 0; 1525 inv_desc->hi = 0; 1526 1527 return false; 1528 } 1529 inv_desc->lo = le64_to_cpu(inv_desc->lo); 1530 inv_desc->hi = le64_to_cpu(inv_desc->hi); 1531 return true; 1532 } 1533 1534 static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) 1535 { 1536 if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) || 1537 (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) { 1538 trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo); 1539 return false; 1540 } 1541 if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) { 1542 /* Status Write */ 1543 uint32_t status_data = (uint32_t)(inv_desc->lo >> 1544 VTD_INV_DESC_WAIT_DATA_SHIFT); 1545 1546 assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF)); 1547 1548 /* FIXME: need to be masked with HAW? */ 1549 dma_addr_t status_addr = inv_desc->hi; 1550 trace_vtd_inv_desc_wait_sw(status_addr, status_data); 1551 status_data = cpu_to_le32(status_data); 1552 if (dma_memory_write(&address_space_memory, status_addr, &status_data, 1553 sizeof(status_data))) { 1554 trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo); 1555 return false; 1556 } 1557 } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) { 1558 /* Interrupt flag */ 1559 vtd_generate_completion_event(s); 1560 } else { 1561 trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo); 1562 return false; 1563 } 1564 return true; 1565 } 1566 1567 static bool vtd_process_context_cache_desc(IntelIOMMUState *s, 1568 VTDInvDesc *inv_desc) 1569 { 1570 uint16_t sid, fmask; 1571 1572 if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) { 1573 trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo); 1574 return false; 1575 } 1576 switch (inv_desc->lo & VTD_INV_DESC_CC_G) { 1577 case VTD_INV_DESC_CC_DOMAIN: 1578 trace_vtd_inv_desc_cc_domain( 1579 (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo)); 1580 /* Fall through */ 1581 case VTD_INV_DESC_CC_GLOBAL: 1582 vtd_context_global_invalidate(s); 1583 break; 1584 1585 case VTD_INV_DESC_CC_DEVICE: 1586 sid = VTD_INV_DESC_CC_SID(inv_desc->lo); 1587 fmask = VTD_INV_DESC_CC_FM(inv_desc->lo); 1588 vtd_context_device_invalidate(s, sid, fmask); 1589 break; 1590 1591 default: 1592 trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo); 1593 return false; 1594 } 1595 return true; 1596 } 1597 1598 static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) 1599 { 1600 uint16_t domain_id; 1601 uint8_t am; 1602 hwaddr addr; 1603 1604 if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) || 1605 (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) { 1606 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); 1607 return false; 1608 } 1609 1610 switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) { 1611 case VTD_INV_DESC_IOTLB_GLOBAL: 1612 trace_vtd_inv_desc_iotlb_global(); 1613 vtd_iotlb_global_invalidate(s); 1614 break; 1615 1616 case VTD_INV_DESC_IOTLB_DOMAIN: 1617 domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo); 1618 trace_vtd_inv_desc_iotlb_domain(domain_id); 1619 vtd_iotlb_domain_invalidate(s, domain_id); 1620 break; 1621 1622 case VTD_INV_DESC_IOTLB_PAGE: 1623 domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo); 1624 addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi); 1625 am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi); 1626 trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am); 1627 if (am > VTD_MAMV) { 1628 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); 1629 return false; 1630 } 1631 vtd_iotlb_page_invalidate(s, domain_id, addr, am); 1632 break; 1633 1634 default: 1635 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); 1636 return false; 1637 } 1638 return true; 1639 } 1640 1641 static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, 1642 VTDInvDesc *inv_desc) 1643 { 1644 VTD_DPRINTF(INV, "inv ir glob %d index %d mask %d", 1645 inv_desc->iec.granularity, 1646 inv_desc->iec.index, 1647 inv_desc->iec.index_mask); 1648 1649 vtd_iec_notify_all(s, !inv_desc->iec.granularity, 1650 inv_desc->iec.index, 1651 inv_desc->iec.index_mask); 1652 return true; 1653 } 1654 1655 static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, 1656 VTDInvDesc *inv_desc) 1657 { 1658 VTDAddressSpace *vtd_dev_as; 1659 IOMMUTLBEntry entry; 1660 struct VTDBus *vtd_bus; 1661 hwaddr addr; 1662 uint64_t sz; 1663 uint16_t sid; 1664 uint8_t devfn; 1665 bool size; 1666 uint8_t bus_num; 1667 1668 addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi); 1669 sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo); 1670 devfn = sid & 0xff; 1671 bus_num = sid >> 8; 1672 size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi); 1673 1674 if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) || 1675 (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) { 1676 VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device " 1677 "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, 1678 inv_desc->hi, inv_desc->lo); 1679 return false; 1680 } 1681 1682 vtd_bus = vtd_find_as_from_bus_num(s, bus_num); 1683 if (!vtd_bus) { 1684 goto done; 1685 } 1686 1687 vtd_dev_as = vtd_bus->dev_as[devfn]; 1688 if (!vtd_dev_as) { 1689 goto done; 1690 } 1691 1692 /* According to ATS spec table 2.4: 1693 * S = 0, bits 15:12 = xxxx range size: 4K 1694 * S = 1, bits 15:12 = xxx0 range size: 8K 1695 * S = 1, bits 15:12 = xx01 range size: 16K 1696 * S = 1, bits 15:12 = x011 range size: 32K 1697 * S = 1, bits 15:12 = 0111 range size: 64K 1698 * ... 1699 */ 1700 if (size) { 1701 sz = (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT); 1702 addr &= ~(sz - 1); 1703 } else { 1704 sz = VTD_PAGE_SIZE; 1705 } 1706 1707 entry.target_as = &vtd_dev_as->as; 1708 entry.addr_mask = sz - 1; 1709 entry.iova = addr; 1710 entry.perm = IOMMU_NONE; 1711 entry.translated_addr = 0; 1712 memory_region_notify_iommu(&vtd_dev_as->iommu, entry); 1713 1714 done: 1715 return true; 1716 } 1717 1718 static bool vtd_process_inv_desc(IntelIOMMUState *s) 1719 { 1720 VTDInvDesc inv_desc; 1721 uint8_t desc_type; 1722 1723 VTD_DPRINTF(INV, "iq head %"PRIu16, s->iq_head); 1724 if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) { 1725 s->iq_last_desc_type = VTD_INV_DESC_NONE; 1726 return false; 1727 } 1728 desc_type = inv_desc.lo & VTD_INV_DESC_TYPE; 1729 /* FIXME: should update at first or at last? */ 1730 s->iq_last_desc_type = desc_type; 1731 1732 switch (desc_type) { 1733 case VTD_INV_DESC_CC: 1734 trace_vtd_inv_desc("context-cache", inv_desc.hi, inv_desc.lo); 1735 if (!vtd_process_context_cache_desc(s, &inv_desc)) { 1736 return false; 1737 } 1738 break; 1739 1740 case VTD_INV_DESC_IOTLB: 1741 trace_vtd_inv_desc("iotlb", inv_desc.hi, inv_desc.lo); 1742 if (!vtd_process_iotlb_desc(s, &inv_desc)) { 1743 return false; 1744 } 1745 break; 1746 1747 case VTD_INV_DESC_WAIT: 1748 trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo); 1749 if (!vtd_process_wait_desc(s, &inv_desc)) { 1750 return false; 1751 } 1752 break; 1753 1754 case VTD_INV_DESC_IEC: 1755 trace_vtd_inv_desc("iec", inv_desc.hi, inv_desc.lo); 1756 if (!vtd_process_inv_iec_desc(s, &inv_desc)) { 1757 return false; 1758 } 1759 break; 1760 1761 case VTD_INV_DESC_DEVICE: 1762 VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64 1763 " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); 1764 if (!vtd_process_device_iotlb_desc(s, &inv_desc)) { 1765 return false; 1766 } 1767 break; 1768 1769 default: 1770 trace_vtd_inv_desc_invalid(inv_desc.hi, inv_desc.lo); 1771 return false; 1772 } 1773 s->iq_head++; 1774 if (s->iq_head == s->iq_size) { 1775 s->iq_head = 0; 1776 } 1777 return true; 1778 } 1779 1780 /* Try to fetch and process more Invalidation Descriptors */ 1781 static void vtd_fetch_inv_desc(IntelIOMMUState *s) 1782 { 1783 VTD_DPRINTF(INV, "fetch Invalidation Descriptors"); 1784 if (s->iq_tail >= s->iq_size) { 1785 /* Detects an invalid Tail pointer */ 1786 VTD_DPRINTF(GENERAL, "error: iq_tail is %"PRIu16 1787 " while iq_size is %"PRIu16, s->iq_tail, s->iq_size); 1788 vtd_handle_inv_queue_error(s); 1789 return; 1790 } 1791 while (s->iq_head != s->iq_tail) { 1792 if (!vtd_process_inv_desc(s)) { 1793 /* Invalidation Queue Errors */ 1794 vtd_handle_inv_queue_error(s); 1795 break; 1796 } 1797 /* Must update the IQH_REG in time */ 1798 vtd_set_quad_raw(s, DMAR_IQH_REG, 1799 (((uint64_t)(s->iq_head)) << VTD_IQH_QH_SHIFT) & 1800 VTD_IQH_QH_MASK); 1801 } 1802 } 1803 1804 /* Handle write to Invalidation Queue Tail Register */ 1805 static void vtd_handle_iqt_write(IntelIOMMUState *s) 1806 { 1807 uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG); 1808 1809 s->iq_tail = VTD_IQT_QT(val); 1810 VTD_DPRINTF(INV, "set iq tail %"PRIu16, s->iq_tail); 1811 if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) { 1812 /* Process Invalidation Queue here */ 1813 vtd_fetch_inv_desc(s); 1814 } 1815 } 1816 1817 static void vtd_handle_fsts_write(IntelIOMMUState *s) 1818 { 1819 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); 1820 uint32_t fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG); 1821 uint32_t status_fields = VTD_FSTS_PFO | VTD_FSTS_PPF | VTD_FSTS_IQE; 1822 1823 if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) { 1824 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); 1825 VTD_DPRINTF(FLOG, "all pending interrupt conditions serviced, clear " 1826 "IP field of FECTL_REG"); 1827 } 1828 /* FIXME: when IQE is Clear, should we try to fetch some Invalidation 1829 * Descriptors if there are any when Queued Invalidation is enabled? 1830 */ 1831 } 1832 1833 static void vtd_handle_fectl_write(IntelIOMMUState *s) 1834 { 1835 uint32_t fectl_reg; 1836 /* FIXME: when software clears the IM field, check the IP field. But do we 1837 * need to compare the old value and the new value to conclude that 1838 * software clears the IM field? Or just check if the IM field is zero? 1839 */ 1840 fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG); 1841 if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) { 1842 vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); 1843 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); 1844 VTD_DPRINTF(FLOG, "IM field is cleared, generate " 1845 "fault event interrupt"); 1846 } 1847 } 1848 1849 static void vtd_handle_ics_write(IntelIOMMUState *s) 1850 { 1851 uint32_t ics_reg = vtd_get_long_raw(s, DMAR_ICS_REG); 1852 uint32_t iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG); 1853 1854 if ((iectl_reg & VTD_IECTL_IP) && !(ics_reg & VTD_ICS_IWC)) { 1855 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); 1856 VTD_DPRINTF(INV, "pending completion interrupt condition serviced, " 1857 "clear IP field of IECTL_REG"); 1858 } 1859 } 1860 1861 static void vtd_handle_iectl_write(IntelIOMMUState *s) 1862 { 1863 uint32_t iectl_reg; 1864 /* FIXME: when software clears the IM field, check the IP field. But do we 1865 * need to compare the old value and the new value to conclude that 1866 * software clears the IM field? Or just check if the IM field is zero? 1867 */ 1868 iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG); 1869 if ((iectl_reg & VTD_IECTL_IP) && !(iectl_reg & VTD_IECTL_IM)) { 1870 vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG); 1871 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); 1872 VTD_DPRINTF(INV, "IM field is cleared, generate " 1873 "invalidation event interrupt"); 1874 } 1875 } 1876 1877 static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) 1878 { 1879 IntelIOMMUState *s = opaque; 1880 uint64_t val; 1881 1882 if (addr + size > DMAR_REG_SIZE) { 1883 VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 1884 ", got 0x%"PRIx64 " %d", 1885 (uint64_t)DMAR_REG_SIZE, addr, size); 1886 return (uint64_t)-1; 1887 } 1888 1889 switch (addr) { 1890 /* Root Table Address Register, 64-bit */ 1891 case DMAR_RTADDR_REG: 1892 if (size == 4) { 1893 val = s->root & ((1ULL << 32) - 1); 1894 } else { 1895 val = s->root; 1896 } 1897 break; 1898 1899 case DMAR_RTADDR_REG_HI: 1900 assert(size == 4); 1901 val = s->root >> 32; 1902 break; 1903 1904 /* Invalidation Queue Address Register, 64-bit */ 1905 case DMAR_IQA_REG: 1906 val = s->iq | (vtd_get_quad(s, DMAR_IQA_REG) & VTD_IQA_QS); 1907 if (size == 4) { 1908 val = val & ((1ULL << 32) - 1); 1909 } 1910 break; 1911 1912 case DMAR_IQA_REG_HI: 1913 assert(size == 4); 1914 val = s->iq >> 32; 1915 break; 1916 1917 default: 1918 if (size == 4) { 1919 val = vtd_get_long(s, addr); 1920 } else { 1921 val = vtd_get_quad(s, addr); 1922 } 1923 } 1924 VTD_DPRINTF(CSR, "addr 0x%"PRIx64 " size %d val 0x%"PRIx64, 1925 addr, size, val); 1926 return val; 1927 } 1928 1929 static void vtd_mem_write(void *opaque, hwaddr addr, 1930 uint64_t val, unsigned size) 1931 { 1932 IntelIOMMUState *s = opaque; 1933 1934 if (addr + size > DMAR_REG_SIZE) { 1935 VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 1936 ", got 0x%"PRIx64 " %d", 1937 (uint64_t)DMAR_REG_SIZE, addr, size); 1938 return; 1939 } 1940 1941 switch (addr) { 1942 /* Global Command Register, 32-bit */ 1943 case DMAR_GCMD_REG: 1944 VTD_DPRINTF(CSR, "DMAR_GCMD_REG write addr 0x%"PRIx64 1945 ", size %d, val 0x%"PRIx64, addr, size, val); 1946 vtd_set_long(s, addr, val); 1947 vtd_handle_gcmd_write(s); 1948 break; 1949 1950 /* Context Command Register, 64-bit */ 1951 case DMAR_CCMD_REG: 1952 VTD_DPRINTF(CSR, "DMAR_CCMD_REG write addr 0x%"PRIx64 1953 ", size %d, val 0x%"PRIx64, addr, size, val); 1954 if (size == 4) { 1955 vtd_set_long(s, addr, val); 1956 } else { 1957 vtd_set_quad(s, addr, val); 1958 vtd_handle_ccmd_write(s); 1959 } 1960 break; 1961 1962 case DMAR_CCMD_REG_HI: 1963 VTD_DPRINTF(CSR, "DMAR_CCMD_REG_HI write addr 0x%"PRIx64 1964 ", size %d, val 0x%"PRIx64, addr, size, val); 1965 assert(size == 4); 1966 vtd_set_long(s, addr, val); 1967 vtd_handle_ccmd_write(s); 1968 break; 1969 1970 /* IOTLB Invalidation Register, 64-bit */ 1971 case DMAR_IOTLB_REG: 1972 VTD_DPRINTF(INV, "DMAR_IOTLB_REG write addr 0x%"PRIx64 1973 ", size %d, val 0x%"PRIx64, addr, size, val); 1974 if (size == 4) { 1975 vtd_set_long(s, addr, val); 1976 } else { 1977 vtd_set_quad(s, addr, val); 1978 vtd_handle_iotlb_write(s); 1979 } 1980 break; 1981 1982 case DMAR_IOTLB_REG_HI: 1983 VTD_DPRINTF(INV, "DMAR_IOTLB_REG_HI write addr 0x%"PRIx64 1984 ", size %d, val 0x%"PRIx64, addr, size, val); 1985 assert(size == 4); 1986 vtd_set_long(s, addr, val); 1987 vtd_handle_iotlb_write(s); 1988 break; 1989 1990 /* Invalidate Address Register, 64-bit */ 1991 case DMAR_IVA_REG: 1992 VTD_DPRINTF(INV, "DMAR_IVA_REG write addr 0x%"PRIx64 1993 ", size %d, val 0x%"PRIx64, addr, size, val); 1994 if (size == 4) { 1995 vtd_set_long(s, addr, val); 1996 } else { 1997 vtd_set_quad(s, addr, val); 1998 } 1999 break; 2000 2001 case DMAR_IVA_REG_HI: 2002 VTD_DPRINTF(INV, "DMAR_IVA_REG_HI write addr 0x%"PRIx64 2003 ", size %d, val 0x%"PRIx64, addr, size, val); 2004 assert(size == 4); 2005 vtd_set_long(s, addr, val); 2006 break; 2007 2008 /* Fault Status Register, 32-bit */ 2009 case DMAR_FSTS_REG: 2010 VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64 2011 ", size %d, val 0x%"PRIx64, addr, size, val); 2012 assert(size == 4); 2013 vtd_set_long(s, addr, val); 2014 vtd_handle_fsts_write(s); 2015 break; 2016 2017 /* Fault Event Control Register, 32-bit */ 2018 case DMAR_FECTL_REG: 2019 VTD_DPRINTF(FLOG, "DMAR_FECTL_REG write addr 0x%"PRIx64 2020 ", size %d, val 0x%"PRIx64, addr, size, val); 2021 assert(size == 4); 2022 vtd_set_long(s, addr, val); 2023 vtd_handle_fectl_write(s); 2024 break; 2025 2026 /* Fault Event Data Register, 32-bit */ 2027 case DMAR_FEDATA_REG: 2028 VTD_DPRINTF(FLOG, "DMAR_FEDATA_REG write addr 0x%"PRIx64 2029 ", size %d, val 0x%"PRIx64, addr, size, val); 2030 assert(size == 4); 2031 vtd_set_long(s, addr, val); 2032 break; 2033 2034 /* Fault Event Address Register, 32-bit */ 2035 case DMAR_FEADDR_REG: 2036 VTD_DPRINTF(FLOG, "DMAR_FEADDR_REG write addr 0x%"PRIx64 2037 ", size %d, val 0x%"PRIx64, addr, size, val); 2038 assert(size == 4); 2039 vtd_set_long(s, addr, val); 2040 break; 2041 2042 /* Fault Event Upper Address Register, 32-bit */ 2043 case DMAR_FEUADDR_REG: 2044 VTD_DPRINTF(FLOG, "DMAR_FEUADDR_REG write addr 0x%"PRIx64 2045 ", size %d, val 0x%"PRIx64, addr, size, val); 2046 assert(size == 4); 2047 vtd_set_long(s, addr, val); 2048 break; 2049 2050 /* Protected Memory Enable Register, 32-bit */ 2051 case DMAR_PMEN_REG: 2052 VTD_DPRINTF(CSR, "DMAR_PMEN_REG write addr 0x%"PRIx64 2053 ", size %d, val 0x%"PRIx64, addr, size, val); 2054 assert(size == 4); 2055 vtd_set_long(s, addr, val); 2056 break; 2057 2058 /* Root Table Address Register, 64-bit */ 2059 case DMAR_RTADDR_REG: 2060 VTD_DPRINTF(CSR, "DMAR_RTADDR_REG write addr 0x%"PRIx64 2061 ", size %d, val 0x%"PRIx64, addr, size, val); 2062 if (size == 4) { 2063 vtd_set_long(s, addr, val); 2064 } else { 2065 vtd_set_quad(s, addr, val); 2066 } 2067 break; 2068 2069 case DMAR_RTADDR_REG_HI: 2070 VTD_DPRINTF(CSR, "DMAR_RTADDR_REG_HI write addr 0x%"PRIx64 2071 ", size %d, val 0x%"PRIx64, addr, size, val); 2072 assert(size == 4); 2073 vtd_set_long(s, addr, val); 2074 break; 2075 2076 /* Invalidation Queue Tail Register, 64-bit */ 2077 case DMAR_IQT_REG: 2078 VTD_DPRINTF(INV, "DMAR_IQT_REG write addr 0x%"PRIx64 2079 ", size %d, val 0x%"PRIx64, addr, size, val); 2080 if (size == 4) { 2081 vtd_set_long(s, addr, val); 2082 } else { 2083 vtd_set_quad(s, addr, val); 2084 } 2085 vtd_handle_iqt_write(s); 2086 break; 2087 2088 case DMAR_IQT_REG_HI: 2089 VTD_DPRINTF(INV, "DMAR_IQT_REG_HI write addr 0x%"PRIx64 2090 ", size %d, val 0x%"PRIx64, addr, size, val); 2091 assert(size == 4); 2092 vtd_set_long(s, addr, val); 2093 /* 19:63 of IQT_REG is RsvdZ, do nothing here */ 2094 break; 2095 2096 /* Invalidation Queue Address Register, 64-bit */ 2097 case DMAR_IQA_REG: 2098 VTD_DPRINTF(INV, "DMAR_IQA_REG write addr 0x%"PRIx64 2099 ", size %d, val 0x%"PRIx64, addr, size, val); 2100 if (size == 4) { 2101 vtd_set_long(s, addr, val); 2102 } else { 2103 vtd_set_quad(s, addr, val); 2104 } 2105 break; 2106 2107 case DMAR_IQA_REG_HI: 2108 VTD_DPRINTF(INV, "DMAR_IQA_REG_HI write addr 0x%"PRIx64 2109 ", size %d, val 0x%"PRIx64, addr, size, val); 2110 assert(size == 4); 2111 vtd_set_long(s, addr, val); 2112 break; 2113 2114 /* Invalidation Completion Status Register, 32-bit */ 2115 case DMAR_ICS_REG: 2116 VTD_DPRINTF(INV, "DMAR_ICS_REG write addr 0x%"PRIx64 2117 ", size %d, val 0x%"PRIx64, addr, size, val); 2118 assert(size == 4); 2119 vtd_set_long(s, addr, val); 2120 vtd_handle_ics_write(s); 2121 break; 2122 2123 /* Invalidation Event Control Register, 32-bit */ 2124 case DMAR_IECTL_REG: 2125 VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64 2126 ", size %d, val 0x%"PRIx64, addr, size, val); 2127 assert(size == 4); 2128 vtd_set_long(s, addr, val); 2129 vtd_handle_iectl_write(s); 2130 break; 2131 2132 /* Invalidation Event Data Register, 32-bit */ 2133 case DMAR_IEDATA_REG: 2134 VTD_DPRINTF(INV, "DMAR_IEDATA_REG write addr 0x%"PRIx64 2135 ", size %d, val 0x%"PRIx64, addr, size, val); 2136 assert(size == 4); 2137 vtd_set_long(s, addr, val); 2138 break; 2139 2140 /* Invalidation Event Address Register, 32-bit */ 2141 case DMAR_IEADDR_REG: 2142 VTD_DPRINTF(INV, "DMAR_IEADDR_REG write addr 0x%"PRIx64 2143 ", size %d, val 0x%"PRIx64, addr, size, val); 2144 assert(size == 4); 2145 vtd_set_long(s, addr, val); 2146 break; 2147 2148 /* Invalidation Event Upper Address Register, 32-bit */ 2149 case DMAR_IEUADDR_REG: 2150 VTD_DPRINTF(INV, "DMAR_IEUADDR_REG write addr 0x%"PRIx64 2151 ", size %d, val 0x%"PRIx64, addr, size, val); 2152 assert(size == 4); 2153 vtd_set_long(s, addr, val); 2154 break; 2155 2156 /* Fault Recording Registers, 128-bit */ 2157 case DMAR_FRCD_REG_0_0: 2158 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_0 write addr 0x%"PRIx64 2159 ", size %d, val 0x%"PRIx64, addr, size, val); 2160 if (size == 4) { 2161 vtd_set_long(s, addr, val); 2162 } else { 2163 vtd_set_quad(s, addr, val); 2164 } 2165 break; 2166 2167 case DMAR_FRCD_REG_0_1: 2168 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_1 write addr 0x%"PRIx64 2169 ", size %d, val 0x%"PRIx64, addr, size, val); 2170 assert(size == 4); 2171 vtd_set_long(s, addr, val); 2172 break; 2173 2174 case DMAR_FRCD_REG_0_2: 2175 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_2 write addr 0x%"PRIx64 2176 ", size %d, val 0x%"PRIx64, addr, size, val); 2177 if (size == 4) { 2178 vtd_set_long(s, addr, val); 2179 } else { 2180 vtd_set_quad(s, addr, val); 2181 /* May clear bit 127 (Fault), update PPF */ 2182 vtd_update_fsts_ppf(s); 2183 } 2184 break; 2185 2186 case DMAR_FRCD_REG_0_3: 2187 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_3 write addr 0x%"PRIx64 2188 ", size %d, val 0x%"PRIx64, addr, size, val); 2189 assert(size == 4); 2190 vtd_set_long(s, addr, val); 2191 /* May clear bit 127 (Fault), update PPF */ 2192 vtd_update_fsts_ppf(s); 2193 break; 2194 2195 case DMAR_IRTA_REG: 2196 VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64 2197 ", size %d, val 0x%"PRIx64, addr, size, val); 2198 if (size == 4) { 2199 vtd_set_long(s, addr, val); 2200 } else { 2201 vtd_set_quad(s, addr, val); 2202 } 2203 break; 2204 2205 case DMAR_IRTA_REG_HI: 2206 VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64 2207 ", size %d, val 0x%"PRIx64, addr, size, val); 2208 assert(size == 4); 2209 vtd_set_long(s, addr, val); 2210 break; 2211 2212 default: 2213 VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64 2214 ", size %d, val 0x%"PRIx64, addr, size, val); 2215 if (size == 4) { 2216 vtd_set_long(s, addr, val); 2217 } else { 2218 vtd_set_quad(s, addr, val); 2219 } 2220 } 2221 } 2222 2223 static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, 2224 bool is_write) 2225 { 2226 VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); 2227 IntelIOMMUState *s = vtd_as->iommu_state; 2228 IOMMUTLBEntry ret = { 2229 .target_as = &address_space_memory, 2230 .iova = addr, 2231 .translated_addr = 0, 2232 .addr_mask = ~(hwaddr)0, 2233 .perm = IOMMU_NONE, 2234 }; 2235 2236 if (!s->dmar_enabled) { 2237 /* DMAR disabled, passthrough, use 4k-page*/ 2238 ret.iova = addr & VTD_PAGE_MASK_4K; 2239 ret.translated_addr = addr & VTD_PAGE_MASK_4K; 2240 ret.addr_mask = ~VTD_PAGE_MASK_4K; 2241 ret.perm = IOMMU_RW; 2242 return ret; 2243 } 2244 2245 vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr, 2246 is_write, &ret); 2247 VTD_DPRINTF(MMU, 2248 "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8 2249 " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus), 2250 VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn), 2251 vtd_as->devfn, addr, ret.translated_addr); 2252 return ret; 2253 } 2254 2255 static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu, 2256 IOMMUNotifierFlag old, 2257 IOMMUNotifierFlag new) 2258 { 2259 VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); 2260 IntelIOMMUState *s = vtd_as->iommu_state; 2261 IntelIOMMUNotifierNode *node = NULL; 2262 IntelIOMMUNotifierNode *next_node = NULL; 2263 2264 if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) { 2265 error_report("We need to set cache_mode=1 for intel-iommu to enable " 2266 "device assignment with IOMMU protection."); 2267 exit(1); 2268 } 2269 2270 if (old == IOMMU_NOTIFIER_NONE) { 2271 node = g_malloc0(sizeof(*node)); 2272 node->vtd_as = vtd_as; 2273 QLIST_INSERT_HEAD(&s->notifiers_list, node, next); 2274 return; 2275 } 2276 2277 /* update notifier node with new flags */ 2278 QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) { 2279 if (node->vtd_as == vtd_as) { 2280 if (new == IOMMU_NOTIFIER_NONE) { 2281 QLIST_REMOVE(node, next); 2282 g_free(node); 2283 } 2284 return; 2285 } 2286 } 2287 } 2288 2289 static const VMStateDescription vtd_vmstate = { 2290 .name = "iommu-intel", 2291 .version_id = 1, 2292 .minimum_version_id = 1, 2293 .priority = MIG_PRI_IOMMU, 2294 .fields = (VMStateField[]) { 2295 VMSTATE_UINT64(root, IntelIOMMUState), 2296 VMSTATE_UINT64(intr_root, IntelIOMMUState), 2297 VMSTATE_UINT64(iq, IntelIOMMUState), 2298 VMSTATE_UINT32(intr_size, IntelIOMMUState), 2299 VMSTATE_UINT16(iq_head, IntelIOMMUState), 2300 VMSTATE_UINT16(iq_tail, IntelIOMMUState), 2301 VMSTATE_UINT16(iq_size, IntelIOMMUState), 2302 VMSTATE_UINT16(next_frcd_reg, IntelIOMMUState), 2303 VMSTATE_UINT8_ARRAY(csr, IntelIOMMUState, DMAR_REG_SIZE), 2304 VMSTATE_UINT8(iq_last_desc_type, IntelIOMMUState), 2305 VMSTATE_BOOL(root_extended, IntelIOMMUState), 2306 VMSTATE_BOOL(dmar_enabled, IntelIOMMUState), 2307 VMSTATE_BOOL(qi_enabled, IntelIOMMUState), 2308 VMSTATE_BOOL(intr_enabled, IntelIOMMUState), 2309 VMSTATE_BOOL(intr_eime, IntelIOMMUState), 2310 VMSTATE_END_OF_LIST() 2311 } 2312 }; 2313 2314 static const MemoryRegionOps vtd_mem_ops = { 2315 .read = vtd_mem_read, 2316 .write = vtd_mem_write, 2317 .endianness = DEVICE_LITTLE_ENDIAN, 2318 .impl = { 2319 .min_access_size = 4, 2320 .max_access_size = 8, 2321 }, 2322 .valid = { 2323 .min_access_size = 4, 2324 .max_access_size = 8, 2325 }, 2326 }; 2327 2328 static Property vtd_properties[] = { 2329 DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0), 2330 DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim, 2331 ON_OFF_AUTO_AUTO), 2332 DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false), 2333 DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), 2334 DEFINE_PROP_END_OF_LIST(), 2335 }; 2336 2337 /* Read IRTE entry with specific index */ 2338 static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, 2339 VTD_IR_TableEntry *entry, uint16_t sid) 2340 { 2341 static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \ 2342 {0xffff, 0xfffb, 0xfff9, 0xfff8}; 2343 dma_addr_t addr = 0x00; 2344 uint16_t mask, source_id; 2345 uint8_t bus, bus_max, bus_min; 2346 2347 addr = iommu->intr_root + index * sizeof(*entry); 2348 if (dma_memory_read(&address_space_memory, addr, entry, 2349 sizeof(*entry))) { 2350 VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64 2351 " + %"PRIu16, iommu->intr_root, index); 2352 return -VTD_FR_IR_ROOT_INVAL; 2353 } 2354 2355 if (!entry->irte.present) { 2356 VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE" 2357 " entry index %u value 0x%"PRIx64 " 0x%"PRIx64, 2358 index, le64_to_cpu(entry->data[1]), 2359 le64_to_cpu(entry->data[0])); 2360 return -VTD_FR_IR_ENTRY_P; 2361 } 2362 2363 if (entry->irte.__reserved_0 || entry->irte.__reserved_1 || 2364 entry->irte.__reserved_2) { 2365 VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16 2366 " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64, 2367 index, le64_to_cpu(entry->data[1]), 2368 le64_to_cpu(entry->data[0])); 2369 return -VTD_FR_IR_IRTE_RSVD; 2370 } 2371 2372 if (sid != X86_IOMMU_SID_INVALID) { 2373 /* Validate IRTE SID */ 2374 source_id = le32_to_cpu(entry->irte.source_id); 2375 switch (entry->irte.sid_vtype) { 2376 case VTD_SVT_NONE: 2377 VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index); 2378 break; 2379 2380 case VTD_SVT_ALL: 2381 mask = vtd_svt_mask[entry->irte.sid_q]; 2382 if ((source_id & mask) != (sid & mask)) { 2383 VTD_DPRINTF(GENERAL, "SID validation for IRTE index " 2384 "%d failed (reqid 0x%04x sid 0x%04x)", index, 2385 sid, source_id); 2386 return -VTD_FR_IR_SID_ERR; 2387 } 2388 break; 2389 2390 case VTD_SVT_BUS: 2391 bus_max = source_id >> 8; 2392 bus_min = source_id & 0xff; 2393 bus = sid >> 8; 2394 if (bus > bus_max || bus < bus_min) { 2395 VTD_DPRINTF(GENERAL, "SID validation for IRTE index %d " 2396 "failed (bus %d outside %d-%d)", index, bus, 2397 bus_min, bus_max); 2398 return -VTD_FR_IR_SID_ERR; 2399 } 2400 break; 2401 2402 default: 2403 VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index " 2404 "%d", entry->irte.sid_vtype, index); 2405 /* Take this as verification failure. */ 2406 return -VTD_FR_IR_SID_ERR; 2407 break; 2408 } 2409 } 2410 2411 return 0; 2412 } 2413 2414 /* Fetch IRQ information of specific IR index */ 2415 static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, 2416 VTDIrq *irq, uint16_t sid) 2417 { 2418 VTD_IR_TableEntry irte = {}; 2419 int ret = 0; 2420 2421 ret = vtd_irte_get(iommu, index, &irte, sid); 2422 if (ret) { 2423 return ret; 2424 } 2425 2426 irq->trigger_mode = irte.irte.trigger_mode; 2427 irq->vector = irte.irte.vector; 2428 irq->delivery_mode = irte.irte.delivery_mode; 2429 irq->dest = le32_to_cpu(irte.irte.dest_id); 2430 if (!iommu->intr_eime) { 2431 #define VTD_IR_APIC_DEST_MASK (0xff00ULL) 2432 #define VTD_IR_APIC_DEST_SHIFT (8) 2433 irq->dest = (irq->dest & VTD_IR_APIC_DEST_MASK) >> 2434 VTD_IR_APIC_DEST_SHIFT; 2435 } 2436 irq->dest_mode = irte.irte.dest_mode; 2437 irq->redir_hint = irte.irte.redir_hint; 2438 2439 VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u," 2440 "deliver:%u,dest:%u,dest_mode:%u", index, 2441 irq->trigger_mode, irq->vector, irq->delivery_mode, 2442 irq->dest, irq->dest_mode); 2443 2444 return 0; 2445 } 2446 2447 /* Generate one MSI message from VTDIrq info */ 2448 static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out) 2449 { 2450 VTD_MSIMessage msg = {}; 2451 2452 /* Generate address bits */ 2453 msg.dest_mode = irq->dest_mode; 2454 msg.redir_hint = irq->redir_hint; 2455 msg.dest = irq->dest; 2456 msg.__addr_hi = irq->dest & 0xffffff00; 2457 msg.__addr_head = cpu_to_le32(0xfee); 2458 /* Keep this from original MSI address bits */ 2459 msg.__not_used = irq->msi_addr_last_bits; 2460 2461 /* Generate data bits */ 2462 msg.vector = irq->vector; 2463 msg.delivery_mode = irq->delivery_mode; 2464 msg.level = 1; 2465 msg.trigger_mode = irq->trigger_mode; 2466 2467 msg_out->address = msg.msi_addr; 2468 msg_out->data = msg.msi_data; 2469 } 2470 2471 /* Interrupt remapping for MSI/MSI-X entry */ 2472 static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, 2473 MSIMessage *origin, 2474 MSIMessage *translated, 2475 uint16_t sid) 2476 { 2477 int ret = 0; 2478 VTD_IR_MSIAddress addr; 2479 uint16_t index; 2480 VTDIrq irq = {}; 2481 2482 assert(origin && translated); 2483 2484 if (!iommu || !iommu->intr_enabled) { 2485 goto do_not_translate; 2486 } 2487 2488 if (origin->address & VTD_MSI_ADDR_HI_MASK) { 2489 VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero" 2490 " during interrupt remapping: 0x%"PRIx32, 2491 (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \ 2492 VTD_MSI_ADDR_HI_SHIFT)); 2493 return -VTD_FR_IR_REQ_RSVD; 2494 } 2495 2496 addr.data = origin->address & VTD_MSI_ADDR_LO_MASK; 2497 if (addr.addr.__head != 0xfee) { 2498 VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: " 2499 "0x%"PRIx32, addr.data); 2500 return -VTD_FR_IR_REQ_RSVD; 2501 } 2502 2503 /* This is compatible mode. */ 2504 if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) { 2505 goto do_not_translate; 2506 } 2507 2508 index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l); 2509 2510 #define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff) 2511 #define VTD_IR_MSI_DATA_RESERVED (0xffff0000) 2512 2513 if (addr.addr.sub_valid) { 2514 /* See VT-d spec 5.1.2.2 and 5.1.3 on subhandle */ 2515 index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE; 2516 } 2517 2518 ret = vtd_remap_irq_get(iommu, index, &irq, sid); 2519 if (ret) { 2520 return ret; 2521 } 2522 2523 if (addr.addr.sub_valid) { 2524 VTD_DPRINTF(IR, "received MSI interrupt"); 2525 if (origin->data & VTD_IR_MSI_DATA_RESERVED) { 2526 VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for " 2527 "interrupt remappable entry: 0x%"PRIx32, 2528 origin->data); 2529 return -VTD_FR_IR_REQ_RSVD; 2530 } 2531 } else { 2532 uint8_t vector = origin->data & 0xff; 2533 uint8_t trigger_mode = (origin->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; 2534 2535 VTD_DPRINTF(IR, "received IOAPIC interrupt"); 2536 /* IOAPIC entry vector should be aligned with IRTE vector 2537 * (see vt-d spec 5.1.5.1). */ 2538 if (vector != irq.vector) { 2539 VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: " 2540 "entry: %d, IRTE: %d, index: %d", 2541 vector, irq.vector, index); 2542 } 2543 2544 /* The Trigger Mode field must match the Trigger Mode in the IRTE. 2545 * (see vt-d spec 5.1.5.1). */ 2546 if (trigger_mode != irq.trigger_mode) { 2547 VTD_DPRINTF(GENERAL, "IOAPIC trigger mode inconsistent: " 2548 "entry: %u, IRTE: %u, index: %d", 2549 trigger_mode, irq.trigger_mode, index); 2550 } 2551 2552 } 2553 2554 /* 2555 * We'd better keep the last two bits, assuming that guest OS 2556 * might modify it. Keep it does not hurt after all. 2557 */ 2558 irq.msi_addr_last_bits = addr.addr.__not_care; 2559 2560 /* Translate VTDIrq to MSI message */ 2561 vtd_generate_msi_message(&irq, translated); 2562 2563 VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> " 2564 "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data, 2565 translated->address, translated->data); 2566 return 0; 2567 2568 do_not_translate: 2569 memcpy(translated, origin, sizeof(*origin)); 2570 return 0; 2571 } 2572 2573 static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src, 2574 MSIMessage *dst, uint16_t sid) 2575 { 2576 return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu), 2577 src, dst, sid); 2578 } 2579 2580 static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr, 2581 uint64_t *data, unsigned size, 2582 MemTxAttrs attrs) 2583 { 2584 return MEMTX_OK; 2585 } 2586 2587 static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr, 2588 uint64_t value, unsigned size, 2589 MemTxAttrs attrs) 2590 { 2591 int ret = 0; 2592 MSIMessage from = {}, to = {}; 2593 uint16_t sid = X86_IOMMU_SID_INVALID; 2594 2595 from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST; 2596 from.data = (uint32_t) value; 2597 2598 if (!attrs.unspecified) { 2599 /* We have explicit Source ID */ 2600 sid = attrs.requester_id; 2601 } 2602 2603 ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid); 2604 if (ret) { 2605 /* TODO: report error */ 2606 VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64 2607 " data 0x%"PRIx32, from.address, from.data); 2608 /* Drop this interrupt */ 2609 return MEMTX_ERROR; 2610 } 2611 2612 VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32 2613 " for device sid 0x%04x", 2614 to.address, to.data, sid); 2615 2616 apic_get_class()->send_msi(&to); 2617 2618 return MEMTX_OK; 2619 } 2620 2621 static const MemoryRegionOps vtd_mem_ir_ops = { 2622 .read_with_attrs = vtd_mem_ir_read, 2623 .write_with_attrs = vtd_mem_ir_write, 2624 .endianness = DEVICE_LITTLE_ENDIAN, 2625 .impl = { 2626 .min_access_size = 4, 2627 .max_access_size = 4, 2628 }, 2629 .valid = { 2630 .min_access_size = 4, 2631 .max_access_size = 4, 2632 }, 2633 }; 2634 2635 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) 2636 { 2637 uintptr_t key = (uintptr_t)bus; 2638 VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key); 2639 VTDAddressSpace *vtd_dev_as; 2640 char name[128]; 2641 2642 if (!vtd_bus) { 2643 uintptr_t *new_key = g_malloc(sizeof(*new_key)); 2644 *new_key = (uintptr_t)bus; 2645 /* No corresponding free() */ 2646 vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \ 2647 X86_IOMMU_PCI_DEVFN_MAX); 2648 vtd_bus->bus = bus; 2649 g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus); 2650 } 2651 2652 vtd_dev_as = vtd_bus->dev_as[devfn]; 2653 2654 if (!vtd_dev_as) { 2655 snprintf(name, sizeof(name), "intel_iommu_devfn_%d", devfn); 2656 vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace)); 2657 2658 vtd_dev_as->bus = bus; 2659 vtd_dev_as->devfn = (uint8_t)devfn; 2660 vtd_dev_as->iommu_state = s; 2661 vtd_dev_as->context_cache_entry.context_cache_gen = 0; 2662 2663 /* 2664 * Memory region relationships looks like (Address range shows 2665 * only lower 32 bits to make it short in length...): 2666 * 2667 * |-----------------+-------------------+----------| 2668 * | Name | Address range | Priority | 2669 * |-----------------+-------------------+----------+ 2670 * | vtd_root | 00000000-ffffffff | 0 | 2671 * | intel_iommu | 00000000-ffffffff | 1 | 2672 * | vtd_sys_alias | 00000000-ffffffff | 1 | 2673 * | intel_iommu_ir | fee00000-feefffff | 64 | 2674 * |-----------------+-------------------+----------| 2675 * 2676 * We enable/disable DMAR by switching enablement for 2677 * vtd_sys_alias and intel_iommu regions. IR region is always 2678 * enabled. 2679 */ 2680 memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s), 2681 &s->iommu_ops, "intel_iommu_dmar", 2682 UINT64_MAX); 2683 memory_region_init_alias(&vtd_dev_as->sys_alias, OBJECT(s), 2684 "vtd_sys_alias", get_system_memory(), 2685 0, memory_region_size(get_system_memory())); 2686 memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s), 2687 &vtd_mem_ir_ops, s, "intel_iommu_ir", 2688 VTD_INTERRUPT_ADDR_SIZE); 2689 memory_region_init(&vtd_dev_as->root, OBJECT(s), 2690 "vtd_root", UINT64_MAX); 2691 memory_region_add_subregion_overlap(&vtd_dev_as->root, 2692 VTD_INTERRUPT_ADDR_FIRST, 2693 &vtd_dev_as->iommu_ir, 64); 2694 address_space_init(&vtd_dev_as->as, &vtd_dev_as->root, name); 2695 memory_region_add_subregion_overlap(&vtd_dev_as->root, 0, 2696 &vtd_dev_as->sys_alias, 1); 2697 memory_region_add_subregion_overlap(&vtd_dev_as->root, 0, 2698 &vtd_dev_as->iommu, 1); 2699 vtd_switch_address_space(vtd_dev_as); 2700 } 2701 return vtd_dev_as; 2702 } 2703 2704 /* Unmap the whole range in the notifier's scope. */ 2705 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) 2706 { 2707 IOMMUTLBEntry entry; 2708 hwaddr size; 2709 hwaddr start = n->start; 2710 hwaddr end = n->end; 2711 2712 /* 2713 * Note: all the codes in this function has a assumption that IOVA 2714 * bits are no more than VTD_MGAW bits (which is restricted by 2715 * VT-d spec), otherwise we need to consider overflow of 64 bits. 2716 */ 2717 2718 if (end > VTD_ADDRESS_SIZE) { 2719 /* 2720 * Don't need to unmap regions that is bigger than the whole 2721 * VT-d supported address space size 2722 */ 2723 end = VTD_ADDRESS_SIZE; 2724 } 2725 2726 assert(start <= end); 2727 size = end - start; 2728 2729 if (ctpop64(size) != 1) { 2730 /* 2731 * This size cannot format a correct mask. Let's enlarge it to 2732 * suite the minimum available mask. 2733 */ 2734 int n = 64 - clz64(size); 2735 if (n > VTD_MGAW) { 2736 /* should not happen, but in case it happens, limit it */ 2737 n = VTD_MGAW; 2738 } 2739 size = 1ULL << n; 2740 } 2741 2742 entry.target_as = &address_space_memory; 2743 /* Adjust iova for the size */ 2744 entry.iova = n->start & ~(size - 1); 2745 /* This field is meaningless for unmap */ 2746 entry.translated_addr = 0; 2747 entry.perm = IOMMU_NONE; 2748 entry.addr_mask = size - 1; 2749 2750 trace_vtd_as_unmap_whole(pci_bus_num(as->bus), 2751 VTD_PCI_SLOT(as->devfn), 2752 VTD_PCI_FUNC(as->devfn), 2753 entry.iova, size); 2754 2755 memory_region_notify_one(n, &entry); 2756 } 2757 2758 static void vtd_address_space_unmap_all(IntelIOMMUState *s) 2759 { 2760 IntelIOMMUNotifierNode *node; 2761 VTDAddressSpace *vtd_as; 2762 IOMMUNotifier *n; 2763 2764 QLIST_FOREACH(node, &s->notifiers_list, next) { 2765 vtd_as = node->vtd_as; 2766 IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) { 2767 vtd_address_space_unmap(vtd_as, n); 2768 } 2769 } 2770 } 2771 2772 static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) 2773 { 2774 memory_region_notify_one((IOMMUNotifier *)private, entry); 2775 return 0; 2776 } 2777 2778 static void vtd_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n) 2779 { 2780 VTDAddressSpace *vtd_as = container_of(mr, VTDAddressSpace, iommu); 2781 IntelIOMMUState *s = vtd_as->iommu_state; 2782 uint8_t bus_n = pci_bus_num(vtd_as->bus); 2783 VTDContextEntry ce; 2784 2785 /* 2786 * The replay can be triggered by either a invalidation or a newly 2787 * created entry. No matter what, we release existing mappings 2788 * (it means flushing caches for UNMAP-only registers). 2789 */ 2790 vtd_address_space_unmap(vtd_as, n); 2791 2792 if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) { 2793 trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn), 2794 PCI_FUNC(vtd_as->devfn), 2795 VTD_CONTEXT_ENTRY_DID(ce.hi), 2796 ce.hi, ce.lo); 2797 vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false); 2798 } else { 2799 trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), 2800 PCI_FUNC(vtd_as->devfn)); 2801 } 2802 2803 return; 2804 } 2805 2806 /* Do the initialization. It will also be called when reset, so pay 2807 * attention when adding new initialization stuff. 2808 */ 2809 static void vtd_init(IntelIOMMUState *s) 2810 { 2811 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); 2812 2813 memset(s->csr, 0, DMAR_REG_SIZE); 2814 memset(s->wmask, 0, DMAR_REG_SIZE); 2815 memset(s->w1cmask, 0, DMAR_REG_SIZE); 2816 memset(s->womask, 0, DMAR_REG_SIZE); 2817 2818 s->iommu_ops.translate = vtd_iommu_translate; 2819 s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed; 2820 s->iommu_ops.replay = vtd_iommu_replay; 2821 s->root = 0; 2822 s->root_extended = false; 2823 s->dmar_enabled = false; 2824 s->iq_head = 0; 2825 s->iq_tail = 0; 2826 s->iq = 0; 2827 s->iq_size = 0; 2828 s->qi_enabled = false; 2829 s->iq_last_desc_type = VTD_INV_DESC_NONE; 2830 s->next_frcd_reg = 0; 2831 s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW | 2832 VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS; 2833 s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; 2834 2835 if (x86_iommu->intr_supported) { 2836 s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; 2837 if (s->intr_eim == ON_OFF_AUTO_ON) { 2838 s->ecap |= VTD_ECAP_EIM; 2839 } 2840 assert(s->intr_eim != ON_OFF_AUTO_AUTO); 2841 } 2842 2843 if (x86_iommu->dt_supported) { 2844 s->ecap |= VTD_ECAP_DT; 2845 } 2846 2847 if (s->caching_mode) { 2848 s->cap |= VTD_CAP_CM; 2849 } 2850 2851 vtd_reset_context_cache(s); 2852 vtd_reset_iotlb(s); 2853 2854 /* Define registers with default values and bit semantics */ 2855 vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0); 2856 vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); 2857 vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); 2858 vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0); 2859 vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL); 2860 vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0); 2861 vtd_define_quad(s, DMAR_RTADDR_REG, 0, 0xfffffffffffff000ULL, 0); 2862 vtd_define_quad(s, DMAR_CCMD_REG, 0, 0xe0000003ffffffffULL, 0); 2863 vtd_define_quad_wo(s, DMAR_CCMD_REG, 0x3ffff0000ULL); 2864 2865 /* Advanced Fault Logging not supported */ 2866 vtd_define_long(s, DMAR_FSTS_REG, 0, 0, 0x11UL); 2867 vtd_define_long(s, DMAR_FECTL_REG, 0x80000000UL, 0x80000000UL, 0); 2868 vtd_define_long(s, DMAR_FEDATA_REG, 0, 0x0000ffffUL, 0); 2869 vtd_define_long(s, DMAR_FEADDR_REG, 0, 0xfffffffcUL, 0); 2870 2871 /* Treated as RsvdZ when EIM in ECAP_REG is not supported 2872 * vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0xffffffffUL, 0); 2873 */ 2874 vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0, 0); 2875 2876 /* Treated as RO for implementations that PLMR and PHMR fields reported 2877 * as Clear in the CAP_REG. 2878 * vtd_define_long(s, DMAR_PMEN_REG, 0, 0x80000000UL, 0); 2879 */ 2880 vtd_define_long(s, DMAR_PMEN_REG, 0, 0, 0); 2881 2882 vtd_define_quad(s, DMAR_IQH_REG, 0, 0, 0); 2883 vtd_define_quad(s, DMAR_IQT_REG, 0, 0x7fff0ULL, 0); 2884 vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff007ULL, 0); 2885 vtd_define_long(s, DMAR_ICS_REG, 0, 0, 0x1UL); 2886 vtd_define_long(s, DMAR_IECTL_REG, 0x80000000UL, 0x80000000UL, 0); 2887 vtd_define_long(s, DMAR_IEDATA_REG, 0, 0xffffffffUL, 0); 2888 vtd_define_long(s, DMAR_IEADDR_REG, 0, 0xfffffffcUL, 0); 2889 /* Treadted as RsvdZ when EIM in ECAP_REG is not supported */ 2890 vtd_define_long(s, DMAR_IEUADDR_REG, 0, 0, 0); 2891 2892 /* IOTLB registers */ 2893 vtd_define_quad(s, DMAR_IOTLB_REG, 0, 0Xb003ffff00000000ULL, 0); 2894 vtd_define_quad(s, DMAR_IVA_REG, 0, 0xfffffffffffff07fULL, 0); 2895 vtd_define_quad_wo(s, DMAR_IVA_REG, 0xfffffffffffff07fULL); 2896 2897 /* Fault Recording Registers, 128-bit */ 2898 vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0); 2899 vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL); 2900 2901 /* 2902 * Interrupt remapping registers. 2903 */ 2904 vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0); 2905 } 2906 2907 /* Should not reset address_spaces when reset because devices will still use 2908 * the address space they got at first (won't ask the bus again). 2909 */ 2910 static void vtd_reset(DeviceState *dev) 2911 { 2912 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); 2913 2914 VTD_DPRINTF(GENERAL, ""); 2915 vtd_init(s); 2916 2917 /* 2918 * When device reset, throw away all mappings and external caches 2919 */ 2920 vtd_address_space_unmap_all(s); 2921 } 2922 2923 static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) 2924 { 2925 IntelIOMMUState *s = opaque; 2926 VTDAddressSpace *vtd_as; 2927 2928 assert(0 <= devfn && devfn < X86_IOMMU_PCI_DEVFN_MAX); 2929 2930 vtd_as = vtd_find_add_as(s, bus, devfn); 2931 return &vtd_as->as; 2932 } 2933 2934 static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) 2935 { 2936 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); 2937 2938 /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */ 2939 if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() && 2940 !kvm_irqchip_is_split()) { 2941 error_setg(errp, "Intel Interrupt Remapping cannot work with " 2942 "kernel-irqchip=on, please use 'split|off'."); 2943 return false; 2944 } 2945 if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu->intr_supported) { 2946 error_setg(errp, "eim=on cannot be selected without intremap=on"); 2947 return false; 2948 } 2949 2950 if (s->intr_eim == ON_OFF_AUTO_AUTO) { 2951 s->intr_eim = (kvm_irqchip_in_kernel() || s->buggy_eim) 2952 && x86_iommu->intr_supported ? 2953 ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; 2954 } 2955 if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) { 2956 if (!kvm_irqchip_in_kernel()) { 2957 error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split"); 2958 return false; 2959 } 2960 if (!kvm_enable_x2apic()) { 2961 error_setg(errp, "eim=on requires support on the KVM side" 2962 "(X2APIC_API, first shipped in v4.7)"); 2963 return false; 2964 } 2965 } 2966 2967 return true; 2968 } 2969 2970 static void vtd_realize(DeviceState *dev, Error **errp) 2971 { 2972 PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); 2973 PCIBus *bus = pcms->bus; 2974 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); 2975 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); 2976 2977 VTD_DPRINTF(GENERAL, ""); 2978 x86_iommu->type = TYPE_INTEL; 2979 2980 if (!vtd_decide_config(s, errp)) { 2981 return; 2982 } 2983 2984 QLIST_INIT(&s->notifiers_list); 2985 memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num)); 2986 memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, 2987 "intel_iommu", DMAR_REG_SIZE); 2988 sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem); 2989 /* No corresponding destroy */ 2990 s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, 2991 g_free, g_free); 2992 s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, 2993 g_free, g_free); 2994 vtd_init(s); 2995 sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); 2996 pci_setup_iommu(bus, vtd_host_dma_iommu, dev); 2997 /* Pseudo address space under root PCI bus. */ 2998 pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); 2999 } 3000 3001 static void vtd_class_init(ObjectClass *klass, void *data) 3002 { 3003 DeviceClass *dc = DEVICE_CLASS(klass); 3004 X86IOMMUClass *x86_class = X86_IOMMU_CLASS(klass); 3005 3006 dc->reset = vtd_reset; 3007 dc->vmsd = &vtd_vmstate; 3008 dc->props = vtd_properties; 3009 dc->hotpluggable = false; 3010 x86_class->realize = vtd_realize; 3011 x86_class->int_remap = vtd_int_remap; 3012 } 3013 3014 static const TypeInfo vtd_info = { 3015 .name = TYPE_INTEL_IOMMU_DEVICE, 3016 .parent = TYPE_X86_IOMMU_DEVICE, 3017 .instance_size = sizeof(IntelIOMMUState), 3018 .class_init = vtd_class_init, 3019 }; 3020 3021 static void vtd_register_types(void) 3022 { 3023 VTD_DPRINTF(GENERAL, ""); 3024 type_register_static(&vtd_info); 3025 } 3026 3027 type_init(vtd_register_types) 3028