1 /* 2 * QEMU emulation of AMD IOMMU (AMD-Vi) 3 * 4 * Copyright (C) 2011 Eduard - Gabriel Munteanu 5 * Copyright (C) 2015, 2016 David Kiarie Kahurani 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 17 * You should have received a copy of the GNU General Public License along 18 * with this program; if not, see <http://www.gnu.org/licenses/>. 19 * 20 * Cache implementation inspired by hw/i386/intel_iommu.c 21 */ 22 23 #include "qemu/osdep.h" 24 #include "hw/i386/pc.h" 25 #include "hw/pci/msi.h" 26 #include "hw/pci/pci_bus.h" 27 #include "migration/vmstate.h" 28 #include "amd_iommu.h" 29 #include "qapi/error.h" 30 #include "qemu/error-report.h" 31 #include "hw/i386/apic_internal.h" 32 #include "trace.h" 33 #include "hw/i386/apic-msidef.h" 34 #include "hw/qdev-properties.h" 35 36 /* used AMD-Vi MMIO registers */ 37 const char *amdvi_mmio_low[] = { 38 "AMDVI_MMIO_DEVTAB_BASE", 39 "AMDVI_MMIO_CMDBUF_BASE", 40 "AMDVI_MMIO_EVTLOG_BASE", 41 "AMDVI_MMIO_CONTROL", 42 "AMDVI_MMIO_EXCL_BASE", 43 "AMDVI_MMIO_EXCL_LIMIT", 44 "AMDVI_MMIO_EXT_FEATURES", 45 "AMDVI_MMIO_PPR_BASE", 46 "UNHANDLED" 47 }; 48 const char *amdvi_mmio_high[] = { 49 "AMDVI_MMIO_COMMAND_HEAD", 50 "AMDVI_MMIO_COMMAND_TAIL", 51 "AMDVI_MMIO_EVTLOG_HEAD", 52 "AMDVI_MMIO_EVTLOG_TAIL", 53 "AMDVI_MMIO_STATUS", 54 "AMDVI_MMIO_PPR_HEAD", 55 "AMDVI_MMIO_PPR_TAIL", 56 "UNHANDLED" 57 }; 58 59 struct AMDVIAddressSpace { 60 uint8_t bus_num; /* bus number */ 61 uint8_t devfn; /* device function */ 62 AMDVIState *iommu_state; /* AMDVI - one per machine */ 63 MemoryRegion root; /* AMDVI Root memory map region */ 64 IOMMUMemoryRegion iommu; /* Device's address translation region */ 65 MemoryRegion iommu_ir; /* Device's interrupt remapping region */ 66 AddressSpace as; /* device's corresponding address space */ 67 }; 68 69 /* AMDVI cache entry */ 70 typedef struct AMDVIIOTLBEntry { 71 uint16_t domid; /* assigned domain id */ 72 uint16_t devid; /* device owning entry */ 73 uint64_t perms; /* access permissions */ 74 uint64_t translated_addr; /* translated address */ 75 uint64_t page_mask; /* physical page size */ 76 } AMDVIIOTLBEntry; 77 78 uint64_t amdvi_extended_feature_register(AMDVIState *s) 79 { 80 uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES; 81 if (s->xtsup) { 82 feature |= AMDVI_FEATURE_XT; 83 } 84 85 return feature; 86 } 87 88 /* configure MMIO registers at startup/reset */ 89 static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val, 90 uint64_t romask, uint64_t w1cmask) 91 { 92 stq_le_p(&s->mmior[addr], val); 93 stq_le_p(&s->romask[addr], romask); 94 stq_le_p(&s->w1cmask[addr], w1cmask); 95 } 96 97 static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr) 98 { 99 return lduw_le_p(&s->mmior[addr]); 100 } 101 102 static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr) 103 { 104 return ldl_le_p(&s->mmior[addr]); 105 } 106 107 static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr) 108 { 109 return ldq_le_p(&s->mmior[addr]); 110 } 111 112 /* internal write */ 113 static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val) 114 { 115 stq_le_p(&s->mmior[addr], val); 116 } 117 118 /* external write */ 119 static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val) 120 { 121 uint16_t romask = lduw_le_p(&s->romask[addr]); 122 uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]); 123 uint16_t oldval = lduw_le_p(&s->mmior[addr]); 124 stw_le_p(&s->mmior[addr], 125 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 126 } 127 128 static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val) 129 { 130 uint32_t romask = ldl_le_p(&s->romask[addr]); 131 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); 132 uint32_t oldval = ldl_le_p(&s->mmior[addr]); 133 stl_le_p(&s->mmior[addr], 134 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 135 } 136 137 static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val) 138 { 139 uint64_t romask = ldq_le_p(&s->romask[addr]); 140 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); 141 uint32_t oldval = ldq_le_p(&s->mmior[addr]); 142 stq_le_p(&s->mmior[addr], 143 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 144 } 145 146 /* OR a 64-bit register with a 64-bit value */ 147 static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val) 148 { 149 return amdvi_readq(s, addr) | val; 150 } 151 152 /* OR a 64-bit register with a 64-bit value storing result in the register */ 153 static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val) 154 { 155 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val); 156 } 157 158 /* AND a 64-bit register with a 64-bit value storing result in the register */ 159 static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val) 160 { 161 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val); 162 } 163 164 static void amdvi_generate_msi_interrupt(AMDVIState *s) 165 { 166 MSIMessage msg = {}; 167 MemTxAttrs attrs = { 168 .requester_id = pci_requester_id(&s->pci.dev) 169 }; 170 171 if (msi_enabled(&s->pci.dev)) { 172 msg = msi_get_message(&s->pci.dev, 0); 173 address_space_stl_le(&address_space_memory, msg.address, msg.data, 174 attrs, NULL); 175 } 176 } 177 178 static void amdvi_log_event(AMDVIState *s, uint64_t *evt) 179 { 180 /* event logging not enabled */ 181 if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS, 182 AMDVI_MMIO_STATUS_EVT_OVF)) { 183 return; 184 } 185 186 /* event log buffer full */ 187 if (s->evtlog_tail >= s->evtlog_len) { 188 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF); 189 /* generate interrupt */ 190 amdvi_generate_msi_interrupt(s); 191 return; 192 } 193 194 if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail, 195 evt, AMDVI_EVENT_LEN, MEMTXATTRS_UNSPECIFIED)) { 196 trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail); 197 } 198 199 s->evtlog_tail += AMDVI_EVENT_LEN; 200 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); 201 amdvi_generate_msi_interrupt(s); 202 } 203 204 static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start, 205 int length) 206 { 207 int index = start / 64, bitpos = start % 64; 208 uint64_t mask = MAKE_64BIT_MASK(start, length); 209 buffer[index] &= ~mask; 210 buffer[index] |= (value << bitpos) & mask; 211 } 212 /* 213 * AMDVi event structure 214 * 0:15 -> DeviceID 215 * 48:63 -> event type + miscellaneous info 216 * 64:127 -> related address 217 */ 218 static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr, 219 uint16_t info) 220 { 221 evt[0] = 0; 222 evt[1] = 0; 223 224 amdvi_setevent_bits(evt, devid, 0, 16); 225 amdvi_setevent_bits(evt, info, 48, 16); 226 amdvi_setevent_bits(evt, addr, 64, 64); 227 } 228 /* log an error encountered during a page walk 229 * 230 * @addr: virtual address in translation request 231 */ 232 static void amdvi_page_fault(AMDVIState *s, uint16_t devid, 233 hwaddr addr, uint16_t info) 234 { 235 uint64_t evt[2]; 236 237 info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF; 238 amdvi_encode_event(evt, devid, addr, info); 239 amdvi_log_event(s, evt); 240 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 241 PCI_STATUS_SIG_TARGET_ABORT); 242 } 243 /* 244 * log a master abort accessing device table 245 * @devtab : address of device table entry 246 * @info : error flags 247 */ 248 static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, 249 hwaddr devtab, uint16_t info) 250 { 251 uint64_t evt[2]; 252 253 info |= AMDVI_EVENT_DEV_TAB_HW_ERROR; 254 255 amdvi_encode_event(evt, devid, devtab, info); 256 amdvi_log_event(s, evt); 257 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 258 PCI_STATUS_SIG_TARGET_ABORT); 259 } 260 /* log an event trying to access command buffer 261 * @addr : address that couldn't be accessed 262 */ 263 static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) 264 { 265 uint64_t evt[2]; 266 uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR; 267 268 amdvi_encode_event(evt, 0, addr, info); 269 amdvi_log_event(s, evt); 270 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 271 PCI_STATUS_SIG_TARGET_ABORT); 272 } 273 /* log an illegal command event 274 * @addr : address of illegal command 275 */ 276 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info, 277 hwaddr addr) 278 { 279 uint64_t evt[2]; 280 281 info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR; 282 amdvi_encode_event(evt, 0, addr, info); 283 amdvi_log_event(s, evt); 284 } 285 /* log an error accessing device table 286 * 287 * @devid : device owning the table entry 288 * @devtab : address of device table entry 289 * @info : error flags 290 */ 291 static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid, 292 hwaddr addr, uint16_t info) 293 { 294 uint64_t evt[2]; 295 296 info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY; 297 amdvi_encode_event(evt, devid, addr, info); 298 amdvi_log_event(s, evt); 299 } 300 /* log an error accessing a PTE entry 301 * @addr : address that couldn't be accessed 302 */ 303 static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid, 304 hwaddr addr, uint16_t info) 305 { 306 uint64_t evt[2]; 307 308 info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR; 309 amdvi_encode_event(evt, devid, addr, info); 310 amdvi_log_event(s, evt); 311 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 312 PCI_STATUS_SIG_TARGET_ABORT); 313 } 314 315 static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2) 316 { 317 return *((const uint64_t *)v1) == *((const uint64_t *)v2); 318 } 319 320 static guint amdvi_uint64_hash(gconstpointer v) 321 { 322 return (guint)*(const uint64_t *)v; 323 } 324 325 static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr, 326 uint64_t devid) 327 { 328 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | 329 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 330 return g_hash_table_lookup(s->iotlb, &key); 331 } 332 333 static void amdvi_iotlb_reset(AMDVIState *s) 334 { 335 assert(s->iotlb); 336 trace_amdvi_iotlb_reset(); 337 g_hash_table_remove_all(s->iotlb); 338 } 339 340 static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value, 341 gpointer user_data) 342 { 343 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value; 344 uint16_t devid = *(uint16_t *)user_data; 345 return entry->devid == devid; 346 } 347 348 static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr, 349 uint64_t devid) 350 { 351 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | 352 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 353 g_hash_table_remove(s->iotlb, &key); 354 } 355 356 static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid, 357 uint64_t gpa, IOMMUTLBEntry to_cache, 358 uint16_t domid) 359 { 360 /* don't cache erroneous translations */ 361 if (to_cache.perm != IOMMU_NONE) { 362 AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1); 363 uint64_t *key = g_new(uint64_t, 1); 364 uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K; 365 366 trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid), 367 PCI_FUNC(devid), gpa, to_cache.translated_addr); 368 369 if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) { 370 amdvi_iotlb_reset(s); 371 } 372 373 entry->domid = domid; 374 entry->perms = to_cache.perm; 375 entry->translated_addr = to_cache.translated_addr; 376 entry->page_mask = to_cache.addr_mask; 377 *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 378 g_hash_table_replace(s->iotlb, key, entry); 379 } 380 } 381 382 static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd) 383 { 384 /* pad the last 3 bits */ 385 hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3; 386 uint64_t data = cpu_to_le64(cmd[1]); 387 388 if (extract64(cmd[0], 52, 8)) { 389 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 390 s->cmdbuf + s->cmdbuf_head); 391 } 392 if (extract64(cmd[0], 0, 1)) { 393 if (dma_memory_write(&address_space_memory, addr, &data, 394 AMDVI_COMPLETION_DATA_SIZE, 395 MEMTXATTRS_UNSPECIFIED)) { 396 trace_amdvi_completion_wait_fail(addr); 397 } 398 } 399 /* set completion interrupt */ 400 if (extract64(cmd[0], 1, 1)) { 401 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); 402 /* generate interrupt */ 403 amdvi_generate_msi_interrupt(s); 404 } 405 trace_amdvi_completion_wait(addr, data); 406 } 407 408 /* log error without aborting since linux seems to be using reserved bits */ 409 static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd) 410 { 411 uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16)); 412 413 /* This command should invalidate internal caches of which there isn't */ 414 if (extract64(cmd[0], 16, 44) || cmd[1]) { 415 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 416 s->cmdbuf + s->cmdbuf_head); 417 } 418 trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid), 419 PCI_FUNC(devid)); 420 } 421 422 static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd) 423 { 424 if (extract64(cmd[0], 16, 16) || extract64(cmd[0], 52, 8) || 425 extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29) 426 || extract64(cmd[1], 48, 16)) { 427 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 428 s->cmdbuf + s->cmdbuf_head); 429 } 430 trace_amdvi_ppr_exec(); 431 } 432 433 static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd) 434 { 435 if (extract64(cmd[0], 0, 60) || cmd[1]) { 436 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 437 s->cmdbuf + s->cmdbuf_head); 438 } 439 440 amdvi_iotlb_reset(s); 441 trace_amdvi_all_inval(); 442 } 443 444 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value, 445 gpointer user_data) 446 { 447 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value; 448 uint16_t domid = *(uint16_t *)user_data; 449 return entry->domid == domid; 450 } 451 452 /* we don't have devid - we can't remove pages by address */ 453 static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd) 454 { 455 uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16)); 456 457 if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) || 458 extract64(cmd[1], 3, 9)) { 459 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 460 s->cmdbuf + s->cmdbuf_head); 461 } 462 463 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid, 464 &domid); 465 trace_amdvi_pages_inval(domid); 466 } 467 468 static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd) 469 { 470 if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 52, 8) || 471 extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || 472 extract64(cmd[1], 5, 7)) { 473 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 474 s->cmdbuf + s->cmdbuf_head); 475 } 476 477 trace_amdvi_prefetch_pages(); 478 } 479 480 static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd) 481 { 482 if (extract64(cmd[0], 16, 44) || cmd[1]) { 483 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 484 s->cmdbuf + s->cmdbuf_head); 485 return; 486 } 487 488 trace_amdvi_intr_inval(); 489 } 490 491 /* FIXME: Try to work with the specified size instead of all the pages 492 * when the S bit is on 493 */ 494 static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd) 495 { 496 497 uint16_t devid = extract64(cmd[0], 0, 16); 498 if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || 499 extract64(cmd[1], 6, 6)) { 500 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 501 s->cmdbuf + s->cmdbuf_head); 502 return; 503 } 504 505 if (extract64(cmd[1], 0, 1)) { 506 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid, 507 &devid); 508 } else { 509 amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12, 510 cpu_to_le16(extract64(cmd[1], 0, 16))); 511 } 512 trace_amdvi_iotlb_inval(); 513 } 514 515 /* not honouring reserved bits is regarded as an illegal command */ 516 static void amdvi_cmdbuf_exec(AMDVIState *s) 517 { 518 uint64_t cmd[2]; 519 520 if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head, 521 cmd, AMDVI_COMMAND_SIZE, MEMTXATTRS_UNSPECIFIED)) { 522 trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head); 523 amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head); 524 return; 525 } 526 527 switch (extract64(cmd[0], 60, 4)) { 528 case AMDVI_CMD_COMPLETION_WAIT: 529 amdvi_completion_wait(s, cmd); 530 break; 531 case AMDVI_CMD_INVAL_DEVTAB_ENTRY: 532 amdvi_inval_devtab_entry(s, cmd); 533 break; 534 case AMDVI_CMD_INVAL_AMDVI_PAGES: 535 amdvi_inval_pages(s, cmd); 536 break; 537 case AMDVI_CMD_INVAL_IOTLB_PAGES: 538 iommu_inval_iotlb(s, cmd); 539 break; 540 case AMDVI_CMD_INVAL_INTR_TABLE: 541 amdvi_inval_inttable(s, cmd); 542 break; 543 case AMDVI_CMD_PREFETCH_AMDVI_PAGES: 544 amdvi_prefetch_pages(s, cmd); 545 break; 546 case AMDVI_CMD_COMPLETE_PPR_REQUEST: 547 amdvi_complete_ppr(s, cmd); 548 break; 549 case AMDVI_CMD_INVAL_AMDVI_ALL: 550 amdvi_inval_all(s, cmd); 551 break; 552 default: 553 trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4)); 554 /* log illegal command */ 555 amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4), 556 s->cmdbuf + s->cmdbuf_head); 557 } 558 } 559 560 static void amdvi_cmdbuf_run(AMDVIState *s) 561 { 562 if (!s->cmdbuf_enabled) { 563 trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL)); 564 return; 565 } 566 567 /* check if there is work to do. */ 568 while (s->cmdbuf_head != s->cmdbuf_tail) { 569 trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf); 570 amdvi_cmdbuf_exec(s); 571 s->cmdbuf_head += AMDVI_COMMAND_SIZE; 572 amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head); 573 574 /* wrap head pointer */ 575 if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) { 576 s->cmdbuf_head = 0; 577 } 578 } 579 } 580 581 static void amdvi_mmio_trace(hwaddr addr, unsigned size) 582 { 583 uint8_t index = (addr & ~0x2000) / 8; 584 585 if ((addr & 0x2000)) { 586 /* high table */ 587 index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index; 588 trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07); 589 } else { 590 index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index; 591 trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07); 592 } 593 } 594 595 static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size) 596 { 597 AMDVIState *s = opaque; 598 599 uint64_t val = -1; 600 if (addr + size > AMDVI_MMIO_SIZE) { 601 trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size); 602 return (uint64_t)-1; 603 } 604 605 if (size == 2) { 606 val = amdvi_readw(s, addr); 607 } else if (size == 4) { 608 val = amdvi_readl(s, addr); 609 } else if (size == 8) { 610 val = amdvi_readq(s, addr); 611 } 612 amdvi_mmio_trace(addr, size); 613 614 return val; 615 } 616 617 static void amdvi_handle_control_write(AMDVIState *s) 618 { 619 unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL); 620 s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN); 621 622 s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN); 623 s->evtlog_enabled = s->enabled && !!(control & 624 AMDVI_MMIO_CONTROL_EVENTLOGEN); 625 626 s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN); 627 s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN); 628 s->cmdbuf_enabled = s->enabled && !!(control & 629 AMDVI_MMIO_CONTROL_CMDBUFLEN); 630 s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN); 631 632 /* update the flags depending on the control register */ 633 if (s->cmdbuf_enabled) { 634 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN); 635 } else { 636 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN); 637 } 638 if (s->evtlog_enabled) { 639 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN); 640 } else { 641 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN); 642 } 643 644 trace_amdvi_control_status(control); 645 amdvi_cmdbuf_run(s); 646 } 647 648 static inline void amdvi_handle_devtab_write(AMDVIState *s) 649 650 { 651 uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE); 652 s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK); 653 654 /* set device table length */ 655 s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 * 656 (AMDVI_MMIO_DEVTAB_SIZE_UNIT / 657 AMDVI_MMIO_DEVTAB_ENTRY_SIZE)); 658 } 659 660 static inline void amdvi_handle_cmdhead_write(AMDVIState *s) 661 { 662 s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD) 663 & AMDVI_MMIO_CMDBUF_HEAD_MASK; 664 amdvi_cmdbuf_run(s); 665 } 666 667 static inline void amdvi_handle_cmdbase_write(AMDVIState *s) 668 { 669 s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE) 670 & AMDVI_MMIO_CMDBUF_BASE_MASK; 671 s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE) 672 & AMDVI_MMIO_CMDBUF_SIZE_MASK); 673 s->cmdbuf_head = s->cmdbuf_tail = 0; 674 } 675 676 static inline void amdvi_handle_cmdtail_write(AMDVIState *s) 677 { 678 s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL) 679 & AMDVI_MMIO_CMDBUF_TAIL_MASK; 680 amdvi_cmdbuf_run(s); 681 } 682 683 static inline void amdvi_handle_excllim_write(AMDVIState *s) 684 { 685 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT); 686 s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) | 687 AMDVI_MMIO_EXCL_LIMIT_LOW; 688 } 689 690 static inline void amdvi_handle_evtbase_write(AMDVIState *s) 691 { 692 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE); 693 s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK; 694 s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE) 695 & AMDVI_MMIO_EVTLOG_SIZE_MASK); 696 } 697 698 static inline void amdvi_handle_evttail_write(AMDVIState *s) 699 { 700 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL); 701 s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK; 702 } 703 704 static inline void amdvi_handle_evthead_write(AMDVIState *s) 705 { 706 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD); 707 s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK; 708 } 709 710 static inline void amdvi_handle_pprbase_write(AMDVIState *s) 711 { 712 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE); 713 s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK; 714 s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE) 715 & AMDVI_MMIO_PPRLOG_SIZE_MASK); 716 } 717 718 static inline void amdvi_handle_pprhead_write(AMDVIState *s) 719 { 720 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD); 721 s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK; 722 } 723 724 static inline void amdvi_handle_pprtail_write(AMDVIState *s) 725 { 726 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL); 727 s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK; 728 } 729 730 /* FIXME: something might go wrong if System Software writes in chunks 731 * of one byte but linux writes in chunks of 4 bytes so currently it 732 * works correctly with linux but will definitely be busted if software 733 * reads/writes 8 bytes 734 */ 735 static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val, 736 hwaddr addr) 737 { 738 if (size == 2) { 739 amdvi_writew(s, addr, val); 740 } else if (size == 4) { 741 amdvi_writel(s, addr, val); 742 } else if (size == 8) { 743 amdvi_writeq(s, addr, val); 744 } 745 } 746 747 static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val, 748 unsigned size) 749 { 750 AMDVIState *s = opaque; 751 unsigned long offset = addr & 0x07; 752 753 if (addr + size > AMDVI_MMIO_SIZE) { 754 trace_amdvi_mmio_write("error: addr outside region: max ", 755 (uint64_t)AMDVI_MMIO_SIZE, size, val, offset); 756 return; 757 } 758 759 amdvi_mmio_trace(addr, size); 760 switch (addr & ~0x07) { 761 case AMDVI_MMIO_CONTROL: 762 amdvi_mmio_reg_write(s, size, val, addr); 763 amdvi_handle_control_write(s); 764 break; 765 case AMDVI_MMIO_DEVICE_TABLE: 766 amdvi_mmio_reg_write(s, size, val, addr); 767 /* set device table address 768 * This also suffers from inability to tell whether software 769 * is done writing 770 */ 771 if (offset || (size == 8)) { 772 amdvi_handle_devtab_write(s); 773 } 774 break; 775 case AMDVI_MMIO_COMMAND_HEAD: 776 amdvi_mmio_reg_write(s, size, val, addr); 777 amdvi_handle_cmdhead_write(s); 778 break; 779 case AMDVI_MMIO_COMMAND_BASE: 780 amdvi_mmio_reg_write(s, size, val, addr); 781 /* FIXME - make sure System Software has finished writing in case 782 * it writes in chucks less than 8 bytes in a robust way.As for 783 * now, this hacks works for the linux driver 784 */ 785 if (offset || (size == 8)) { 786 amdvi_handle_cmdbase_write(s); 787 } 788 break; 789 case AMDVI_MMIO_COMMAND_TAIL: 790 amdvi_mmio_reg_write(s, size, val, addr); 791 amdvi_handle_cmdtail_write(s); 792 break; 793 case AMDVI_MMIO_EVENT_BASE: 794 amdvi_mmio_reg_write(s, size, val, addr); 795 amdvi_handle_evtbase_write(s); 796 break; 797 case AMDVI_MMIO_EVENT_HEAD: 798 amdvi_mmio_reg_write(s, size, val, addr); 799 amdvi_handle_evthead_write(s); 800 break; 801 case AMDVI_MMIO_EVENT_TAIL: 802 amdvi_mmio_reg_write(s, size, val, addr); 803 amdvi_handle_evttail_write(s); 804 break; 805 case AMDVI_MMIO_EXCL_LIMIT: 806 amdvi_mmio_reg_write(s, size, val, addr); 807 amdvi_handle_excllim_write(s); 808 break; 809 /* PPR log base - unused for now */ 810 case AMDVI_MMIO_PPR_BASE: 811 amdvi_mmio_reg_write(s, size, val, addr); 812 amdvi_handle_pprbase_write(s); 813 break; 814 /* PPR log head - also unused for now */ 815 case AMDVI_MMIO_PPR_HEAD: 816 amdvi_mmio_reg_write(s, size, val, addr); 817 amdvi_handle_pprhead_write(s); 818 break; 819 /* PPR log tail - unused for now */ 820 case AMDVI_MMIO_PPR_TAIL: 821 amdvi_mmio_reg_write(s, size, val, addr); 822 amdvi_handle_pprtail_write(s); 823 break; 824 } 825 } 826 827 static inline uint64_t amdvi_get_perms(uint64_t entry) 828 { 829 return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >> 830 AMDVI_DEV_PERM_SHIFT; 831 } 832 833 /* validate that reserved bits are honoured */ 834 static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid, 835 uint64_t *dte) 836 { 837 if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED) 838 || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED) 839 || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) { 840 amdvi_log_illegaldevtab_error(s, devid, 841 s->devtab + 842 devid * AMDVI_DEVTAB_ENTRY_SIZE, 0); 843 return false; 844 } 845 846 return true; 847 } 848 849 /* get a device table entry given the devid */ 850 static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry) 851 { 852 uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE; 853 854 if (dma_memory_read(&address_space_memory, s->devtab + offset, entry, 855 AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) { 856 trace_amdvi_dte_get_fail(s->devtab, offset); 857 /* log error accessing dte */ 858 amdvi_log_devtab_error(s, devid, s->devtab + offset, 0); 859 return false; 860 } 861 862 *entry = le64_to_cpu(*entry); 863 if (!amdvi_validate_dte(s, devid, entry)) { 864 trace_amdvi_invalid_dte(entry[0]); 865 return false; 866 } 867 868 return true; 869 } 870 871 /* get pte translation mode */ 872 static inline uint8_t get_pte_translation_mode(uint64_t pte) 873 { 874 return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK; 875 } 876 877 static inline uint64_t pte_override_page_mask(uint64_t pte) 878 { 879 uint8_t page_mask = 13; 880 uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12; 881 /* find the first zero bit */ 882 while (addr & 1) { 883 page_mask++; 884 addr = addr >> 1; 885 } 886 887 return ~((1ULL << page_mask) - 1); 888 } 889 890 static inline uint64_t pte_get_page_mask(uint64_t oldlevel) 891 { 892 return ~((1UL << ((oldlevel * 9) + 3)) - 1); 893 } 894 895 static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr, 896 uint16_t devid) 897 { 898 uint64_t pte; 899 900 if (dma_memory_read(&address_space_memory, pte_addr, 901 &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) { 902 trace_amdvi_get_pte_hwerror(pte_addr); 903 amdvi_log_pagetab_error(s, devid, pte_addr, 0); 904 pte = 0; 905 return pte; 906 } 907 908 pte = le64_to_cpu(pte); 909 return pte; 910 } 911 912 static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte, 913 IOMMUTLBEntry *ret, unsigned perms, 914 hwaddr addr) 915 { 916 unsigned level, present, pte_perms, oldlevel; 917 uint64_t pte = dte[0], pte_addr, page_mask; 918 919 /* make sure the DTE has TV = 1 */ 920 if (pte & AMDVI_DEV_TRANSLATION_VALID) { 921 level = get_pte_translation_mode(pte); 922 if (level >= 7) { 923 trace_amdvi_mode_invalid(level, addr); 924 return; 925 } 926 if (level == 0) { 927 goto no_remap; 928 } 929 930 /* we are at the leaf page table or page table encodes a huge page */ 931 do { 932 pte_perms = amdvi_get_perms(pte); 933 present = pte & 1; 934 if (!present || perms != (perms & pte_perms)) { 935 amdvi_page_fault(as->iommu_state, as->devfn, addr, perms); 936 trace_amdvi_page_fault(addr); 937 return; 938 } 939 940 /* go to the next lower level */ 941 pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK; 942 /* add offset and load pte */ 943 pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3; 944 pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn); 945 if (!pte) { 946 return; 947 } 948 oldlevel = level; 949 level = get_pte_translation_mode(pte); 950 } while (level > 0 && level < 7); 951 952 if (level == 0x7) { 953 page_mask = pte_override_page_mask(pte); 954 } else { 955 page_mask = pte_get_page_mask(oldlevel); 956 } 957 958 /* get access permissions from pte */ 959 ret->iova = addr & page_mask; 960 ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask; 961 ret->addr_mask = ~page_mask; 962 ret->perm = amdvi_get_perms(pte); 963 return; 964 } 965 no_remap: 966 ret->iova = addr & AMDVI_PAGE_MASK_4K; 967 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K; 968 ret->addr_mask = ~AMDVI_PAGE_MASK_4K; 969 ret->perm = amdvi_get_perms(pte); 970 } 971 972 static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr, 973 bool is_write, IOMMUTLBEntry *ret) 974 { 975 AMDVIState *s = as->iommu_state; 976 uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn); 977 AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid); 978 uint64_t entry[4]; 979 980 if (iotlb_entry) { 981 trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid), 982 PCI_FUNC(devid), addr, iotlb_entry->translated_addr); 983 ret->iova = addr & ~iotlb_entry->page_mask; 984 ret->translated_addr = iotlb_entry->translated_addr; 985 ret->addr_mask = iotlb_entry->page_mask; 986 ret->perm = iotlb_entry->perms; 987 return; 988 } 989 990 if (!amdvi_get_dte(s, devid, entry)) { 991 return; 992 } 993 994 /* devices with V = 0 are not translated */ 995 if (!(entry[0] & AMDVI_DEV_VALID)) { 996 goto out; 997 } 998 999 amdvi_page_walk(as, entry, ret, 1000 is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr); 1001 1002 amdvi_update_iotlb(s, devid, addr, *ret, 1003 entry[1] & AMDVI_DEV_DOMID_ID_MASK); 1004 return; 1005 1006 out: 1007 ret->iova = addr & AMDVI_PAGE_MASK_4K; 1008 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K; 1009 ret->addr_mask = ~AMDVI_PAGE_MASK_4K; 1010 ret->perm = IOMMU_RW; 1011 } 1012 1013 static inline bool amdvi_is_interrupt_addr(hwaddr addr) 1014 { 1015 return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST; 1016 } 1017 1018 static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr, 1019 IOMMUAccessFlags flag, int iommu_idx) 1020 { 1021 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); 1022 AMDVIState *s = as->iommu_state; 1023 IOMMUTLBEntry ret = { 1024 .target_as = &address_space_memory, 1025 .iova = addr, 1026 .translated_addr = 0, 1027 .addr_mask = ~(hwaddr)0, 1028 .perm = IOMMU_NONE 1029 }; 1030 1031 if (!s->enabled) { 1032 /* AMDVI disabled - corresponds to iommu=off not 1033 * failure to provide any parameter 1034 */ 1035 ret.iova = addr & AMDVI_PAGE_MASK_4K; 1036 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K; 1037 ret.addr_mask = ~AMDVI_PAGE_MASK_4K; 1038 ret.perm = IOMMU_RW; 1039 return ret; 1040 } else if (amdvi_is_interrupt_addr(addr)) { 1041 ret.iova = addr & AMDVI_PAGE_MASK_4K; 1042 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K; 1043 ret.addr_mask = ~AMDVI_PAGE_MASK_4K; 1044 ret.perm = IOMMU_WO; 1045 return ret; 1046 } 1047 1048 amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret); 1049 trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn), 1050 PCI_FUNC(as->devfn), addr, ret.translated_addr); 1051 return ret; 1052 } 1053 1054 static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte, 1055 union irte *irte, uint16_t devid) 1056 { 1057 uint64_t irte_root, offset; 1058 1059 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK; 1060 offset = (origin->data & AMDVI_IRTE_OFFSET) << 2; 1061 1062 trace_amdvi_ir_irte(irte_root, offset); 1063 1064 if (dma_memory_read(&address_space_memory, irte_root + offset, 1065 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) { 1066 trace_amdvi_ir_err("failed to get irte"); 1067 return -AMDVI_IR_GET_IRTE; 1068 } 1069 1070 trace_amdvi_ir_irte_val(irte->val); 1071 1072 return 0; 1073 } 1074 1075 static int amdvi_int_remap_legacy(AMDVIState *iommu, 1076 MSIMessage *origin, 1077 MSIMessage *translated, 1078 uint64_t *dte, 1079 X86IOMMUIrq *irq, 1080 uint16_t sid) 1081 { 1082 int ret; 1083 union irte irte; 1084 1085 /* get interrupt remapping table */ 1086 ret = amdvi_get_irte(iommu, origin, dte, &irte, sid); 1087 if (ret < 0) { 1088 return ret; 1089 } 1090 1091 if (!irte.fields.valid) { 1092 trace_amdvi_ir_target_abort("RemapEn is disabled"); 1093 return -AMDVI_IR_TARGET_ABORT; 1094 } 1095 1096 if (irte.fields.guest_mode) { 1097 error_report_once("guest mode is not zero"); 1098 return -AMDVI_IR_ERR; 1099 } 1100 1101 if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) { 1102 error_report_once("reserved int_type"); 1103 return -AMDVI_IR_ERR; 1104 } 1105 1106 irq->delivery_mode = irte.fields.int_type; 1107 irq->vector = irte.fields.vector; 1108 irq->dest_mode = irte.fields.dm; 1109 irq->redir_hint = irte.fields.rq_eoi; 1110 irq->dest = irte.fields.destination; 1111 1112 return 0; 1113 } 1114 1115 static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte, 1116 struct irte_ga *irte, uint16_t devid) 1117 { 1118 uint64_t irte_root, offset; 1119 1120 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK; 1121 offset = (origin->data & AMDVI_IRTE_OFFSET) << 4; 1122 trace_amdvi_ir_irte(irte_root, offset); 1123 1124 if (dma_memory_read(&address_space_memory, irte_root + offset, 1125 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) { 1126 trace_amdvi_ir_err("failed to get irte_ga"); 1127 return -AMDVI_IR_GET_IRTE; 1128 } 1129 1130 trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val); 1131 return 0; 1132 } 1133 1134 static int amdvi_int_remap_ga(AMDVIState *iommu, 1135 MSIMessage *origin, 1136 MSIMessage *translated, 1137 uint64_t *dte, 1138 X86IOMMUIrq *irq, 1139 uint16_t sid) 1140 { 1141 int ret; 1142 struct irte_ga irte; 1143 1144 /* get interrupt remapping table */ 1145 ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid); 1146 if (ret < 0) { 1147 return ret; 1148 } 1149 1150 if (!irte.lo.fields_remap.valid) { 1151 trace_amdvi_ir_target_abort("RemapEn is disabled"); 1152 return -AMDVI_IR_TARGET_ABORT; 1153 } 1154 1155 if (irte.lo.fields_remap.guest_mode) { 1156 error_report_once("guest mode is not zero"); 1157 return -AMDVI_IR_ERR; 1158 } 1159 1160 if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) { 1161 error_report_once("reserved int_type is set"); 1162 return -AMDVI_IR_ERR; 1163 } 1164 1165 irq->delivery_mode = irte.lo.fields_remap.int_type; 1166 irq->vector = irte.hi.fields.vector; 1167 irq->dest_mode = irte.lo.fields_remap.dm; 1168 irq->redir_hint = irte.lo.fields_remap.rq_eoi; 1169 if (iommu->xtsup) { 1170 irq->dest = irte.lo.fields_remap.destination | 1171 (irte.hi.fields.destination_hi << 24); 1172 } else { 1173 irq->dest = irte.lo.fields_remap.destination & 0xff; 1174 } 1175 1176 return 0; 1177 } 1178 1179 static int __amdvi_int_remap_msi(AMDVIState *iommu, 1180 MSIMessage *origin, 1181 MSIMessage *translated, 1182 uint64_t *dte, 1183 X86IOMMUIrq *irq, 1184 uint16_t sid) 1185 { 1186 int ret; 1187 uint8_t int_ctl; 1188 1189 int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3; 1190 trace_amdvi_ir_intctl(int_ctl); 1191 1192 switch (int_ctl) { 1193 case AMDVI_IR_INTCTL_PASS: 1194 memcpy(translated, origin, sizeof(*origin)); 1195 return 0; 1196 case AMDVI_IR_INTCTL_REMAP: 1197 break; 1198 case AMDVI_IR_INTCTL_ABORT: 1199 trace_amdvi_ir_target_abort("int_ctl abort"); 1200 return -AMDVI_IR_TARGET_ABORT; 1201 default: 1202 trace_amdvi_ir_err("int_ctl reserved"); 1203 return -AMDVI_IR_ERR; 1204 } 1205 1206 if (iommu->ga_enabled) { 1207 ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid); 1208 } else { 1209 ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid); 1210 } 1211 1212 return ret; 1213 } 1214 1215 /* Interrupt remapping for MSI/MSI-X entry */ 1216 static int amdvi_int_remap_msi(AMDVIState *iommu, 1217 MSIMessage *origin, 1218 MSIMessage *translated, 1219 uint16_t sid) 1220 { 1221 int ret = 0; 1222 uint64_t pass = 0; 1223 uint64_t dte[4] = { 0 }; 1224 X86IOMMUIrq irq = { 0 }; 1225 uint8_t dest_mode, delivery_mode; 1226 1227 assert(origin && translated); 1228 1229 /* 1230 * When IOMMU is enabled, interrupt remap request will come either from 1231 * IO-APIC or PCI device. If interrupt is from PCI device then it will 1232 * have a valid requester id but if the interrupt is from IO-APIC 1233 * then requester id will be invalid. 1234 */ 1235 if (sid == X86_IOMMU_SID_INVALID) { 1236 sid = AMDVI_IOAPIC_SB_DEVID; 1237 } 1238 1239 trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid); 1240 1241 /* check if device table entry is set before we go further. */ 1242 if (!iommu || !iommu->devtab_len) { 1243 memcpy(translated, origin, sizeof(*origin)); 1244 goto out; 1245 } 1246 1247 if (!amdvi_get_dte(iommu, sid, dte)) { 1248 return -AMDVI_IR_ERR; 1249 } 1250 1251 /* Check if IR is enabled in DTE */ 1252 if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) { 1253 memcpy(translated, origin, sizeof(*origin)); 1254 goto out; 1255 } 1256 1257 /* validate that we are configure with intremap=on */ 1258 if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) { 1259 trace_amdvi_err("Interrupt remapping is enabled in the guest but " 1260 "not in the host. Use intremap=on to enable interrupt " 1261 "remapping in amd-iommu."); 1262 return -AMDVI_IR_ERR; 1263 } 1264 1265 if (origin->address < AMDVI_INT_ADDR_FIRST || 1266 origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) { 1267 trace_amdvi_err("MSI is not from IOAPIC."); 1268 return -AMDVI_IR_ERR; 1269 } 1270 1271 /* 1272 * The MSI data register [10:8] are used to get the upstream interrupt type. 1273 * 1274 * See MSI/MSI-X format: 1275 * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf 1276 * (page 5) 1277 */ 1278 delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7; 1279 1280 switch (delivery_mode) { 1281 case AMDVI_IOAPIC_INT_TYPE_FIXED: 1282 case AMDVI_IOAPIC_INT_TYPE_ARBITRATED: 1283 trace_amdvi_ir_delivery_mode("fixed/arbitrated"); 1284 ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid); 1285 if (ret < 0) { 1286 goto remap_fail; 1287 } else { 1288 /* Translate IRQ to MSI messages */ 1289 x86_iommu_irq_to_msi_message(&irq, translated); 1290 goto out; 1291 } 1292 break; 1293 case AMDVI_IOAPIC_INT_TYPE_SMI: 1294 error_report("SMI is not supported!"); 1295 ret = -AMDVI_IR_ERR; 1296 break; 1297 case AMDVI_IOAPIC_INT_TYPE_NMI: 1298 pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK; 1299 trace_amdvi_ir_delivery_mode("nmi"); 1300 break; 1301 case AMDVI_IOAPIC_INT_TYPE_INIT: 1302 pass = dte[3] & AMDVI_DEV_INT_PASS_MASK; 1303 trace_amdvi_ir_delivery_mode("init"); 1304 break; 1305 case AMDVI_IOAPIC_INT_TYPE_EINT: 1306 pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK; 1307 trace_amdvi_ir_delivery_mode("eint"); 1308 break; 1309 default: 1310 trace_amdvi_ir_delivery_mode("unsupported delivery_mode"); 1311 ret = -AMDVI_IR_ERR; 1312 break; 1313 } 1314 1315 if (ret < 0) { 1316 goto remap_fail; 1317 } 1318 1319 /* 1320 * The MSI address register bit[2] is used to get the destination 1321 * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts 1322 * only. 1323 */ 1324 dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1; 1325 if (dest_mode) { 1326 trace_amdvi_ir_err("invalid dest_mode"); 1327 ret = -AMDVI_IR_ERR; 1328 goto remap_fail; 1329 } 1330 1331 if (pass) { 1332 memcpy(translated, origin, sizeof(*origin)); 1333 } else { 1334 trace_amdvi_ir_err("passthrough is not enabled"); 1335 ret = -AMDVI_IR_ERR; 1336 goto remap_fail; 1337 } 1338 1339 out: 1340 trace_amdvi_ir_remap_msi(origin->address, origin->data, 1341 translated->address, translated->data); 1342 return 0; 1343 1344 remap_fail: 1345 return ret; 1346 } 1347 1348 static int amdvi_int_remap(X86IOMMUState *iommu, 1349 MSIMessage *origin, 1350 MSIMessage *translated, 1351 uint16_t sid) 1352 { 1353 return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin, 1354 translated, sid); 1355 } 1356 1357 static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr, 1358 uint64_t value, unsigned size, 1359 MemTxAttrs attrs) 1360 { 1361 int ret; 1362 MSIMessage from = { 0, 0 }, to = { 0, 0 }; 1363 uint16_t sid = AMDVI_IOAPIC_SB_DEVID; 1364 1365 from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST; 1366 from.data = (uint32_t) value; 1367 1368 trace_amdvi_mem_ir_write_req(addr, value, size); 1369 1370 if (!attrs.unspecified) { 1371 /* We have explicit Source ID */ 1372 sid = attrs.requester_id; 1373 } 1374 1375 ret = amdvi_int_remap_msi(opaque, &from, &to, sid); 1376 if (ret < 0) { 1377 /* TODO: log the event using IOMMU log event interface */ 1378 error_report_once("failed to remap interrupt from devid 0x%x", sid); 1379 return MEMTX_ERROR; 1380 } 1381 1382 apic_get_class(NULL)->send_msi(&to); 1383 1384 trace_amdvi_mem_ir_write(to.address, to.data); 1385 return MEMTX_OK; 1386 } 1387 1388 static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr, 1389 uint64_t *data, unsigned size, 1390 MemTxAttrs attrs) 1391 { 1392 return MEMTX_OK; 1393 } 1394 1395 static const MemoryRegionOps amdvi_ir_ops = { 1396 .read_with_attrs = amdvi_mem_ir_read, 1397 .write_with_attrs = amdvi_mem_ir_write, 1398 .endianness = DEVICE_LITTLE_ENDIAN, 1399 .impl = { 1400 .min_access_size = 4, 1401 .max_access_size = 4, 1402 }, 1403 .valid = { 1404 .min_access_size = 4, 1405 .max_access_size = 4, 1406 } 1407 }; 1408 1409 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) 1410 { 1411 char name[128]; 1412 AMDVIState *s = opaque; 1413 AMDVIAddressSpace **iommu_as, *amdvi_dev_as; 1414 int bus_num = pci_bus_num(bus); 1415 1416 iommu_as = s->address_spaces[bus_num]; 1417 1418 /* allocate memory during the first run */ 1419 if (!iommu_as) { 1420 iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX); 1421 s->address_spaces[bus_num] = iommu_as; 1422 } 1423 1424 /* set up AMD-Vi region */ 1425 if (!iommu_as[devfn]) { 1426 snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn); 1427 1428 iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1); 1429 iommu_as[devfn]->bus_num = (uint8_t)bus_num; 1430 iommu_as[devfn]->devfn = (uint8_t)devfn; 1431 iommu_as[devfn]->iommu_state = s; 1432 1433 amdvi_dev_as = iommu_as[devfn]; 1434 1435 /* 1436 * Memory region relationships looks like (Address range shows 1437 * only lower 32 bits to make it short in length...): 1438 * 1439 * |-----------------+-------------------+----------| 1440 * | Name | Address range | Priority | 1441 * |-----------------+-------------------+----------+ 1442 * | amdvi_root | 00000000-ffffffff | 0 | 1443 * | amdvi_iommu | 00000000-ffffffff | 1 | 1444 * | amdvi_iommu_ir | fee00000-feefffff | 64 | 1445 * |-----------------+-------------------+----------| 1446 */ 1447 memory_region_init_iommu(&amdvi_dev_as->iommu, 1448 sizeof(amdvi_dev_as->iommu), 1449 TYPE_AMD_IOMMU_MEMORY_REGION, 1450 OBJECT(s), 1451 "amd_iommu", UINT64_MAX); 1452 memory_region_init(&amdvi_dev_as->root, OBJECT(s), 1453 "amdvi_root", UINT64_MAX); 1454 address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name); 1455 memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s), 1456 &amdvi_ir_ops, s, "amd_iommu_ir", 1457 AMDVI_INT_ADDR_SIZE); 1458 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 1459 AMDVI_INT_ADDR_FIRST, 1460 &amdvi_dev_as->iommu_ir, 1461 64); 1462 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0, 1463 MEMORY_REGION(&amdvi_dev_as->iommu), 1464 1); 1465 } 1466 return &iommu_as[devfn]->as; 1467 } 1468 1469 static const PCIIOMMUOps amdvi_iommu_ops = { 1470 .get_address_space = amdvi_host_dma_iommu, 1471 }; 1472 1473 static const MemoryRegionOps mmio_mem_ops = { 1474 .read = amdvi_mmio_read, 1475 .write = amdvi_mmio_write, 1476 .endianness = DEVICE_LITTLE_ENDIAN, 1477 .impl = { 1478 .min_access_size = 1, 1479 .max_access_size = 8, 1480 .unaligned = false, 1481 }, 1482 .valid = { 1483 .min_access_size = 1, 1484 .max_access_size = 8, 1485 } 1486 }; 1487 1488 static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, 1489 IOMMUNotifierFlag old, 1490 IOMMUNotifierFlag new, 1491 Error **errp) 1492 { 1493 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); 1494 1495 if (new & IOMMU_NOTIFIER_MAP) { 1496 error_setg(errp, 1497 "device %02x.%02x.%x requires iommu notifier which is not " 1498 "currently supported", as->bus_num, PCI_SLOT(as->devfn), 1499 PCI_FUNC(as->devfn)); 1500 return -EINVAL; 1501 } 1502 return 0; 1503 } 1504 1505 static void amdvi_init(AMDVIState *s) 1506 { 1507 amdvi_iotlb_reset(s); 1508 1509 s->devtab_len = 0; 1510 s->cmdbuf_len = 0; 1511 s->cmdbuf_head = 0; 1512 s->cmdbuf_tail = 0; 1513 s->evtlog_head = 0; 1514 s->evtlog_tail = 0; 1515 s->excl_enabled = false; 1516 s->excl_allow = false; 1517 s->mmio_enabled = false; 1518 s->enabled = false; 1519 s->ats_enabled = false; 1520 s->cmdbuf_enabled = false; 1521 1522 /* reset MMIO */ 1523 memset(s->mmior, 0, AMDVI_MMIO_SIZE); 1524 amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, 1525 amdvi_extended_feature_register(s), 1526 0xffffffffffffffef, 0); 1527 amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67); 1528 } 1529 1530 static void amdvi_pci_realize(PCIDevice *pdev, Error **errp) 1531 { 1532 AMDVIPCIState *s = AMD_IOMMU_PCI(pdev); 1533 int ret; 1534 1535 ret = pci_add_capability(pdev, AMDVI_CAPAB_ID_SEC, 0, 1536 AMDVI_CAPAB_SIZE, errp); 1537 if (ret < 0) { 1538 return; 1539 } 1540 s->capab_offset = ret; 1541 1542 ret = pci_add_capability(pdev, PCI_CAP_ID_MSI, 0, 1543 AMDVI_CAPAB_REG_SIZE, errp); 1544 if (ret < 0) { 1545 return; 1546 } 1547 ret = pci_add_capability(pdev, PCI_CAP_ID_HT, 0, 1548 AMDVI_CAPAB_REG_SIZE, errp); 1549 if (ret < 0) { 1550 return; 1551 } 1552 1553 if (msi_init(pdev, 0, 1, true, false, errp) < 0) { 1554 return; 1555 } 1556 1557 /* reset device ident */ 1558 pci_config_set_prog_interface(pdev->config, 0); 1559 1560 /* reset AMDVI specific capabilities, all r/o */ 1561 pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES); 1562 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW, 1563 AMDVI_BASE_ADDR & ~(0xffff0000)); 1564 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH, 1565 (AMDVI_BASE_ADDR & ~(0xffff)) >> 16); 1566 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE, 1567 0xff000000); 1568 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0); 1569 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 1570 AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR); 1571 } 1572 1573 static void amdvi_sysbus_reset(DeviceState *dev) 1574 { 1575 AMDVIState *s = AMD_IOMMU_DEVICE(dev); 1576 1577 msi_reset(&s->pci.dev); 1578 amdvi_init(s); 1579 } 1580 1581 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) 1582 { 1583 AMDVIState *s = AMD_IOMMU_DEVICE(dev); 1584 MachineState *ms = MACHINE(qdev_get_machine()); 1585 PCMachineState *pcms = PC_MACHINE(ms); 1586 X86MachineState *x86ms = X86_MACHINE(ms); 1587 PCIBus *bus = pcms->pcibus; 1588 1589 s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, 1590 amdvi_uint64_equal, g_free, g_free); 1591 1592 /* This device should take care of IOMMU PCI properties */ 1593 if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) { 1594 return; 1595 } 1596 1597 /* Pseudo address space under root PCI bus. */ 1598 x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); 1599 1600 /* set up MMIO */ 1601 memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", 1602 AMDVI_MMIO_SIZE); 1603 memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR, 1604 &s->mmio); 1605 pci_setup_iommu(bus, &amdvi_iommu_ops, s); 1606 amdvi_init(s); 1607 } 1608 1609 static Property amdvi_properties[] = { 1610 DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false), 1611 DEFINE_PROP_END_OF_LIST(), 1612 }; 1613 1614 static const VMStateDescription vmstate_amdvi_sysbus = { 1615 .name = "amd-iommu", 1616 .unmigratable = 1 1617 }; 1618 1619 static void amdvi_sysbus_instance_init(Object *klass) 1620 { 1621 AMDVIState *s = AMD_IOMMU_DEVICE(klass); 1622 1623 object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI); 1624 } 1625 1626 static void amdvi_sysbus_class_init(ObjectClass *klass, void *data) 1627 { 1628 DeviceClass *dc = DEVICE_CLASS(klass); 1629 X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass); 1630 1631 device_class_set_legacy_reset(dc, amdvi_sysbus_reset); 1632 dc->vmsd = &vmstate_amdvi_sysbus; 1633 dc->hotpluggable = false; 1634 dc_class->realize = amdvi_sysbus_realize; 1635 dc_class->int_remap = amdvi_int_remap; 1636 /* Supported by the pc-q35-* machine types */ 1637 dc->user_creatable = true; 1638 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1639 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; 1640 device_class_set_props(dc, amdvi_properties); 1641 } 1642 1643 static const TypeInfo amdvi_sysbus = { 1644 .name = TYPE_AMD_IOMMU_DEVICE, 1645 .parent = TYPE_X86_IOMMU_DEVICE, 1646 .instance_size = sizeof(AMDVIState), 1647 .instance_init = amdvi_sysbus_instance_init, 1648 .class_init = amdvi_sysbus_class_init 1649 }; 1650 1651 static void amdvi_pci_class_init(ObjectClass *klass, void *data) 1652 { 1653 DeviceClass *dc = DEVICE_CLASS(klass); 1654 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1655 1656 k->vendor_id = PCI_VENDOR_ID_AMD; 1657 k->class_id = 0x0806; 1658 k->realize = amdvi_pci_realize; 1659 1660 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1661 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; 1662 } 1663 1664 static const TypeInfo amdvi_pci = { 1665 .name = TYPE_AMD_IOMMU_PCI, 1666 .parent = TYPE_PCI_DEVICE, 1667 .instance_size = sizeof(AMDVIPCIState), 1668 .class_init = amdvi_pci_class_init, 1669 .interfaces = (InterfaceInfo[]) { 1670 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1671 { }, 1672 }, 1673 }; 1674 1675 static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data) 1676 { 1677 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 1678 1679 imrc->translate = amdvi_translate; 1680 imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed; 1681 } 1682 1683 static const TypeInfo amdvi_iommu_memory_region_info = { 1684 .parent = TYPE_IOMMU_MEMORY_REGION, 1685 .name = TYPE_AMD_IOMMU_MEMORY_REGION, 1686 .class_init = amdvi_iommu_memory_region_class_init, 1687 }; 1688 1689 static void amdvi_register_types(void) 1690 { 1691 type_register_static(&amdvi_pci); 1692 type_register_static(&amdvi_sysbus); 1693 type_register_static(&amdvi_iommu_memory_region_info); 1694 } 1695 1696 type_init(amdvi_register_types); 1697