1 /* 2 * QEMU emulation of AMD IOMMU (AMD-Vi) 3 * 4 * Copyright (C) 2011 Eduard - Gabriel Munteanu 5 * Copyright (C) 2015, 2016 David Kiarie Kahurani 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 17 * You should have received a copy of the GNU General Public License along 18 * with this program; if not, see <http://www.gnu.org/licenses/>. 19 * 20 * Cache implementation inspired by hw/i386/intel_iommu.c 21 */ 22 23 #include "qemu/osdep.h" 24 #include "hw/i386/pc.h" 25 #include "hw/pci/msi.h" 26 #include "hw/pci/pci_bus.h" 27 #include "migration/vmstate.h" 28 #include "amd_iommu.h" 29 #include "qapi/error.h" 30 #include "qemu/error-report.h" 31 #include "hw/i386/apic_internal.h" 32 #include "trace.h" 33 #include "hw/i386/apic-msidef.h" 34 35 /* used AMD-Vi MMIO registers */ 36 const char *amdvi_mmio_low[] = { 37 "AMDVI_MMIO_DEVTAB_BASE", 38 "AMDVI_MMIO_CMDBUF_BASE", 39 "AMDVI_MMIO_EVTLOG_BASE", 40 "AMDVI_MMIO_CONTROL", 41 "AMDVI_MMIO_EXCL_BASE", 42 "AMDVI_MMIO_EXCL_LIMIT", 43 "AMDVI_MMIO_EXT_FEATURES", 44 "AMDVI_MMIO_PPR_BASE", 45 "UNHANDLED" 46 }; 47 const char *amdvi_mmio_high[] = { 48 "AMDVI_MMIO_COMMAND_HEAD", 49 "AMDVI_MMIO_COMMAND_TAIL", 50 "AMDVI_MMIO_EVTLOG_HEAD", 51 "AMDVI_MMIO_EVTLOG_TAIL", 52 "AMDVI_MMIO_STATUS", 53 "AMDVI_MMIO_PPR_HEAD", 54 "AMDVI_MMIO_PPR_TAIL", 55 "UNHANDLED" 56 }; 57 58 struct AMDVIAddressSpace { 59 uint8_t bus_num; /* bus number */ 60 uint8_t devfn; /* device function */ 61 AMDVIState *iommu_state; /* AMDVI - one per machine */ 62 MemoryRegion root; /* AMDVI Root memory map region */ 63 IOMMUMemoryRegion iommu; /* Device's address translation region */ 64 MemoryRegion iommu_ir; /* Device's interrupt remapping region */ 65 AddressSpace as; /* device's corresponding address space */ 66 }; 67 68 /* AMDVI cache entry */ 69 typedef struct AMDVIIOTLBEntry { 70 uint16_t domid; /* assigned domain id */ 71 uint16_t devid; /* device owning entry */ 72 uint64_t perms; /* access permissions */ 73 uint64_t translated_addr; /* translated address */ 74 uint64_t page_mask; /* physical page size */ 75 } AMDVIIOTLBEntry; 76 77 /* configure MMIO registers at startup/reset */ 78 static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val, 79 uint64_t romask, uint64_t w1cmask) 80 { 81 stq_le_p(&s->mmior[addr], val); 82 stq_le_p(&s->romask[addr], romask); 83 stq_le_p(&s->w1cmask[addr], w1cmask); 84 } 85 86 static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr) 87 { 88 return lduw_le_p(&s->mmior[addr]); 89 } 90 91 static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr) 92 { 93 return ldl_le_p(&s->mmior[addr]); 94 } 95 96 static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr) 97 { 98 return ldq_le_p(&s->mmior[addr]); 99 } 100 101 /* internal write */ 102 static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val) 103 { 104 stq_le_p(&s->mmior[addr], val); 105 } 106 107 /* external write */ 108 static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val) 109 { 110 uint16_t romask = lduw_le_p(&s->romask[addr]); 111 uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]); 112 uint16_t oldval = lduw_le_p(&s->mmior[addr]); 113 stw_le_p(&s->mmior[addr], 114 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 115 } 116 117 static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val) 118 { 119 uint32_t romask = ldl_le_p(&s->romask[addr]); 120 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); 121 uint32_t oldval = ldl_le_p(&s->mmior[addr]); 122 stl_le_p(&s->mmior[addr], 123 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 124 } 125 126 static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val) 127 { 128 uint64_t romask = ldq_le_p(&s->romask[addr]); 129 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); 130 uint32_t oldval = ldq_le_p(&s->mmior[addr]); 131 stq_le_p(&s->mmior[addr], 132 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 133 } 134 135 /* OR a 64-bit register with a 64-bit value */ 136 static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val) 137 { 138 return amdvi_readq(s, addr) | val; 139 } 140 141 /* OR a 64-bit register with a 64-bit value storing result in the register */ 142 static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val) 143 { 144 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val); 145 } 146 147 /* AND a 64-bit register with a 64-bit value storing result in the register */ 148 static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val) 149 { 150 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val); 151 } 152 153 static void amdvi_generate_msi_interrupt(AMDVIState *s) 154 { 155 MSIMessage msg = {}; 156 MemTxAttrs attrs = { 157 .requester_id = pci_requester_id(&s->pci.dev) 158 }; 159 160 if (msi_enabled(&s->pci.dev)) { 161 msg = msi_get_message(&s->pci.dev, 0); 162 address_space_stl_le(&address_space_memory, msg.address, msg.data, 163 attrs, NULL); 164 } 165 } 166 167 static void amdvi_log_event(AMDVIState *s, uint64_t *evt) 168 { 169 /* event logging not enabled */ 170 if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS, 171 AMDVI_MMIO_STATUS_EVT_OVF)) { 172 return; 173 } 174 175 /* event log buffer full */ 176 if (s->evtlog_tail >= s->evtlog_len) { 177 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF); 178 /* generate interrupt */ 179 amdvi_generate_msi_interrupt(s); 180 return; 181 } 182 183 if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail, 184 evt, AMDVI_EVENT_LEN, MEMTXATTRS_UNSPECIFIED)) { 185 trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail); 186 } 187 188 s->evtlog_tail += AMDVI_EVENT_LEN; 189 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); 190 amdvi_generate_msi_interrupt(s); 191 } 192 193 static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start, 194 int length) 195 { 196 int index = start / 64, bitpos = start % 64; 197 uint64_t mask = MAKE_64BIT_MASK(start, length); 198 buffer[index] &= ~mask; 199 buffer[index] |= (value << bitpos) & mask; 200 } 201 /* 202 * AMDVi event structure 203 * 0:15 -> DeviceID 204 * 55:63 -> event type + miscellaneous info 205 * 63:127 -> related address 206 */ 207 static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr, 208 uint16_t info) 209 { 210 amdvi_setevent_bits(evt, devid, 0, 16); 211 amdvi_setevent_bits(evt, info, 55, 8); 212 amdvi_setevent_bits(evt, addr, 63, 64); 213 } 214 /* log an error encountered during a page walk 215 * 216 * @addr: virtual address in translation request 217 */ 218 static void amdvi_page_fault(AMDVIState *s, uint16_t devid, 219 hwaddr addr, uint16_t info) 220 { 221 uint64_t evt[4]; 222 223 info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF; 224 amdvi_encode_event(evt, devid, addr, info); 225 amdvi_log_event(s, evt); 226 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 227 PCI_STATUS_SIG_TARGET_ABORT); 228 } 229 /* 230 * log a master abort accessing device table 231 * @devtab : address of device table entry 232 * @info : error flags 233 */ 234 static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, 235 hwaddr devtab, uint16_t info) 236 { 237 uint64_t evt[4]; 238 239 info |= AMDVI_EVENT_DEV_TAB_HW_ERROR; 240 241 amdvi_encode_event(evt, devid, devtab, info); 242 amdvi_log_event(s, evt); 243 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 244 PCI_STATUS_SIG_TARGET_ABORT); 245 } 246 /* log an event trying to access command buffer 247 * @addr : address that couldn't be accessed 248 */ 249 static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) 250 { 251 uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR; 252 253 amdvi_encode_event(evt, 0, addr, info); 254 amdvi_log_event(s, evt); 255 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 256 PCI_STATUS_SIG_TARGET_ABORT); 257 } 258 /* log an illegal comand event 259 * @addr : address of illegal command 260 */ 261 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info, 262 hwaddr addr) 263 { 264 uint64_t evt[4]; 265 266 info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR; 267 amdvi_encode_event(evt, 0, addr, info); 268 amdvi_log_event(s, evt); 269 } 270 /* log an error accessing device table 271 * 272 * @devid : device owning the table entry 273 * @devtab : address of device table entry 274 * @info : error flags 275 */ 276 static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid, 277 hwaddr addr, uint16_t info) 278 { 279 uint64_t evt[4]; 280 281 info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY; 282 amdvi_encode_event(evt, devid, addr, info); 283 amdvi_log_event(s, evt); 284 } 285 /* log an error accessing a PTE entry 286 * @addr : address that couldn't be accessed 287 */ 288 static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid, 289 hwaddr addr, uint16_t info) 290 { 291 uint64_t evt[4]; 292 293 info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR; 294 amdvi_encode_event(evt, devid, addr, info); 295 amdvi_log_event(s, evt); 296 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 297 PCI_STATUS_SIG_TARGET_ABORT); 298 } 299 300 static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2) 301 { 302 return *((const uint64_t *)v1) == *((const uint64_t *)v2); 303 } 304 305 static guint amdvi_uint64_hash(gconstpointer v) 306 { 307 return (guint)*(const uint64_t *)v; 308 } 309 310 static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr, 311 uint64_t devid) 312 { 313 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | 314 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 315 return g_hash_table_lookup(s->iotlb, &key); 316 } 317 318 static void amdvi_iotlb_reset(AMDVIState *s) 319 { 320 assert(s->iotlb); 321 trace_amdvi_iotlb_reset(); 322 g_hash_table_remove_all(s->iotlb); 323 } 324 325 static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value, 326 gpointer user_data) 327 { 328 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value; 329 uint16_t devid = *(uint16_t *)user_data; 330 return entry->devid == devid; 331 } 332 333 static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr, 334 uint64_t devid) 335 { 336 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | 337 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 338 g_hash_table_remove(s->iotlb, &key); 339 } 340 341 static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid, 342 uint64_t gpa, IOMMUTLBEntry to_cache, 343 uint16_t domid) 344 { 345 AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1); 346 uint64_t *key = g_new(uint64_t, 1); 347 uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K; 348 349 /* don't cache erroneous translations */ 350 if (to_cache.perm != IOMMU_NONE) { 351 trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid), 352 PCI_FUNC(devid), gpa, to_cache.translated_addr); 353 354 if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) { 355 amdvi_iotlb_reset(s); 356 } 357 358 entry->domid = domid; 359 entry->perms = to_cache.perm; 360 entry->translated_addr = to_cache.translated_addr; 361 entry->page_mask = to_cache.addr_mask; 362 *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 363 g_hash_table_replace(s->iotlb, key, entry); 364 } 365 } 366 367 static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd) 368 { 369 /* pad the last 3 bits */ 370 hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3; 371 uint64_t data = cpu_to_le64(cmd[1]); 372 373 if (extract64(cmd[0], 52, 8)) { 374 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 375 s->cmdbuf + s->cmdbuf_head); 376 } 377 if (extract64(cmd[0], 0, 1)) { 378 if (dma_memory_write(&address_space_memory, addr, &data, 379 AMDVI_COMPLETION_DATA_SIZE, 380 MEMTXATTRS_UNSPECIFIED)) { 381 trace_amdvi_completion_wait_fail(addr); 382 } 383 } 384 /* set completion interrupt */ 385 if (extract64(cmd[0], 1, 1)) { 386 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); 387 /* generate interrupt */ 388 amdvi_generate_msi_interrupt(s); 389 } 390 trace_amdvi_completion_wait(addr, data); 391 } 392 393 /* log error without aborting since linux seems to be using reserved bits */ 394 static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd) 395 { 396 uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16)); 397 398 /* This command should invalidate internal caches of which there isn't */ 399 if (extract64(cmd[0], 16, 44) || cmd[1]) { 400 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 401 s->cmdbuf + s->cmdbuf_head); 402 } 403 trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid), 404 PCI_FUNC(devid)); 405 } 406 407 static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd) 408 { 409 if (extract64(cmd[0], 16, 16) || extract64(cmd[0], 52, 8) || 410 extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29) 411 || extract64(cmd[1], 48, 16)) { 412 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 413 s->cmdbuf + s->cmdbuf_head); 414 } 415 trace_amdvi_ppr_exec(); 416 } 417 418 static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd) 419 { 420 if (extract64(cmd[0], 0, 60) || cmd[1]) { 421 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 422 s->cmdbuf + s->cmdbuf_head); 423 } 424 425 amdvi_iotlb_reset(s); 426 trace_amdvi_all_inval(); 427 } 428 429 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value, 430 gpointer user_data) 431 { 432 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value; 433 uint16_t domid = *(uint16_t *)user_data; 434 return entry->domid == domid; 435 } 436 437 /* we don't have devid - we can't remove pages by address */ 438 static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd) 439 { 440 uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16)); 441 442 if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) || 443 extract64(cmd[1], 3, 9)) { 444 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 445 s->cmdbuf + s->cmdbuf_head); 446 } 447 448 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid, 449 &domid); 450 trace_amdvi_pages_inval(domid); 451 } 452 453 static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd) 454 { 455 if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 52, 8) || 456 extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || 457 extract64(cmd[1], 5, 7)) { 458 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 459 s->cmdbuf + s->cmdbuf_head); 460 } 461 462 trace_amdvi_prefetch_pages(); 463 } 464 465 static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd) 466 { 467 if (extract64(cmd[0], 16, 44) || cmd[1]) { 468 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 469 s->cmdbuf + s->cmdbuf_head); 470 return; 471 } 472 473 trace_amdvi_intr_inval(); 474 } 475 476 /* FIXME: Try to work with the specified size instead of all the pages 477 * when the S bit is on 478 */ 479 static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd) 480 { 481 482 uint16_t devid = extract64(cmd[0], 0, 16); 483 if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || 484 extract64(cmd[1], 6, 6)) { 485 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 486 s->cmdbuf + s->cmdbuf_head); 487 return; 488 } 489 490 if (extract64(cmd[1], 0, 1)) { 491 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid, 492 &devid); 493 } else { 494 amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12, 495 cpu_to_le16(extract64(cmd[1], 0, 16))); 496 } 497 trace_amdvi_iotlb_inval(); 498 } 499 500 /* not honouring reserved bits is regarded as an illegal command */ 501 static void amdvi_cmdbuf_exec(AMDVIState *s) 502 { 503 uint64_t cmd[2]; 504 505 if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head, 506 cmd, AMDVI_COMMAND_SIZE, MEMTXATTRS_UNSPECIFIED)) { 507 trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head); 508 amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head); 509 return; 510 } 511 512 switch (extract64(cmd[0], 60, 4)) { 513 case AMDVI_CMD_COMPLETION_WAIT: 514 amdvi_completion_wait(s, cmd); 515 break; 516 case AMDVI_CMD_INVAL_DEVTAB_ENTRY: 517 amdvi_inval_devtab_entry(s, cmd); 518 break; 519 case AMDVI_CMD_INVAL_AMDVI_PAGES: 520 amdvi_inval_pages(s, cmd); 521 break; 522 case AMDVI_CMD_INVAL_IOTLB_PAGES: 523 iommu_inval_iotlb(s, cmd); 524 break; 525 case AMDVI_CMD_INVAL_INTR_TABLE: 526 amdvi_inval_inttable(s, cmd); 527 break; 528 case AMDVI_CMD_PREFETCH_AMDVI_PAGES: 529 amdvi_prefetch_pages(s, cmd); 530 break; 531 case AMDVI_CMD_COMPLETE_PPR_REQUEST: 532 amdvi_complete_ppr(s, cmd); 533 break; 534 case AMDVI_CMD_INVAL_AMDVI_ALL: 535 amdvi_inval_all(s, cmd); 536 break; 537 default: 538 trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4)); 539 /* log illegal command */ 540 amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4), 541 s->cmdbuf + s->cmdbuf_head); 542 } 543 } 544 545 static void amdvi_cmdbuf_run(AMDVIState *s) 546 { 547 if (!s->cmdbuf_enabled) { 548 trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL)); 549 return; 550 } 551 552 /* check if there is work to do. */ 553 while (s->cmdbuf_head != s->cmdbuf_tail) { 554 trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf); 555 amdvi_cmdbuf_exec(s); 556 s->cmdbuf_head += AMDVI_COMMAND_SIZE; 557 amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head); 558 559 /* wrap head pointer */ 560 if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) { 561 s->cmdbuf_head = 0; 562 } 563 } 564 } 565 566 static void amdvi_mmio_trace(hwaddr addr, unsigned size) 567 { 568 uint8_t index = (addr & ~0x2000) / 8; 569 570 if ((addr & 0x2000)) { 571 /* high table */ 572 index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index; 573 trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07); 574 } else { 575 index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index; 576 trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07); 577 } 578 } 579 580 static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size) 581 { 582 AMDVIState *s = opaque; 583 584 uint64_t val = -1; 585 if (addr + size > AMDVI_MMIO_SIZE) { 586 trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size); 587 return (uint64_t)-1; 588 } 589 590 if (size == 2) { 591 val = amdvi_readw(s, addr); 592 } else if (size == 4) { 593 val = amdvi_readl(s, addr); 594 } else if (size == 8) { 595 val = amdvi_readq(s, addr); 596 } 597 amdvi_mmio_trace(addr, size); 598 599 return val; 600 } 601 602 static void amdvi_handle_control_write(AMDVIState *s) 603 { 604 unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL); 605 s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN); 606 607 s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN); 608 s->evtlog_enabled = s->enabled && !!(control & 609 AMDVI_MMIO_CONTROL_EVENTLOGEN); 610 611 s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN); 612 s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN); 613 s->cmdbuf_enabled = s->enabled && !!(control & 614 AMDVI_MMIO_CONTROL_CMDBUFLEN); 615 s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN); 616 617 /* update the flags depending on the control register */ 618 if (s->cmdbuf_enabled) { 619 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN); 620 } else { 621 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN); 622 } 623 if (s->evtlog_enabled) { 624 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN); 625 } else { 626 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN); 627 } 628 629 trace_amdvi_control_status(control); 630 amdvi_cmdbuf_run(s); 631 } 632 633 static inline void amdvi_handle_devtab_write(AMDVIState *s) 634 635 { 636 uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE); 637 s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK); 638 639 /* set device table length */ 640 s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 * 641 (AMDVI_MMIO_DEVTAB_SIZE_UNIT / 642 AMDVI_MMIO_DEVTAB_ENTRY_SIZE)); 643 } 644 645 static inline void amdvi_handle_cmdhead_write(AMDVIState *s) 646 { 647 s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD) 648 & AMDVI_MMIO_CMDBUF_HEAD_MASK; 649 amdvi_cmdbuf_run(s); 650 } 651 652 static inline void amdvi_handle_cmdbase_write(AMDVIState *s) 653 { 654 s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE) 655 & AMDVI_MMIO_CMDBUF_BASE_MASK; 656 s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE) 657 & AMDVI_MMIO_CMDBUF_SIZE_MASK); 658 s->cmdbuf_head = s->cmdbuf_tail = 0; 659 } 660 661 static inline void amdvi_handle_cmdtail_write(AMDVIState *s) 662 { 663 s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL) 664 & AMDVI_MMIO_CMDBUF_TAIL_MASK; 665 amdvi_cmdbuf_run(s); 666 } 667 668 static inline void amdvi_handle_excllim_write(AMDVIState *s) 669 { 670 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT); 671 s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) | 672 AMDVI_MMIO_EXCL_LIMIT_LOW; 673 } 674 675 static inline void amdvi_handle_evtbase_write(AMDVIState *s) 676 { 677 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE); 678 s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK; 679 s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE) 680 & AMDVI_MMIO_EVTLOG_SIZE_MASK); 681 } 682 683 static inline void amdvi_handle_evttail_write(AMDVIState *s) 684 { 685 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL); 686 s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK; 687 } 688 689 static inline void amdvi_handle_evthead_write(AMDVIState *s) 690 { 691 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD); 692 s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK; 693 } 694 695 static inline void amdvi_handle_pprbase_write(AMDVIState *s) 696 { 697 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE); 698 s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK; 699 s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE) 700 & AMDVI_MMIO_PPRLOG_SIZE_MASK); 701 } 702 703 static inline void amdvi_handle_pprhead_write(AMDVIState *s) 704 { 705 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD); 706 s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK; 707 } 708 709 static inline void amdvi_handle_pprtail_write(AMDVIState *s) 710 { 711 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL); 712 s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK; 713 } 714 715 /* FIXME: something might go wrong if System Software writes in chunks 716 * of one byte but linux writes in chunks of 4 bytes so currently it 717 * works correctly with linux but will definitely be busted if software 718 * reads/writes 8 bytes 719 */ 720 static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val, 721 hwaddr addr) 722 { 723 if (size == 2) { 724 amdvi_writew(s, addr, val); 725 } else if (size == 4) { 726 amdvi_writel(s, addr, val); 727 } else if (size == 8) { 728 amdvi_writeq(s, addr, val); 729 } 730 } 731 732 static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val, 733 unsigned size) 734 { 735 AMDVIState *s = opaque; 736 unsigned long offset = addr & 0x07; 737 738 if (addr + size > AMDVI_MMIO_SIZE) { 739 trace_amdvi_mmio_write("error: addr outside region: max ", 740 (uint64_t)AMDVI_MMIO_SIZE, size, val, offset); 741 return; 742 } 743 744 amdvi_mmio_trace(addr, size); 745 switch (addr & ~0x07) { 746 case AMDVI_MMIO_CONTROL: 747 amdvi_mmio_reg_write(s, size, val, addr); 748 amdvi_handle_control_write(s); 749 break; 750 case AMDVI_MMIO_DEVICE_TABLE: 751 amdvi_mmio_reg_write(s, size, val, addr); 752 /* set device table address 753 * This also suffers from inability to tell whether software 754 * is done writing 755 */ 756 if (offset || (size == 8)) { 757 amdvi_handle_devtab_write(s); 758 } 759 break; 760 case AMDVI_MMIO_COMMAND_HEAD: 761 amdvi_mmio_reg_write(s, size, val, addr); 762 amdvi_handle_cmdhead_write(s); 763 break; 764 case AMDVI_MMIO_COMMAND_BASE: 765 amdvi_mmio_reg_write(s, size, val, addr); 766 /* FIXME - make sure System Software has finished writing incase 767 * it writes in chucks less than 8 bytes in a robust way.As for 768 * now, this hacks works for the linux driver 769 */ 770 if (offset || (size == 8)) { 771 amdvi_handle_cmdbase_write(s); 772 } 773 break; 774 case AMDVI_MMIO_COMMAND_TAIL: 775 amdvi_mmio_reg_write(s, size, val, addr); 776 amdvi_handle_cmdtail_write(s); 777 break; 778 case AMDVI_MMIO_EVENT_BASE: 779 amdvi_mmio_reg_write(s, size, val, addr); 780 amdvi_handle_evtbase_write(s); 781 break; 782 case AMDVI_MMIO_EVENT_HEAD: 783 amdvi_mmio_reg_write(s, size, val, addr); 784 amdvi_handle_evthead_write(s); 785 break; 786 case AMDVI_MMIO_EVENT_TAIL: 787 amdvi_mmio_reg_write(s, size, val, addr); 788 amdvi_handle_evttail_write(s); 789 break; 790 case AMDVI_MMIO_EXCL_LIMIT: 791 amdvi_mmio_reg_write(s, size, val, addr); 792 amdvi_handle_excllim_write(s); 793 break; 794 /* PPR log base - unused for now */ 795 case AMDVI_MMIO_PPR_BASE: 796 amdvi_mmio_reg_write(s, size, val, addr); 797 amdvi_handle_pprbase_write(s); 798 break; 799 /* PPR log head - also unused for now */ 800 case AMDVI_MMIO_PPR_HEAD: 801 amdvi_mmio_reg_write(s, size, val, addr); 802 amdvi_handle_pprhead_write(s); 803 break; 804 /* PPR log tail - unused for now */ 805 case AMDVI_MMIO_PPR_TAIL: 806 amdvi_mmio_reg_write(s, size, val, addr); 807 amdvi_handle_pprtail_write(s); 808 break; 809 } 810 } 811 812 static inline uint64_t amdvi_get_perms(uint64_t entry) 813 { 814 return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >> 815 AMDVI_DEV_PERM_SHIFT; 816 } 817 818 /* validate that reserved bits are honoured */ 819 static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid, 820 uint64_t *dte) 821 { 822 if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED) 823 || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED) 824 || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) { 825 amdvi_log_illegaldevtab_error(s, devid, 826 s->devtab + 827 devid * AMDVI_DEVTAB_ENTRY_SIZE, 0); 828 return false; 829 } 830 831 return true; 832 } 833 834 /* get a device table entry given the devid */ 835 static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry) 836 { 837 uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE; 838 839 if (dma_memory_read(&address_space_memory, s->devtab + offset, entry, 840 AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) { 841 trace_amdvi_dte_get_fail(s->devtab, offset); 842 /* log error accessing dte */ 843 amdvi_log_devtab_error(s, devid, s->devtab + offset, 0); 844 return false; 845 } 846 847 *entry = le64_to_cpu(*entry); 848 if (!amdvi_validate_dte(s, devid, entry)) { 849 trace_amdvi_invalid_dte(entry[0]); 850 return false; 851 } 852 853 return true; 854 } 855 856 /* get pte translation mode */ 857 static inline uint8_t get_pte_translation_mode(uint64_t pte) 858 { 859 return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK; 860 } 861 862 static inline uint64_t pte_override_page_mask(uint64_t pte) 863 { 864 uint8_t page_mask = 13; 865 uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12; 866 /* find the first zero bit */ 867 while (addr & 1) { 868 page_mask++; 869 addr = addr >> 1; 870 } 871 872 return ~((1ULL << page_mask) - 1); 873 } 874 875 static inline uint64_t pte_get_page_mask(uint64_t oldlevel) 876 { 877 return ~((1UL << ((oldlevel * 9) + 3)) - 1); 878 } 879 880 static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr, 881 uint16_t devid) 882 { 883 uint64_t pte; 884 885 if (dma_memory_read(&address_space_memory, pte_addr, 886 &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) { 887 trace_amdvi_get_pte_hwerror(pte_addr); 888 amdvi_log_pagetab_error(s, devid, pte_addr, 0); 889 pte = 0; 890 return pte; 891 } 892 893 pte = le64_to_cpu(pte); 894 return pte; 895 } 896 897 static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte, 898 IOMMUTLBEntry *ret, unsigned perms, 899 hwaddr addr) 900 { 901 unsigned level, present, pte_perms, oldlevel; 902 uint64_t pte = dte[0], pte_addr, page_mask; 903 904 /* make sure the DTE has TV = 1 */ 905 if (pte & AMDVI_DEV_TRANSLATION_VALID) { 906 level = get_pte_translation_mode(pte); 907 if (level >= 7) { 908 trace_amdvi_mode_invalid(level, addr); 909 return; 910 } 911 if (level == 0) { 912 goto no_remap; 913 } 914 915 /* we are at the leaf page table or page table encodes a huge page */ 916 do { 917 pte_perms = amdvi_get_perms(pte); 918 present = pte & 1; 919 if (!present || perms != (perms & pte_perms)) { 920 amdvi_page_fault(as->iommu_state, as->devfn, addr, perms); 921 trace_amdvi_page_fault(addr); 922 return; 923 } 924 925 /* go to the next lower level */ 926 pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK; 927 /* add offset and load pte */ 928 pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3; 929 pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn); 930 if (!pte) { 931 return; 932 } 933 oldlevel = level; 934 level = get_pte_translation_mode(pte); 935 } while (level > 0 && level < 7); 936 937 if (level == 0x7) { 938 page_mask = pte_override_page_mask(pte); 939 } else { 940 page_mask = pte_get_page_mask(oldlevel); 941 } 942 943 /* get access permissions from pte */ 944 ret->iova = addr & page_mask; 945 ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask; 946 ret->addr_mask = ~page_mask; 947 ret->perm = amdvi_get_perms(pte); 948 return; 949 } 950 no_remap: 951 ret->iova = addr & AMDVI_PAGE_MASK_4K; 952 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K; 953 ret->addr_mask = ~AMDVI_PAGE_MASK_4K; 954 ret->perm = amdvi_get_perms(pte); 955 } 956 957 static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr, 958 bool is_write, IOMMUTLBEntry *ret) 959 { 960 AMDVIState *s = as->iommu_state; 961 uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn); 962 AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid); 963 uint64_t entry[4]; 964 965 if (iotlb_entry) { 966 trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid), 967 PCI_FUNC(devid), addr, iotlb_entry->translated_addr); 968 ret->iova = addr & ~iotlb_entry->page_mask; 969 ret->translated_addr = iotlb_entry->translated_addr; 970 ret->addr_mask = iotlb_entry->page_mask; 971 ret->perm = iotlb_entry->perms; 972 return; 973 } 974 975 if (!amdvi_get_dte(s, devid, entry)) { 976 return; 977 } 978 979 /* devices with V = 0 are not translated */ 980 if (!(entry[0] & AMDVI_DEV_VALID)) { 981 goto out; 982 } 983 984 amdvi_page_walk(as, entry, ret, 985 is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr); 986 987 amdvi_update_iotlb(s, devid, addr, *ret, 988 entry[1] & AMDVI_DEV_DOMID_ID_MASK); 989 return; 990 991 out: 992 ret->iova = addr & AMDVI_PAGE_MASK_4K; 993 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K; 994 ret->addr_mask = ~AMDVI_PAGE_MASK_4K; 995 ret->perm = IOMMU_RW; 996 } 997 998 static inline bool amdvi_is_interrupt_addr(hwaddr addr) 999 { 1000 return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST; 1001 } 1002 1003 static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr, 1004 IOMMUAccessFlags flag, int iommu_idx) 1005 { 1006 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); 1007 AMDVIState *s = as->iommu_state; 1008 IOMMUTLBEntry ret = { 1009 .target_as = &address_space_memory, 1010 .iova = addr, 1011 .translated_addr = 0, 1012 .addr_mask = ~(hwaddr)0, 1013 .perm = IOMMU_NONE 1014 }; 1015 1016 if (!s->enabled) { 1017 /* AMDVI disabled - corresponds to iommu=off not 1018 * failure to provide any parameter 1019 */ 1020 ret.iova = addr & AMDVI_PAGE_MASK_4K; 1021 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K; 1022 ret.addr_mask = ~AMDVI_PAGE_MASK_4K; 1023 ret.perm = IOMMU_RW; 1024 return ret; 1025 } else if (amdvi_is_interrupt_addr(addr)) { 1026 ret.iova = addr & AMDVI_PAGE_MASK_4K; 1027 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K; 1028 ret.addr_mask = ~AMDVI_PAGE_MASK_4K; 1029 ret.perm = IOMMU_WO; 1030 return ret; 1031 } 1032 1033 amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret); 1034 trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn), 1035 PCI_FUNC(as->devfn), addr, ret.translated_addr); 1036 return ret; 1037 } 1038 1039 static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte, 1040 union irte *irte, uint16_t devid) 1041 { 1042 uint64_t irte_root, offset; 1043 1044 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK; 1045 offset = (origin->data & AMDVI_IRTE_OFFSET) << 2; 1046 1047 trace_amdvi_ir_irte(irte_root, offset); 1048 1049 if (dma_memory_read(&address_space_memory, irte_root + offset, 1050 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) { 1051 trace_amdvi_ir_err("failed to get irte"); 1052 return -AMDVI_IR_GET_IRTE; 1053 } 1054 1055 trace_amdvi_ir_irte_val(irte->val); 1056 1057 return 0; 1058 } 1059 1060 static int amdvi_int_remap_legacy(AMDVIState *iommu, 1061 MSIMessage *origin, 1062 MSIMessage *translated, 1063 uint64_t *dte, 1064 X86IOMMUIrq *irq, 1065 uint16_t sid) 1066 { 1067 int ret; 1068 union irte irte; 1069 1070 /* get interrupt remapping table */ 1071 ret = amdvi_get_irte(iommu, origin, dte, &irte, sid); 1072 if (ret < 0) { 1073 return ret; 1074 } 1075 1076 if (!irte.fields.valid) { 1077 trace_amdvi_ir_target_abort("RemapEn is disabled"); 1078 return -AMDVI_IR_TARGET_ABORT; 1079 } 1080 1081 if (irte.fields.guest_mode) { 1082 error_report_once("guest mode is not zero"); 1083 return -AMDVI_IR_ERR; 1084 } 1085 1086 if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) { 1087 error_report_once("reserved int_type"); 1088 return -AMDVI_IR_ERR; 1089 } 1090 1091 irq->delivery_mode = irte.fields.int_type; 1092 irq->vector = irte.fields.vector; 1093 irq->dest_mode = irte.fields.dm; 1094 irq->redir_hint = irte.fields.rq_eoi; 1095 irq->dest = irte.fields.destination; 1096 1097 return 0; 1098 } 1099 1100 static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte, 1101 struct irte_ga *irte, uint16_t devid) 1102 { 1103 uint64_t irte_root, offset; 1104 1105 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK; 1106 offset = (origin->data & AMDVI_IRTE_OFFSET) << 4; 1107 trace_amdvi_ir_irte(irte_root, offset); 1108 1109 if (dma_memory_read(&address_space_memory, irte_root + offset, 1110 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) { 1111 trace_amdvi_ir_err("failed to get irte_ga"); 1112 return -AMDVI_IR_GET_IRTE; 1113 } 1114 1115 trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val); 1116 return 0; 1117 } 1118 1119 static int amdvi_int_remap_ga(AMDVIState *iommu, 1120 MSIMessage *origin, 1121 MSIMessage *translated, 1122 uint64_t *dte, 1123 X86IOMMUIrq *irq, 1124 uint16_t sid) 1125 { 1126 int ret; 1127 struct irte_ga irte; 1128 1129 /* get interrupt remapping table */ 1130 ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid); 1131 if (ret < 0) { 1132 return ret; 1133 } 1134 1135 if (!irte.lo.fields_remap.valid) { 1136 trace_amdvi_ir_target_abort("RemapEn is disabled"); 1137 return -AMDVI_IR_TARGET_ABORT; 1138 } 1139 1140 if (irte.lo.fields_remap.guest_mode) { 1141 error_report_once("guest mode is not zero"); 1142 return -AMDVI_IR_ERR; 1143 } 1144 1145 if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) { 1146 error_report_once("reserved int_type is set"); 1147 return -AMDVI_IR_ERR; 1148 } 1149 1150 irq->delivery_mode = irte.lo.fields_remap.int_type; 1151 irq->vector = irte.hi.fields.vector; 1152 irq->dest_mode = irte.lo.fields_remap.dm; 1153 irq->redir_hint = irte.lo.fields_remap.rq_eoi; 1154 irq->dest = irte.lo.fields_remap.destination; 1155 1156 return 0; 1157 } 1158 1159 static int __amdvi_int_remap_msi(AMDVIState *iommu, 1160 MSIMessage *origin, 1161 MSIMessage *translated, 1162 uint64_t *dte, 1163 X86IOMMUIrq *irq, 1164 uint16_t sid) 1165 { 1166 int ret; 1167 uint8_t int_ctl; 1168 1169 int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3; 1170 trace_amdvi_ir_intctl(int_ctl); 1171 1172 switch (int_ctl) { 1173 case AMDVI_IR_INTCTL_PASS: 1174 memcpy(translated, origin, sizeof(*origin)); 1175 return 0; 1176 case AMDVI_IR_INTCTL_REMAP: 1177 break; 1178 case AMDVI_IR_INTCTL_ABORT: 1179 trace_amdvi_ir_target_abort("int_ctl abort"); 1180 return -AMDVI_IR_TARGET_ABORT; 1181 default: 1182 trace_amdvi_ir_err("int_ctl reserved"); 1183 return -AMDVI_IR_ERR; 1184 } 1185 1186 if (iommu->ga_enabled) { 1187 ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid); 1188 } else { 1189 ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid); 1190 } 1191 1192 return ret; 1193 } 1194 1195 /* Interrupt remapping for MSI/MSI-X entry */ 1196 static int amdvi_int_remap_msi(AMDVIState *iommu, 1197 MSIMessage *origin, 1198 MSIMessage *translated, 1199 uint16_t sid) 1200 { 1201 int ret = 0; 1202 uint64_t pass = 0; 1203 uint64_t dte[4] = { 0 }; 1204 X86IOMMUIrq irq = { 0 }; 1205 uint8_t dest_mode, delivery_mode; 1206 1207 assert(origin && translated); 1208 1209 /* 1210 * When IOMMU is enabled, interrupt remap request will come either from 1211 * IO-APIC or PCI device. If interrupt is from PCI device then it will 1212 * have a valid requester id but if the interrupt is from IO-APIC 1213 * then requester id will be invalid. 1214 */ 1215 if (sid == X86_IOMMU_SID_INVALID) { 1216 sid = AMDVI_IOAPIC_SB_DEVID; 1217 } 1218 1219 trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid); 1220 1221 /* check if device table entry is set before we go further. */ 1222 if (!iommu || !iommu->devtab_len) { 1223 memcpy(translated, origin, sizeof(*origin)); 1224 goto out; 1225 } 1226 1227 if (!amdvi_get_dte(iommu, sid, dte)) { 1228 return -AMDVI_IR_ERR; 1229 } 1230 1231 /* Check if IR is enabled in DTE */ 1232 if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) { 1233 memcpy(translated, origin, sizeof(*origin)); 1234 goto out; 1235 } 1236 1237 /* validate that we are configure with intremap=on */ 1238 if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) { 1239 trace_amdvi_err("Interrupt remapping is enabled in the guest but " 1240 "not in the host. Use intremap=on to enable interrupt " 1241 "remapping in amd-iommu."); 1242 return -AMDVI_IR_ERR; 1243 } 1244 1245 if (origin->address & AMDVI_MSI_ADDR_HI_MASK) { 1246 trace_amdvi_err("MSI address high 32 bits non-zero when " 1247 "Interrupt Remapping enabled."); 1248 return -AMDVI_IR_ERR; 1249 } 1250 1251 if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) { 1252 trace_amdvi_err("MSI is not from IOAPIC."); 1253 return -AMDVI_IR_ERR; 1254 } 1255 1256 /* 1257 * The MSI data register [10:8] are used to get the upstream interrupt type. 1258 * 1259 * See MSI/MSI-X format: 1260 * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf 1261 * (page 5) 1262 */ 1263 delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7; 1264 1265 switch (delivery_mode) { 1266 case AMDVI_IOAPIC_INT_TYPE_FIXED: 1267 case AMDVI_IOAPIC_INT_TYPE_ARBITRATED: 1268 trace_amdvi_ir_delivery_mode("fixed/arbitrated"); 1269 ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid); 1270 if (ret < 0) { 1271 goto remap_fail; 1272 } else { 1273 /* Translate IRQ to MSI messages */ 1274 x86_iommu_irq_to_msi_message(&irq, translated); 1275 goto out; 1276 } 1277 break; 1278 case AMDVI_IOAPIC_INT_TYPE_SMI: 1279 error_report("SMI is not supported!"); 1280 ret = -AMDVI_IR_ERR; 1281 break; 1282 case AMDVI_IOAPIC_INT_TYPE_NMI: 1283 pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK; 1284 trace_amdvi_ir_delivery_mode("nmi"); 1285 break; 1286 case AMDVI_IOAPIC_INT_TYPE_INIT: 1287 pass = dte[3] & AMDVI_DEV_INT_PASS_MASK; 1288 trace_amdvi_ir_delivery_mode("init"); 1289 break; 1290 case AMDVI_IOAPIC_INT_TYPE_EINT: 1291 pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK; 1292 trace_amdvi_ir_delivery_mode("eint"); 1293 break; 1294 default: 1295 trace_amdvi_ir_delivery_mode("unsupported delivery_mode"); 1296 ret = -AMDVI_IR_ERR; 1297 break; 1298 } 1299 1300 if (ret < 0) { 1301 goto remap_fail; 1302 } 1303 1304 /* 1305 * The MSI address register bit[2] is used to get the destination 1306 * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts 1307 * only. 1308 */ 1309 dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1; 1310 if (dest_mode) { 1311 trace_amdvi_ir_err("invalid dest_mode"); 1312 ret = -AMDVI_IR_ERR; 1313 goto remap_fail; 1314 } 1315 1316 if (pass) { 1317 memcpy(translated, origin, sizeof(*origin)); 1318 } else { 1319 trace_amdvi_ir_err("passthrough is not enabled"); 1320 ret = -AMDVI_IR_ERR; 1321 goto remap_fail; 1322 } 1323 1324 out: 1325 trace_amdvi_ir_remap_msi(origin->address, origin->data, 1326 translated->address, translated->data); 1327 return 0; 1328 1329 remap_fail: 1330 return ret; 1331 } 1332 1333 static int amdvi_int_remap(X86IOMMUState *iommu, 1334 MSIMessage *origin, 1335 MSIMessage *translated, 1336 uint16_t sid) 1337 { 1338 return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin, 1339 translated, sid); 1340 } 1341 1342 static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr, 1343 uint64_t value, unsigned size, 1344 MemTxAttrs attrs) 1345 { 1346 int ret; 1347 MSIMessage from = { 0, 0 }, to = { 0, 0 }; 1348 uint16_t sid = AMDVI_IOAPIC_SB_DEVID; 1349 1350 from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST; 1351 from.data = (uint32_t) value; 1352 1353 trace_amdvi_mem_ir_write_req(addr, value, size); 1354 1355 if (!attrs.unspecified) { 1356 /* We have explicit Source ID */ 1357 sid = attrs.requester_id; 1358 } 1359 1360 ret = amdvi_int_remap_msi(opaque, &from, &to, sid); 1361 if (ret < 0) { 1362 /* TODO: log the event using IOMMU log event interface */ 1363 error_report_once("failed to remap interrupt from devid 0x%x", sid); 1364 return MEMTX_ERROR; 1365 } 1366 1367 apic_get_class()->send_msi(&to); 1368 1369 trace_amdvi_mem_ir_write(to.address, to.data); 1370 return MEMTX_OK; 1371 } 1372 1373 static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr, 1374 uint64_t *data, unsigned size, 1375 MemTxAttrs attrs) 1376 { 1377 return MEMTX_OK; 1378 } 1379 1380 static const MemoryRegionOps amdvi_ir_ops = { 1381 .read_with_attrs = amdvi_mem_ir_read, 1382 .write_with_attrs = amdvi_mem_ir_write, 1383 .endianness = DEVICE_LITTLE_ENDIAN, 1384 .impl = { 1385 .min_access_size = 4, 1386 .max_access_size = 4, 1387 }, 1388 .valid = { 1389 .min_access_size = 4, 1390 .max_access_size = 4, 1391 } 1392 }; 1393 1394 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) 1395 { 1396 char name[128]; 1397 AMDVIState *s = opaque; 1398 AMDVIAddressSpace **iommu_as, *amdvi_dev_as; 1399 int bus_num = pci_bus_num(bus); 1400 1401 iommu_as = s->address_spaces[bus_num]; 1402 1403 /* allocate memory during the first run */ 1404 if (!iommu_as) { 1405 iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX); 1406 s->address_spaces[bus_num] = iommu_as; 1407 } 1408 1409 /* set up AMD-Vi region */ 1410 if (!iommu_as[devfn]) { 1411 snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn); 1412 1413 iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1); 1414 iommu_as[devfn]->bus_num = (uint8_t)bus_num; 1415 iommu_as[devfn]->devfn = (uint8_t)devfn; 1416 iommu_as[devfn]->iommu_state = s; 1417 1418 amdvi_dev_as = iommu_as[devfn]; 1419 1420 /* 1421 * Memory region relationships looks like (Address range shows 1422 * only lower 32 bits to make it short in length...): 1423 * 1424 * |-----------------+-------------------+----------| 1425 * | Name | Address range | Priority | 1426 * |-----------------+-------------------+----------+ 1427 * | amdvi_root | 00000000-ffffffff | 0 | 1428 * | amdvi_iommu | 00000000-ffffffff | 1 | 1429 * | amdvi_iommu_ir | fee00000-feefffff | 64 | 1430 * |-----------------+-------------------+----------| 1431 */ 1432 memory_region_init_iommu(&amdvi_dev_as->iommu, 1433 sizeof(amdvi_dev_as->iommu), 1434 TYPE_AMD_IOMMU_MEMORY_REGION, 1435 OBJECT(s), 1436 "amd_iommu", UINT64_MAX); 1437 memory_region_init(&amdvi_dev_as->root, OBJECT(s), 1438 "amdvi_root", UINT64_MAX); 1439 address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name); 1440 memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s), 1441 &amdvi_ir_ops, s, "amd_iommu_ir", 1442 AMDVI_INT_ADDR_SIZE); 1443 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 1444 AMDVI_INT_ADDR_FIRST, 1445 &amdvi_dev_as->iommu_ir, 1446 64); 1447 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0, 1448 MEMORY_REGION(&amdvi_dev_as->iommu), 1449 1); 1450 } 1451 return &iommu_as[devfn]->as; 1452 } 1453 1454 static const MemoryRegionOps mmio_mem_ops = { 1455 .read = amdvi_mmio_read, 1456 .write = amdvi_mmio_write, 1457 .endianness = DEVICE_LITTLE_ENDIAN, 1458 .impl = { 1459 .min_access_size = 1, 1460 .max_access_size = 8, 1461 .unaligned = false, 1462 }, 1463 .valid = { 1464 .min_access_size = 1, 1465 .max_access_size = 8, 1466 } 1467 }; 1468 1469 static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, 1470 IOMMUNotifierFlag old, 1471 IOMMUNotifierFlag new, 1472 Error **errp) 1473 { 1474 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); 1475 1476 if (new & IOMMU_NOTIFIER_MAP) { 1477 error_setg(errp, 1478 "device %02x.%02x.%x requires iommu notifier which is not " 1479 "currently supported", as->bus_num, PCI_SLOT(as->devfn), 1480 PCI_FUNC(as->devfn)); 1481 return -EINVAL; 1482 } 1483 return 0; 1484 } 1485 1486 static void amdvi_init(AMDVIState *s) 1487 { 1488 amdvi_iotlb_reset(s); 1489 1490 s->devtab_len = 0; 1491 s->cmdbuf_len = 0; 1492 s->cmdbuf_head = 0; 1493 s->cmdbuf_tail = 0; 1494 s->evtlog_head = 0; 1495 s->evtlog_tail = 0; 1496 s->excl_enabled = false; 1497 s->excl_allow = false; 1498 s->mmio_enabled = false; 1499 s->enabled = false; 1500 s->ats_enabled = false; 1501 s->cmdbuf_enabled = false; 1502 1503 /* reset MMIO */ 1504 memset(s->mmior, 0, AMDVI_MMIO_SIZE); 1505 amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES, 1506 0xffffffffffffffef, 0); 1507 amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67); 1508 1509 /* reset device ident */ 1510 pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD); 1511 pci_config_set_prog_interface(s->pci.dev.config, 00); 1512 pci_config_set_device_id(s->pci.dev.config, s->devid); 1513 pci_config_set_class(s->pci.dev.config, 0x0806); 1514 1515 /* reset AMDVI specific capabilities, all r/o */ 1516 pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES); 1517 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW, 1518 s->mmio.addr & ~(0xffff0000)); 1519 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH, 1520 (s->mmio.addr & ~(0xffff)) >> 16); 1521 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE, 1522 0xff000000); 1523 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0); 1524 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 1525 AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR); 1526 } 1527 1528 static void amdvi_sysbus_reset(DeviceState *dev) 1529 { 1530 AMDVIState *s = AMD_IOMMU_DEVICE(dev); 1531 1532 msi_reset(&s->pci.dev); 1533 amdvi_init(s); 1534 } 1535 1536 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) 1537 { 1538 int ret = 0; 1539 AMDVIState *s = AMD_IOMMU_DEVICE(dev); 1540 MachineState *ms = MACHINE(qdev_get_machine()); 1541 PCMachineState *pcms = PC_MACHINE(ms); 1542 X86MachineState *x86ms = X86_MACHINE(ms); 1543 PCIBus *bus = pcms->bus; 1544 1545 s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, 1546 amdvi_uint64_equal, g_free, g_free); 1547 1548 /* This device should take care of IOMMU PCI properties */ 1549 if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) { 1550 return; 1551 } 1552 ret = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0, 1553 AMDVI_CAPAB_SIZE, errp); 1554 if (ret < 0) { 1555 return; 1556 } 1557 s->capab_offset = ret; 1558 1559 ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, 1560 AMDVI_CAPAB_REG_SIZE, errp); 1561 if (ret < 0) { 1562 return; 1563 } 1564 ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, 1565 AMDVI_CAPAB_REG_SIZE, errp); 1566 if (ret < 0) { 1567 return; 1568 } 1569 1570 /* Pseudo address space under root PCI bus. */ 1571 x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); 1572 1573 /* set up MMIO */ 1574 memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", 1575 AMDVI_MMIO_SIZE); 1576 1577 sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio); 1578 sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR); 1579 pci_setup_iommu(bus, amdvi_host_dma_iommu, s); 1580 s->devid = object_property_get_int(OBJECT(&s->pci), "addr", &error_abort); 1581 msi_init(&s->pci.dev, 0, 1, true, false, errp); 1582 amdvi_init(s); 1583 } 1584 1585 static const VMStateDescription vmstate_amdvi_sysbus = { 1586 .name = "amd-iommu", 1587 .unmigratable = 1 1588 }; 1589 1590 static void amdvi_sysbus_instance_init(Object *klass) 1591 { 1592 AMDVIState *s = AMD_IOMMU_DEVICE(klass); 1593 1594 object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI); 1595 } 1596 1597 static void amdvi_sysbus_class_init(ObjectClass *klass, void *data) 1598 { 1599 DeviceClass *dc = DEVICE_CLASS(klass); 1600 X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass); 1601 1602 dc->reset = amdvi_sysbus_reset; 1603 dc->vmsd = &vmstate_amdvi_sysbus; 1604 dc->hotpluggable = false; 1605 dc_class->realize = amdvi_sysbus_realize; 1606 dc_class->int_remap = amdvi_int_remap; 1607 /* Supported by the pc-q35-* machine types */ 1608 dc->user_creatable = true; 1609 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1610 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; 1611 } 1612 1613 static const TypeInfo amdvi_sysbus = { 1614 .name = TYPE_AMD_IOMMU_DEVICE, 1615 .parent = TYPE_X86_IOMMU_DEVICE, 1616 .instance_size = sizeof(AMDVIState), 1617 .instance_init = amdvi_sysbus_instance_init, 1618 .class_init = amdvi_sysbus_class_init 1619 }; 1620 1621 static void amdvi_pci_class_init(ObjectClass *klass, void *data) 1622 { 1623 DeviceClass *dc = DEVICE_CLASS(klass); 1624 1625 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1626 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; 1627 } 1628 1629 static const TypeInfo amdvi_pci = { 1630 .name = TYPE_AMD_IOMMU_PCI, 1631 .parent = TYPE_PCI_DEVICE, 1632 .instance_size = sizeof(AMDVIPCIState), 1633 .class_init = amdvi_pci_class_init, 1634 .interfaces = (InterfaceInfo[]) { 1635 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1636 { }, 1637 }, 1638 }; 1639 1640 static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data) 1641 { 1642 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 1643 1644 imrc->translate = amdvi_translate; 1645 imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed; 1646 } 1647 1648 static const TypeInfo amdvi_iommu_memory_region_info = { 1649 .parent = TYPE_IOMMU_MEMORY_REGION, 1650 .name = TYPE_AMD_IOMMU_MEMORY_REGION, 1651 .class_init = amdvi_iommu_memory_region_class_init, 1652 }; 1653 1654 static void amdvi_register_types(void) 1655 { 1656 type_register_static(&amdvi_pci); 1657 type_register_static(&amdvi_sysbus); 1658 type_register_static(&amdvi_iommu_memory_region_info); 1659 } 1660 1661 type_init(amdvi_register_types); 1662