1 /* 2 * QEMU emulation of AMD IOMMU (AMD-Vi) 3 * 4 * Copyright (C) 2011 Eduard - Gabriel Munteanu 5 * Copyright (C) 2015, 2016 David Kiarie Kahurani 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 17 * You should have received a copy of the GNU General Public License along 18 * with this program; if not, see <http://www.gnu.org/licenses/>. 19 * 20 * Cache implementation inspired by hw/i386/intel_iommu.c 21 */ 22 #include "qemu/osdep.h" 23 #include "hw/i386/pc.h" 24 #include "hw/pci/msi.h" 25 #include "hw/pci/pci_bus.h" 26 #include "amd_iommu.h" 27 #include "qapi/error.h" 28 #include "qemu/error-report.h" 29 #include "hw/i386/apic_internal.h" 30 #include "trace.h" 31 #include "hw/i386/apic-msidef.h" 32 33 /* used AMD-Vi MMIO registers */ 34 const char *amdvi_mmio_low[] = { 35 "AMDVI_MMIO_DEVTAB_BASE", 36 "AMDVI_MMIO_CMDBUF_BASE", 37 "AMDVI_MMIO_EVTLOG_BASE", 38 "AMDVI_MMIO_CONTROL", 39 "AMDVI_MMIO_EXCL_BASE", 40 "AMDVI_MMIO_EXCL_LIMIT", 41 "AMDVI_MMIO_EXT_FEATURES", 42 "AMDVI_MMIO_PPR_BASE", 43 "UNHANDLED" 44 }; 45 const char *amdvi_mmio_high[] = { 46 "AMDVI_MMIO_COMMAND_HEAD", 47 "AMDVI_MMIO_COMMAND_TAIL", 48 "AMDVI_MMIO_EVTLOG_HEAD", 49 "AMDVI_MMIO_EVTLOG_TAIL", 50 "AMDVI_MMIO_STATUS", 51 "AMDVI_MMIO_PPR_HEAD", 52 "AMDVI_MMIO_PPR_TAIL", 53 "UNHANDLED" 54 }; 55 56 struct AMDVIAddressSpace { 57 uint8_t bus_num; /* bus number */ 58 uint8_t devfn; /* device function */ 59 AMDVIState *iommu_state; /* AMDVI - one per machine */ 60 MemoryRegion root; /* AMDVI Root memory map region */ 61 IOMMUMemoryRegion iommu; /* Device's address translation region */ 62 MemoryRegion iommu_ir; /* Device's interrupt remapping region */ 63 AddressSpace as; /* device's corresponding address space */ 64 }; 65 66 /* AMDVI cache entry */ 67 typedef struct AMDVIIOTLBEntry { 68 uint16_t domid; /* assigned domain id */ 69 uint16_t devid; /* device owning entry */ 70 uint64_t perms; /* access permissions */ 71 uint64_t translated_addr; /* translated address */ 72 uint64_t page_mask; /* physical page size */ 73 } AMDVIIOTLBEntry; 74 75 /* configure MMIO registers at startup/reset */ 76 static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val, 77 uint64_t romask, uint64_t w1cmask) 78 { 79 stq_le_p(&s->mmior[addr], val); 80 stq_le_p(&s->romask[addr], romask); 81 stq_le_p(&s->w1cmask[addr], w1cmask); 82 } 83 84 static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr) 85 { 86 return lduw_le_p(&s->mmior[addr]); 87 } 88 89 static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr) 90 { 91 return ldl_le_p(&s->mmior[addr]); 92 } 93 94 static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr) 95 { 96 return ldq_le_p(&s->mmior[addr]); 97 } 98 99 /* internal write */ 100 static void amdvi_writeq_raw(AMDVIState *s, uint64_t val, hwaddr addr) 101 { 102 stq_le_p(&s->mmior[addr], val); 103 } 104 105 /* external write */ 106 static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val) 107 { 108 uint16_t romask = lduw_le_p(&s->romask[addr]); 109 uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]); 110 uint16_t oldval = lduw_le_p(&s->mmior[addr]); 111 stw_le_p(&s->mmior[addr], 112 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 113 } 114 115 static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val) 116 { 117 uint32_t romask = ldl_le_p(&s->romask[addr]); 118 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); 119 uint32_t oldval = ldl_le_p(&s->mmior[addr]); 120 stl_le_p(&s->mmior[addr], 121 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 122 } 123 124 static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val) 125 { 126 uint64_t romask = ldq_le_p(&s->romask[addr]); 127 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); 128 uint32_t oldval = ldq_le_p(&s->mmior[addr]); 129 stq_le_p(&s->mmior[addr], 130 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 131 } 132 133 /* OR a 64-bit register with a 64-bit value */ 134 static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val) 135 { 136 return amdvi_readq(s, addr) | val; 137 } 138 139 /* OR a 64-bit register with a 64-bit value storing result in the register */ 140 static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val) 141 { 142 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val); 143 } 144 145 /* AND a 64-bit register with a 64-bit value storing result in the register */ 146 static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val) 147 { 148 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val); 149 } 150 151 static void amdvi_generate_msi_interrupt(AMDVIState *s) 152 { 153 MSIMessage msg = {}; 154 MemTxAttrs attrs = { 155 .requester_id = pci_requester_id(&s->pci.dev) 156 }; 157 158 if (msi_enabled(&s->pci.dev)) { 159 msg = msi_get_message(&s->pci.dev, 0); 160 address_space_stl_le(&address_space_memory, msg.address, msg.data, 161 attrs, NULL); 162 } 163 } 164 165 static void amdvi_log_event(AMDVIState *s, uint64_t *evt) 166 { 167 /* event logging not enabled */ 168 if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS, 169 AMDVI_MMIO_STATUS_EVT_OVF)) { 170 return; 171 } 172 173 /* event log buffer full */ 174 if (s->evtlog_tail >= s->evtlog_len) { 175 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF); 176 /* generate interrupt */ 177 amdvi_generate_msi_interrupt(s); 178 return; 179 } 180 181 if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail, 182 &evt, AMDVI_EVENT_LEN)) { 183 trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail); 184 } 185 186 s->evtlog_tail += AMDVI_EVENT_LEN; 187 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); 188 amdvi_generate_msi_interrupt(s); 189 } 190 191 static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start, 192 int length) 193 { 194 int index = start / 64, bitpos = start % 64; 195 uint64_t mask = MAKE_64BIT_MASK(start, length); 196 buffer[index] &= ~mask; 197 buffer[index] |= (value << bitpos) & mask; 198 } 199 /* 200 * AMDVi event structure 201 * 0:15 -> DeviceID 202 * 55:63 -> event type + miscellaneous info 203 * 63:127 -> related address 204 */ 205 static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr, 206 uint16_t info) 207 { 208 amdvi_setevent_bits(evt, devid, 0, 16); 209 amdvi_setevent_bits(evt, info, 55, 8); 210 amdvi_setevent_bits(evt, addr, 63, 64); 211 } 212 /* log an error encountered during a page walk 213 * 214 * @addr: virtual address in translation request 215 */ 216 static void amdvi_page_fault(AMDVIState *s, uint16_t devid, 217 hwaddr addr, uint16_t info) 218 { 219 uint64_t evt[4]; 220 221 info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF; 222 amdvi_encode_event(evt, devid, addr, info); 223 amdvi_log_event(s, evt); 224 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 225 PCI_STATUS_SIG_TARGET_ABORT); 226 } 227 /* 228 * log a master abort accessing device table 229 * @devtab : address of device table entry 230 * @info : error flags 231 */ 232 static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, 233 hwaddr devtab, uint16_t info) 234 { 235 uint64_t evt[4]; 236 237 info |= AMDVI_EVENT_DEV_TAB_HW_ERROR; 238 239 amdvi_encode_event(evt, devid, devtab, info); 240 amdvi_log_event(s, evt); 241 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 242 PCI_STATUS_SIG_TARGET_ABORT); 243 } 244 /* log an event trying to access command buffer 245 * @addr : address that couldn't be accessed 246 */ 247 static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) 248 { 249 uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR; 250 251 amdvi_encode_event(evt, 0, addr, info); 252 amdvi_log_event(s, evt); 253 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 254 PCI_STATUS_SIG_TARGET_ABORT); 255 } 256 /* log an illegal comand event 257 * @addr : address of illegal command 258 */ 259 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info, 260 hwaddr addr) 261 { 262 uint64_t evt[4]; 263 264 info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR; 265 amdvi_encode_event(evt, 0, addr, info); 266 amdvi_log_event(s, evt); 267 } 268 /* log an error accessing device table 269 * 270 * @devid : device owning the table entry 271 * @devtab : address of device table entry 272 * @info : error flags 273 */ 274 static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid, 275 hwaddr addr, uint16_t info) 276 { 277 uint64_t evt[4]; 278 279 info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY; 280 amdvi_encode_event(evt, devid, addr, info); 281 amdvi_log_event(s, evt); 282 } 283 /* log an error accessing a PTE entry 284 * @addr : address that couldn't be accessed 285 */ 286 static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid, 287 hwaddr addr, uint16_t info) 288 { 289 uint64_t evt[4]; 290 291 info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR; 292 amdvi_encode_event(evt, devid, addr, info); 293 amdvi_log_event(s, evt); 294 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 295 PCI_STATUS_SIG_TARGET_ABORT); 296 } 297 298 static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2) 299 { 300 return *((const uint64_t *)v1) == *((const uint64_t *)v2); 301 } 302 303 static guint amdvi_uint64_hash(gconstpointer v) 304 { 305 return (guint)*(const uint64_t *)v; 306 } 307 308 static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr, 309 uint64_t devid) 310 { 311 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | 312 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 313 return g_hash_table_lookup(s->iotlb, &key); 314 } 315 316 static void amdvi_iotlb_reset(AMDVIState *s) 317 { 318 assert(s->iotlb); 319 trace_amdvi_iotlb_reset(); 320 g_hash_table_remove_all(s->iotlb); 321 } 322 323 static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value, 324 gpointer user_data) 325 { 326 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value; 327 uint16_t devid = *(uint16_t *)user_data; 328 return entry->devid == devid; 329 } 330 331 static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr, 332 uint64_t devid) 333 { 334 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | 335 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 336 g_hash_table_remove(s->iotlb, &key); 337 } 338 339 static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid, 340 uint64_t gpa, IOMMUTLBEntry to_cache, 341 uint16_t domid) 342 { 343 AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1); 344 uint64_t *key = g_new(uint64_t, 1); 345 uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K; 346 347 /* don't cache erroneous translations */ 348 if (to_cache.perm != IOMMU_NONE) { 349 trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid), 350 PCI_FUNC(devid), gpa, to_cache.translated_addr); 351 352 if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) { 353 amdvi_iotlb_reset(s); 354 } 355 356 entry->domid = domid; 357 entry->perms = to_cache.perm; 358 entry->translated_addr = to_cache.translated_addr; 359 entry->page_mask = to_cache.addr_mask; 360 *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 361 g_hash_table_replace(s->iotlb, key, entry); 362 } 363 } 364 365 static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd) 366 { 367 /* pad the last 3 bits */ 368 hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3; 369 uint64_t data = cpu_to_le64(cmd[1]); 370 371 if (extract64(cmd[0], 51, 8)) { 372 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 373 s->cmdbuf + s->cmdbuf_head); 374 } 375 if (extract64(cmd[0], 0, 1)) { 376 if (dma_memory_write(&address_space_memory, addr, &data, 377 AMDVI_COMPLETION_DATA_SIZE)) { 378 trace_amdvi_completion_wait_fail(addr); 379 } 380 } 381 /* set completion interrupt */ 382 if (extract64(cmd[0], 1, 1)) { 383 amdvi_test_mask(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); 384 /* generate interrupt */ 385 amdvi_generate_msi_interrupt(s); 386 } 387 trace_amdvi_completion_wait(addr, data); 388 } 389 390 /* log error without aborting since linux seems to be using reserved bits */ 391 static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd) 392 { 393 uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16)); 394 395 /* This command should invalidate internal caches of which there isn't */ 396 if (extract64(cmd[0], 15, 16) || cmd[1]) { 397 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 398 s->cmdbuf + s->cmdbuf_head); 399 } 400 trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid), 401 PCI_FUNC(devid)); 402 } 403 404 static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd) 405 { 406 if (extract64(cmd[0], 15, 16) || extract64(cmd[0], 19, 8) || 407 extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29) 408 || extract64(cmd[1], 47, 16)) { 409 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 410 s->cmdbuf + s->cmdbuf_head); 411 } 412 trace_amdvi_ppr_exec(); 413 } 414 415 static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd) 416 { 417 if (extract64(cmd[0], 0, 60) || cmd[1]) { 418 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 419 s->cmdbuf + s->cmdbuf_head); 420 } 421 422 amdvi_iotlb_reset(s); 423 trace_amdvi_all_inval(); 424 } 425 426 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value, 427 gpointer user_data) 428 { 429 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value; 430 uint16_t domid = *(uint16_t *)user_data; 431 return entry->domid == domid; 432 } 433 434 /* we don't have devid - we can't remove pages by address */ 435 static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd) 436 { 437 uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16)); 438 439 if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 16, 12) || 440 extract64(cmd[0], 3, 10)) { 441 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 442 s->cmdbuf + s->cmdbuf_head); 443 } 444 445 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid, 446 &domid); 447 trace_amdvi_pages_inval(domid); 448 } 449 450 static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd) 451 { 452 if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 20, 8) || 453 extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || 454 extract64(cmd[1], 5, 7)) { 455 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 456 s->cmdbuf + s->cmdbuf_head); 457 } 458 459 trace_amdvi_prefetch_pages(); 460 } 461 462 static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd) 463 { 464 if (extract64(cmd[0], 16, 16) || cmd[1]) { 465 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 466 s->cmdbuf + s->cmdbuf_head); 467 return; 468 } 469 470 trace_amdvi_intr_inval(); 471 } 472 473 /* FIXME: Try to work with the specified size instead of all the pages 474 * when the S bit is on 475 */ 476 static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd) 477 { 478 479 uint16_t devid = extract64(cmd[0], 0, 16); 480 if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 9)) { 481 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 482 s->cmdbuf + s->cmdbuf_head); 483 return; 484 } 485 486 if (extract64(cmd[1], 0, 1)) { 487 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid, 488 &devid); 489 } else { 490 amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12, 491 cpu_to_le16(extract64(cmd[1], 0, 16))); 492 } 493 trace_amdvi_iotlb_inval(); 494 } 495 496 /* not honouring reserved bits is regarded as an illegal command */ 497 static void amdvi_cmdbuf_exec(AMDVIState *s) 498 { 499 uint64_t cmd[2]; 500 501 if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head, 502 cmd, AMDVI_COMMAND_SIZE)) { 503 trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head); 504 amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head); 505 return; 506 } 507 508 switch (extract64(cmd[0], 60, 4)) { 509 case AMDVI_CMD_COMPLETION_WAIT: 510 amdvi_completion_wait(s, cmd); 511 break; 512 case AMDVI_CMD_INVAL_DEVTAB_ENTRY: 513 amdvi_inval_devtab_entry(s, cmd); 514 break; 515 case AMDVI_CMD_INVAL_AMDVI_PAGES: 516 amdvi_inval_pages(s, cmd); 517 break; 518 case AMDVI_CMD_INVAL_IOTLB_PAGES: 519 iommu_inval_iotlb(s, cmd); 520 break; 521 case AMDVI_CMD_INVAL_INTR_TABLE: 522 amdvi_inval_inttable(s, cmd); 523 break; 524 case AMDVI_CMD_PREFETCH_AMDVI_PAGES: 525 amdvi_prefetch_pages(s, cmd); 526 break; 527 case AMDVI_CMD_COMPLETE_PPR_REQUEST: 528 amdvi_complete_ppr(s, cmd); 529 break; 530 case AMDVI_CMD_INVAL_AMDVI_ALL: 531 amdvi_inval_all(s, cmd); 532 break; 533 default: 534 trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4)); 535 /* log illegal command */ 536 amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4), 537 s->cmdbuf + s->cmdbuf_head); 538 } 539 } 540 541 static void amdvi_cmdbuf_run(AMDVIState *s) 542 { 543 if (!s->cmdbuf_enabled) { 544 trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL)); 545 return; 546 } 547 548 /* check if there is work to do. */ 549 while (s->cmdbuf_head != s->cmdbuf_tail) { 550 trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf); 551 amdvi_cmdbuf_exec(s); 552 s->cmdbuf_head += AMDVI_COMMAND_SIZE; 553 amdvi_writeq_raw(s, s->cmdbuf_head, AMDVI_MMIO_COMMAND_HEAD); 554 555 /* wrap head pointer */ 556 if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) { 557 s->cmdbuf_head = 0; 558 } 559 } 560 } 561 562 static void amdvi_mmio_trace(hwaddr addr, unsigned size) 563 { 564 uint8_t index = (addr & ~0x2000) / 8; 565 566 if ((addr & 0x2000)) { 567 /* high table */ 568 index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index; 569 trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07); 570 } else { 571 index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index; 572 trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07); 573 } 574 } 575 576 static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size) 577 { 578 AMDVIState *s = opaque; 579 580 uint64_t val = -1; 581 if (addr + size > AMDVI_MMIO_SIZE) { 582 trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size); 583 return (uint64_t)-1; 584 } 585 586 if (size == 2) { 587 val = amdvi_readw(s, addr); 588 } else if (size == 4) { 589 val = amdvi_readl(s, addr); 590 } else if (size == 8) { 591 val = amdvi_readq(s, addr); 592 } 593 amdvi_mmio_trace(addr, size); 594 595 return val; 596 } 597 598 static void amdvi_handle_control_write(AMDVIState *s) 599 { 600 unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL); 601 s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN); 602 603 s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN); 604 s->evtlog_enabled = s->enabled && !!(control & 605 AMDVI_MMIO_CONTROL_EVENTLOGEN); 606 607 s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN); 608 s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN); 609 s->cmdbuf_enabled = s->enabled && !!(control & 610 AMDVI_MMIO_CONTROL_CMDBUFLEN); 611 s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN); 612 613 /* update the flags depending on the control register */ 614 if (s->cmdbuf_enabled) { 615 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN); 616 } else { 617 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN); 618 } 619 if (s->evtlog_enabled) { 620 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN); 621 } else { 622 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN); 623 } 624 625 trace_amdvi_control_status(control); 626 amdvi_cmdbuf_run(s); 627 } 628 629 static inline void amdvi_handle_devtab_write(AMDVIState *s) 630 631 { 632 uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE); 633 s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK); 634 635 /* set device table length */ 636 s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 * 637 (AMDVI_MMIO_DEVTAB_SIZE_UNIT / 638 AMDVI_MMIO_DEVTAB_ENTRY_SIZE)); 639 } 640 641 static inline void amdvi_handle_cmdhead_write(AMDVIState *s) 642 { 643 s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD) 644 & AMDVI_MMIO_CMDBUF_HEAD_MASK; 645 amdvi_cmdbuf_run(s); 646 } 647 648 static inline void amdvi_handle_cmdbase_write(AMDVIState *s) 649 { 650 s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE) 651 & AMDVI_MMIO_CMDBUF_BASE_MASK; 652 s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE) 653 & AMDVI_MMIO_CMDBUF_SIZE_MASK); 654 s->cmdbuf_head = s->cmdbuf_tail = 0; 655 } 656 657 static inline void amdvi_handle_cmdtail_write(AMDVIState *s) 658 { 659 s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL) 660 & AMDVI_MMIO_CMDBUF_TAIL_MASK; 661 amdvi_cmdbuf_run(s); 662 } 663 664 static inline void amdvi_handle_excllim_write(AMDVIState *s) 665 { 666 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT); 667 s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) | 668 AMDVI_MMIO_EXCL_LIMIT_LOW; 669 } 670 671 static inline void amdvi_handle_evtbase_write(AMDVIState *s) 672 { 673 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE); 674 s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK; 675 s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE) 676 & AMDVI_MMIO_EVTLOG_SIZE_MASK); 677 } 678 679 static inline void amdvi_handle_evttail_write(AMDVIState *s) 680 { 681 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL); 682 s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK; 683 } 684 685 static inline void amdvi_handle_evthead_write(AMDVIState *s) 686 { 687 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD); 688 s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK; 689 } 690 691 static inline void amdvi_handle_pprbase_write(AMDVIState *s) 692 { 693 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE); 694 s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK; 695 s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE) 696 & AMDVI_MMIO_PPRLOG_SIZE_MASK); 697 } 698 699 static inline void amdvi_handle_pprhead_write(AMDVIState *s) 700 { 701 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD); 702 s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK; 703 } 704 705 static inline void amdvi_handle_pprtail_write(AMDVIState *s) 706 { 707 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL); 708 s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK; 709 } 710 711 /* FIXME: something might go wrong if System Software writes in chunks 712 * of one byte but linux writes in chunks of 4 bytes so currently it 713 * works correctly with linux but will definitely be busted if software 714 * reads/writes 8 bytes 715 */ 716 static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val, 717 hwaddr addr) 718 { 719 if (size == 2) { 720 amdvi_writew(s, addr, val); 721 } else if (size == 4) { 722 amdvi_writel(s, addr, val); 723 } else if (size == 8) { 724 amdvi_writeq(s, addr, val); 725 } 726 } 727 728 static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val, 729 unsigned size) 730 { 731 AMDVIState *s = opaque; 732 unsigned long offset = addr & 0x07; 733 734 if (addr + size > AMDVI_MMIO_SIZE) { 735 trace_amdvi_mmio_write("error: addr outside region: max ", 736 (uint64_t)AMDVI_MMIO_SIZE, size, val, offset); 737 return; 738 } 739 740 amdvi_mmio_trace(addr, size); 741 switch (addr & ~0x07) { 742 case AMDVI_MMIO_CONTROL: 743 amdvi_mmio_reg_write(s, size, val, addr); 744 amdvi_handle_control_write(s); 745 break; 746 case AMDVI_MMIO_DEVICE_TABLE: 747 amdvi_mmio_reg_write(s, size, val, addr); 748 /* set device table address 749 * This also suffers from inability to tell whether software 750 * is done writing 751 */ 752 if (offset || (size == 8)) { 753 amdvi_handle_devtab_write(s); 754 } 755 break; 756 case AMDVI_MMIO_COMMAND_HEAD: 757 amdvi_mmio_reg_write(s, size, val, addr); 758 amdvi_handle_cmdhead_write(s); 759 break; 760 case AMDVI_MMIO_COMMAND_BASE: 761 amdvi_mmio_reg_write(s, size, val, addr); 762 /* FIXME - make sure System Software has finished writing incase 763 * it writes in chucks less than 8 bytes in a robust way.As for 764 * now, this hacks works for the linux driver 765 */ 766 if (offset || (size == 8)) { 767 amdvi_handle_cmdbase_write(s); 768 } 769 break; 770 case AMDVI_MMIO_COMMAND_TAIL: 771 amdvi_mmio_reg_write(s, size, val, addr); 772 amdvi_handle_cmdtail_write(s); 773 break; 774 case AMDVI_MMIO_EVENT_BASE: 775 amdvi_mmio_reg_write(s, size, val, addr); 776 amdvi_handle_evtbase_write(s); 777 break; 778 case AMDVI_MMIO_EVENT_HEAD: 779 amdvi_mmio_reg_write(s, size, val, addr); 780 amdvi_handle_evthead_write(s); 781 break; 782 case AMDVI_MMIO_EVENT_TAIL: 783 amdvi_mmio_reg_write(s, size, val, addr); 784 amdvi_handle_evttail_write(s); 785 break; 786 case AMDVI_MMIO_EXCL_LIMIT: 787 amdvi_mmio_reg_write(s, size, val, addr); 788 amdvi_handle_excllim_write(s); 789 break; 790 /* PPR log base - unused for now */ 791 case AMDVI_MMIO_PPR_BASE: 792 amdvi_mmio_reg_write(s, size, val, addr); 793 amdvi_handle_pprbase_write(s); 794 break; 795 /* PPR log head - also unused for now */ 796 case AMDVI_MMIO_PPR_HEAD: 797 amdvi_mmio_reg_write(s, size, val, addr); 798 amdvi_handle_pprhead_write(s); 799 break; 800 /* PPR log tail - unused for now */ 801 case AMDVI_MMIO_PPR_TAIL: 802 amdvi_mmio_reg_write(s, size, val, addr); 803 amdvi_handle_pprtail_write(s); 804 break; 805 } 806 } 807 808 static inline uint64_t amdvi_get_perms(uint64_t entry) 809 { 810 return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >> 811 AMDVI_DEV_PERM_SHIFT; 812 } 813 814 /* validate that reserved bits are honoured */ 815 static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid, 816 uint64_t *dte) 817 { 818 if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED) 819 || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED) 820 || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) { 821 amdvi_log_illegaldevtab_error(s, devid, 822 s->devtab + 823 devid * AMDVI_DEVTAB_ENTRY_SIZE, 0); 824 return false; 825 } 826 827 return true; 828 } 829 830 /* get a device table entry given the devid */ 831 static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry) 832 { 833 uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE; 834 835 if (dma_memory_read(&address_space_memory, s->devtab + offset, entry, 836 AMDVI_DEVTAB_ENTRY_SIZE)) { 837 trace_amdvi_dte_get_fail(s->devtab, offset); 838 /* log error accessing dte */ 839 amdvi_log_devtab_error(s, devid, s->devtab + offset, 0); 840 return false; 841 } 842 843 *entry = le64_to_cpu(*entry); 844 if (!amdvi_validate_dte(s, devid, entry)) { 845 trace_amdvi_invalid_dte(entry[0]); 846 return false; 847 } 848 849 return true; 850 } 851 852 /* get pte translation mode */ 853 static inline uint8_t get_pte_translation_mode(uint64_t pte) 854 { 855 return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK; 856 } 857 858 static inline uint64_t pte_override_page_mask(uint64_t pte) 859 { 860 uint8_t page_mask = 12; 861 uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) ^ AMDVI_DEV_PT_ROOT_MASK; 862 /* find the first zero bit */ 863 while (addr & 1) { 864 page_mask++; 865 addr = addr >> 1; 866 } 867 868 return ~((1ULL << page_mask) - 1); 869 } 870 871 static inline uint64_t pte_get_page_mask(uint64_t oldlevel) 872 { 873 return ~((1UL << ((oldlevel * 9) + 3)) - 1); 874 } 875 876 static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr, 877 uint16_t devid) 878 { 879 uint64_t pte; 880 881 if (dma_memory_read(&address_space_memory, pte_addr, &pte, sizeof(pte))) { 882 trace_amdvi_get_pte_hwerror(pte_addr); 883 amdvi_log_pagetab_error(s, devid, pte_addr, 0); 884 pte = 0; 885 return pte; 886 } 887 888 pte = le64_to_cpu(pte); 889 return pte; 890 } 891 892 static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte, 893 IOMMUTLBEntry *ret, unsigned perms, 894 hwaddr addr) 895 { 896 unsigned level, present, pte_perms, oldlevel; 897 uint64_t pte = dte[0], pte_addr, page_mask; 898 899 /* make sure the DTE has TV = 1 */ 900 if (pte & AMDVI_DEV_TRANSLATION_VALID) { 901 level = get_pte_translation_mode(pte); 902 if (level >= 7) { 903 trace_amdvi_mode_invalid(level, addr); 904 return; 905 } 906 if (level == 0) { 907 goto no_remap; 908 } 909 910 /* we are at the leaf page table or page table encodes a huge page */ 911 while (level > 0) { 912 pte_perms = amdvi_get_perms(pte); 913 present = pte & 1; 914 if (!present || perms != (perms & pte_perms)) { 915 amdvi_page_fault(as->iommu_state, as->devfn, addr, perms); 916 trace_amdvi_page_fault(addr); 917 return; 918 } 919 920 /* go to the next lower level */ 921 pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK; 922 /* add offset and load pte */ 923 pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3; 924 pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn); 925 if (!pte) { 926 return; 927 } 928 oldlevel = level; 929 level = get_pte_translation_mode(pte); 930 if (level == 0x7) { 931 break; 932 } 933 } 934 935 if (level == 0x7) { 936 page_mask = pte_override_page_mask(pte); 937 } else { 938 page_mask = pte_get_page_mask(oldlevel); 939 } 940 941 /* get access permissions from pte */ 942 ret->iova = addr & page_mask; 943 ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask; 944 ret->addr_mask = ~page_mask; 945 ret->perm = amdvi_get_perms(pte); 946 return; 947 } 948 no_remap: 949 ret->iova = addr & AMDVI_PAGE_MASK_4K; 950 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K; 951 ret->addr_mask = ~AMDVI_PAGE_MASK_4K; 952 ret->perm = amdvi_get_perms(pte); 953 } 954 955 static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr, 956 bool is_write, IOMMUTLBEntry *ret) 957 { 958 AMDVIState *s = as->iommu_state; 959 uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn); 960 AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid); 961 uint64_t entry[4]; 962 963 if (iotlb_entry) { 964 trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid), 965 PCI_FUNC(devid), addr, iotlb_entry->translated_addr); 966 ret->iova = addr & ~iotlb_entry->page_mask; 967 ret->translated_addr = iotlb_entry->translated_addr; 968 ret->addr_mask = iotlb_entry->page_mask; 969 ret->perm = iotlb_entry->perms; 970 return; 971 } 972 973 if (!amdvi_get_dte(s, devid, entry)) { 974 return; 975 } 976 977 /* devices with V = 0 are not translated */ 978 if (!(entry[0] & AMDVI_DEV_VALID)) { 979 goto out; 980 } 981 982 amdvi_page_walk(as, entry, ret, 983 is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr); 984 985 amdvi_update_iotlb(s, devid, addr, *ret, 986 entry[1] & AMDVI_DEV_DOMID_ID_MASK); 987 return; 988 989 out: 990 ret->iova = addr & AMDVI_PAGE_MASK_4K; 991 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K; 992 ret->addr_mask = ~AMDVI_PAGE_MASK_4K; 993 ret->perm = IOMMU_RW; 994 } 995 996 static inline bool amdvi_is_interrupt_addr(hwaddr addr) 997 { 998 return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST; 999 } 1000 1001 static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr, 1002 IOMMUAccessFlags flag, int iommu_idx) 1003 { 1004 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); 1005 AMDVIState *s = as->iommu_state; 1006 IOMMUTLBEntry ret = { 1007 .target_as = &address_space_memory, 1008 .iova = addr, 1009 .translated_addr = 0, 1010 .addr_mask = ~(hwaddr)0, 1011 .perm = IOMMU_NONE 1012 }; 1013 1014 if (!s->enabled) { 1015 /* AMDVI disabled - corresponds to iommu=off not 1016 * failure to provide any parameter 1017 */ 1018 ret.iova = addr & AMDVI_PAGE_MASK_4K; 1019 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K; 1020 ret.addr_mask = ~AMDVI_PAGE_MASK_4K; 1021 ret.perm = IOMMU_RW; 1022 return ret; 1023 } else if (amdvi_is_interrupt_addr(addr)) { 1024 ret.iova = addr & AMDVI_PAGE_MASK_4K; 1025 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K; 1026 ret.addr_mask = ~AMDVI_PAGE_MASK_4K; 1027 ret.perm = IOMMU_WO; 1028 return ret; 1029 } 1030 1031 amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret); 1032 trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn), 1033 PCI_FUNC(as->devfn), addr, ret.translated_addr); 1034 return ret; 1035 } 1036 1037 static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte, 1038 union irte *irte, uint16_t devid) 1039 { 1040 uint64_t irte_root, offset; 1041 1042 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK; 1043 offset = (origin->data & AMDVI_IRTE_OFFSET) << 2; 1044 1045 trace_amdvi_ir_irte(irte_root, offset); 1046 1047 if (dma_memory_read(&address_space_memory, irte_root + offset, 1048 irte, sizeof(*irte))) { 1049 trace_amdvi_ir_err("failed to get irte"); 1050 return -AMDVI_IR_GET_IRTE; 1051 } 1052 1053 trace_amdvi_ir_irte_val(irte->val); 1054 1055 return 0; 1056 } 1057 1058 static int amdvi_int_remap_legacy(AMDVIState *iommu, 1059 MSIMessage *origin, 1060 MSIMessage *translated, 1061 uint64_t *dte, 1062 X86IOMMUIrq *irq, 1063 uint16_t sid) 1064 { 1065 int ret; 1066 union irte irte; 1067 1068 /* get interrupt remapping table */ 1069 ret = amdvi_get_irte(iommu, origin, dte, &irte, sid); 1070 if (ret < 0) { 1071 return ret; 1072 } 1073 1074 if (!irte.fields.valid) { 1075 trace_amdvi_ir_target_abort("RemapEn is disabled"); 1076 return -AMDVI_IR_TARGET_ABORT; 1077 } 1078 1079 if (irte.fields.guest_mode) { 1080 error_report_once("guest mode is not zero"); 1081 return -AMDVI_IR_ERR; 1082 } 1083 1084 if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) { 1085 error_report_once("reserved int_type"); 1086 return -AMDVI_IR_ERR; 1087 } 1088 1089 irq->delivery_mode = irte.fields.int_type; 1090 irq->vector = irte.fields.vector; 1091 irq->dest_mode = irte.fields.dm; 1092 irq->redir_hint = irte.fields.rq_eoi; 1093 irq->dest = irte.fields.destination; 1094 1095 return 0; 1096 } 1097 1098 static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte, 1099 struct irte_ga *irte, uint16_t devid) 1100 { 1101 uint64_t irte_root, offset; 1102 1103 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK; 1104 offset = (origin->data & AMDVI_IRTE_OFFSET) << 4; 1105 trace_amdvi_ir_irte(irte_root, offset); 1106 1107 if (dma_memory_read(&address_space_memory, irte_root + offset, 1108 irte, sizeof(*irte))) { 1109 trace_amdvi_ir_err("failed to get irte_ga"); 1110 return -AMDVI_IR_GET_IRTE; 1111 } 1112 1113 trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val); 1114 return 0; 1115 } 1116 1117 static int amdvi_int_remap_ga(AMDVIState *iommu, 1118 MSIMessage *origin, 1119 MSIMessage *translated, 1120 uint64_t *dte, 1121 X86IOMMUIrq *irq, 1122 uint16_t sid) 1123 { 1124 int ret; 1125 struct irte_ga irte; 1126 1127 /* get interrupt remapping table */ 1128 ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid); 1129 if (ret < 0) { 1130 return ret; 1131 } 1132 1133 if (!irte.lo.fields_remap.valid) { 1134 trace_amdvi_ir_target_abort("RemapEn is disabled"); 1135 return -AMDVI_IR_TARGET_ABORT; 1136 } 1137 1138 if (irte.lo.fields_remap.guest_mode) { 1139 error_report_once("guest mode is not zero"); 1140 return -AMDVI_IR_ERR; 1141 } 1142 1143 if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) { 1144 error_report_once("reserved int_type is set"); 1145 return -AMDVI_IR_ERR; 1146 } 1147 1148 irq->delivery_mode = irte.lo.fields_remap.int_type; 1149 irq->vector = irte.hi.fields.vector; 1150 irq->dest_mode = irte.lo.fields_remap.dm; 1151 irq->redir_hint = irte.lo.fields_remap.rq_eoi; 1152 irq->dest = irte.lo.fields_remap.destination; 1153 1154 return 0; 1155 } 1156 1157 static int __amdvi_int_remap_msi(AMDVIState *iommu, 1158 MSIMessage *origin, 1159 MSIMessage *translated, 1160 uint64_t *dte, 1161 X86IOMMUIrq *irq, 1162 uint16_t sid) 1163 { 1164 int ret; 1165 uint8_t int_ctl; 1166 1167 int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3; 1168 trace_amdvi_ir_intctl(int_ctl); 1169 1170 switch (int_ctl) { 1171 case AMDVI_IR_INTCTL_PASS: 1172 memcpy(translated, origin, sizeof(*origin)); 1173 return 0; 1174 case AMDVI_IR_INTCTL_REMAP: 1175 break; 1176 case AMDVI_IR_INTCTL_ABORT: 1177 trace_amdvi_ir_target_abort("int_ctl abort"); 1178 return -AMDVI_IR_TARGET_ABORT; 1179 default: 1180 trace_amdvi_ir_err("int_ctl reserved"); 1181 return -AMDVI_IR_ERR; 1182 } 1183 1184 if (iommu->ga_enabled) { 1185 ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid); 1186 } else { 1187 ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid); 1188 } 1189 1190 return ret; 1191 } 1192 1193 /* Interrupt remapping for MSI/MSI-X entry */ 1194 static int amdvi_int_remap_msi(AMDVIState *iommu, 1195 MSIMessage *origin, 1196 MSIMessage *translated, 1197 uint16_t sid) 1198 { 1199 int ret = 0; 1200 uint64_t pass = 0; 1201 uint64_t dte[4] = { 0 }; 1202 X86IOMMUIrq irq = { 0 }; 1203 uint8_t dest_mode, delivery_mode; 1204 1205 assert(origin && translated); 1206 1207 /* 1208 * When IOMMU is enabled, interrupt remap request will come either from 1209 * IO-APIC or PCI device. If interrupt is from PCI device then it will 1210 * have a valid requester id but if the interrupt is from IO-APIC 1211 * then requester id will be invalid. 1212 */ 1213 if (sid == X86_IOMMU_SID_INVALID) { 1214 sid = AMDVI_IOAPIC_SB_DEVID; 1215 } 1216 1217 trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid); 1218 1219 /* check if device table entry is set before we go further. */ 1220 if (!iommu || !iommu->devtab_len) { 1221 memcpy(translated, origin, sizeof(*origin)); 1222 goto out; 1223 } 1224 1225 if (!amdvi_get_dte(iommu, sid, dte)) { 1226 return -AMDVI_IR_ERR; 1227 } 1228 1229 /* Check if IR is enabled in DTE */ 1230 if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) { 1231 memcpy(translated, origin, sizeof(*origin)); 1232 goto out; 1233 } 1234 1235 /* validate that we are configure with intremap=on */ 1236 if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) { 1237 trace_amdvi_err("Interrupt remapping is enabled in the guest but " 1238 "not in the host. Use intremap=on to enable interrupt " 1239 "remapping in amd-iommu."); 1240 return -AMDVI_IR_ERR; 1241 } 1242 1243 if (origin->address & AMDVI_MSI_ADDR_HI_MASK) { 1244 trace_amdvi_err("MSI address high 32 bits non-zero when " 1245 "Interrupt Remapping enabled."); 1246 return -AMDVI_IR_ERR; 1247 } 1248 1249 if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) { 1250 trace_amdvi_err("MSI is not from IOAPIC."); 1251 return -AMDVI_IR_ERR; 1252 } 1253 1254 /* 1255 * The MSI data register [10:8] are used to get the upstream interrupt type. 1256 * 1257 * See MSI/MSI-X format: 1258 * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf 1259 * (page 5) 1260 */ 1261 delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7; 1262 1263 switch (delivery_mode) { 1264 case AMDVI_IOAPIC_INT_TYPE_FIXED: 1265 case AMDVI_IOAPIC_INT_TYPE_ARBITRATED: 1266 trace_amdvi_ir_delivery_mode("fixed/arbitrated"); 1267 ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid); 1268 if (ret < 0) { 1269 goto remap_fail; 1270 } else { 1271 /* Translate IRQ to MSI messages */ 1272 x86_iommu_irq_to_msi_message(&irq, translated); 1273 goto out; 1274 } 1275 break; 1276 case AMDVI_IOAPIC_INT_TYPE_SMI: 1277 error_report("SMI is not supported!"); 1278 ret = -AMDVI_IR_ERR; 1279 break; 1280 case AMDVI_IOAPIC_INT_TYPE_NMI: 1281 pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK; 1282 trace_amdvi_ir_delivery_mode("nmi"); 1283 break; 1284 case AMDVI_IOAPIC_INT_TYPE_INIT: 1285 pass = dte[3] & AMDVI_DEV_INT_PASS_MASK; 1286 trace_amdvi_ir_delivery_mode("init"); 1287 break; 1288 case AMDVI_IOAPIC_INT_TYPE_EINT: 1289 pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK; 1290 trace_amdvi_ir_delivery_mode("eint"); 1291 break; 1292 default: 1293 trace_amdvi_ir_delivery_mode("unsupported delivery_mode"); 1294 ret = -AMDVI_IR_ERR; 1295 break; 1296 } 1297 1298 if (ret < 0) { 1299 goto remap_fail; 1300 } 1301 1302 /* 1303 * The MSI address register bit[2] is used to get the destination 1304 * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts 1305 * only. 1306 */ 1307 dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1; 1308 if (dest_mode) { 1309 trace_amdvi_ir_err("invalid dest_mode"); 1310 ret = -AMDVI_IR_ERR; 1311 goto remap_fail; 1312 } 1313 1314 if (pass) { 1315 memcpy(translated, origin, sizeof(*origin)); 1316 } else { 1317 trace_amdvi_ir_err("passthrough is not enabled"); 1318 ret = -AMDVI_IR_ERR; 1319 goto remap_fail; 1320 } 1321 1322 out: 1323 trace_amdvi_ir_remap_msi(origin->address, origin->data, 1324 translated->address, translated->data); 1325 return 0; 1326 1327 remap_fail: 1328 return ret; 1329 } 1330 1331 static int amdvi_int_remap(X86IOMMUState *iommu, 1332 MSIMessage *origin, 1333 MSIMessage *translated, 1334 uint16_t sid) 1335 { 1336 return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin, 1337 translated, sid); 1338 } 1339 1340 static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr, 1341 uint64_t value, unsigned size, 1342 MemTxAttrs attrs) 1343 { 1344 int ret; 1345 MSIMessage from = { 0, 0 }, to = { 0, 0 }; 1346 uint16_t sid = AMDVI_IOAPIC_SB_DEVID; 1347 1348 from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST; 1349 from.data = (uint32_t) value; 1350 1351 trace_amdvi_mem_ir_write_req(addr, value, size); 1352 1353 if (!attrs.unspecified) { 1354 /* We have explicit Source ID */ 1355 sid = attrs.requester_id; 1356 } 1357 1358 ret = amdvi_int_remap_msi(opaque, &from, &to, sid); 1359 if (ret < 0) { 1360 /* TODO: log the event using IOMMU log event interface */ 1361 error_report_once("failed to remap interrupt from devid 0x%x", sid); 1362 return MEMTX_ERROR; 1363 } 1364 1365 apic_get_class()->send_msi(&to); 1366 1367 trace_amdvi_mem_ir_write(to.address, to.data); 1368 return MEMTX_OK; 1369 } 1370 1371 static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr, 1372 uint64_t *data, unsigned size, 1373 MemTxAttrs attrs) 1374 { 1375 return MEMTX_OK; 1376 } 1377 1378 static const MemoryRegionOps amdvi_ir_ops = { 1379 .read_with_attrs = amdvi_mem_ir_read, 1380 .write_with_attrs = amdvi_mem_ir_write, 1381 .endianness = DEVICE_LITTLE_ENDIAN, 1382 .impl = { 1383 .min_access_size = 4, 1384 .max_access_size = 4, 1385 }, 1386 .valid = { 1387 .min_access_size = 4, 1388 .max_access_size = 4, 1389 } 1390 }; 1391 1392 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) 1393 { 1394 char name[128]; 1395 AMDVIState *s = opaque; 1396 AMDVIAddressSpace **iommu_as, *amdvi_dev_as; 1397 int bus_num = pci_bus_num(bus); 1398 1399 iommu_as = s->address_spaces[bus_num]; 1400 1401 /* allocate memory during the first run */ 1402 if (!iommu_as) { 1403 iommu_as = g_malloc0(sizeof(AMDVIAddressSpace *) * PCI_DEVFN_MAX); 1404 s->address_spaces[bus_num] = iommu_as; 1405 } 1406 1407 /* set up AMD-Vi region */ 1408 if (!iommu_as[devfn]) { 1409 snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn); 1410 1411 iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace)); 1412 iommu_as[devfn]->bus_num = (uint8_t)bus_num; 1413 iommu_as[devfn]->devfn = (uint8_t)devfn; 1414 iommu_as[devfn]->iommu_state = s; 1415 1416 amdvi_dev_as = iommu_as[devfn]; 1417 1418 /* 1419 * Memory region relationships looks like (Address range shows 1420 * only lower 32 bits to make it short in length...): 1421 * 1422 * |-----------------+-------------------+----------| 1423 * | Name | Address range | Priority | 1424 * |-----------------+-------------------+----------+ 1425 * | amdvi_root | 00000000-ffffffff | 0 | 1426 * | amdvi_iommu | 00000000-ffffffff | 1 | 1427 * | amdvi_iommu_ir | fee00000-feefffff | 64 | 1428 * |-----------------+-------------------+----------| 1429 */ 1430 memory_region_init_iommu(&amdvi_dev_as->iommu, 1431 sizeof(amdvi_dev_as->iommu), 1432 TYPE_AMD_IOMMU_MEMORY_REGION, 1433 OBJECT(s), 1434 "amd_iommu", UINT64_MAX); 1435 memory_region_init(&amdvi_dev_as->root, OBJECT(s), 1436 "amdvi_root", UINT64_MAX); 1437 address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name); 1438 memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s), 1439 &amdvi_ir_ops, s, "amd_iommu_ir", 1440 AMDVI_INT_ADDR_SIZE); 1441 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 1442 AMDVI_INT_ADDR_FIRST, 1443 &amdvi_dev_as->iommu_ir, 1444 64); 1445 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0, 1446 MEMORY_REGION(&amdvi_dev_as->iommu), 1447 1); 1448 } 1449 return &iommu_as[devfn]->as; 1450 } 1451 1452 static const MemoryRegionOps mmio_mem_ops = { 1453 .read = amdvi_mmio_read, 1454 .write = amdvi_mmio_write, 1455 .endianness = DEVICE_LITTLE_ENDIAN, 1456 .impl = { 1457 .min_access_size = 1, 1458 .max_access_size = 8, 1459 .unaligned = false, 1460 }, 1461 .valid = { 1462 .min_access_size = 1, 1463 .max_access_size = 8, 1464 } 1465 }; 1466 1467 static void amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, 1468 IOMMUNotifierFlag old, 1469 IOMMUNotifierFlag new) 1470 { 1471 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); 1472 1473 if (new & IOMMU_NOTIFIER_MAP) { 1474 error_report("device %02x.%02x.%x requires iommu notifier which is not " 1475 "currently supported", as->bus_num, PCI_SLOT(as->devfn), 1476 PCI_FUNC(as->devfn)); 1477 exit(1); 1478 } 1479 } 1480 1481 static void amdvi_init(AMDVIState *s) 1482 { 1483 amdvi_iotlb_reset(s); 1484 1485 s->devtab_len = 0; 1486 s->cmdbuf_len = 0; 1487 s->cmdbuf_head = 0; 1488 s->cmdbuf_tail = 0; 1489 s->evtlog_head = 0; 1490 s->evtlog_tail = 0; 1491 s->excl_enabled = false; 1492 s->excl_allow = false; 1493 s->mmio_enabled = false; 1494 s->enabled = false; 1495 s->ats_enabled = false; 1496 s->cmdbuf_enabled = false; 1497 1498 /* reset MMIO */ 1499 memset(s->mmior, 0, AMDVI_MMIO_SIZE); 1500 amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES, 1501 0xffffffffffffffef, 0); 1502 amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67); 1503 1504 /* reset device ident */ 1505 pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD); 1506 pci_config_set_prog_interface(s->pci.dev.config, 00); 1507 pci_config_set_device_id(s->pci.dev.config, s->devid); 1508 pci_config_set_class(s->pci.dev.config, 0x0806); 1509 1510 /* reset AMDVI specific capabilities, all r/o */ 1511 pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES); 1512 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW, 1513 s->mmio.addr & ~(0xffff0000)); 1514 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH, 1515 (s->mmio.addr & ~(0xffff)) >> 16); 1516 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE, 1517 0xff000000); 1518 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0); 1519 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 1520 AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR); 1521 } 1522 1523 static void amdvi_reset(DeviceState *dev) 1524 { 1525 AMDVIState *s = AMD_IOMMU_DEVICE(dev); 1526 1527 msi_reset(&s->pci.dev); 1528 amdvi_init(s); 1529 } 1530 1531 static void amdvi_realize(DeviceState *dev, Error **err) 1532 { 1533 int ret = 0; 1534 AMDVIState *s = AMD_IOMMU_DEVICE(dev); 1535 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); 1536 MachineState *ms = MACHINE(qdev_get_machine()); 1537 PCMachineState *pcms = PC_MACHINE(ms); 1538 PCIBus *bus = pcms->bus; 1539 1540 s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, 1541 amdvi_uint64_equal, g_free, g_free); 1542 1543 /* This device should take care of IOMMU PCI properties */ 1544 x86_iommu->type = TYPE_AMD; 1545 qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus); 1546 object_property_set_bool(OBJECT(&s->pci), true, "realized", err); 1547 ret = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0, 1548 AMDVI_CAPAB_SIZE, err); 1549 if (ret < 0) { 1550 return; 1551 } 1552 s->capab_offset = ret; 1553 1554 ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, 1555 AMDVI_CAPAB_REG_SIZE, err); 1556 if (ret < 0) { 1557 return; 1558 } 1559 ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, 1560 AMDVI_CAPAB_REG_SIZE, err); 1561 if (ret < 0) { 1562 return; 1563 } 1564 1565 /* Pseudo address space under root PCI bus. */ 1566 pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); 1567 1568 /* set up MMIO */ 1569 memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", 1570 AMDVI_MMIO_SIZE); 1571 1572 sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio); 1573 sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR); 1574 pci_setup_iommu(bus, amdvi_host_dma_iommu, s); 1575 s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err); 1576 msi_init(&s->pci.dev, 0, 1, true, false, err); 1577 amdvi_init(s); 1578 } 1579 1580 static const VMStateDescription vmstate_amdvi = { 1581 .name = "amd-iommu", 1582 .unmigratable = 1 1583 }; 1584 1585 static void amdvi_instance_init(Object *klass) 1586 { 1587 AMDVIState *s = AMD_IOMMU_DEVICE(klass); 1588 1589 object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI); 1590 } 1591 1592 static void amdvi_class_init(ObjectClass *klass, void* data) 1593 { 1594 DeviceClass *dc = DEVICE_CLASS(klass); 1595 X86IOMMUClass *dc_class = X86_IOMMU_CLASS(klass); 1596 1597 dc->reset = amdvi_reset; 1598 dc->vmsd = &vmstate_amdvi; 1599 dc->hotpluggable = false; 1600 dc_class->realize = amdvi_realize; 1601 dc_class->int_remap = amdvi_int_remap; 1602 /* Supported by the pc-q35-* machine types */ 1603 dc->user_creatable = true; 1604 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1605 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; 1606 } 1607 1608 static const TypeInfo amdvi = { 1609 .name = TYPE_AMD_IOMMU_DEVICE, 1610 .parent = TYPE_X86_IOMMU_DEVICE, 1611 .instance_size = sizeof(AMDVIState), 1612 .instance_init = amdvi_instance_init, 1613 .class_init = amdvi_class_init 1614 }; 1615 1616 static const TypeInfo amdviPCI = { 1617 .name = "AMDVI-PCI", 1618 .parent = TYPE_PCI_DEVICE, 1619 .instance_size = sizeof(AMDVIPCIState), 1620 .interfaces = (InterfaceInfo[]) { 1621 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1622 { }, 1623 }, 1624 }; 1625 1626 static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data) 1627 { 1628 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 1629 1630 imrc->translate = amdvi_translate; 1631 imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed; 1632 } 1633 1634 static const TypeInfo amdvi_iommu_memory_region_info = { 1635 .parent = TYPE_IOMMU_MEMORY_REGION, 1636 .name = TYPE_AMD_IOMMU_MEMORY_REGION, 1637 .class_init = amdvi_iommu_memory_region_class_init, 1638 }; 1639 1640 static void amdviPCI_register_types(void) 1641 { 1642 type_register_static(&amdviPCI); 1643 type_register_static(&amdvi); 1644 type_register_static(&amdvi_iommu_memory_region_info); 1645 } 1646 1647 type_init(amdviPCI_register_types); 1648