1 /* 2 * QEMU emulation of AMD IOMMU (AMD-Vi) 3 * 4 * Copyright (C) 2011 Eduard - Gabriel Munteanu 5 * Copyright (C) 2015, 2016 David Kiarie Kahurani 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 17 * You should have received a copy of the GNU General Public License along 18 * with this program; if not, see <http://www.gnu.org/licenses/>. 19 * 20 * Cache implementation inspired by hw/i386/intel_iommu.c 21 */ 22 23 #include "qemu/osdep.h" 24 #include "hw/i386/pc.h" 25 #include "hw/pci/msi.h" 26 #include "hw/pci/pci_bus.h" 27 #include "migration/vmstate.h" 28 #include "amd_iommu.h" 29 #include "qapi/error.h" 30 #include "qemu/error-report.h" 31 #include "hw/i386/apic_internal.h" 32 #include "trace.h" 33 #include "hw/i386/apic-msidef.h" 34 35 /* used AMD-Vi MMIO registers */ 36 const char *amdvi_mmio_low[] = { 37 "AMDVI_MMIO_DEVTAB_BASE", 38 "AMDVI_MMIO_CMDBUF_BASE", 39 "AMDVI_MMIO_EVTLOG_BASE", 40 "AMDVI_MMIO_CONTROL", 41 "AMDVI_MMIO_EXCL_BASE", 42 "AMDVI_MMIO_EXCL_LIMIT", 43 "AMDVI_MMIO_EXT_FEATURES", 44 "AMDVI_MMIO_PPR_BASE", 45 "UNHANDLED" 46 }; 47 const char *amdvi_mmio_high[] = { 48 "AMDVI_MMIO_COMMAND_HEAD", 49 "AMDVI_MMIO_COMMAND_TAIL", 50 "AMDVI_MMIO_EVTLOG_HEAD", 51 "AMDVI_MMIO_EVTLOG_TAIL", 52 "AMDVI_MMIO_STATUS", 53 "AMDVI_MMIO_PPR_HEAD", 54 "AMDVI_MMIO_PPR_TAIL", 55 "UNHANDLED" 56 }; 57 58 struct AMDVIAddressSpace { 59 uint8_t bus_num; /* bus number */ 60 uint8_t devfn; /* device function */ 61 AMDVIState *iommu_state; /* AMDVI - one per machine */ 62 MemoryRegion root; /* AMDVI Root memory map region */ 63 IOMMUMemoryRegion iommu; /* Device's address translation region */ 64 MemoryRegion iommu_ir; /* Device's interrupt remapping region */ 65 AddressSpace as; /* device's corresponding address space */ 66 }; 67 68 /* AMDVI cache entry */ 69 typedef struct AMDVIIOTLBEntry { 70 uint16_t domid; /* assigned domain id */ 71 uint16_t devid; /* device owning entry */ 72 uint64_t perms; /* access permissions */ 73 uint64_t translated_addr; /* translated address */ 74 uint64_t page_mask; /* physical page size */ 75 } AMDVIIOTLBEntry; 76 77 /* configure MMIO registers at startup/reset */ 78 static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val, 79 uint64_t romask, uint64_t w1cmask) 80 { 81 stq_le_p(&s->mmior[addr], val); 82 stq_le_p(&s->romask[addr], romask); 83 stq_le_p(&s->w1cmask[addr], w1cmask); 84 } 85 86 static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr) 87 { 88 return lduw_le_p(&s->mmior[addr]); 89 } 90 91 static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr) 92 { 93 return ldl_le_p(&s->mmior[addr]); 94 } 95 96 static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr) 97 { 98 return ldq_le_p(&s->mmior[addr]); 99 } 100 101 /* internal write */ 102 static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val) 103 { 104 stq_le_p(&s->mmior[addr], val); 105 } 106 107 /* external write */ 108 static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val) 109 { 110 uint16_t romask = lduw_le_p(&s->romask[addr]); 111 uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]); 112 uint16_t oldval = lduw_le_p(&s->mmior[addr]); 113 stw_le_p(&s->mmior[addr], 114 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 115 } 116 117 static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val) 118 { 119 uint32_t romask = ldl_le_p(&s->romask[addr]); 120 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); 121 uint32_t oldval = ldl_le_p(&s->mmior[addr]); 122 stl_le_p(&s->mmior[addr], 123 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 124 } 125 126 static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val) 127 { 128 uint64_t romask = ldq_le_p(&s->romask[addr]); 129 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); 130 uint32_t oldval = ldq_le_p(&s->mmior[addr]); 131 stq_le_p(&s->mmior[addr], 132 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 133 } 134 135 /* OR a 64-bit register with a 64-bit value */ 136 static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val) 137 { 138 return amdvi_readq(s, addr) | val; 139 } 140 141 /* OR a 64-bit register with a 64-bit value storing result in the register */ 142 static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val) 143 { 144 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val); 145 } 146 147 /* AND a 64-bit register with a 64-bit value storing result in the register */ 148 static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val) 149 { 150 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val); 151 } 152 153 static void amdvi_generate_msi_interrupt(AMDVIState *s) 154 { 155 MSIMessage msg = {}; 156 MemTxAttrs attrs = { 157 .requester_id = pci_requester_id(&s->pci.dev) 158 }; 159 160 if (msi_enabled(&s->pci.dev)) { 161 msg = msi_get_message(&s->pci.dev, 0); 162 address_space_stl_le(&address_space_memory, msg.address, msg.data, 163 attrs, NULL); 164 } 165 } 166 167 static void amdvi_log_event(AMDVIState *s, uint64_t *evt) 168 { 169 /* event logging not enabled */ 170 if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS, 171 AMDVI_MMIO_STATUS_EVT_OVF)) { 172 return; 173 } 174 175 /* event log buffer full */ 176 if (s->evtlog_tail >= s->evtlog_len) { 177 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF); 178 /* generate interrupt */ 179 amdvi_generate_msi_interrupt(s); 180 return; 181 } 182 183 if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail, 184 evt, AMDVI_EVENT_LEN, MEMTXATTRS_UNSPECIFIED)) { 185 trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail); 186 } 187 188 s->evtlog_tail += AMDVI_EVENT_LEN; 189 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); 190 amdvi_generate_msi_interrupt(s); 191 } 192 193 static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start, 194 int length) 195 { 196 int index = start / 64, bitpos = start % 64; 197 uint64_t mask = MAKE_64BIT_MASK(start, length); 198 buffer[index] &= ~mask; 199 buffer[index] |= (value << bitpos) & mask; 200 } 201 /* 202 * AMDVi event structure 203 * 0:15 -> DeviceID 204 * 48:63 -> event type + miscellaneous info 205 * 64:127 -> related address 206 */ 207 static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr, 208 uint16_t info) 209 { 210 evt[0] = 0; 211 evt[1] = 0; 212 213 amdvi_setevent_bits(evt, devid, 0, 16); 214 amdvi_setevent_bits(evt, info, 48, 16); 215 amdvi_setevent_bits(evt, addr, 64, 64); 216 } 217 /* log an error encountered during a page walk 218 * 219 * @addr: virtual address in translation request 220 */ 221 static void amdvi_page_fault(AMDVIState *s, uint16_t devid, 222 hwaddr addr, uint16_t info) 223 { 224 uint64_t evt[2]; 225 226 info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF; 227 amdvi_encode_event(evt, devid, addr, info); 228 amdvi_log_event(s, evt); 229 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 230 PCI_STATUS_SIG_TARGET_ABORT); 231 } 232 /* 233 * log a master abort accessing device table 234 * @devtab : address of device table entry 235 * @info : error flags 236 */ 237 static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, 238 hwaddr devtab, uint16_t info) 239 { 240 uint64_t evt[2]; 241 242 info |= AMDVI_EVENT_DEV_TAB_HW_ERROR; 243 244 amdvi_encode_event(evt, devid, devtab, info); 245 amdvi_log_event(s, evt); 246 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 247 PCI_STATUS_SIG_TARGET_ABORT); 248 } 249 /* log an event trying to access command buffer 250 * @addr : address that couldn't be accessed 251 */ 252 static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) 253 { 254 uint64_t evt[2]; 255 uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR; 256 257 amdvi_encode_event(evt, 0, addr, info); 258 amdvi_log_event(s, evt); 259 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 260 PCI_STATUS_SIG_TARGET_ABORT); 261 } 262 /* log an illegal command event 263 * @addr : address of illegal command 264 */ 265 static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info, 266 hwaddr addr) 267 { 268 uint64_t evt[2]; 269 270 info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR; 271 amdvi_encode_event(evt, 0, addr, info); 272 amdvi_log_event(s, evt); 273 } 274 /* log an error accessing device table 275 * 276 * @devid : device owning the table entry 277 * @devtab : address of device table entry 278 * @info : error flags 279 */ 280 static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid, 281 hwaddr addr, uint16_t info) 282 { 283 uint64_t evt[2]; 284 285 info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY; 286 amdvi_encode_event(evt, devid, addr, info); 287 amdvi_log_event(s, evt); 288 } 289 /* log an error accessing a PTE entry 290 * @addr : address that couldn't be accessed 291 */ 292 static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid, 293 hwaddr addr, uint16_t info) 294 { 295 uint64_t evt[2]; 296 297 info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR; 298 amdvi_encode_event(evt, devid, addr, info); 299 amdvi_log_event(s, evt); 300 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 301 PCI_STATUS_SIG_TARGET_ABORT); 302 } 303 304 static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2) 305 { 306 return *((const uint64_t *)v1) == *((const uint64_t *)v2); 307 } 308 309 static guint amdvi_uint64_hash(gconstpointer v) 310 { 311 return (guint)*(const uint64_t *)v; 312 } 313 314 static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr, 315 uint64_t devid) 316 { 317 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | 318 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 319 return g_hash_table_lookup(s->iotlb, &key); 320 } 321 322 static void amdvi_iotlb_reset(AMDVIState *s) 323 { 324 assert(s->iotlb); 325 trace_amdvi_iotlb_reset(); 326 g_hash_table_remove_all(s->iotlb); 327 } 328 329 static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value, 330 gpointer user_data) 331 { 332 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value; 333 uint16_t devid = *(uint16_t *)user_data; 334 return entry->devid == devid; 335 } 336 337 static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr, 338 uint64_t devid) 339 { 340 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | 341 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 342 g_hash_table_remove(s->iotlb, &key); 343 } 344 345 static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid, 346 uint64_t gpa, IOMMUTLBEntry to_cache, 347 uint16_t domid) 348 { 349 AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1); 350 uint64_t *key = g_new(uint64_t, 1); 351 uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K; 352 353 /* don't cache erroneous translations */ 354 if (to_cache.perm != IOMMU_NONE) { 355 trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid), 356 PCI_FUNC(devid), gpa, to_cache.translated_addr); 357 358 if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) { 359 amdvi_iotlb_reset(s); 360 } 361 362 entry->domid = domid; 363 entry->perms = to_cache.perm; 364 entry->translated_addr = to_cache.translated_addr; 365 entry->page_mask = to_cache.addr_mask; 366 *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 367 g_hash_table_replace(s->iotlb, key, entry); 368 } 369 } 370 371 static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd) 372 { 373 /* pad the last 3 bits */ 374 hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3; 375 uint64_t data = cpu_to_le64(cmd[1]); 376 377 if (extract64(cmd[0], 52, 8)) { 378 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 379 s->cmdbuf + s->cmdbuf_head); 380 } 381 if (extract64(cmd[0], 0, 1)) { 382 if (dma_memory_write(&address_space_memory, addr, &data, 383 AMDVI_COMPLETION_DATA_SIZE, 384 MEMTXATTRS_UNSPECIFIED)) { 385 trace_amdvi_completion_wait_fail(addr); 386 } 387 } 388 /* set completion interrupt */ 389 if (extract64(cmd[0], 1, 1)) { 390 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); 391 /* generate interrupt */ 392 amdvi_generate_msi_interrupt(s); 393 } 394 trace_amdvi_completion_wait(addr, data); 395 } 396 397 /* log error without aborting since linux seems to be using reserved bits */ 398 static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd) 399 { 400 uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16)); 401 402 /* This command should invalidate internal caches of which there isn't */ 403 if (extract64(cmd[0], 16, 44) || cmd[1]) { 404 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 405 s->cmdbuf + s->cmdbuf_head); 406 } 407 trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid), 408 PCI_FUNC(devid)); 409 } 410 411 static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd) 412 { 413 if (extract64(cmd[0], 16, 16) || extract64(cmd[0], 52, 8) || 414 extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29) 415 || extract64(cmd[1], 48, 16)) { 416 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 417 s->cmdbuf + s->cmdbuf_head); 418 } 419 trace_amdvi_ppr_exec(); 420 } 421 422 static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd) 423 { 424 if (extract64(cmd[0], 0, 60) || cmd[1]) { 425 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 426 s->cmdbuf + s->cmdbuf_head); 427 } 428 429 amdvi_iotlb_reset(s); 430 trace_amdvi_all_inval(); 431 } 432 433 static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value, 434 gpointer user_data) 435 { 436 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value; 437 uint16_t domid = *(uint16_t *)user_data; 438 return entry->domid == domid; 439 } 440 441 /* we don't have devid - we can't remove pages by address */ 442 static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd) 443 { 444 uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16)); 445 446 if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) || 447 extract64(cmd[1], 3, 9)) { 448 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 449 s->cmdbuf + s->cmdbuf_head); 450 } 451 452 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid, 453 &domid); 454 trace_amdvi_pages_inval(domid); 455 } 456 457 static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd) 458 { 459 if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 52, 8) || 460 extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || 461 extract64(cmd[1], 5, 7)) { 462 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 463 s->cmdbuf + s->cmdbuf_head); 464 } 465 466 trace_amdvi_prefetch_pages(); 467 } 468 469 static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd) 470 { 471 if (extract64(cmd[0], 16, 44) || cmd[1]) { 472 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 473 s->cmdbuf + s->cmdbuf_head); 474 return; 475 } 476 477 trace_amdvi_intr_inval(); 478 } 479 480 /* FIXME: Try to work with the specified size instead of all the pages 481 * when the S bit is on 482 */ 483 static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd) 484 { 485 486 uint16_t devid = extract64(cmd[0], 0, 16); 487 if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || 488 extract64(cmd[1], 6, 6)) { 489 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 490 s->cmdbuf + s->cmdbuf_head); 491 return; 492 } 493 494 if (extract64(cmd[1], 0, 1)) { 495 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid, 496 &devid); 497 } else { 498 amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12, 499 cpu_to_le16(extract64(cmd[1], 0, 16))); 500 } 501 trace_amdvi_iotlb_inval(); 502 } 503 504 /* not honouring reserved bits is regarded as an illegal command */ 505 static void amdvi_cmdbuf_exec(AMDVIState *s) 506 { 507 uint64_t cmd[2]; 508 509 if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head, 510 cmd, AMDVI_COMMAND_SIZE, MEMTXATTRS_UNSPECIFIED)) { 511 trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head); 512 amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head); 513 return; 514 } 515 516 switch (extract64(cmd[0], 60, 4)) { 517 case AMDVI_CMD_COMPLETION_WAIT: 518 amdvi_completion_wait(s, cmd); 519 break; 520 case AMDVI_CMD_INVAL_DEVTAB_ENTRY: 521 amdvi_inval_devtab_entry(s, cmd); 522 break; 523 case AMDVI_CMD_INVAL_AMDVI_PAGES: 524 amdvi_inval_pages(s, cmd); 525 break; 526 case AMDVI_CMD_INVAL_IOTLB_PAGES: 527 iommu_inval_iotlb(s, cmd); 528 break; 529 case AMDVI_CMD_INVAL_INTR_TABLE: 530 amdvi_inval_inttable(s, cmd); 531 break; 532 case AMDVI_CMD_PREFETCH_AMDVI_PAGES: 533 amdvi_prefetch_pages(s, cmd); 534 break; 535 case AMDVI_CMD_COMPLETE_PPR_REQUEST: 536 amdvi_complete_ppr(s, cmd); 537 break; 538 case AMDVI_CMD_INVAL_AMDVI_ALL: 539 amdvi_inval_all(s, cmd); 540 break; 541 default: 542 trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4)); 543 /* log illegal command */ 544 amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4), 545 s->cmdbuf + s->cmdbuf_head); 546 } 547 } 548 549 static void amdvi_cmdbuf_run(AMDVIState *s) 550 { 551 if (!s->cmdbuf_enabled) { 552 trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL)); 553 return; 554 } 555 556 /* check if there is work to do. */ 557 while (s->cmdbuf_head != s->cmdbuf_tail) { 558 trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf); 559 amdvi_cmdbuf_exec(s); 560 s->cmdbuf_head += AMDVI_COMMAND_SIZE; 561 amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head); 562 563 /* wrap head pointer */ 564 if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) { 565 s->cmdbuf_head = 0; 566 } 567 } 568 } 569 570 static void amdvi_mmio_trace(hwaddr addr, unsigned size) 571 { 572 uint8_t index = (addr & ~0x2000) / 8; 573 574 if ((addr & 0x2000)) { 575 /* high table */ 576 index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index; 577 trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07); 578 } else { 579 index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index; 580 trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07); 581 } 582 } 583 584 static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size) 585 { 586 AMDVIState *s = opaque; 587 588 uint64_t val = -1; 589 if (addr + size > AMDVI_MMIO_SIZE) { 590 trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size); 591 return (uint64_t)-1; 592 } 593 594 if (size == 2) { 595 val = amdvi_readw(s, addr); 596 } else if (size == 4) { 597 val = amdvi_readl(s, addr); 598 } else if (size == 8) { 599 val = amdvi_readq(s, addr); 600 } 601 amdvi_mmio_trace(addr, size); 602 603 return val; 604 } 605 606 static void amdvi_handle_control_write(AMDVIState *s) 607 { 608 unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL); 609 s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN); 610 611 s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN); 612 s->evtlog_enabled = s->enabled && !!(control & 613 AMDVI_MMIO_CONTROL_EVENTLOGEN); 614 615 s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN); 616 s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN); 617 s->cmdbuf_enabled = s->enabled && !!(control & 618 AMDVI_MMIO_CONTROL_CMDBUFLEN); 619 s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN); 620 621 /* update the flags depending on the control register */ 622 if (s->cmdbuf_enabled) { 623 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN); 624 } else { 625 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN); 626 } 627 if (s->evtlog_enabled) { 628 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN); 629 } else { 630 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN); 631 } 632 633 trace_amdvi_control_status(control); 634 amdvi_cmdbuf_run(s); 635 } 636 637 static inline void amdvi_handle_devtab_write(AMDVIState *s) 638 639 { 640 uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE); 641 s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK); 642 643 /* set device table length */ 644 s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 * 645 (AMDVI_MMIO_DEVTAB_SIZE_UNIT / 646 AMDVI_MMIO_DEVTAB_ENTRY_SIZE)); 647 } 648 649 static inline void amdvi_handle_cmdhead_write(AMDVIState *s) 650 { 651 s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD) 652 & AMDVI_MMIO_CMDBUF_HEAD_MASK; 653 amdvi_cmdbuf_run(s); 654 } 655 656 static inline void amdvi_handle_cmdbase_write(AMDVIState *s) 657 { 658 s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE) 659 & AMDVI_MMIO_CMDBUF_BASE_MASK; 660 s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE) 661 & AMDVI_MMIO_CMDBUF_SIZE_MASK); 662 s->cmdbuf_head = s->cmdbuf_tail = 0; 663 } 664 665 static inline void amdvi_handle_cmdtail_write(AMDVIState *s) 666 { 667 s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL) 668 & AMDVI_MMIO_CMDBUF_TAIL_MASK; 669 amdvi_cmdbuf_run(s); 670 } 671 672 static inline void amdvi_handle_excllim_write(AMDVIState *s) 673 { 674 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT); 675 s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) | 676 AMDVI_MMIO_EXCL_LIMIT_LOW; 677 } 678 679 static inline void amdvi_handle_evtbase_write(AMDVIState *s) 680 { 681 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE); 682 s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK; 683 s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE) 684 & AMDVI_MMIO_EVTLOG_SIZE_MASK); 685 } 686 687 static inline void amdvi_handle_evttail_write(AMDVIState *s) 688 { 689 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL); 690 s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK; 691 } 692 693 static inline void amdvi_handle_evthead_write(AMDVIState *s) 694 { 695 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD); 696 s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK; 697 } 698 699 static inline void amdvi_handle_pprbase_write(AMDVIState *s) 700 { 701 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE); 702 s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK; 703 s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE) 704 & AMDVI_MMIO_PPRLOG_SIZE_MASK); 705 } 706 707 static inline void amdvi_handle_pprhead_write(AMDVIState *s) 708 { 709 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD); 710 s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK; 711 } 712 713 static inline void amdvi_handle_pprtail_write(AMDVIState *s) 714 { 715 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL); 716 s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK; 717 } 718 719 /* FIXME: something might go wrong if System Software writes in chunks 720 * of one byte but linux writes in chunks of 4 bytes so currently it 721 * works correctly with linux but will definitely be busted if software 722 * reads/writes 8 bytes 723 */ 724 static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val, 725 hwaddr addr) 726 { 727 if (size == 2) { 728 amdvi_writew(s, addr, val); 729 } else if (size == 4) { 730 amdvi_writel(s, addr, val); 731 } else if (size == 8) { 732 amdvi_writeq(s, addr, val); 733 } 734 } 735 736 static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val, 737 unsigned size) 738 { 739 AMDVIState *s = opaque; 740 unsigned long offset = addr & 0x07; 741 742 if (addr + size > AMDVI_MMIO_SIZE) { 743 trace_amdvi_mmio_write("error: addr outside region: max ", 744 (uint64_t)AMDVI_MMIO_SIZE, size, val, offset); 745 return; 746 } 747 748 amdvi_mmio_trace(addr, size); 749 switch (addr & ~0x07) { 750 case AMDVI_MMIO_CONTROL: 751 amdvi_mmio_reg_write(s, size, val, addr); 752 amdvi_handle_control_write(s); 753 break; 754 case AMDVI_MMIO_DEVICE_TABLE: 755 amdvi_mmio_reg_write(s, size, val, addr); 756 /* set device table address 757 * This also suffers from inability to tell whether software 758 * is done writing 759 */ 760 if (offset || (size == 8)) { 761 amdvi_handle_devtab_write(s); 762 } 763 break; 764 case AMDVI_MMIO_COMMAND_HEAD: 765 amdvi_mmio_reg_write(s, size, val, addr); 766 amdvi_handle_cmdhead_write(s); 767 break; 768 case AMDVI_MMIO_COMMAND_BASE: 769 amdvi_mmio_reg_write(s, size, val, addr); 770 /* FIXME - make sure System Software has finished writing in case 771 * it writes in chucks less than 8 bytes in a robust way.As for 772 * now, this hacks works for the linux driver 773 */ 774 if (offset || (size == 8)) { 775 amdvi_handle_cmdbase_write(s); 776 } 777 break; 778 case AMDVI_MMIO_COMMAND_TAIL: 779 amdvi_mmio_reg_write(s, size, val, addr); 780 amdvi_handle_cmdtail_write(s); 781 break; 782 case AMDVI_MMIO_EVENT_BASE: 783 amdvi_mmio_reg_write(s, size, val, addr); 784 amdvi_handle_evtbase_write(s); 785 break; 786 case AMDVI_MMIO_EVENT_HEAD: 787 amdvi_mmio_reg_write(s, size, val, addr); 788 amdvi_handle_evthead_write(s); 789 break; 790 case AMDVI_MMIO_EVENT_TAIL: 791 amdvi_mmio_reg_write(s, size, val, addr); 792 amdvi_handle_evttail_write(s); 793 break; 794 case AMDVI_MMIO_EXCL_LIMIT: 795 amdvi_mmio_reg_write(s, size, val, addr); 796 amdvi_handle_excllim_write(s); 797 break; 798 /* PPR log base - unused for now */ 799 case AMDVI_MMIO_PPR_BASE: 800 amdvi_mmio_reg_write(s, size, val, addr); 801 amdvi_handle_pprbase_write(s); 802 break; 803 /* PPR log head - also unused for now */ 804 case AMDVI_MMIO_PPR_HEAD: 805 amdvi_mmio_reg_write(s, size, val, addr); 806 amdvi_handle_pprhead_write(s); 807 break; 808 /* PPR log tail - unused for now */ 809 case AMDVI_MMIO_PPR_TAIL: 810 amdvi_mmio_reg_write(s, size, val, addr); 811 amdvi_handle_pprtail_write(s); 812 break; 813 } 814 } 815 816 static inline uint64_t amdvi_get_perms(uint64_t entry) 817 { 818 return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >> 819 AMDVI_DEV_PERM_SHIFT; 820 } 821 822 /* validate that reserved bits are honoured */ 823 static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid, 824 uint64_t *dte) 825 { 826 if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED) 827 || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED) 828 || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) { 829 amdvi_log_illegaldevtab_error(s, devid, 830 s->devtab + 831 devid * AMDVI_DEVTAB_ENTRY_SIZE, 0); 832 return false; 833 } 834 835 return true; 836 } 837 838 /* get a device table entry given the devid */ 839 static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry) 840 { 841 uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE; 842 843 if (dma_memory_read(&address_space_memory, s->devtab + offset, entry, 844 AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) { 845 trace_amdvi_dte_get_fail(s->devtab, offset); 846 /* log error accessing dte */ 847 amdvi_log_devtab_error(s, devid, s->devtab + offset, 0); 848 return false; 849 } 850 851 *entry = le64_to_cpu(*entry); 852 if (!amdvi_validate_dte(s, devid, entry)) { 853 trace_amdvi_invalid_dte(entry[0]); 854 return false; 855 } 856 857 return true; 858 } 859 860 /* get pte translation mode */ 861 static inline uint8_t get_pte_translation_mode(uint64_t pte) 862 { 863 return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK; 864 } 865 866 static inline uint64_t pte_override_page_mask(uint64_t pte) 867 { 868 uint8_t page_mask = 13; 869 uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12; 870 /* find the first zero bit */ 871 while (addr & 1) { 872 page_mask++; 873 addr = addr >> 1; 874 } 875 876 return ~((1ULL << page_mask) - 1); 877 } 878 879 static inline uint64_t pte_get_page_mask(uint64_t oldlevel) 880 { 881 return ~((1UL << ((oldlevel * 9) + 3)) - 1); 882 } 883 884 static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr, 885 uint16_t devid) 886 { 887 uint64_t pte; 888 889 if (dma_memory_read(&address_space_memory, pte_addr, 890 &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) { 891 trace_amdvi_get_pte_hwerror(pte_addr); 892 amdvi_log_pagetab_error(s, devid, pte_addr, 0); 893 pte = 0; 894 return pte; 895 } 896 897 pte = le64_to_cpu(pte); 898 return pte; 899 } 900 901 static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte, 902 IOMMUTLBEntry *ret, unsigned perms, 903 hwaddr addr) 904 { 905 unsigned level, present, pte_perms, oldlevel; 906 uint64_t pte = dte[0], pte_addr, page_mask; 907 908 /* make sure the DTE has TV = 1 */ 909 if (pte & AMDVI_DEV_TRANSLATION_VALID) { 910 level = get_pte_translation_mode(pte); 911 if (level >= 7) { 912 trace_amdvi_mode_invalid(level, addr); 913 return; 914 } 915 if (level == 0) { 916 goto no_remap; 917 } 918 919 /* we are at the leaf page table or page table encodes a huge page */ 920 do { 921 pte_perms = amdvi_get_perms(pte); 922 present = pte & 1; 923 if (!present || perms != (perms & pte_perms)) { 924 amdvi_page_fault(as->iommu_state, as->devfn, addr, perms); 925 trace_amdvi_page_fault(addr); 926 return; 927 } 928 929 /* go to the next lower level */ 930 pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK; 931 /* add offset and load pte */ 932 pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3; 933 pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn); 934 if (!pte) { 935 return; 936 } 937 oldlevel = level; 938 level = get_pte_translation_mode(pte); 939 } while (level > 0 && level < 7); 940 941 if (level == 0x7) { 942 page_mask = pte_override_page_mask(pte); 943 } else { 944 page_mask = pte_get_page_mask(oldlevel); 945 } 946 947 /* get access permissions from pte */ 948 ret->iova = addr & page_mask; 949 ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask; 950 ret->addr_mask = ~page_mask; 951 ret->perm = amdvi_get_perms(pte); 952 return; 953 } 954 no_remap: 955 ret->iova = addr & AMDVI_PAGE_MASK_4K; 956 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K; 957 ret->addr_mask = ~AMDVI_PAGE_MASK_4K; 958 ret->perm = amdvi_get_perms(pte); 959 } 960 961 static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr, 962 bool is_write, IOMMUTLBEntry *ret) 963 { 964 AMDVIState *s = as->iommu_state; 965 uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn); 966 AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid); 967 uint64_t entry[4]; 968 969 if (iotlb_entry) { 970 trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid), 971 PCI_FUNC(devid), addr, iotlb_entry->translated_addr); 972 ret->iova = addr & ~iotlb_entry->page_mask; 973 ret->translated_addr = iotlb_entry->translated_addr; 974 ret->addr_mask = iotlb_entry->page_mask; 975 ret->perm = iotlb_entry->perms; 976 return; 977 } 978 979 if (!amdvi_get_dte(s, devid, entry)) { 980 return; 981 } 982 983 /* devices with V = 0 are not translated */ 984 if (!(entry[0] & AMDVI_DEV_VALID)) { 985 goto out; 986 } 987 988 amdvi_page_walk(as, entry, ret, 989 is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr); 990 991 amdvi_update_iotlb(s, devid, addr, *ret, 992 entry[1] & AMDVI_DEV_DOMID_ID_MASK); 993 return; 994 995 out: 996 ret->iova = addr & AMDVI_PAGE_MASK_4K; 997 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K; 998 ret->addr_mask = ~AMDVI_PAGE_MASK_4K; 999 ret->perm = IOMMU_RW; 1000 } 1001 1002 static inline bool amdvi_is_interrupt_addr(hwaddr addr) 1003 { 1004 return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST; 1005 } 1006 1007 static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr, 1008 IOMMUAccessFlags flag, int iommu_idx) 1009 { 1010 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); 1011 AMDVIState *s = as->iommu_state; 1012 IOMMUTLBEntry ret = { 1013 .target_as = &address_space_memory, 1014 .iova = addr, 1015 .translated_addr = 0, 1016 .addr_mask = ~(hwaddr)0, 1017 .perm = IOMMU_NONE 1018 }; 1019 1020 if (!s->enabled) { 1021 /* AMDVI disabled - corresponds to iommu=off not 1022 * failure to provide any parameter 1023 */ 1024 ret.iova = addr & AMDVI_PAGE_MASK_4K; 1025 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K; 1026 ret.addr_mask = ~AMDVI_PAGE_MASK_4K; 1027 ret.perm = IOMMU_RW; 1028 return ret; 1029 } else if (amdvi_is_interrupt_addr(addr)) { 1030 ret.iova = addr & AMDVI_PAGE_MASK_4K; 1031 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K; 1032 ret.addr_mask = ~AMDVI_PAGE_MASK_4K; 1033 ret.perm = IOMMU_WO; 1034 return ret; 1035 } 1036 1037 amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret); 1038 trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn), 1039 PCI_FUNC(as->devfn), addr, ret.translated_addr); 1040 return ret; 1041 } 1042 1043 static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte, 1044 union irte *irte, uint16_t devid) 1045 { 1046 uint64_t irte_root, offset; 1047 1048 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK; 1049 offset = (origin->data & AMDVI_IRTE_OFFSET) << 2; 1050 1051 trace_amdvi_ir_irte(irte_root, offset); 1052 1053 if (dma_memory_read(&address_space_memory, irte_root + offset, 1054 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) { 1055 trace_amdvi_ir_err("failed to get irte"); 1056 return -AMDVI_IR_GET_IRTE; 1057 } 1058 1059 trace_amdvi_ir_irte_val(irte->val); 1060 1061 return 0; 1062 } 1063 1064 static int amdvi_int_remap_legacy(AMDVIState *iommu, 1065 MSIMessage *origin, 1066 MSIMessage *translated, 1067 uint64_t *dte, 1068 X86IOMMUIrq *irq, 1069 uint16_t sid) 1070 { 1071 int ret; 1072 union irte irte; 1073 1074 /* get interrupt remapping table */ 1075 ret = amdvi_get_irte(iommu, origin, dte, &irte, sid); 1076 if (ret < 0) { 1077 return ret; 1078 } 1079 1080 if (!irte.fields.valid) { 1081 trace_amdvi_ir_target_abort("RemapEn is disabled"); 1082 return -AMDVI_IR_TARGET_ABORT; 1083 } 1084 1085 if (irte.fields.guest_mode) { 1086 error_report_once("guest mode is not zero"); 1087 return -AMDVI_IR_ERR; 1088 } 1089 1090 if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) { 1091 error_report_once("reserved int_type"); 1092 return -AMDVI_IR_ERR; 1093 } 1094 1095 irq->delivery_mode = irte.fields.int_type; 1096 irq->vector = irte.fields.vector; 1097 irq->dest_mode = irte.fields.dm; 1098 irq->redir_hint = irte.fields.rq_eoi; 1099 irq->dest = irte.fields.destination; 1100 1101 return 0; 1102 } 1103 1104 static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte, 1105 struct irte_ga *irte, uint16_t devid) 1106 { 1107 uint64_t irte_root, offset; 1108 1109 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK; 1110 offset = (origin->data & AMDVI_IRTE_OFFSET) << 4; 1111 trace_amdvi_ir_irte(irte_root, offset); 1112 1113 if (dma_memory_read(&address_space_memory, irte_root + offset, 1114 irte, sizeof(*irte), MEMTXATTRS_UNSPECIFIED)) { 1115 trace_amdvi_ir_err("failed to get irte_ga"); 1116 return -AMDVI_IR_GET_IRTE; 1117 } 1118 1119 trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val); 1120 return 0; 1121 } 1122 1123 static int amdvi_int_remap_ga(AMDVIState *iommu, 1124 MSIMessage *origin, 1125 MSIMessage *translated, 1126 uint64_t *dte, 1127 X86IOMMUIrq *irq, 1128 uint16_t sid) 1129 { 1130 int ret; 1131 struct irte_ga irte; 1132 1133 /* get interrupt remapping table */ 1134 ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid); 1135 if (ret < 0) { 1136 return ret; 1137 } 1138 1139 if (!irte.lo.fields_remap.valid) { 1140 trace_amdvi_ir_target_abort("RemapEn is disabled"); 1141 return -AMDVI_IR_TARGET_ABORT; 1142 } 1143 1144 if (irte.lo.fields_remap.guest_mode) { 1145 error_report_once("guest mode is not zero"); 1146 return -AMDVI_IR_ERR; 1147 } 1148 1149 if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) { 1150 error_report_once("reserved int_type is set"); 1151 return -AMDVI_IR_ERR; 1152 } 1153 1154 irq->delivery_mode = irte.lo.fields_remap.int_type; 1155 irq->vector = irte.hi.fields.vector; 1156 irq->dest_mode = irte.lo.fields_remap.dm; 1157 irq->redir_hint = irte.lo.fields_remap.rq_eoi; 1158 irq->dest = irte.lo.fields_remap.destination; 1159 1160 return 0; 1161 } 1162 1163 static int __amdvi_int_remap_msi(AMDVIState *iommu, 1164 MSIMessage *origin, 1165 MSIMessage *translated, 1166 uint64_t *dte, 1167 X86IOMMUIrq *irq, 1168 uint16_t sid) 1169 { 1170 int ret; 1171 uint8_t int_ctl; 1172 1173 int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3; 1174 trace_amdvi_ir_intctl(int_ctl); 1175 1176 switch (int_ctl) { 1177 case AMDVI_IR_INTCTL_PASS: 1178 memcpy(translated, origin, sizeof(*origin)); 1179 return 0; 1180 case AMDVI_IR_INTCTL_REMAP: 1181 break; 1182 case AMDVI_IR_INTCTL_ABORT: 1183 trace_amdvi_ir_target_abort("int_ctl abort"); 1184 return -AMDVI_IR_TARGET_ABORT; 1185 default: 1186 trace_amdvi_ir_err("int_ctl reserved"); 1187 return -AMDVI_IR_ERR; 1188 } 1189 1190 if (iommu->ga_enabled) { 1191 ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid); 1192 } else { 1193 ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid); 1194 } 1195 1196 return ret; 1197 } 1198 1199 /* Interrupt remapping for MSI/MSI-X entry */ 1200 static int amdvi_int_remap_msi(AMDVIState *iommu, 1201 MSIMessage *origin, 1202 MSIMessage *translated, 1203 uint16_t sid) 1204 { 1205 int ret = 0; 1206 uint64_t pass = 0; 1207 uint64_t dte[4] = { 0 }; 1208 X86IOMMUIrq irq = { 0 }; 1209 uint8_t dest_mode, delivery_mode; 1210 1211 assert(origin && translated); 1212 1213 /* 1214 * When IOMMU is enabled, interrupt remap request will come either from 1215 * IO-APIC or PCI device. If interrupt is from PCI device then it will 1216 * have a valid requester id but if the interrupt is from IO-APIC 1217 * then requester id will be invalid. 1218 */ 1219 if (sid == X86_IOMMU_SID_INVALID) { 1220 sid = AMDVI_IOAPIC_SB_DEVID; 1221 } 1222 1223 trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid); 1224 1225 /* check if device table entry is set before we go further. */ 1226 if (!iommu || !iommu->devtab_len) { 1227 memcpy(translated, origin, sizeof(*origin)); 1228 goto out; 1229 } 1230 1231 if (!amdvi_get_dte(iommu, sid, dte)) { 1232 return -AMDVI_IR_ERR; 1233 } 1234 1235 /* Check if IR is enabled in DTE */ 1236 if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) { 1237 memcpy(translated, origin, sizeof(*origin)); 1238 goto out; 1239 } 1240 1241 /* validate that we are configure with intremap=on */ 1242 if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) { 1243 trace_amdvi_err("Interrupt remapping is enabled in the guest but " 1244 "not in the host. Use intremap=on to enable interrupt " 1245 "remapping in amd-iommu."); 1246 return -AMDVI_IR_ERR; 1247 } 1248 1249 if (origin->address < AMDVI_INT_ADDR_FIRST || 1250 origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) { 1251 trace_amdvi_err("MSI is not from IOAPIC."); 1252 return -AMDVI_IR_ERR; 1253 } 1254 1255 /* 1256 * The MSI data register [10:8] are used to get the upstream interrupt type. 1257 * 1258 * See MSI/MSI-X format: 1259 * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf 1260 * (page 5) 1261 */ 1262 delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7; 1263 1264 switch (delivery_mode) { 1265 case AMDVI_IOAPIC_INT_TYPE_FIXED: 1266 case AMDVI_IOAPIC_INT_TYPE_ARBITRATED: 1267 trace_amdvi_ir_delivery_mode("fixed/arbitrated"); 1268 ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid); 1269 if (ret < 0) { 1270 goto remap_fail; 1271 } else { 1272 /* Translate IRQ to MSI messages */ 1273 x86_iommu_irq_to_msi_message(&irq, translated); 1274 goto out; 1275 } 1276 break; 1277 case AMDVI_IOAPIC_INT_TYPE_SMI: 1278 error_report("SMI is not supported!"); 1279 ret = -AMDVI_IR_ERR; 1280 break; 1281 case AMDVI_IOAPIC_INT_TYPE_NMI: 1282 pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK; 1283 trace_amdvi_ir_delivery_mode("nmi"); 1284 break; 1285 case AMDVI_IOAPIC_INT_TYPE_INIT: 1286 pass = dte[3] & AMDVI_DEV_INT_PASS_MASK; 1287 trace_amdvi_ir_delivery_mode("init"); 1288 break; 1289 case AMDVI_IOAPIC_INT_TYPE_EINT: 1290 pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK; 1291 trace_amdvi_ir_delivery_mode("eint"); 1292 break; 1293 default: 1294 trace_amdvi_ir_delivery_mode("unsupported delivery_mode"); 1295 ret = -AMDVI_IR_ERR; 1296 break; 1297 } 1298 1299 if (ret < 0) { 1300 goto remap_fail; 1301 } 1302 1303 /* 1304 * The MSI address register bit[2] is used to get the destination 1305 * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts 1306 * only. 1307 */ 1308 dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1; 1309 if (dest_mode) { 1310 trace_amdvi_ir_err("invalid dest_mode"); 1311 ret = -AMDVI_IR_ERR; 1312 goto remap_fail; 1313 } 1314 1315 if (pass) { 1316 memcpy(translated, origin, sizeof(*origin)); 1317 } else { 1318 trace_amdvi_ir_err("passthrough is not enabled"); 1319 ret = -AMDVI_IR_ERR; 1320 goto remap_fail; 1321 } 1322 1323 out: 1324 trace_amdvi_ir_remap_msi(origin->address, origin->data, 1325 translated->address, translated->data); 1326 return 0; 1327 1328 remap_fail: 1329 return ret; 1330 } 1331 1332 static int amdvi_int_remap(X86IOMMUState *iommu, 1333 MSIMessage *origin, 1334 MSIMessage *translated, 1335 uint16_t sid) 1336 { 1337 return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin, 1338 translated, sid); 1339 } 1340 1341 static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr, 1342 uint64_t value, unsigned size, 1343 MemTxAttrs attrs) 1344 { 1345 int ret; 1346 MSIMessage from = { 0, 0 }, to = { 0, 0 }; 1347 uint16_t sid = AMDVI_IOAPIC_SB_DEVID; 1348 1349 from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST; 1350 from.data = (uint32_t) value; 1351 1352 trace_amdvi_mem_ir_write_req(addr, value, size); 1353 1354 if (!attrs.unspecified) { 1355 /* We have explicit Source ID */ 1356 sid = attrs.requester_id; 1357 } 1358 1359 ret = amdvi_int_remap_msi(opaque, &from, &to, sid); 1360 if (ret < 0) { 1361 /* TODO: log the event using IOMMU log event interface */ 1362 error_report_once("failed to remap interrupt from devid 0x%x", sid); 1363 return MEMTX_ERROR; 1364 } 1365 1366 apic_get_class(NULL)->send_msi(&to); 1367 1368 trace_amdvi_mem_ir_write(to.address, to.data); 1369 return MEMTX_OK; 1370 } 1371 1372 static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr, 1373 uint64_t *data, unsigned size, 1374 MemTxAttrs attrs) 1375 { 1376 return MEMTX_OK; 1377 } 1378 1379 static const MemoryRegionOps amdvi_ir_ops = { 1380 .read_with_attrs = amdvi_mem_ir_read, 1381 .write_with_attrs = amdvi_mem_ir_write, 1382 .endianness = DEVICE_LITTLE_ENDIAN, 1383 .impl = { 1384 .min_access_size = 4, 1385 .max_access_size = 4, 1386 }, 1387 .valid = { 1388 .min_access_size = 4, 1389 .max_access_size = 4, 1390 } 1391 }; 1392 1393 static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) 1394 { 1395 char name[128]; 1396 AMDVIState *s = opaque; 1397 AMDVIAddressSpace **iommu_as, *amdvi_dev_as; 1398 int bus_num = pci_bus_num(bus); 1399 1400 iommu_as = s->address_spaces[bus_num]; 1401 1402 /* allocate memory during the first run */ 1403 if (!iommu_as) { 1404 iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX); 1405 s->address_spaces[bus_num] = iommu_as; 1406 } 1407 1408 /* set up AMD-Vi region */ 1409 if (!iommu_as[devfn]) { 1410 snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn); 1411 1412 iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1); 1413 iommu_as[devfn]->bus_num = (uint8_t)bus_num; 1414 iommu_as[devfn]->devfn = (uint8_t)devfn; 1415 iommu_as[devfn]->iommu_state = s; 1416 1417 amdvi_dev_as = iommu_as[devfn]; 1418 1419 /* 1420 * Memory region relationships looks like (Address range shows 1421 * only lower 32 bits to make it short in length...): 1422 * 1423 * |-----------------+-------------------+----------| 1424 * | Name | Address range | Priority | 1425 * |-----------------+-------------------+----------+ 1426 * | amdvi_root | 00000000-ffffffff | 0 | 1427 * | amdvi_iommu | 00000000-ffffffff | 1 | 1428 * | amdvi_iommu_ir | fee00000-feefffff | 64 | 1429 * |-----------------+-------------------+----------| 1430 */ 1431 memory_region_init_iommu(&amdvi_dev_as->iommu, 1432 sizeof(amdvi_dev_as->iommu), 1433 TYPE_AMD_IOMMU_MEMORY_REGION, 1434 OBJECT(s), 1435 "amd_iommu", UINT64_MAX); 1436 memory_region_init(&amdvi_dev_as->root, OBJECT(s), 1437 "amdvi_root", UINT64_MAX); 1438 address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name); 1439 memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s), 1440 &amdvi_ir_ops, s, "amd_iommu_ir", 1441 AMDVI_INT_ADDR_SIZE); 1442 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 1443 AMDVI_INT_ADDR_FIRST, 1444 &amdvi_dev_as->iommu_ir, 1445 64); 1446 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0, 1447 MEMORY_REGION(&amdvi_dev_as->iommu), 1448 1); 1449 } 1450 return &iommu_as[devfn]->as; 1451 } 1452 1453 static const PCIIOMMUOps amdvi_iommu_ops = { 1454 .get_address_space = amdvi_host_dma_iommu, 1455 }; 1456 1457 static const MemoryRegionOps mmio_mem_ops = { 1458 .read = amdvi_mmio_read, 1459 .write = amdvi_mmio_write, 1460 .endianness = DEVICE_LITTLE_ENDIAN, 1461 .impl = { 1462 .min_access_size = 1, 1463 .max_access_size = 8, 1464 .unaligned = false, 1465 }, 1466 .valid = { 1467 .min_access_size = 1, 1468 .max_access_size = 8, 1469 } 1470 }; 1471 1472 static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, 1473 IOMMUNotifierFlag old, 1474 IOMMUNotifierFlag new, 1475 Error **errp) 1476 { 1477 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); 1478 1479 if (new & IOMMU_NOTIFIER_MAP) { 1480 error_setg(errp, 1481 "device %02x.%02x.%x requires iommu notifier which is not " 1482 "currently supported", as->bus_num, PCI_SLOT(as->devfn), 1483 PCI_FUNC(as->devfn)); 1484 return -EINVAL; 1485 } 1486 return 0; 1487 } 1488 1489 static void amdvi_init(AMDVIState *s) 1490 { 1491 amdvi_iotlb_reset(s); 1492 1493 s->devtab_len = 0; 1494 s->cmdbuf_len = 0; 1495 s->cmdbuf_head = 0; 1496 s->cmdbuf_tail = 0; 1497 s->evtlog_head = 0; 1498 s->evtlog_tail = 0; 1499 s->excl_enabled = false; 1500 s->excl_allow = false; 1501 s->mmio_enabled = false; 1502 s->enabled = false; 1503 s->ats_enabled = false; 1504 s->cmdbuf_enabled = false; 1505 1506 /* reset MMIO */ 1507 memset(s->mmior, 0, AMDVI_MMIO_SIZE); 1508 amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES, 1509 0xffffffffffffffef, 0); 1510 amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67); 1511 } 1512 1513 static void amdvi_pci_realize(PCIDevice *pdev, Error **errp) 1514 { 1515 AMDVIPCIState *s = AMD_IOMMU_PCI(pdev); 1516 int ret; 1517 1518 ret = pci_add_capability(pdev, AMDVI_CAPAB_ID_SEC, 0, 1519 AMDVI_CAPAB_SIZE, errp); 1520 if (ret < 0) { 1521 return; 1522 } 1523 s->capab_offset = ret; 1524 1525 ret = pci_add_capability(pdev, PCI_CAP_ID_MSI, 0, 1526 AMDVI_CAPAB_REG_SIZE, errp); 1527 if (ret < 0) { 1528 return; 1529 } 1530 ret = pci_add_capability(pdev, PCI_CAP_ID_HT, 0, 1531 AMDVI_CAPAB_REG_SIZE, errp); 1532 if (ret < 0) { 1533 return; 1534 } 1535 1536 if (msi_init(pdev, 0, 1, true, false, errp) < 0) { 1537 return; 1538 } 1539 1540 /* reset device ident */ 1541 pci_config_set_prog_interface(pdev->config, 0); 1542 1543 /* reset AMDVI specific capabilities, all r/o */ 1544 pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES); 1545 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW, 1546 AMDVI_BASE_ADDR & ~(0xffff0000)); 1547 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH, 1548 (AMDVI_BASE_ADDR & ~(0xffff)) >> 16); 1549 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE, 1550 0xff000000); 1551 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0); 1552 pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 1553 AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR); 1554 } 1555 1556 static void amdvi_sysbus_reset(DeviceState *dev) 1557 { 1558 AMDVIState *s = AMD_IOMMU_DEVICE(dev); 1559 1560 msi_reset(&s->pci.dev); 1561 amdvi_init(s); 1562 } 1563 1564 static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) 1565 { 1566 AMDVIState *s = AMD_IOMMU_DEVICE(dev); 1567 MachineState *ms = MACHINE(qdev_get_machine()); 1568 PCMachineState *pcms = PC_MACHINE(ms); 1569 X86MachineState *x86ms = X86_MACHINE(ms); 1570 PCIBus *bus = pcms->bus; 1571 1572 s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, 1573 amdvi_uint64_equal, g_free, g_free); 1574 1575 /* This device should take care of IOMMU PCI properties */ 1576 if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) { 1577 return; 1578 } 1579 1580 /* Pseudo address space under root PCI bus. */ 1581 x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); 1582 1583 /* set up MMIO */ 1584 memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", 1585 AMDVI_MMIO_SIZE); 1586 memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR, 1587 &s->mmio); 1588 pci_setup_iommu(bus, &amdvi_iommu_ops, s); 1589 amdvi_init(s); 1590 } 1591 1592 static const VMStateDescription vmstate_amdvi_sysbus = { 1593 .name = "amd-iommu", 1594 .unmigratable = 1 1595 }; 1596 1597 static void amdvi_sysbus_instance_init(Object *klass) 1598 { 1599 AMDVIState *s = AMD_IOMMU_DEVICE(klass); 1600 1601 object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI); 1602 } 1603 1604 static void amdvi_sysbus_class_init(ObjectClass *klass, void *data) 1605 { 1606 DeviceClass *dc = DEVICE_CLASS(klass); 1607 X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass); 1608 1609 dc->reset = amdvi_sysbus_reset; 1610 dc->vmsd = &vmstate_amdvi_sysbus; 1611 dc->hotpluggable = false; 1612 dc_class->realize = amdvi_sysbus_realize; 1613 dc_class->int_remap = amdvi_int_remap; 1614 /* Supported by the pc-q35-* machine types */ 1615 dc->user_creatable = true; 1616 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1617 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; 1618 } 1619 1620 static const TypeInfo amdvi_sysbus = { 1621 .name = TYPE_AMD_IOMMU_DEVICE, 1622 .parent = TYPE_X86_IOMMU_DEVICE, 1623 .instance_size = sizeof(AMDVIState), 1624 .instance_init = amdvi_sysbus_instance_init, 1625 .class_init = amdvi_sysbus_class_init 1626 }; 1627 1628 static void amdvi_pci_class_init(ObjectClass *klass, void *data) 1629 { 1630 DeviceClass *dc = DEVICE_CLASS(klass); 1631 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1632 1633 k->vendor_id = PCI_VENDOR_ID_AMD; 1634 k->class_id = 0x0806; 1635 k->realize = amdvi_pci_realize; 1636 1637 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1638 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; 1639 } 1640 1641 static const TypeInfo amdvi_pci = { 1642 .name = TYPE_AMD_IOMMU_PCI, 1643 .parent = TYPE_PCI_DEVICE, 1644 .instance_size = sizeof(AMDVIPCIState), 1645 .class_init = amdvi_pci_class_init, 1646 .interfaces = (InterfaceInfo[]) { 1647 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1648 { }, 1649 }, 1650 }; 1651 1652 static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data) 1653 { 1654 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 1655 1656 imrc->translate = amdvi_translate; 1657 imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed; 1658 } 1659 1660 static const TypeInfo amdvi_iommu_memory_region_info = { 1661 .parent = TYPE_IOMMU_MEMORY_REGION, 1662 .name = TYPE_AMD_IOMMU_MEMORY_REGION, 1663 .class_init = amdvi_iommu_memory_region_class_init, 1664 }; 1665 1666 static void amdvi_register_types(void) 1667 { 1668 type_register_static(&amdvi_pci); 1669 type_register_static(&amdvi_sysbus); 1670 type_register_static(&amdvi_iommu_memory_region_info); 1671 } 1672 1673 type_init(amdvi_register_types); 1674