1 /* 2 * Copyright (c) 2007, Neocleus Corporation. 3 * Copyright (c) 2007, Intel Corporation. 4 * 5 * This work is licensed under the terms of the GNU GPL, version 2. See 6 * the COPYING file in the top-level directory. 7 * 8 * Alex Novik <alex@neocleus.com> 9 * Allen Kay <allen.m.kay@intel.com> 10 * Guy Zana <guy@neocleus.com> 11 * 12 * This file implements direct PCI assignment to a HVM guest 13 */ 14 15 /* 16 * Interrupt Disable policy: 17 * 18 * INTx interrupt: 19 * Initialize(register_real_device) 20 * Map INTx(xc_physdev_map_pirq): 21 * <fail> 22 * - Set real Interrupt Disable bit to '1'. 23 * - Set machine_irq and assigned_device->machine_irq to '0'. 24 * * Don't bind INTx. 25 * 26 * Bind INTx(xc_domain_bind_pt_pci_irq): 27 * <fail> 28 * - Set real Interrupt Disable bit to '1'. 29 * - Unmap INTx. 30 * - Decrement xen_pt_mapped_machine_irq[machine_irq] 31 * - Set assigned_device->machine_irq to '0'. 32 * 33 * Write to Interrupt Disable bit by guest software(xen_pt_cmd_reg_write) 34 * Write '0' 35 * - Set real bit to '0' if assigned_device->machine_irq isn't '0'. 36 * 37 * Write '1' 38 * - Set real bit to '1'. 39 * 40 * MSI interrupt: 41 * Initialize MSI register(xen_pt_msi_setup, xen_pt_msi_update) 42 * Bind MSI(xc_domain_update_msi_irq) 43 * <fail> 44 * - Unmap MSI. 45 * - Set dev->msi->pirq to '-1'. 46 * 47 * MSI-X interrupt: 48 * Initialize MSI-X register(xen_pt_msix_update_one) 49 * Bind MSI-X(xc_domain_update_msi_irq) 50 * <fail> 51 * - Unmap MSI-X. 52 * - Set entry->pirq to '-1'. 53 */ 54 55 #include "qemu/osdep.h" 56 #include "qapi/error.h" 57 #include <sys/ioctl.h> 58 59 #include "hw/pci/pci.h" 60 #include "hw/pci/pci_bus.h" 61 #include "hw/qdev-properties.h" 62 #include "hw/qdev-properties-system.h" 63 #include "hw/xen/xen.h" 64 #include "hw/xen/xen-legacy-backend.h" 65 #include "xen_pt.h" 66 #include "qemu/range.h" 67 68 static bool has_igd_gfx_passthru; 69 70 bool xen_igd_gfx_pt_enabled(void) 71 { 72 return has_igd_gfx_passthru; 73 } 74 75 void xen_igd_gfx_pt_set(bool value, Error **errp) 76 { 77 has_igd_gfx_passthru = value; 78 } 79 80 #define XEN_PT_NR_IRQS (256) 81 static uint8_t xen_pt_mapped_machine_irq[XEN_PT_NR_IRQS] = {0}; 82 83 void xen_pt_log(const PCIDevice *d, const char *f, ...) 84 { 85 va_list ap; 86 87 va_start(ap, f); 88 if (d) { 89 fprintf(stderr, "[%02x:%02x.%d] ", pci_dev_bus_num(d), 90 PCI_SLOT(d->devfn), PCI_FUNC(d->devfn)); 91 } 92 vfprintf(stderr, f, ap); 93 va_end(ap); 94 } 95 96 /* Config Space */ 97 98 static int xen_pt_pci_config_access_check(PCIDevice *d, uint32_t addr, int len) 99 { 100 /* check offset range */ 101 if (addr > 0xFF) { 102 XEN_PT_ERR(d, "Failed to access register with offset exceeding 0xFF. " 103 "(addr: 0x%02x, len: %d)\n", addr, len); 104 return -1; 105 } 106 107 /* check read size */ 108 if ((len != 1) && (len != 2) && (len != 4)) { 109 XEN_PT_ERR(d, "Failed to access register with invalid access length. " 110 "(addr: 0x%02x, len: %d)\n", addr, len); 111 return -1; 112 } 113 114 /* check offset alignment */ 115 if (addr & (len - 1)) { 116 XEN_PT_ERR(d, "Failed to access register with invalid access size " 117 "alignment. (addr: 0x%02x, len: %d)\n", addr, len); 118 return -1; 119 } 120 121 return 0; 122 } 123 124 int xen_pt_bar_offset_to_index(uint32_t offset) 125 { 126 int index = 0; 127 128 /* check Exp ROM BAR */ 129 if (offset == PCI_ROM_ADDRESS) { 130 return PCI_ROM_SLOT; 131 } 132 133 /* calculate BAR index */ 134 index = (offset - PCI_BASE_ADDRESS_0) >> 2; 135 if (index >= PCI_NUM_REGIONS) { 136 return -1; 137 } 138 139 return index; 140 } 141 142 static uint32_t xen_pt_pci_read_config(PCIDevice *d, uint32_t addr, int len) 143 { 144 XenPCIPassthroughState *s = XEN_PT_DEVICE(d); 145 uint32_t val = 0; 146 XenPTRegGroup *reg_grp_entry = NULL; 147 XenPTReg *reg_entry = NULL; 148 int rc = 0; 149 int emul_len = 0; 150 uint32_t find_addr = addr; 151 152 if (xen_pt_pci_config_access_check(d, addr, len)) { 153 goto exit; 154 } 155 156 /* find register group entry */ 157 reg_grp_entry = xen_pt_find_reg_grp(s, addr); 158 if (reg_grp_entry) { 159 /* check 0-Hardwired register group */ 160 if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) { 161 /* no need to emulate, just return 0 */ 162 val = 0; 163 goto exit; 164 } 165 } 166 167 /* read I/O device register value */ 168 rc = xen_host_pci_get_block(&s->real_device, addr, (uint8_t *)&val, len); 169 if (rc < 0) { 170 XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc); 171 memset(&val, 0xff, len); 172 } 173 174 /* just return the I/O device register value for 175 * passthrough type register group */ 176 if (reg_grp_entry == NULL) { 177 goto exit; 178 } 179 180 /* adjust the read value to appropriate CFC-CFF window */ 181 val <<= (addr & 3) << 3; 182 emul_len = len; 183 184 /* loop around the guest requested size */ 185 while (emul_len > 0) { 186 /* find register entry to be emulated */ 187 reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr); 188 if (reg_entry) { 189 XenPTRegInfo *reg = reg_entry->reg; 190 uint32_t real_offset = reg_grp_entry->base_offset + reg->offset; 191 uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3); 192 uint8_t *ptr_val = NULL; 193 194 valid_mask <<= (find_addr - real_offset) << 3; 195 ptr_val = (uint8_t *)&val + (real_offset & 3); 196 197 /* do emulation based on register size */ 198 switch (reg->size) { 199 case 1: 200 if (reg->u.b.read) { 201 rc = reg->u.b.read(s, reg_entry, ptr_val, valid_mask); 202 } 203 break; 204 case 2: 205 if (reg->u.w.read) { 206 rc = reg->u.w.read(s, reg_entry, 207 (uint16_t *)ptr_val, valid_mask); 208 } 209 break; 210 case 4: 211 if (reg->u.dw.read) { 212 rc = reg->u.dw.read(s, reg_entry, 213 (uint32_t *)ptr_val, valid_mask); 214 } 215 break; 216 } 217 218 if (rc < 0) { 219 xen_shutdown_fatal_error("Internal error: Invalid read " 220 "emulation. (%s, rc: %d)\n", 221 __func__, rc); 222 return 0; 223 } 224 225 /* calculate next address to find */ 226 emul_len -= reg->size; 227 if (emul_len > 0) { 228 find_addr = real_offset + reg->size; 229 } 230 } else { 231 /* nothing to do with passthrough type register, 232 * continue to find next byte */ 233 emul_len--; 234 find_addr++; 235 } 236 } 237 238 /* need to shift back before returning them to pci bus emulator */ 239 val >>= ((addr & 3) << 3); 240 241 exit: 242 XEN_PT_LOG_CONFIG(d, addr, val, len); 243 return val; 244 } 245 246 static void xen_pt_pci_write_config(PCIDevice *d, uint32_t addr, 247 uint32_t val, int len) 248 { 249 XenPCIPassthroughState *s = XEN_PT_DEVICE(d); 250 int index = 0; 251 XenPTRegGroup *reg_grp_entry = NULL; 252 int rc = 0; 253 uint32_t read_val = 0, wb_mask; 254 int emul_len = 0; 255 XenPTReg *reg_entry = NULL; 256 uint32_t find_addr = addr; 257 XenPTRegInfo *reg = NULL; 258 bool wp_flag = false; 259 260 if (xen_pt_pci_config_access_check(d, addr, len)) { 261 return; 262 } 263 264 XEN_PT_LOG_CONFIG(d, addr, val, len); 265 266 /* check unused BAR register */ 267 index = xen_pt_bar_offset_to_index(addr); 268 if ((index >= 0) && (val != 0)) { 269 uint32_t chk = val; 270 271 if (index == PCI_ROM_SLOT) 272 chk |= (uint32_t)~PCI_ROM_ADDRESS_MASK; 273 274 if ((chk != XEN_PT_BAR_ALLF) && 275 (s->bases[index].bar_flag == XEN_PT_BAR_FLAG_UNUSED)) { 276 XEN_PT_WARN(d, "Guest attempt to set address to unused " 277 "Base Address Register. (addr: 0x%02x, len: %d)\n", 278 addr, len); 279 } 280 } 281 282 /* find register group entry */ 283 reg_grp_entry = xen_pt_find_reg_grp(s, addr); 284 if (reg_grp_entry) { 285 /* check 0-Hardwired register group */ 286 if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) { 287 /* ignore silently */ 288 XEN_PT_WARN(d, "Access to 0-Hardwired register. " 289 "(addr: 0x%02x, len: %d)\n", addr, len); 290 return; 291 } 292 } 293 294 rc = xen_host_pci_get_block(&s->real_device, addr, 295 (uint8_t *)&read_val, len); 296 if (rc < 0) { 297 XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc); 298 memset(&read_val, 0xff, len); 299 wb_mask = 0; 300 } else { 301 wb_mask = 0xFFFFFFFF >> ((4 - len) << 3); 302 } 303 304 /* pass directly to the real device for passthrough type register group */ 305 if (reg_grp_entry == NULL) { 306 if (!s->permissive) { 307 wb_mask = 0; 308 wp_flag = true; 309 } 310 goto out; 311 } 312 313 memory_region_transaction_begin(); 314 pci_default_write_config(d, addr, val, len); 315 316 /* adjust the read and write value to appropriate CFC-CFF window */ 317 read_val <<= (addr & 3) << 3; 318 val <<= (addr & 3) << 3; 319 emul_len = len; 320 321 /* loop around the guest requested size */ 322 while (emul_len > 0) { 323 /* find register entry to be emulated */ 324 reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr); 325 if (reg_entry) { 326 reg = reg_entry->reg; 327 uint32_t real_offset = reg_grp_entry->base_offset + reg->offset; 328 uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3); 329 uint8_t *ptr_val = NULL; 330 uint32_t wp_mask = reg->emu_mask | reg->ro_mask; 331 332 valid_mask <<= (find_addr - real_offset) << 3; 333 ptr_val = (uint8_t *)&val + (real_offset & 3); 334 if (!s->permissive) { 335 wp_mask |= reg->res_mask; 336 } 337 if (wp_mask == (0xFFFFFFFF >> ((4 - reg->size) << 3))) { 338 wb_mask &= ~((wp_mask >> ((find_addr - real_offset) << 3)) 339 << ((len - emul_len) << 3)); 340 } 341 342 /* do emulation based on register size */ 343 switch (reg->size) { 344 case 1: 345 if (reg->u.b.write) { 346 rc = reg->u.b.write(s, reg_entry, ptr_val, 347 read_val >> ((real_offset & 3) << 3), 348 valid_mask); 349 } 350 break; 351 case 2: 352 if (reg->u.w.write) { 353 rc = reg->u.w.write(s, reg_entry, (uint16_t *)ptr_val, 354 (read_val >> ((real_offset & 3) << 3)), 355 valid_mask); 356 } 357 break; 358 case 4: 359 if (reg->u.dw.write) { 360 rc = reg->u.dw.write(s, reg_entry, (uint32_t *)ptr_val, 361 (read_val >> ((real_offset & 3) << 3)), 362 valid_mask); 363 } 364 break; 365 } 366 367 if (rc < 0) { 368 xen_shutdown_fatal_error("Internal error: Invalid write" 369 " emulation. (%s, rc: %d)\n", 370 __func__, rc); 371 return; 372 } 373 374 /* calculate next address to find */ 375 emul_len -= reg->size; 376 if (emul_len > 0) { 377 find_addr = real_offset + reg->size; 378 } 379 } else { 380 /* nothing to do with passthrough type register, 381 * continue to find next byte */ 382 if (!s->permissive) { 383 wb_mask &= ~(0xff << ((len - emul_len) << 3)); 384 /* Unused BARs will make it here, but we don't want to issue 385 * warnings for writes to them (bogus writes get dealt with 386 * above). 387 */ 388 if (index < 0) { 389 wp_flag = true; 390 } 391 } 392 emul_len--; 393 find_addr++; 394 } 395 } 396 397 /* need to shift back before passing them to xen_host_pci_set_block. */ 398 val >>= (addr & 3) << 3; 399 400 memory_region_transaction_commit(); 401 402 out: 403 if (wp_flag && !s->permissive_warned) { 404 s->permissive_warned = true; 405 xen_pt_log(d, "Write-back to unknown field 0x%02x (partially) inhibited (0x%0*x)\n", 406 addr, len * 2, wb_mask); 407 xen_pt_log(d, "If the device doesn't work, try enabling permissive mode\n"); 408 xen_pt_log(d, "(unsafe) and if it helps report the problem to xen-devel\n"); 409 } 410 for (index = 0; wb_mask; index += len) { 411 /* unknown regs are passed through */ 412 while (!(wb_mask & 0xff)) { 413 index++; 414 wb_mask >>= 8; 415 } 416 len = 0; 417 do { 418 len++; 419 wb_mask >>= 8; 420 } while (wb_mask & 0xff); 421 rc = xen_host_pci_set_block(&s->real_device, addr + index, 422 (uint8_t *)&val + index, len); 423 424 if (rc < 0) { 425 XEN_PT_ERR(d, "xen_host_pci_set_block failed. return value: %d.\n", rc); 426 } 427 } 428 } 429 430 /* register regions */ 431 432 static uint64_t xen_pt_bar_read(void *o, hwaddr addr, 433 unsigned size) 434 { 435 PCIDevice *d = o; 436 /* if this function is called, that probably means that there is a 437 * misconfiguration of the IOMMU. */ 438 XEN_PT_ERR(d, "Should not read BAR through QEMU. @0x"HWADDR_FMT_plx"\n", 439 addr); 440 return 0; 441 } 442 static void xen_pt_bar_write(void *o, hwaddr addr, uint64_t val, 443 unsigned size) 444 { 445 PCIDevice *d = o; 446 /* Same comment as xen_pt_bar_read function */ 447 XEN_PT_ERR(d, "Should not write BAR through QEMU. @0x"HWADDR_FMT_plx"\n", 448 addr); 449 } 450 451 static const MemoryRegionOps ops = { 452 .endianness = DEVICE_NATIVE_ENDIAN, 453 .read = xen_pt_bar_read, 454 .write = xen_pt_bar_write, 455 }; 456 457 static int xen_pt_register_regions(XenPCIPassthroughState *s, uint16_t *cmd) 458 { 459 int i = 0; 460 XenHostPCIDevice *d = &s->real_device; 461 462 /* Register PIO/MMIO BARs */ 463 for (i = 0; i < PCI_ROM_SLOT; i++) { 464 XenHostPCIIORegion *r = &d->io_regions[i]; 465 uint8_t type; 466 467 if (r->base_addr == 0 || r->size == 0) { 468 continue; 469 } 470 471 s->bases[i].access.u = r->base_addr; 472 473 if (r->type & XEN_HOST_PCI_REGION_TYPE_IO) { 474 type = PCI_BASE_ADDRESS_SPACE_IO; 475 *cmd |= PCI_COMMAND_IO; 476 } else { 477 type = PCI_BASE_ADDRESS_SPACE_MEMORY; 478 if (r->type & XEN_HOST_PCI_REGION_TYPE_PREFETCH) { 479 type |= PCI_BASE_ADDRESS_MEM_PREFETCH; 480 } 481 if (r->type & XEN_HOST_PCI_REGION_TYPE_MEM_64) { 482 type |= PCI_BASE_ADDRESS_MEM_TYPE_64; 483 } 484 *cmd |= PCI_COMMAND_MEMORY; 485 } 486 487 memory_region_init_io(&s->bar[i], OBJECT(s), &ops, &s->dev, 488 "xen-pci-pt-bar", r->size); 489 pci_register_bar(&s->dev, i, type, &s->bar[i]); 490 491 XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%08"PRIx64 492 " base_addr=0x%08"PRIx64" type: 0x%x)\n", 493 i, r->size, r->base_addr, type); 494 } 495 496 /* Register expansion ROM address */ 497 if (d->rom.base_addr && d->rom.size) { 498 uint32_t bar_data = 0; 499 500 /* Re-set BAR reported by OS, otherwise ROM can't be read. */ 501 if (xen_host_pci_get_long(d, PCI_ROM_ADDRESS, &bar_data)) { 502 return 0; 503 } 504 if ((bar_data & PCI_ROM_ADDRESS_MASK) == 0) { 505 bar_data |= d->rom.base_addr & PCI_ROM_ADDRESS_MASK; 506 xen_host_pci_set_long(d, PCI_ROM_ADDRESS, bar_data); 507 } 508 509 s->bases[PCI_ROM_SLOT].access.maddr = d->rom.base_addr; 510 511 memory_region_init_io(&s->rom, OBJECT(s), &ops, &s->dev, 512 "xen-pci-pt-rom", d->rom.size); 513 pci_register_bar(&s->dev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_MEM_PREFETCH, 514 &s->rom); 515 516 XEN_PT_LOG(&s->dev, "Expansion ROM registered (size=0x%08"PRIx64 517 " base_addr=0x%08"PRIx64")\n", 518 d->rom.size, d->rom.base_addr); 519 } 520 521 xen_pt_register_vga_regions(d); 522 return 0; 523 } 524 525 /* region mapping */ 526 527 static int xen_pt_bar_from_region(XenPCIPassthroughState *s, MemoryRegion *mr) 528 { 529 int i = 0; 530 531 for (i = 0; i < PCI_NUM_REGIONS - 1; i++) { 532 if (mr == &s->bar[i]) { 533 return i; 534 } 535 } 536 if (mr == &s->rom) { 537 return PCI_ROM_SLOT; 538 } 539 return -1; 540 } 541 542 /* 543 * This function checks if an io_region overlaps an io_region from another 544 * device. The io_region to check is provided with (addr, size and type) 545 * A callback can be provided and will be called for every region that is 546 * overlapped. 547 * The return value indicates if the region is overlappsed */ 548 struct CheckBarArgs { 549 XenPCIPassthroughState *s; 550 pcibus_t addr; 551 pcibus_t size; 552 uint8_t type; 553 bool rc; 554 }; 555 static void xen_pt_check_bar_overlap(PCIBus *bus, PCIDevice *d, void *opaque) 556 { 557 struct CheckBarArgs *arg = opaque; 558 XenPCIPassthroughState *s = arg->s; 559 uint8_t type = arg->type; 560 int i; 561 562 if (d->devfn == s->dev.devfn) { 563 return; 564 } 565 566 /* xxx: This ignores bridges. */ 567 for (i = 0; i < PCI_NUM_REGIONS; i++) { 568 const PCIIORegion *r = &d->io_regions[i]; 569 570 if (!r->size) { 571 continue; 572 } 573 if ((type & PCI_BASE_ADDRESS_SPACE_IO) 574 != (r->type & PCI_BASE_ADDRESS_SPACE_IO)) { 575 continue; 576 } 577 578 if (ranges_overlap(arg->addr, arg->size, r->addr, r->size)) { 579 XEN_PT_WARN(&s->dev, 580 "Overlapped to device [%02x:%02x.%d] Region: %i" 581 " (addr: 0x%"FMT_PCIBUS", len: 0x%"FMT_PCIBUS")\n", 582 pci_bus_num(bus), PCI_SLOT(d->devfn), 583 PCI_FUNC(d->devfn), i, r->addr, r->size); 584 arg->rc = true; 585 } 586 } 587 } 588 589 static void xen_pt_region_update(XenPCIPassthroughState *s, 590 MemoryRegionSection *sec, bool adding) 591 { 592 PCIDevice *d = &s->dev; 593 MemoryRegion *mr = sec->mr; 594 int bar = -1; 595 int rc; 596 int op = adding ? DPCI_ADD_MAPPING : DPCI_REMOVE_MAPPING; 597 struct CheckBarArgs args = { 598 .s = s, 599 .addr = sec->offset_within_address_space, 600 .size = int128_get64(sec->size), 601 .rc = false, 602 }; 603 604 bar = xen_pt_bar_from_region(s, mr); 605 if (bar == -1 && (!s->msix || &s->msix->mmio != mr)) { 606 return; 607 } 608 609 if (s->msix && &s->msix->mmio == mr) { 610 if (adding) { 611 s->msix->mmio_base_addr = sec->offset_within_address_space; 612 rc = xen_pt_msix_update_remap(s, s->msix->bar_index); 613 } 614 return; 615 } 616 617 args.type = d->io_regions[bar].type; 618 pci_for_each_device_under_bus(pci_get_bus(d), 619 xen_pt_check_bar_overlap, &args); 620 if (args.rc) { 621 XEN_PT_WARN(d, "Region: %d (addr: 0x%"FMT_PCIBUS 622 ", len: 0x%"FMT_PCIBUS") is overlapped.\n", 623 bar, sec->offset_within_address_space, 624 int128_get64(sec->size)); 625 } 626 627 if (d->io_regions[bar].type & PCI_BASE_ADDRESS_SPACE_IO) { 628 uint32_t guest_port = sec->offset_within_address_space; 629 uint32_t machine_port = s->bases[bar].access.pio_base; 630 uint32_t size = int128_get64(sec->size); 631 rc = xc_domain_ioport_mapping(xen_xc, xen_domid, 632 guest_port, machine_port, size, 633 op); 634 if (rc) { 635 XEN_PT_ERR(d, "%s ioport mapping failed! (err: %i)\n", 636 adding ? "create new" : "remove old", errno); 637 } 638 } else { 639 pcibus_t guest_addr = sec->offset_within_address_space; 640 pcibus_t machine_addr = s->bases[bar].access.maddr 641 + sec->offset_within_region; 642 pcibus_t size = int128_get64(sec->size); 643 rc = xc_domain_memory_mapping(xen_xc, xen_domid, 644 XEN_PFN(guest_addr + XC_PAGE_SIZE - 1), 645 XEN_PFN(machine_addr + XC_PAGE_SIZE - 1), 646 XEN_PFN(size + XC_PAGE_SIZE - 1), 647 op); 648 if (rc) { 649 XEN_PT_ERR(d, "%s mem mapping failed! (err: %i)\n", 650 adding ? "create new" : "remove old", errno); 651 } 652 } 653 } 654 655 static void xen_pt_region_add(MemoryListener *l, MemoryRegionSection *sec) 656 { 657 XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, 658 memory_listener); 659 660 memory_region_ref(sec->mr); 661 xen_pt_region_update(s, sec, true); 662 } 663 664 static void xen_pt_region_del(MemoryListener *l, MemoryRegionSection *sec) 665 { 666 XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, 667 memory_listener); 668 669 xen_pt_region_update(s, sec, false); 670 memory_region_unref(sec->mr); 671 } 672 673 static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec) 674 { 675 XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, 676 io_listener); 677 678 memory_region_ref(sec->mr); 679 xen_pt_region_update(s, sec, true); 680 } 681 682 static void xen_pt_io_region_del(MemoryListener *l, MemoryRegionSection *sec) 683 { 684 XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, 685 io_listener); 686 687 xen_pt_region_update(s, sec, false); 688 memory_region_unref(sec->mr); 689 } 690 691 static const MemoryListener xen_pt_memory_listener = { 692 .name = "xen-pt-mem", 693 .region_add = xen_pt_region_add, 694 .region_del = xen_pt_region_del, 695 .priority = 10, 696 }; 697 698 static const MemoryListener xen_pt_io_listener = { 699 .name = "xen-pt-io", 700 .region_add = xen_pt_io_region_add, 701 .region_del = xen_pt_io_region_del, 702 .priority = 10, 703 }; 704 705 /* destroy. */ 706 static void xen_pt_destroy(PCIDevice *d) { 707 708 XenPCIPassthroughState *s = XEN_PT_DEVICE(d); 709 XenHostPCIDevice *host_dev = &s->real_device; 710 uint8_t machine_irq = s->machine_irq; 711 uint8_t intx; 712 int rc; 713 714 if (machine_irq && !xen_host_pci_device_closed(&s->real_device)) { 715 intx = xen_pt_pci_intx(s); 716 rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq, 717 PT_IRQ_TYPE_PCI, 718 pci_dev_bus_num(d), 719 PCI_SLOT(s->dev.devfn), 720 intx, 721 0 /* isa_irq */); 722 if (rc < 0) { 723 XEN_PT_ERR(d, "unbinding of interrupt INT%c failed." 724 " (machine irq: %i, err: %d)" 725 " But bravely continuing on..\n", 726 'a' + intx, machine_irq, errno); 727 } 728 } 729 730 /* N.B. xen_pt_config_delete takes care of freeing them. */ 731 if (s->msi) { 732 xen_pt_msi_disable(s); 733 } 734 if (s->msix) { 735 xen_pt_msix_disable(s); 736 } 737 738 if (machine_irq) { 739 xen_pt_mapped_machine_irq[machine_irq]--; 740 741 if (xen_pt_mapped_machine_irq[machine_irq] == 0) { 742 rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq); 743 744 if (rc < 0) { 745 XEN_PT_ERR(d, "unmapping of interrupt %i failed. (err: %d)" 746 " But bravely continuing on..\n", 747 machine_irq, errno); 748 } 749 } 750 s->machine_irq = 0; 751 } 752 753 /* delete all emulated config registers */ 754 xen_pt_config_delete(s); 755 756 xen_pt_unregister_vga_regions(host_dev); 757 758 if (s->listener_set) { 759 memory_listener_unregister(&s->memory_listener); 760 memory_listener_unregister(&s->io_listener); 761 s->listener_set = false; 762 } 763 if (!xen_host_pci_device_closed(&s->real_device)) { 764 xen_host_pci_device_put(&s->real_device); 765 } 766 } 767 /* init */ 768 769 static void xen_pt_realize(PCIDevice *d, Error **errp) 770 { 771 ERRP_GUARD(); 772 XenPCIPassthroughState *s = XEN_PT_DEVICE(d); 773 int i, rc = 0; 774 uint8_t machine_irq = 0, scratch; 775 uint16_t cmd = 0; 776 int pirq = XEN_PT_UNASSIGNED_PIRQ; 777 778 /* register real device */ 779 XEN_PT_LOG(d, "Assigning real physical device %02x:%02x.%d" 780 " to devfn 0x%x\n", 781 s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function, 782 s->dev.devfn); 783 784 s->is_virtfn = s->real_device.is_virtfn; 785 if (s->is_virtfn) { 786 XEN_PT_LOG(d, "%04x:%02x:%02x.%d is a SR-IOV Virtual Function\n", 787 s->real_device.domain, s->real_device.bus, 788 s->real_device.dev, s->real_device.func); 789 } 790 791 /* Initialize virtualized PCI configuration (Extended 256 Bytes) */ 792 memset(d->config, 0, PCI_CONFIG_SPACE_SIZE); 793 794 s->memory_listener = xen_pt_memory_listener; 795 s->io_listener = xen_pt_io_listener; 796 797 /* Setup VGA bios for passthrough GFX */ 798 if ((s->real_device.domain == XEN_PCI_IGD_DOMAIN) && 799 (s->real_device.bus == XEN_PCI_IGD_BUS) && 800 (s->real_device.dev == XEN_PCI_IGD_DEV) && 801 (s->real_device.func == XEN_PCI_IGD_FN)) { 802 if (!is_igd_vga_passthrough(&s->real_device)) { 803 error_setg(errp, "Need to enable igd-passthru if you're trying" 804 " to passthrough IGD GFX"); 805 xen_host_pci_device_put(&s->real_device); 806 return; 807 } 808 809 xen_pt_setup_vga(s, &s->real_device, errp); 810 if (*errp) { 811 error_append_hint(errp, "Setup VGA BIOS of passthrough" 812 " GFX failed"); 813 xen_host_pci_device_put(&s->real_device); 814 return; 815 } 816 817 /* Register ISA bridge for passthrough GFX. */ 818 xen_igd_passthrough_isa_bridge_create(s, &s->real_device); 819 } 820 821 /* Handle real device's MMIO/PIO BARs */ 822 xen_pt_register_regions(s, &cmd); 823 824 /* reinitialize each config register to be emulated */ 825 xen_pt_config_init(s, errp); 826 if (*errp) { 827 error_append_hint(errp, "PCI Config space initialisation failed"); 828 rc = -1; 829 goto err_out; 830 } 831 832 /* Bind interrupt */ 833 rc = xen_host_pci_get_byte(&s->real_device, PCI_INTERRUPT_PIN, &scratch); 834 if (rc) { 835 error_setg_errno(errp, errno, "Failed to read PCI_INTERRUPT_PIN"); 836 goto err_out; 837 } 838 if (!scratch) { 839 XEN_PT_LOG(d, "no pin interrupt\n"); 840 goto out; 841 } 842 843 machine_irq = s->real_device.irq; 844 if (machine_irq == 0) { 845 XEN_PT_LOG(d, "machine irq is 0\n"); 846 cmd |= PCI_COMMAND_INTX_DISABLE; 847 goto out; 848 } 849 850 rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq); 851 if (rc < 0) { 852 XEN_PT_ERR(d, "Mapping machine irq %u to pirq %i failed, (err: %d)\n", 853 machine_irq, pirq, errno); 854 855 /* Disable PCI intx assertion (turn on bit10 of devctl) */ 856 cmd |= PCI_COMMAND_INTX_DISABLE; 857 machine_irq = 0; 858 s->machine_irq = 0; 859 } else { 860 machine_irq = pirq; 861 s->machine_irq = pirq; 862 xen_pt_mapped_machine_irq[machine_irq]++; 863 } 864 865 /* bind machine_irq to device */ 866 if (machine_irq != 0) { 867 uint8_t e_intx = xen_pt_pci_intx(s); 868 869 rc = xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, machine_irq, 870 pci_dev_bus_num(d), 871 PCI_SLOT(d->devfn), 872 e_intx); 873 if (rc < 0) { 874 XEN_PT_ERR(d, "Binding of interrupt %i failed! (err: %d)\n", 875 e_intx, errno); 876 877 /* Disable PCI intx assertion (turn on bit10 of devctl) */ 878 cmd |= PCI_COMMAND_INTX_DISABLE; 879 xen_pt_mapped_machine_irq[machine_irq]--; 880 881 if (xen_pt_mapped_machine_irq[machine_irq] == 0) { 882 if (xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq)) { 883 XEN_PT_ERR(d, "Unmapping of machine interrupt %i failed!" 884 " (err: %d)\n", machine_irq, errno); 885 } 886 } 887 s->machine_irq = 0; 888 } 889 } 890 891 out: 892 if (cmd) { 893 uint16_t val; 894 895 rc = xen_host_pci_get_word(&s->real_device, PCI_COMMAND, &val); 896 if (rc) { 897 error_setg_errno(errp, errno, "Failed to read PCI_COMMAND"); 898 goto err_out; 899 } else { 900 val |= cmd; 901 rc = xen_host_pci_set_word(&s->real_device, PCI_COMMAND, val); 902 if (rc) { 903 error_setg_errno(errp, errno, "Failed to write PCI_COMMAND" 904 " val = 0x%x", val); 905 goto err_out; 906 } 907 } 908 } 909 910 memory_listener_register(&s->memory_listener, &address_space_memory); 911 memory_listener_register(&s->io_listener, &address_space_io); 912 s->listener_set = true; 913 XEN_PT_LOG(d, 914 "Real physical device %02x:%02x.%d registered successfully\n", 915 s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function); 916 917 return; 918 919 err_out: 920 for (i = 0; i < PCI_ROM_SLOT; i++) { 921 object_unparent(OBJECT(&s->bar[i])); 922 } 923 object_unparent(OBJECT(&s->rom)); 924 925 xen_pt_destroy(d); 926 assert(rc); 927 } 928 929 static void xen_pt_unregister_device(PCIDevice *d) 930 { 931 xen_pt_destroy(d); 932 } 933 934 static Property xen_pci_passthrough_properties[] = { 935 DEFINE_PROP_PCI_HOST_DEVADDR("hostaddr", XenPCIPassthroughState, hostaddr), 936 DEFINE_PROP_BOOL("permissive", XenPCIPassthroughState, permissive, false), 937 DEFINE_PROP_END_OF_LIST(), 938 }; 939 940 static void xen_pci_passthrough_instance_init(Object *obj) 941 { 942 /* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command 943 * line, therefore, no need to wait to realize like other devices */ 944 PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; 945 } 946 947 void xen_igd_reserve_slot(PCIBus *pci_bus) 948 { 949 if (!xen_igd_gfx_pt_enabled()) { 950 return; 951 } 952 953 XEN_PT_LOG(0, "Reserving PCI slot 2 for IGD\n"); 954 pci_bus->slot_reserved_mask |= XEN_PCI_IGD_SLOT_MASK; 955 } 956 957 static void xen_igd_clear_slot(DeviceState *qdev, Error **errp) 958 { 959 ERRP_GUARD(); 960 PCIDevice *pci_dev = (PCIDevice *)qdev; 961 XenPCIPassthroughState *s = XEN_PT_DEVICE(pci_dev); 962 XenPTDeviceClass *xpdc = XEN_PT_DEVICE_GET_CLASS(s); 963 PCIBus *pci_bus = pci_get_bus(pci_dev); 964 965 xen_host_pci_device_get(&s->real_device, 966 s->hostaddr.domain, s->hostaddr.bus, 967 s->hostaddr.slot, s->hostaddr.function, 968 errp); 969 if (*errp) { 970 error_append_hint(errp, "Failed to \"open\" the real pci device"); 971 return; 972 } 973 974 if (!(pci_bus->slot_reserved_mask & XEN_PCI_IGD_SLOT_MASK)) { 975 xpdc->pci_qdev_realize(qdev, errp); 976 return; 977 } 978 979 if (is_igd_vga_passthrough(&s->real_device) && 980 s->real_device.domain == XEN_PCI_IGD_DOMAIN && 981 s->real_device.bus == XEN_PCI_IGD_BUS && 982 s->real_device.dev == XEN_PCI_IGD_DEV && 983 s->real_device.func == XEN_PCI_IGD_FN && 984 s->real_device.vendor_id == PCI_VENDOR_ID_INTEL) { 985 pci_bus->slot_reserved_mask &= ~XEN_PCI_IGD_SLOT_MASK; 986 XEN_PT_LOG(pci_dev, "Intel IGD found, using slot 2\n"); 987 } 988 xpdc->pci_qdev_realize(qdev, errp); 989 } 990 991 static void xen_pci_passthrough_class_init(ObjectClass *klass, void *data) 992 { 993 DeviceClass *dc = DEVICE_CLASS(klass); 994 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 995 996 XenPTDeviceClass *xpdc = XEN_PT_DEVICE_CLASS(klass); 997 xpdc->pci_qdev_realize = dc->realize; 998 dc->realize = xen_igd_clear_slot; 999 k->realize = xen_pt_realize; 1000 k->exit = xen_pt_unregister_device; 1001 k->config_read = xen_pt_pci_read_config; 1002 k->config_write = xen_pt_pci_write_config; 1003 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1004 dc->desc = "Assign an host PCI device with Xen"; 1005 device_class_set_props(dc, xen_pci_passthrough_properties); 1006 }; 1007 1008 static void xen_pci_passthrough_finalize(Object *obj) 1009 { 1010 XenPCIPassthroughState *s = XEN_PT_DEVICE(obj); 1011 1012 xen_pt_msix_delete(s); 1013 } 1014 1015 static const TypeInfo xen_pci_passthrough_info = { 1016 .name = TYPE_XEN_PT_DEVICE, 1017 .parent = TYPE_PCI_DEVICE, 1018 .instance_size = sizeof(XenPCIPassthroughState), 1019 .instance_finalize = xen_pci_passthrough_finalize, 1020 .class_init = xen_pci_passthrough_class_init, 1021 .class_size = sizeof(XenPTDeviceClass), 1022 .instance_init = xen_pci_passthrough_instance_init, 1023 .interfaces = (InterfaceInfo[]) { 1024 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1025 { INTERFACE_PCIE_DEVICE }, 1026 { }, 1027 }, 1028 }; 1029 1030 static void xen_pci_passthrough_register_types(void) 1031 { 1032 type_register_static(&xen_pci_passthrough_info); 1033 } 1034 1035 type_init(xen_pci_passthrough_register_types) 1036