1 /* 2 * Copyright (c) 2007, Neocleus Corporation. 3 * Copyright (c) 2007, Intel Corporation. 4 * 5 * This work is licensed under the terms of the GNU GPL, version 2. See 6 * the COPYING file in the top-level directory. 7 * 8 * Alex Novik <alex@neocleus.com> 9 * Allen Kay <allen.m.kay@intel.com> 10 * Guy Zana <guy@neocleus.com> 11 * 12 * This file implements direct PCI assignment to a HVM guest 13 */ 14 15 /* 16 * Interrupt Disable policy: 17 * 18 * INTx interrupt: 19 * Initialize(register_real_device) 20 * Map INTx(xc_physdev_map_pirq): 21 * <fail> 22 * - Set real Interrupt Disable bit to '1'. 23 * - Set machine_irq and assigned_device->machine_irq to '0'. 24 * * Don't bind INTx. 25 * 26 * Bind INTx(xc_domain_bind_pt_pci_irq): 27 * <fail> 28 * - Set real Interrupt Disable bit to '1'. 29 * - Unmap INTx. 30 * - Decrement xen_pt_mapped_machine_irq[machine_irq] 31 * - Set assigned_device->machine_irq to '0'. 32 * 33 * Write to Interrupt Disable bit by guest software(xen_pt_cmd_reg_write) 34 * Write '0' 35 * - Set real bit to '0' if assigned_device->machine_irq isn't '0'. 36 * 37 * Write '1' 38 * - Set real bit to '1'. 39 * 40 * MSI interrupt: 41 * Initialize MSI register(xen_pt_msi_setup, xen_pt_msi_update) 42 * Bind MSI(xc_domain_update_msi_irq) 43 * <fail> 44 * - Unmap MSI. 45 * - Set dev->msi->pirq to '-1'. 46 * 47 * MSI-X interrupt: 48 * Initialize MSI-X register(xen_pt_msix_update_one) 49 * Bind MSI-X(xc_domain_update_msi_irq) 50 * <fail> 51 * - Unmap MSI-X. 52 * - Set entry->pirq to '-1'. 53 */ 54 55 #include "qemu/osdep.h" 56 #include "qapi/error.h" 57 #include <sys/ioctl.h> 58 59 #include "hw/pci/pci.h" 60 #include "hw/qdev-properties.h" 61 #include "hw/xen/xen.h" 62 #include "hw/i386/pc.h" 63 #include "hw/xen/xen-legacy-backend.h" 64 #include "xen_pt.h" 65 #include "qemu/range.h" 66 #include "exec/address-spaces.h" 67 68 static bool has_igd_gfx_passthru; 69 70 bool xen_igd_gfx_pt_enabled(void) 71 { 72 return has_igd_gfx_passthru; 73 } 74 75 void xen_igd_gfx_pt_set(bool value, Error **errp) 76 { 77 has_igd_gfx_passthru = value; 78 } 79 80 #define XEN_PT_NR_IRQS (256) 81 static uint8_t xen_pt_mapped_machine_irq[XEN_PT_NR_IRQS] = {0}; 82 83 void xen_pt_log(const PCIDevice *d, const char *f, ...) 84 { 85 va_list ap; 86 87 va_start(ap, f); 88 if (d) { 89 fprintf(stderr, "[%02x:%02x.%d] ", pci_dev_bus_num(d), 90 PCI_SLOT(d->devfn), PCI_FUNC(d->devfn)); 91 } 92 vfprintf(stderr, f, ap); 93 va_end(ap); 94 } 95 96 /* Config Space */ 97 98 static int xen_pt_pci_config_access_check(PCIDevice *d, uint32_t addr, int len) 99 { 100 /* check offset range */ 101 if (addr > 0xFF) { 102 XEN_PT_ERR(d, "Failed to access register with offset exceeding 0xFF. " 103 "(addr: 0x%02x, len: %d)\n", addr, len); 104 return -1; 105 } 106 107 /* check read size */ 108 if ((len != 1) && (len != 2) && (len != 4)) { 109 XEN_PT_ERR(d, "Failed to access register with invalid access length. " 110 "(addr: 0x%02x, len: %d)\n", addr, len); 111 return -1; 112 } 113 114 /* check offset alignment */ 115 if (addr & (len - 1)) { 116 XEN_PT_ERR(d, "Failed to access register with invalid access size " 117 "alignment. (addr: 0x%02x, len: %d)\n", addr, len); 118 return -1; 119 } 120 121 return 0; 122 } 123 124 int xen_pt_bar_offset_to_index(uint32_t offset) 125 { 126 int index = 0; 127 128 /* check Exp ROM BAR */ 129 if (offset == PCI_ROM_ADDRESS) { 130 return PCI_ROM_SLOT; 131 } 132 133 /* calculate BAR index */ 134 index = (offset - PCI_BASE_ADDRESS_0) >> 2; 135 if (index >= PCI_NUM_REGIONS) { 136 return -1; 137 } 138 139 return index; 140 } 141 142 static uint32_t xen_pt_pci_read_config(PCIDevice *d, uint32_t addr, int len) 143 { 144 XenPCIPassthroughState *s = XEN_PT_DEVICE(d); 145 uint32_t val = 0; 146 XenPTRegGroup *reg_grp_entry = NULL; 147 XenPTReg *reg_entry = NULL; 148 int rc = 0; 149 int emul_len = 0; 150 uint32_t find_addr = addr; 151 152 if (xen_pt_pci_config_access_check(d, addr, len)) { 153 goto exit; 154 } 155 156 /* find register group entry */ 157 reg_grp_entry = xen_pt_find_reg_grp(s, addr); 158 if (reg_grp_entry) { 159 /* check 0-Hardwired register group */ 160 if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) { 161 /* no need to emulate, just return 0 */ 162 val = 0; 163 goto exit; 164 } 165 } 166 167 /* read I/O device register value */ 168 rc = xen_host_pci_get_block(&s->real_device, addr, (uint8_t *)&val, len); 169 if (rc < 0) { 170 XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc); 171 memset(&val, 0xff, len); 172 } 173 174 /* just return the I/O device register value for 175 * passthrough type register group */ 176 if (reg_grp_entry == NULL) { 177 goto exit; 178 } 179 180 /* adjust the read value to appropriate CFC-CFF window */ 181 val <<= (addr & 3) << 3; 182 emul_len = len; 183 184 /* loop around the guest requested size */ 185 while (emul_len > 0) { 186 /* find register entry to be emulated */ 187 reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr); 188 if (reg_entry) { 189 XenPTRegInfo *reg = reg_entry->reg; 190 uint32_t real_offset = reg_grp_entry->base_offset + reg->offset; 191 uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3); 192 uint8_t *ptr_val = NULL; 193 194 valid_mask <<= (find_addr - real_offset) << 3; 195 ptr_val = (uint8_t *)&val + (real_offset & 3); 196 197 /* do emulation based on register size */ 198 switch (reg->size) { 199 case 1: 200 if (reg->u.b.read) { 201 rc = reg->u.b.read(s, reg_entry, ptr_val, valid_mask); 202 } 203 break; 204 case 2: 205 if (reg->u.w.read) { 206 rc = reg->u.w.read(s, reg_entry, 207 (uint16_t *)ptr_val, valid_mask); 208 } 209 break; 210 case 4: 211 if (reg->u.dw.read) { 212 rc = reg->u.dw.read(s, reg_entry, 213 (uint32_t *)ptr_val, valid_mask); 214 } 215 break; 216 } 217 218 if (rc < 0) { 219 xen_shutdown_fatal_error("Internal error: Invalid read " 220 "emulation. (%s, rc: %d)\n", 221 __func__, rc); 222 return 0; 223 } 224 225 /* calculate next address to find */ 226 emul_len -= reg->size; 227 if (emul_len > 0) { 228 find_addr = real_offset + reg->size; 229 } 230 } else { 231 /* nothing to do with passthrough type register, 232 * continue to find next byte */ 233 emul_len--; 234 find_addr++; 235 } 236 } 237 238 /* need to shift back before returning them to pci bus emulator */ 239 val >>= ((addr & 3) << 3); 240 241 exit: 242 XEN_PT_LOG_CONFIG(d, addr, val, len); 243 return val; 244 } 245 246 static void xen_pt_pci_write_config(PCIDevice *d, uint32_t addr, 247 uint32_t val, int len) 248 { 249 XenPCIPassthroughState *s = XEN_PT_DEVICE(d); 250 int index = 0; 251 XenPTRegGroup *reg_grp_entry = NULL; 252 int rc = 0; 253 uint32_t read_val = 0, wb_mask; 254 int emul_len = 0; 255 XenPTReg *reg_entry = NULL; 256 uint32_t find_addr = addr; 257 XenPTRegInfo *reg = NULL; 258 bool wp_flag = false; 259 260 if (xen_pt_pci_config_access_check(d, addr, len)) { 261 return; 262 } 263 264 XEN_PT_LOG_CONFIG(d, addr, val, len); 265 266 /* check unused BAR register */ 267 index = xen_pt_bar_offset_to_index(addr); 268 if ((index >= 0) && (val != 0)) { 269 uint32_t chk = val; 270 271 if (index == PCI_ROM_SLOT) 272 chk |= (uint32_t)~PCI_ROM_ADDRESS_MASK; 273 274 if ((chk != XEN_PT_BAR_ALLF) && 275 (s->bases[index].bar_flag == XEN_PT_BAR_FLAG_UNUSED)) { 276 XEN_PT_WARN(d, "Guest attempt to set address to unused " 277 "Base Address Register. (addr: 0x%02x, len: %d)\n", 278 addr, len); 279 } 280 } 281 282 /* find register group entry */ 283 reg_grp_entry = xen_pt_find_reg_grp(s, addr); 284 if (reg_grp_entry) { 285 /* check 0-Hardwired register group */ 286 if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) { 287 /* ignore silently */ 288 XEN_PT_WARN(d, "Access to 0-Hardwired register. " 289 "(addr: 0x%02x, len: %d)\n", addr, len); 290 return; 291 } 292 } 293 294 rc = xen_host_pci_get_block(&s->real_device, addr, 295 (uint8_t *)&read_val, len); 296 if (rc < 0) { 297 XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc); 298 memset(&read_val, 0xff, len); 299 wb_mask = 0; 300 } else { 301 wb_mask = 0xFFFFFFFF >> ((4 - len) << 3); 302 } 303 304 /* pass directly to the real device for passthrough type register group */ 305 if (reg_grp_entry == NULL) { 306 if (!s->permissive) { 307 wb_mask = 0; 308 wp_flag = true; 309 } 310 goto out; 311 } 312 313 memory_region_transaction_begin(); 314 pci_default_write_config(d, addr, val, len); 315 316 /* adjust the read and write value to appropriate CFC-CFF window */ 317 read_val <<= (addr & 3) << 3; 318 val <<= (addr & 3) << 3; 319 emul_len = len; 320 321 /* loop around the guest requested size */ 322 while (emul_len > 0) { 323 /* find register entry to be emulated */ 324 reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr); 325 if (reg_entry) { 326 reg = reg_entry->reg; 327 uint32_t real_offset = reg_grp_entry->base_offset + reg->offset; 328 uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3); 329 uint8_t *ptr_val = NULL; 330 uint32_t wp_mask = reg->emu_mask | reg->ro_mask; 331 332 valid_mask <<= (find_addr - real_offset) << 3; 333 ptr_val = (uint8_t *)&val + (real_offset & 3); 334 if (!s->permissive) { 335 wp_mask |= reg->res_mask; 336 } 337 if (wp_mask == (0xFFFFFFFF >> ((4 - reg->size) << 3))) { 338 wb_mask &= ~((wp_mask >> ((find_addr - real_offset) << 3)) 339 << ((len - emul_len) << 3)); 340 } 341 342 /* do emulation based on register size */ 343 switch (reg->size) { 344 case 1: 345 if (reg->u.b.write) { 346 rc = reg->u.b.write(s, reg_entry, ptr_val, 347 read_val >> ((real_offset & 3) << 3), 348 valid_mask); 349 } 350 break; 351 case 2: 352 if (reg->u.w.write) { 353 rc = reg->u.w.write(s, reg_entry, (uint16_t *)ptr_val, 354 (read_val >> ((real_offset & 3) << 3)), 355 valid_mask); 356 } 357 break; 358 case 4: 359 if (reg->u.dw.write) { 360 rc = reg->u.dw.write(s, reg_entry, (uint32_t *)ptr_val, 361 (read_val >> ((real_offset & 3) << 3)), 362 valid_mask); 363 } 364 break; 365 } 366 367 if (rc < 0) { 368 xen_shutdown_fatal_error("Internal error: Invalid write" 369 " emulation. (%s, rc: %d)\n", 370 __func__, rc); 371 return; 372 } 373 374 /* calculate next address to find */ 375 emul_len -= reg->size; 376 if (emul_len > 0) { 377 find_addr = real_offset + reg->size; 378 } 379 } else { 380 /* nothing to do with passthrough type register, 381 * continue to find next byte */ 382 if (!s->permissive) { 383 wb_mask &= ~(0xff << ((len - emul_len) << 3)); 384 /* Unused BARs will make it here, but we don't want to issue 385 * warnings for writes to them (bogus writes get dealt with 386 * above). 387 */ 388 if (index < 0) { 389 wp_flag = true; 390 } 391 } 392 emul_len--; 393 find_addr++; 394 } 395 } 396 397 /* need to shift back before passing them to xen_host_pci_set_block. */ 398 val >>= (addr & 3) << 3; 399 400 memory_region_transaction_commit(); 401 402 out: 403 if (wp_flag && !s->permissive_warned) { 404 s->permissive_warned = true; 405 xen_pt_log(d, "Write-back to unknown field 0x%02x (partially) inhibited (0x%0*x)\n", 406 addr, len * 2, wb_mask); 407 xen_pt_log(d, "If the device doesn't work, try enabling permissive mode\n"); 408 xen_pt_log(d, "(unsafe) and if it helps report the problem to xen-devel\n"); 409 } 410 for (index = 0; wb_mask; index += len) { 411 /* unknown regs are passed through */ 412 while (!(wb_mask & 0xff)) { 413 index++; 414 wb_mask >>= 8; 415 } 416 len = 0; 417 do { 418 len++; 419 wb_mask >>= 8; 420 } while (wb_mask & 0xff); 421 rc = xen_host_pci_set_block(&s->real_device, addr + index, 422 (uint8_t *)&val + index, len); 423 424 if (rc < 0) { 425 XEN_PT_ERR(d, "xen_host_pci_set_block failed. return value: %d.\n", rc); 426 } 427 } 428 } 429 430 /* register regions */ 431 432 static uint64_t xen_pt_bar_read(void *o, hwaddr addr, 433 unsigned size) 434 { 435 PCIDevice *d = o; 436 /* if this function is called, that probably means that there is a 437 * misconfiguration of the IOMMU. */ 438 XEN_PT_ERR(d, "Should not read BAR through QEMU. @0x"TARGET_FMT_plx"\n", 439 addr); 440 return 0; 441 } 442 static void xen_pt_bar_write(void *o, hwaddr addr, uint64_t val, 443 unsigned size) 444 { 445 PCIDevice *d = o; 446 /* Same comment as xen_pt_bar_read function */ 447 XEN_PT_ERR(d, "Should not write BAR through QEMU. @0x"TARGET_FMT_plx"\n", 448 addr); 449 } 450 451 static const MemoryRegionOps ops = { 452 .endianness = DEVICE_NATIVE_ENDIAN, 453 .read = xen_pt_bar_read, 454 .write = xen_pt_bar_write, 455 }; 456 457 static int xen_pt_register_regions(XenPCIPassthroughState *s, uint16_t *cmd) 458 { 459 int i = 0; 460 XenHostPCIDevice *d = &s->real_device; 461 462 /* Register PIO/MMIO BARs */ 463 for (i = 0; i < PCI_ROM_SLOT; i++) { 464 XenHostPCIIORegion *r = &d->io_regions[i]; 465 uint8_t type; 466 467 if (r->base_addr == 0 || r->size == 0) { 468 continue; 469 } 470 471 s->bases[i].access.u = r->base_addr; 472 473 if (r->type & XEN_HOST_PCI_REGION_TYPE_IO) { 474 type = PCI_BASE_ADDRESS_SPACE_IO; 475 *cmd |= PCI_COMMAND_IO; 476 } else { 477 type = PCI_BASE_ADDRESS_SPACE_MEMORY; 478 if (r->type & XEN_HOST_PCI_REGION_TYPE_PREFETCH) { 479 type |= PCI_BASE_ADDRESS_MEM_PREFETCH; 480 } 481 if (r->type & XEN_HOST_PCI_REGION_TYPE_MEM_64) { 482 type |= PCI_BASE_ADDRESS_MEM_TYPE_64; 483 } 484 *cmd |= PCI_COMMAND_MEMORY; 485 } 486 487 memory_region_init_io(&s->bar[i], OBJECT(s), &ops, &s->dev, 488 "xen-pci-pt-bar", r->size); 489 pci_register_bar(&s->dev, i, type, &s->bar[i]); 490 491 XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%08"PRIx64 492 " base_addr=0x%08"PRIx64" type: %#x)\n", 493 i, r->size, r->base_addr, type); 494 } 495 496 /* Register expansion ROM address */ 497 if (d->rom.base_addr && d->rom.size) { 498 uint32_t bar_data = 0; 499 500 /* Re-set BAR reported by OS, otherwise ROM can't be read. */ 501 if (xen_host_pci_get_long(d, PCI_ROM_ADDRESS, &bar_data)) { 502 return 0; 503 } 504 if ((bar_data & PCI_ROM_ADDRESS_MASK) == 0) { 505 bar_data |= d->rom.base_addr & PCI_ROM_ADDRESS_MASK; 506 xen_host_pci_set_long(d, PCI_ROM_ADDRESS, bar_data); 507 } 508 509 s->bases[PCI_ROM_SLOT].access.maddr = d->rom.base_addr; 510 511 memory_region_init_io(&s->rom, OBJECT(s), &ops, &s->dev, 512 "xen-pci-pt-rom", d->rom.size); 513 pci_register_bar(&s->dev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_MEM_PREFETCH, 514 &s->rom); 515 516 XEN_PT_LOG(&s->dev, "Expansion ROM registered (size=0x%08"PRIx64 517 " base_addr=0x%08"PRIx64")\n", 518 d->rom.size, d->rom.base_addr); 519 } 520 521 xen_pt_register_vga_regions(d); 522 return 0; 523 } 524 525 /* region mapping */ 526 527 static int xen_pt_bar_from_region(XenPCIPassthroughState *s, MemoryRegion *mr) 528 { 529 int i = 0; 530 531 for (i = 0; i < PCI_NUM_REGIONS - 1; i++) { 532 if (mr == &s->bar[i]) { 533 return i; 534 } 535 } 536 if (mr == &s->rom) { 537 return PCI_ROM_SLOT; 538 } 539 return -1; 540 } 541 542 /* 543 * This function checks if an io_region overlaps an io_region from another 544 * device. The io_region to check is provided with (addr, size and type) 545 * A callback can be provided and will be called for every region that is 546 * overlapped. 547 * The return value indicates if the region is overlappsed */ 548 struct CheckBarArgs { 549 XenPCIPassthroughState *s; 550 pcibus_t addr; 551 pcibus_t size; 552 uint8_t type; 553 bool rc; 554 }; 555 static void xen_pt_check_bar_overlap(PCIBus *bus, PCIDevice *d, void *opaque) 556 { 557 struct CheckBarArgs *arg = opaque; 558 XenPCIPassthroughState *s = arg->s; 559 uint8_t type = arg->type; 560 int i; 561 562 if (d->devfn == s->dev.devfn) { 563 return; 564 } 565 566 /* xxx: This ignores bridges. */ 567 for (i = 0; i < PCI_NUM_REGIONS; i++) { 568 const PCIIORegion *r = &d->io_regions[i]; 569 570 if (!r->size) { 571 continue; 572 } 573 if ((type & PCI_BASE_ADDRESS_SPACE_IO) 574 != (r->type & PCI_BASE_ADDRESS_SPACE_IO)) { 575 continue; 576 } 577 578 if (ranges_overlap(arg->addr, arg->size, r->addr, r->size)) { 579 XEN_PT_WARN(&s->dev, 580 "Overlapped to device [%02x:%02x.%d] Region: %i" 581 " (addr: %#"FMT_PCIBUS", len: %#"FMT_PCIBUS")\n", 582 pci_bus_num(bus), PCI_SLOT(d->devfn), 583 PCI_FUNC(d->devfn), i, r->addr, r->size); 584 arg->rc = true; 585 } 586 } 587 } 588 589 static void xen_pt_region_update(XenPCIPassthroughState *s, 590 MemoryRegionSection *sec, bool adding) 591 { 592 PCIDevice *d = &s->dev; 593 MemoryRegion *mr = sec->mr; 594 int bar = -1; 595 int rc; 596 int op = adding ? DPCI_ADD_MAPPING : DPCI_REMOVE_MAPPING; 597 struct CheckBarArgs args = { 598 .s = s, 599 .addr = sec->offset_within_address_space, 600 .size = int128_get64(sec->size), 601 .rc = false, 602 }; 603 604 bar = xen_pt_bar_from_region(s, mr); 605 if (bar == -1 && (!s->msix || &s->msix->mmio != mr)) { 606 return; 607 } 608 609 if (s->msix && &s->msix->mmio == mr) { 610 if (adding) { 611 s->msix->mmio_base_addr = sec->offset_within_address_space; 612 rc = xen_pt_msix_update_remap(s, s->msix->bar_index); 613 } 614 return; 615 } 616 617 args.type = d->io_regions[bar].type; 618 pci_for_each_device(pci_get_bus(d), pci_dev_bus_num(d), 619 xen_pt_check_bar_overlap, &args); 620 if (args.rc) { 621 XEN_PT_WARN(d, "Region: %d (addr: %#"FMT_PCIBUS 622 ", len: %#"FMT_PCIBUS") is overlapped.\n", 623 bar, sec->offset_within_address_space, 624 int128_get64(sec->size)); 625 } 626 627 if (d->io_regions[bar].type & PCI_BASE_ADDRESS_SPACE_IO) { 628 uint32_t guest_port = sec->offset_within_address_space; 629 uint32_t machine_port = s->bases[bar].access.pio_base; 630 uint32_t size = int128_get64(sec->size); 631 rc = xc_domain_ioport_mapping(xen_xc, xen_domid, 632 guest_port, machine_port, size, 633 op); 634 if (rc) { 635 XEN_PT_ERR(d, "%s ioport mapping failed! (err: %i)\n", 636 adding ? "create new" : "remove old", errno); 637 } 638 } else { 639 pcibus_t guest_addr = sec->offset_within_address_space; 640 pcibus_t machine_addr = s->bases[bar].access.maddr 641 + sec->offset_within_region; 642 pcibus_t size = int128_get64(sec->size); 643 rc = xc_domain_memory_mapping(xen_xc, xen_domid, 644 XEN_PFN(guest_addr + XC_PAGE_SIZE - 1), 645 XEN_PFN(machine_addr + XC_PAGE_SIZE - 1), 646 XEN_PFN(size + XC_PAGE_SIZE - 1), 647 op); 648 if (rc) { 649 XEN_PT_ERR(d, "%s mem mapping failed! (err: %i)\n", 650 adding ? "create new" : "remove old", errno); 651 } 652 } 653 } 654 655 static void xen_pt_region_add(MemoryListener *l, MemoryRegionSection *sec) 656 { 657 XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, 658 memory_listener); 659 660 memory_region_ref(sec->mr); 661 xen_pt_region_update(s, sec, true); 662 } 663 664 static void xen_pt_region_del(MemoryListener *l, MemoryRegionSection *sec) 665 { 666 XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, 667 memory_listener); 668 669 xen_pt_region_update(s, sec, false); 670 memory_region_unref(sec->mr); 671 } 672 673 static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec) 674 { 675 XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, 676 io_listener); 677 678 memory_region_ref(sec->mr); 679 xen_pt_region_update(s, sec, true); 680 } 681 682 static void xen_pt_io_region_del(MemoryListener *l, MemoryRegionSection *sec) 683 { 684 XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, 685 io_listener); 686 687 xen_pt_region_update(s, sec, false); 688 memory_region_unref(sec->mr); 689 } 690 691 static const MemoryListener xen_pt_memory_listener = { 692 .region_add = xen_pt_region_add, 693 .region_del = xen_pt_region_del, 694 .priority = 10, 695 }; 696 697 static const MemoryListener xen_pt_io_listener = { 698 .region_add = xen_pt_io_region_add, 699 .region_del = xen_pt_io_region_del, 700 .priority = 10, 701 }; 702 703 static void 704 xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s, 705 XenHostPCIDevice *dev) 706 { 707 uint16_t gpu_dev_id; 708 PCIDevice *d = &s->dev; 709 710 gpu_dev_id = dev->device_id; 711 igd_passthrough_isa_bridge_create(pci_get_bus(d), gpu_dev_id); 712 } 713 714 /* destroy. */ 715 static void xen_pt_destroy(PCIDevice *d) { 716 717 XenPCIPassthroughState *s = XEN_PT_DEVICE(d); 718 XenHostPCIDevice *host_dev = &s->real_device; 719 uint8_t machine_irq = s->machine_irq; 720 uint8_t intx; 721 int rc; 722 723 if (machine_irq && !xen_host_pci_device_closed(&s->real_device)) { 724 intx = xen_pt_pci_intx(s); 725 rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq, 726 PT_IRQ_TYPE_PCI, 727 pci_dev_bus_num(d), 728 PCI_SLOT(s->dev.devfn), 729 intx, 730 0 /* isa_irq */); 731 if (rc < 0) { 732 XEN_PT_ERR(d, "unbinding of interrupt INT%c failed." 733 " (machine irq: %i, err: %d)" 734 " But bravely continuing on..\n", 735 'a' + intx, machine_irq, errno); 736 } 737 } 738 739 /* N.B. xen_pt_config_delete takes care of freeing them. */ 740 if (s->msi) { 741 xen_pt_msi_disable(s); 742 } 743 if (s->msix) { 744 xen_pt_msix_disable(s); 745 } 746 747 if (machine_irq) { 748 xen_pt_mapped_machine_irq[machine_irq]--; 749 750 if (xen_pt_mapped_machine_irq[machine_irq] == 0) { 751 rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq); 752 753 if (rc < 0) { 754 XEN_PT_ERR(d, "unmapping of interrupt %i failed. (err: %d)" 755 " But bravely continuing on..\n", 756 machine_irq, errno); 757 } 758 } 759 s->machine_irq = 0; 760 } 761 762 /* delete all emulated config registers */ 763 xen_pt_config_delete(s); 764 765 xen_pt_unregister_vga_regions(host_dev); 766 767 if (s->listener_set) { 768 memory_listener_unregister(&s->memory_listener); 769 memory_listener_unregister(&s->io_listener); 770 s->listener_set = false; 771 } 772 if (!xen_host_pci_device_closed(&s->real_device)) { 773 xen_host_pci_device_put(&s->real_device); 774 } 775 } 776 /* init */ 777 778 static void xen_pt_realize(PCIDevice *d, Error **errp) 779 { 780 ERRP_GUARD(); 781 XenPCIPassthroughState *s = XEN_PT_DEVICE(d); 782 int i, rc = 0; 783 uint8_t machine_irq = 0, scratch; 784 uint16_t cmd = 0; 785 int pirq = XEN_PT_UNASSIGNED_PIRQ; 786 787 /* register real device */ 788 XEN_PT_LOG(d, "Assigning real physical device %02x:%02x.%d" 789 " to devfn %#x\n", 790 s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function, 791 s->dev.devfn); 792 793 xen_host_pci_device_get(&s->real_device, 794 s->hostaddr.domain, s->hostaddr.bus, 795 s->hostaddr.slot, s->hostaddr.function, 796 errp); 797 if (*errp) { 798 error_append_hint(errp, "Failed to \"open\" the real pci device"); 799 return; 800 } 801 802 s->is_virtfn = s->real_device.is_virtfn; 803 if (s->is_virtfn) { 804 XEN_PT_LOG(d, "%04x:%02x:%02x.%d is a SR-IOV Virtual Function\n", 805 s->real_device.domain, s->real_device.bus, 806 s->real_device.dev, s->real_device.func); 807 } 808 809 /* Initialize virtualized PCI configuration (Extended 256 Bytes) */ 810 memset(d->config, 0, PCI_CONFIG_SPACE_SIZE); 811 812 s->memory_listener = xen_pt_memory_listener; 813 s->io_listener = xen_pt_io_listener; 814 815 /* Setup VGA bios for passthrough GFX */ 816 if ((s->real_device.domain == 0) && (s->real_device.bus == 0) && 817 (s->real_device.dev == 2) && (s->real_device.func == 0)) { 818 if (!is_igd_vga_passthrough(&s->real_device)) { 819 error_setg(errp, "Need to enable igd-passthru if you're trying" 820 " to passthrough IGD GFX"); 821 xen_host_pci_device_put(&s->real_device); 822 return; 823 } 824 825 xen_pt_setup_vga(s, &s->real_device, errp); 826 if (*errp) { 827 error_append_hint(errp, "Setup VGA BIOS of passthrough" 828 " GFX failed"); 829 xen_host_pci_device_put(&s->real_device); 830 return; 831 } 832 833 /* Register ISA bridge for passthrough GFX. */ 834 xen_igd_passthrough_isa_bridge_create(s, &s->real_device); 835 } 836 837 /* Handle real device's MMIO/PIO BARs */ 838 xen_pt_register_regions(s, &cmd); 839 840 /* reinitialize each config register to be emulated */ 841 xen_pt_config_init(s, errp); 842 if (*errp) { 843 error_append_hint(errp, "PCI Config space initialisation failed"); 844 rc = -1; 845 goto err_out; 846 } 847 848 /* Bind interrupt */ 849 rc = xen_host_pci_get_byte(&s->real_device, PCI_INTERRUPT_PIN, &scratch); 850 if (rc) { 851 error_setg_errno(errp, errno, "Failed to read PCI_INTERRUPT_PIN"); 852 goto err_out; 853 } 854 if (!scratch) { 855 XEN_PT_LOG(d, "no pin interrupt\n"); 856 goto out; 857 } 858 859 machine_irq = s->real_device.irq; 860 if (machine_irq == 0) { 861 XEN_PT_LOG(d, "machine irq is 0\n"); 862 cmd |= PCI_COMMAND_INTX_DISABLE; 863 goto out; 864 } 865 866 rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq); 867 if (rc < 0) { 868 XEN_PT_ERR(d, "Mapping machine irq %u to pirq %i failed, (err: %d)\n", 869 machine_irq, pirq, errno); 870 871 /* Disable PCI intx assertion (turn on bit10 of devctl) */ 872 cmd |= PCI_COMMAND_INTX_DISABLE; 873 machine_irq = 0; 874 s->machine_irq = 0; 875 } else { 876 machine_irq = pirq; 877 s->machine_irq = pirq; 878 xen_pt_mapped_machine_irq[machine_irq]++; 879 } 880 881 /* bind machine_irq to device */ 882 if (machine_irq != 0) { 883 uint8_t e_intx = xen_pt_pci_intx(s); 884 885 rc = xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, machine_irq, 886 pci_dev_bus_num(d), 887 PCI_SLOT(d->devfn), 888 e_intx); 889 if (rc < 0) { 890 XEN_PT_ERR(d, "Binding of interrupt %i failed! (err: %d)\n", 891 e_intx, errno); 892 893 /* Disable PCI intx assertion (turn on bit10 of devctl) */ 894 cmd |= PCI_COMMAND_INTX_DISABLE; 895 xen_pt_mapped_machine_irq[machine_irq]--; 896 897 if (xen_pt_mapped_machine_irq[machine_irq] == 0) { 898 if (xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq)) { 899 XEN_PT_ERR(d, "Unmapping of machine interrupt %i failed!" 900 " (err: %d)\n", machine_irq, errno); 901 } 902 } 903 s->machine_irq = 0; 904 } 905 } 906 907 out: 908 if (cmd) { 909 uint16_t val; 910 911 rc = xen_host_pci_get_word(&s->real_device, PCI_COMMAND, &val); 912 if (rc) { 913 error_setg_errno(errp, errno, "Failed to read PCI_COMMAND"); 914 goto err_out; 915 } else { 916 val |= cmd; 917 rc = xen_host_pci_set_word(&s->real_device, PCI_COMMAND, val); 918 if (rc) { 919 error_setg_errno(errp, errno, "Failed to write PCI_COMMAND" 920 " val = 0x%x", val); 921 goto err_out; 922 } 923 } 924 } 925 926 memory_listener_register(&s->memory_listener, &address_space_memory); 927 memory_listener_register(&s->io_listener, &address_space_io); 928 s->listener_set = true; 929 XEN_PT_LOG(d, 930 "Real physical device %02x:%02x.%d registered successfully\n", 931 s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function); 932 933 return; 934 935 err_out: 936 for (i = 0; i < PCI_ROM_SLOT; i++) { 937 object_unparent(OBJECT(&s->bar[i])); 938 } 939 object_unparent(OBJECT(&s->rom)); 940 941 xen_pt_destroy(d); 942 assert(rc); 943 } 944 945 static void xen_pt_unregister_device(PCIDevice *d) 946 { 947 xen_pt_destroy(d); 948 } 949 950 static Property xen_pci_passthrough_properties[] = { 951 DEFINE_PROP_PCI_HOST_DEVADDR("hostaddr", XenPCIPassthroughState, hostaddr), 952 DEFINE_PROP_BOOL("permissive", XenPCIPassthroughState, permissive, false), 953 DEFINE_PROP_END_OF_LIST(), 954 }; 955 956 static void xen_pci_passthrough_instance_init(Object *obj) 957 { 958 /* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command 959 * line, therefore, no need to wait to realize like other devices */ 960 PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; 961 } 962 963 static void xen_pci_passthrough_class_init(ObjectClass *klass, void *data) 964 { 965 DeviceClass *dc = DEVICE_CLASS(klass); 966 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 967 968 k->realize = xen_pt_realize; 969 k->exit = xen_pt_unregister_device; 970 k->config_read = xen_pt_pci_read_config; 971 k->config_write = xen_pt_pci_write_config; 972 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 973 dc->desc = "Assign an host PCI device with Xen"; 974 device_class_set_props(dc, xen_pci_passthrough_properties); 975 }; 976 977 static void xen_pci_passthrough_finalize(Object *obj) 978 { 979 XenPCIPassthroughState *s = XEN_PT_DEVICE(obj); 980 981 xen_pt_msix_delete(s); 982 } 983 984 static const TypeInfo xen_pci_passthrough_info = { 985 .name = TYPE_XEN_PT_DEVICE, 986 .parent = TYPE_PCI_DEVICE, 987 .instance_size = sizeof(XenPCIPassthroughState), 988 .instance_finalize = xen_pci_passthrough_finalize, 989 .class_init = xen_pci_passthrough_class_init, 990 .instance_init = xen_pci_passthrough_instance_init, 991 .interfaces = (InterfaceInfo[]) { 992 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 993 { INTERFACE_PCIE_DEVICE }, 994 { }, 995 }, 996 }; 997 998 static void xen_pci_passthrough_register_types(void) 999 { 1000 type_register_static(&xen_pci_passthrough_info); 1001 } 1002 1003 type_init(xen_pci_passthrough_register_types) 1004