1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Volume Management Device driver 4 * Copyright (c) 2015, Intel Corporation. 5 */ 6 7 #include <linux/device.h> 8 #include <linux/interrupt.h> 9 #include <linux/irq.h> 10 #include <linux/kernel.h> 11 #include <linux/module.h> 12 #include <linux/msi.h> 13 #include <linux/pci.h> 14 #include <linux/pci-acpi.h> 15 #include <linux/pci-ecam.h> 16 #include <linux/srcu.h> 17 #include <linux/rculist.h> 18 #include <linux/rcupdate.h> 19 20 #include <asm/irqdomain.h> 21 #include <asm/device.h> 22 #include <asm/msi.h> 23 24 #define VMD_CFGBAR 0 25 #define VMD_MEMBAR1 2 26 #define VMD_MEMBAR2 4 27 28 #define PCI_REG_VMCAP 0x40 29 #define BUS_RESTRICT_CAP(vmcap) (vmcap & 0x1) 30 #define PCI_REG_VMCONFIG 0x44 31 #define BUS_RESTRICT_CFG(vmcfg) ((vmcfg >> 8) & 0x3) 32 #define VMCONFIG_MSI_REMAP 0x2 33 #define PCI_REG_VMLOCK 0x70 34 #define MB2_SHADOW_EN(vmlock) (vmlock & 0x2) 35 36 #define MB2_SHADOW_OFFSET 0x2000 37 #define MB2_SHADOW_SIZE 16 38 39 enum vmd_features { 40 /* 41 * Device may contain registers which hint the physical location of the 42 * membars, in order to allow proper address translation during 43 * resource assignment to enable guest virtualization 44 */ 45 VMD_FEAT_HAS_MEMBAR_SHADOW = (1 << 0), 46 47 /* 48 * Device may provide root port configuration information which limits 49 * bus numbering 50 */ 51 VMD_FEAT_HAS_BUS_RESTRICTIONS = (1 << 1), 52 53 /* 54 * Device contains physical location shadow registers in 55 * vendor-specific capability space 56 */ 57 VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP = (1 << 2), 58 59 /* 60 * Device may use MSI-X vector 0 for software triggering and will not 61 * be used for MSI remapping 62 */ 63 VMD_FEAT_OFFSET_FIRST_VECTOR = (1 << 3), 64 65 /* 66 * Device can bypass remapping MSI-X transactions into its MSI-X table, 67 * avoiding the requirement of a VMD MSI domain for child device 68 * interrupt handling. 69 */ 70 VMD_FEAT_CAN_BYPASS_MSI_REMAP = (1 << 4), 71 }; 72 73 /* 74 * Lock for manipulating VMD IRQ lists. 75 */ 76 static DEFINE_RAW_SPINLOCK(list_lock); 77 78 /** 79 * struct vmd_irq - private data to map driver IRQ to the VMD shared vector 80 * @node: list item for parent traversal. 81 * @irq: back pointer to parent. 82 * @enabled: true if driver enabled IRQ 83 * @virq: the virtual IRQ value provided to the requesting driver. 84 * 85 * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to 86 * a VMD IRQ using this structure. 87 */ 88 struct vmd_irq { 89 struct list_head node; 90 struct vmd_irq_list *irq; 91 bool enabled; 92 unsigned int virq; 93 }; 94 95 /** 96 * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector 97 * @irq_list: the list of irq's the VMD one demuxes to. 98 * @srcu: SRCU struct for local synchronization. 99 * @count: number of child IRQs assigned to this vector; used to track 100 * sharing. 101 */ 102 struct vmd_irq_list { 103 struct list_head irq_list; 104 struct srcu_struct srcu; 105 unsigned int count; 106 }; 107 108 struct vmd_dev { 109 struct pci_dev *dev; 110 111 spinlock_t cfg_lock; 112 void __iomem *cfgbar; 113 114 int msix_count; 115 struct vmd_irq_list *irqs; 116 117 struct pci_sysdata sysdata; 118 struct resource resources[3]; 119 struct irq_domain *irq_domain; 120 struct pci_bus *bus; 121 u8 busn_start; 122 u8 first_vec; 123 }; 124 125 static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus) 126 { 127 return container_of(bus->sysdata, struct vmd_dev, sysdata); 128 } 129 130 static inline unsigned int index_from_irqs(struct vmd_dev *vmd, 131 struct vmd_irq_list *irqs) 132 { 133 return irqs - vmd->irqs; 134 } 135 136 /* 137 * Drivers managing a device in a VMD domain allocate their own IRQs as before, 138 * but the MSI entry for the hardware it's driving will be programmed with a 139 * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its 140 * domain into one of its own, and the VMD driver de-muxes these for the 141 * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations 142 * and irq_chip to set this up. 143 */ 144 static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) 145 { 146 struct vmd_irq *vmdirq = data->chip_data; 147 struct vmd_irq_list *irq = vmdirq->irq; 148 struct vmd_dev *vmd = irq_data_get_irq_handler_data(data); 149 150 memset(msg, 0, sizeof(*msg)); 151 msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; 152 msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; 153 msg->arch_addr_lo.destid_0_7 = index_from_irqs(vmd, irq); 154 } 155 156 /* 157 * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops. 158 */ 159 static void vmd_irq_enable(struct irq_data *data) 160 { 161 struct vmd_irq *vmdirq = data->chip_data; 162 unsigned long flags; 163 164 raw_spin_lock_irqsave(&list_lock, flags); 165 WARN_ON(vmdirq->enabled); 166 list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); 167 vmdirq->enabled = true; 168 raw_spin_unlock_irqrestore(&list_lock, flags); 169 170 data->chip->irq_unmask(data); 171 } 172 173 static void vmd_irq_disable(struct irq_data *data) 174 { 175 struct vmd_irq *vmdirq = data->chip_data; 176 unsigned long flags; 177 178 data->chip->irq_mask(data); 179 180 raw_spin_lock_irqsave(&list_lock, flags); 181 if (vmdirq->enabled) { 182 list_del_rcu(&vmdirq->node); 183 vmdirq->enabled = false; 184 } 185 raw_spin_unlock_irqrestore(&list_lock, flags); 186 } 187 188 /* 189 * XXX: Stubbed until we develop acceptable way to not create conflicts with 190 * other devices sharing the same vector. 191 */ 192 static int vmd_irq_set_affinity(struct irq_data *data, 193 const struct cpumask *dest, bool force) 194 { 195 return -EINVAL; 196 } 197 198 static struct irq_chip vmd_msi_controller = { 199 .name = "VMD-MSI", 200 .irq_enable = vmd_irq_enable, 201 .irq_disable = vmd_irq_disable, 202 .irq_compose_msi_msg = vmd_compose_msi_msg, 203 .irq_set_affinity = vmd_irq_set_affinity, 204 }; 205 206 static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info, 207 msi_alloc_info_t *arg) 208 { 209 return 0; 210 } 211 212 /* 213 * XXX: We can be even smarter selecting the best IRQ once we solve the 214 * affinity problem. 215 */ 216 static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc) 217 { 218 unsigned long flags; 219 int i, best; 220 221 if (vmd->msix_count == 1 + vmd->first_vec) 222 return &vmd->irqs[vmd->first_vec]; 223 224 /* 225 * White list for fast-interrupt handlers. All others will share the 226 * "slow" interrupt vector. 227 */ 228 switch (msi_desc_to_pci_dev(desc)->class) { 229 case PCI_CLASS_STORAGE_EXPRESS: 230 break; 231 default: 232 return &vmd->irqs[vmd->first_vec]; 233 } 234 235 raw_spin_lock_irqsave(&list_lock, flags); 236 best = vmd->first_vec + 1; 237 for (i = best; i < vmd->msix_count; i++) 238 if (vmd->irqs[i].count < vmd->irqs[best].count) 239 best = i; 240 vmd->irqs[best].count++; 241 raw_spin_unlock_irqrestore(&list_lock, flags); 242 243 return &vmd->irqs[best]; 244 } 245 246 static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, 247 unsigned int virq, irq_hw_number_t hwirq, 248 msi_alloc_info_t *arg) 249 { 250 struct msi_desc *desc = arg->desc; 251 struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); 252 struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); 253 unsigned int index, vector; 254 255 if (!vmdirq) 256 return -ENOMEM; 257 258 INIT_LIST_HEAD(&vmdirq->node); 259 vmdirq->irq = vmd_next_irq(vmd, desc); 260 vmdirq->virq = virq; 261 index = index_from_irqs(vmd, vmdirq->irq); 262 vector = pci_irq_vector(vmd->dev, index); 263 264 irq_domain_set_info(domain, virq, vector, info->chip, vmdirq, 265 handle_untracked_irq, vmd, NULL); 266 return 0; 267 } 268 269 static void vmd_msi_free(struct irq_domain *domain, 270 struct msi_domain_info *info, unsigned int virq) 271 { 272 struct vmd_irq *vmdirq = irq_get_chip_data(virq); 273 unsigned long flags; 274 275 synchronize_srcu(&vmdirq->irq->srcu); 276 277 /* XXX: Potential optimization to rebalance */ 278 raw_spin_lock_irqsave(&list_lock, flags); 279 vmdirq->irq->count--; 280 raw_spin_unlock_irqrestore(&list_lock, flags); 281 282 kfree(vmdirq); 283 } 284 285 static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev, 286 int nvec, msi_alloc_info_t *arg) 287 { 288 struct pci_dev *pdev = to_pci_dev(dev); 289 struct vmd_dev *vmd = vmd_from_bus(pdev->bus); 290 291 if (nvec > vmd->msix_count) 292 return vmd->msix_count; 293 294 memset(arg, 0, sizeof(*arg)); 295 return 0; 296 } 297 298 static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) 299 { 300 arg->desc = desc; 301 } 302 303 static struct msi_domain_ops vmd_msi_domain_ops = { 304 .get_hwirq = vmd_get_hwirq, 305 .msi_init = vmd_msi_init, 306 .msi_free = vmd_msi_free, 307 .msi_prepare = vmd_msi_prepare, 308 .set_desc = vmd_set_desc, 309 }; 310 311 static struct msi_domain_info vmd_msi_domain_info = { 312 .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | 313 MSI_FLAG_PCI_MSIX, 314 .ops = &vmd_msi_domain_ops, 315 .chip = &vmd_msi_controller, 316 }; 317 318 static void vmd_set_msi_remapping(struct vmd_dev *vmd, bool enable) 319 { 320 u16 reg; 321 322 pci_read_config_word(vmd->dev, PCI_REG_VMCONFIG, ®); 323 reg = enable ? (reg & ~VMCONFIG_MSI_REMAP) : 324 (reg | VMCONFIG_MSI_REMAP); 325 pci_write_config_word(vmd->dev, PCI_REG_VMCONFIG, reg); 326 } 327 328 static int vmd_create_irq_domain(struct vmd_dev *vmd) 329 { 330 struct fwnode_handle *fn; 331 332 fn = irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd->sysdata.domain); 333 if (!fn) 334 return -ENODEV; 335 336 vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info, NULL); 337 if (!vmd->irq_domain) { 338 irq_domain_free_fwnode(fn); 339 return -ENODEV; 340 } 341 342 return 0; 343 } 344 345 static void vmd_remove_irq_domain(struct vmd_dev *vmd) 346 { 347 /* 348 * Some production BIOS won't enable remapping between soft reboots. 349 * Ensure remapping is restored before unloading the driver. 350 */ 351 if (!vmd->msix_count) 352 vmd_set_msi_remapping(vmd, true); 353 354 if (vmd->irq_domain) { 355 struct fwnode_handle *fn = vmd->irq_domain->fwnode; 356 357 irq_domain_remove(vmd->irq_domain); 358 irq_domain_free_fwnode(fn); 359 } 360 } 361 362 static void __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus, 363 unsigned int devfn, int reg, int len) 364 { 365 unsigned int busnr_ecam = bus->number - vmd->busn_start; 366 u32 offset = PCIE_ECAM_OFFSET(busnr_ecam, devfn, reg); 367 368 if (offset + len >= resource_size(&vmd->dev->resource[VMD_CFGBAR])) 369 return NULL; 370 371 return vmd->cfgbar + offset; 372 } 373 374 /* 375 * CPU may deadlock if config space is not serialized on some versions of this 376 * hardware, so all config space access is done under a spinlock. 377 */ 378 static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg, 379 int len, u32 *value) 380 { 381 struct vmd_dev *vmd = vmd_from_bus(bus); 382 void __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); 383 unsigned long flags; 384 int ret = 0; 385 386 if (!addr) 387 return -EFAULT; 388 389 spin_lock_irqsave(&vmd->cfg_lock, flags); 390 switch (len) { 391 case 1: 392 *value = readb(addr); 393 break; 394 case 2: 395 *value = readw(addr); 396 break; 397 case 4: 398 *value = readl(addr); 399 break; 400 default: 401 ret = -EINVAL; 402 break; 403 } 404 spin_unlock_irqrestore(&vmd->cfg_lock, flags); 405 return ret; 406 } 407 408 /* 409 * VMD h/w converts non-posted config writes to posted memory writes. The 410 * read-back in this function forces the completion so it returns only after 411 * the config space was written, as expected. 412 */ 413 static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, 414 int len, u32 value) 415 { 416 struct vmd_dev *vmd = vmd_from_bus(bus); 417 void __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); 418 unsigned long flags; 419 int ret = 0; 420 421 if (!addr) 422 return -EFAULT; 423 424 spin_lock_irqsave(&vmd->cfg_lock, flags); 425 switch (len) { 426 case 1: 427 writeb(value, addr); 428 readb(addr); 429 break; 430 case 2: 431 writew(value, addr); 432 readw(addr); 433 break; 434 case 4: 435 writel(value, addr); 436 readl(addr); 437 break; 438 default: 439 ret = -EINVAL; 440 break; 441 } 442 spin_unlock_irqrestore(&vmd->cfg_lock, flags); 443 return ret; 444 } 445 446 static struct pci_ops vmd_ops = { 447 .read = vmd_pci_read, 448 .write = vmd_pci_write, 449 }; 450 451 #ifdef CONFIG_ACPI 452 static struct acpi_device *vmd_acpi_find_companion(struct pci_dev *pci_dev) 453 { 454 struct pci_host_bridge *bridge; 455 u32 busnr, addr; 456 457 if (pci_dev->bus->ops != &vmd_ops) 458 return NULL; 459 460 bridge = pci_find_host_bridge(pci_dev->bus); 461 busnr = pci_dev->bus->number - bridge->bus->number; 462 /* 463 * The address computation below is only applicable to relative bus 464 * numbers below 32. 465 */ 466 if (busnr > 31) 467 return NULL; 468 469 addr = (busnr << 24) | ((u32)pci_dev->devfn << 16) | 0x8000FFFFU; 470 471 dev_dbg(&pci_dev->dev, "Looking for ACPI companion (address 0x%x)\n", 472 addr); 473 474 return acpi_find_child_device(ACPI_COMPANION(bridge->dev.parent), addr, 475 false); 476 } 477 478 static bool hook_installed; 479 480 static void vmd_acpi_begin(void) 481 { 482 if (pci_acpi_set_companion_lookup_hook(vmd_acpi_find_companion)) 483 return; 484 485 hook_installed = true; 486 } 487 488 static void vmd_acpi_end(void) 489 { 490 if (!hook_installed) 491 return; 492 493 pci_acpi_clear_companion_lookup_hook(); 494 hook_installed = false; 495 } 496 #else 497 static inline void vmd_acpi_begin(void) { } 498 static inline void vmd_acpi_end(void) { } 499 #endif /* CONFIG_ACPI */ 500 501 static void vmd_attach_resources(struct vmd_dev *vmd) 502 { 503 vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; 504 vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2]; 505 } 506 507 static void vmd_detach_resources(struct vmd_dev *vmd) 508 { 509 vmd->dev->resource[VMD_MEMBAR1].child = NULL; 510 vmd->dev->resource[VMD_MEMBAR2].child = NULL; 511 } 512 513 /* 514 * VMD domains start at 0x10000 to not clash with ACPI _SEG domains. 515 * Per ACPI r6.0, sec 6.5.6, _SEG returns an integer, of which the lower 516 * 16 bits are the PCI Segment Group (domain) number. Other bits are 517 * currently reserved. 518 */ 519 static int vmd_find_free_domain(void) 520 { 521 int domain = 0xffff; 522 struct pci_bus *bus = NULL; 523 524 while ((bus = pci_find_next_bus(bus)) != NULL) 525 domain = max_t(int, domain, pci_domain_nr(bus)); 526 return domain + 1; 527 } 528 529 static int vmd_get_phys_offsets(struct vmd_dev *vmd, bool native_hint, 530 resource_size_t *offset1, 531 resource_size_t *offset2) 532 { 533 struct pci_dev *dev = vmd->dev; 534 u64 phys1, phys2; 535 536 if (native_hint) { 537 u32 vmlock; 538 int ret; 539 540 ret = pci_read_config_dword(dev, PCI_REG_VMLOCK, &vmlock); 541 if (ret || vmlock == ~0) 542 return -ENODEV; 543 544 if (MB2_SHADOW_EN(vmlock)) { 545 void __iomem *membar2; 546 547 membar2 = pci_iomap(dev, VMD_MEMBAR2, 0); 548 if (!membar2) 549 return -ENOMEM; 550 phys1 = readq(membar2 + MB2_SHADOW_OFFSET); 551 phys2 = readq(membar2 + MB2_SHADOW_OFFSET + 8); 552 pci_iounmap(dev, membar2); 553 } else 554 return 0; 555 } else { 556 /* Hypervisor-Emulated Vendor-Specific Capability */ 557 int pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); 558 u32 reg, regu; 559 560 pci_read_config_dword(dev, pos + 4, ®); 561 562 /* "SHDW" */ 563 if (pos && reg == 0x53484457) { 564 pci_read_config_dword(dev, pos + 8, ®); 565 pci_read_config_dword(dev, pos + 12, ®u); 566 phys1 = (u64) regu << 32 | reg; 567 568 pci_read_config_dword(dev, pos + 16, ®); 569 pci_read_config_dword(dev, pos + 20, ®u); 570 phys2 = (u64) regu << 32 | reg; 571 } else 572 return 0; 573 } 574 575 *offset1 = dev->resource[VMD_MEMBAR1].start - 576 (phys1 & PCI_BASE_ADDRESS_MEM_MASK); 577 *offset2 = dev->resource[VMD_MEMBAR2].start - 578 (phys2 & PCI_BASE_ADDRESS_MEM_MASK); 579 580 return 0; 581 } 582 583 static int vmd_get_bus_number_start(struct vmd_dev *vmd) 584 { 585 struct pci_dev *dev = vmd->dev; 586 u16 reg; 587 588 pci_read_config_word(dev, PCI_REG_VMCAP, ®); 589 if (BUS_RESTRICT_CAP(reg)) { 590 pci_read_config_word(dev, PCI_REG_VMCONFIG, ®); 591 592 switch (BUS_RESTRICT_CFG(reg)) { 593 case 0: 594 vmd->busn_start = 0; 595 break; 596 case 1: 597 vmd->busn_start = 128; 598 break; 599 case 2: 600 vmd->busn_start = 224; 601 break; 602 default: 603 pci_err(dev, "Unknown Bus Offset Setting (%d)\n", 604 BUS_RESTRICT_CFG(reg)); 605 return -ENODEV; 606 } 607 } 608 609 return 0; 610 } 611 612 static irqreturn_t vmd_irq(int irq, void *data) 613 { 614 struct vmd_irq_list *irqs = data; 615 struct vmd_irq *vmdirq; 616 int idx; 617 618 idx = srcu_read_lock(&irqs->srcu); 619 list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node) 620 generic_handle_irq(vmdirq->virq); 621 srcu_read_unlock(&irqs->srcu, idx); 622 623 return IRQ_HANDLED; 624 } 625 626 static int vmd_alloc_irqs(struct vmd_dev *vmd) 627 { 628 struct pci_dev *dev = vmd->dev; 629 int i, err; 630 631 vmd->msix_count = pci_msix_vec_count(dev); 632 if (vmd->msix_count < 0) 633 return -ENODEV; 634 635 vmd->msix_count = pci_alloc_irq_vectors(dev, vmd->first_vec + 1, 636 vmd->msix_count, PCI_IRQ_MSIX); 637 if (vmd->msix_count < 0) 638 return vmd->msix_count; 639 640 vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), 641 GFP_KERNEL); 642 if (!vmd->irqs) 643 return -ENOMEM; 644 645 for (i = 0; i < vmd->msix_count; i++) { 646 err = init_srcu_struct(&vmd->irqs[i].srcu); 647 if (err) 648 return err; 649 650 INIT_LIST_HEAD(&vmd->irqs[i].irq_list); 651 err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i), 652 vmd_irq, IRQF_NO_THREAD, 653 "vmd", &vmd->irqs[i]); 654 if (err) 655 return err; 656 } 657 658 return 0; 659 } 660 661 static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) 662 { 663 struct pci_sysdata *sd = &vmd->sysdata; 664 struct resource *res; 665 u32 upper_bits; 666 unsigned long flags; 667 LIST_HEAD(resources); 668 resource_size_t offset[2] = {0}; 669 resource_size_t membar2_offset = 0x2000; 670 struct pci_bus *child; 671 int ret; 672 673 /* 674 * Shadow registers may exist in certain VMD device ids which allow 675 * guests to correctly assign host physical addresses to the root ports 676 * and child devices. These registers will either return the host value 677 * or 0, depending on an enable bit in the VMD device. 678 */ 679 if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) { 680 membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE; 681 ret = vmd_get_phys_offsets(vmd, true, &offset[0], &offset[1]); 682 if (ret) 683 return ret; 684 } else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP) { 685 ret = vmd_get_phys_offsets(vmd, false, &offset[0], &offset[1]); 686 if (ret) 687 return ret; 688 } 689 690 /* 691 * Certain VMD devices may have a root port configuration option which 692 * limits the bus range to between 0-127, 128-255, or 224-255 693 */ 694 if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) { 695 ret = vmd_get_bus_number_start(vmd); 696 if (ret) 697 return ret; 698 } 699 700 res = &vmd->dev->resource[VMD_CFGBAR]; 701 vmd->resources[0] = (struct resource) { 702 .name = "VMD CFGBAR", 703 .start = vmd->busn_start, 704 .end = vmd->busn_start + (resource_size(res) >> 20) - 1, 705 .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED, 706 }; 707 708 /* 709 * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can 710 * put 32-bit resources in the window. 711 * 712 * There's no hardware reason why a 64-bit window *couldn't* 713 * contain a 32-bit resource, but pbus_size_mem() computes the 714 * bridge window size assuming a 64-bit window will contain no 715 * 32-bit resources. __pci_assign_resource() enforces that 716 * artificial restriction to make sure everything will fit. 717 * 718 * The only way we could use a 64-bit non-prefetchable MEMBAR is 719 * if its address is <4GB so that we can convert it to a 32-bit 720 * resource. To be visible to the host OS, all VMD endpoints must 721 * be initially configured by platform BIOS, which includes setting 722 * up these resources. We can assume the device is configured 723 * according to the platform needs. 724 */ 725 res = &vmd->dev->resource[VMD_MEMBAR1]; 726 upper_bits = upper_32_bits(res->end); 727 flags = res->flags & ~IORESOURCE_SIZEALIGN; 728 if (!upper_bits) 729 flags &= ~IORESOURCE_MEM_64; 730 vmd->resources[1] = (struct resource) { 731 .name = "VMD MEMBAR1", 732 .start = res->start, 733 .end = res->end, 734 .flags = flags, 735 .parent = res, 736 }; 737 738 res = &vmd->dev->resource[VMD_MEMBAR2]; 739 upper_bits = upper_32_bits(res->end); 740 flags = res->flags & ~IORESOURCE_SIZEALIGN; 741 if (!upper_bits) 742 flags &= ~IORESOURCE_MEM_64; 743 vmd->resources[2] = (struct resource) { 744 .name = "VMD MEMBAR2", 745 .start = res->start + membar2_offset, 746 .end = res->end, 747 .flags = flags, 748 .parent = res, 749 }; 750 751 sd->vmd_dev = vmd->dev; 752 sd->domain = vmd_find_free_domain(); 753 if (sd->domain < 0) 754 return sd->domain; 755 756 sd->node = pcibus_to_node(vmd->dev->bus); 757 758 /* 759 * Currently MSI remapping must be enabled in guest passthrough mode 760 * due to some missing interrupt remapping plumbing. This is probably 761 * acceptable because the guest is usually CPU-limited and MSI 762 * remapping doesn't become a performance bottleneck. 763 */ 764 if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) || 765 offset[0] || offset[1]) { 766 ret = vmd_alloc_irqs(vmd); 767 if (ret) 768 return ret; 769 770 vmd_set_msi_remapping(vmd, true); 771 772 ret = vmd_create_irq_domain(vmd); 773 if (ret) 774 return ret; 775 776 /* 777 * Override the IRQ domain bus token so the domain can be 778 * distinguished from a regular PCI/MSI domain. 779 */ 780 irq_domain_update_bus_token(vmd->irq_domain, DOMAIN_BUS_VMD_MSI); 781 } else { 782 vmd_set_msi_remapping(vmd, false); 783 } 784 785 pci_add_resource(&resources, &vmd->resources[0]); 786 pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]); 787 pci_add_resource_offset(&resources, &vmd->resources[2], offset[1]); 788 789 vmd->bus = pci_create_root_bus(&vmd->dev->dev, vmd->busn_start, 790 &vmd_ops, sd, &resources); 791 if (!vmd->bus) { 792 pci_free_resource_list(&resources); 793 vmd_remove_irq_domain(vmd); 794 return -ENODEV; 795 } 796 797 vmd_attach_resources(vmd); 798 if (vmd->irq_domain) 799 dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain); 800 801 vmd_acpi_begin(); 802 803 pci_scan_child_bus(vmd->bus); 804 pci_assign_unassigned_bus_resources(vmd->bus); 805 806 /* 807 * VMD root buses are virtual and don't return true on pci_is_pcie() 808 * and will fail pcie_bus_configure_settings() early. It can instead be 809 * run on each of the real root ports. 810 */ 811 list_for_each_entry(child, &vmd->bus->children, node) 812 pcie_bus_configure_settings(child); 813 814 pci_bus_add_devices(vmd->bus); 815 816 vmd_acpi_end(); 817 818 WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj, 819 "domain"), "Can't create symlink to domain\n"); 820 return 0; 821 } 822 823 static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) 824 { 825 unsigned long features = (unsigned long) id->driver_data; 826 struct vmd_dev *vmd; 827 int err; 828 829 if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20)) 830 return -ENOMEM; 831 832 vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL); 833 if (!vmd) 834 return -ENOMEM; 835 836 vmd->dev = dev; 837 err = pcim_enable_device(dev); 838 if (err < 0) 839 return err; 840 841 vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0); 842 if (!vmd->cfgbar) 843 return -ENOMEM; 844 845 pci_set_master(dev); 846 if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) && 847 dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32))) 848 return -ENODEV; 849 850 if (features & VMD_FEAT_OFFSET_FIRST_VECTOR) 851 vmd->first_vec = 1; 852 853 spin_lock_init(&vmd->cfg_lock); 854 pci_set_drvdata(dev, vmd); 855 err = vmd_enable_domain(vmd, features); 856 if (err) 857 return err; 858 859 dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n", 860 vmd->sysdata.domain); 861 return 0; 862 } 863 864 static void vmd_cleanup_srcu(struct vmd_dev *vmd) 865 { 866 int i; 867 868 for (i = 0; i < vmd->msix_count; i++) 869 cleanup_srcu_struct(&vmd->irqs[i].srcu); 870 } 871 872 static void vmd_remove(struct pci_dev *dev) 873 { 874 struct vmd_dev *vmd = pci_get_drvdata(dev); 875 876 sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); 877 pci_stop_root_bus(vmd->bus); 878 pci_remove_root_bus(vmd->bus); 879 vmd_cleanup_srcu(vmd); 880 vmd_detach_resources(vmd); 881 vmd_remove_irq_domain(vmd); 882 } 883 884 #ifdef CONFIG_PM_SLEEP 885 static int vmd_suspend(struct device *dev) 886 { 887 struct pci_dev *pdev = to_pci_dev(dev); 888 struct vmd_dev *vmd = pci_get_drvdata(pdev); 889 int i; 890 891 for (i = 0; i < vmd->msix_count; i++) 892 devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]); 893 894 return 0; 895 } 896 897 static int vmd_resume(struct device *dev) 898 { 899 struct pci_dev *pdev = to_pci_dev(dev); 900 struct vmd_dev *vmd = pci_get_drvdata(pdev); 901 int err, i; 902 903 for (i = 0; i < vmd->msix_count; i++) { 904 err = devm_request_irq(dev, pci_irq_vector(pdev, i), 905 vmd_irq, IRQF_NO_THREAD, 906 "vmd", &vmd->irqs[i]); 907 if (err) 908 return err; 909 } 910 911 return 0; 912 } 913 #endif 914 static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume); 915 916 static const struct pci_device_id vmd_ids[] = { 917 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_201D), 918 .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP,}, 919 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_28C0), 920 .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW | 921 VMD_FEAT_HAS_BUS_RESTRICTIONS | 922 VMD_FEAT_CAN_BYPASS_MSI_REMAP,}, 923 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x467f), 924 .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP | 925 VMD_FEAT_HAS_BUS_RESTRICTIONS | 926 VMD_FEAT_OFFSET_FIRST_VECTOR,}, 927 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4c3d), 928 .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP | 929 VMD_FEAT_HAS_BUS_RESTRICTIONS | 930 VMD_FEAT_OFFSET_FIRST_VECTOR,}, 931 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_9A0B), 932 .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP | 933 VMD_FEAT_HAS_BUS_RESTRICTIONS | 934 VMD_FEAT_OFFSET_FIRST_VECTOR,}, 935 {0,} 936 }; 937 MODULE_DEVICE_TABLE(pci, vmd_ids); 938 939 static struct pci_driver vmd_drv = { 940 .name = "vmd", 941 .id_table = vmd_ids, 942 .probe = vmd_probe, 943 .remove = vmd_remove, 944 .driver = { 945 .pm = &vmd_dev_pm_ops, 946 }, 947 }; 948 module_pci_driver(vmd_drv); 949 950 MODULE_AUTHOR("Intel Corporation"); 951 MODULE_LICENSE("GPL v2"); 952 MODULE_VERSION("0.6"); 953