1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * PCI Message Signaled Interrupt (MSI) 4 * 5 * Copyright (C) 2003-2004 Intel 6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) 7 * Copyright (C) 2016 Christoph Hellwig. 8 */ 9 #include <linux/err.h> 10 #include <linux/export.h> 11 #include <linux/irq.h> 12 13 #include "../pci.h" 14 #include "msi.h" 15 16 int pci_msi_enable = 1; 17 int pci_msi_ignore_mask; 18 19 /** 20 * pci_msi_supported - check whether MSI may be enabled on a device 21 * @dev: pointer to the pci_dev data structure of MSI device function 22 * @nvec: how many MSIs have been requested? 23 * 24 * Look at global flags, the device itself, and its parent buses 25 * to determine if MSI/-X are supported for the device. If MSI/-X is 26 * supported return 1, else return 0. 27 **/ 28 static int pci_msi_supported(struct pci_dev *dev, int nvec) 29 { 30 struct pci_bus *bus; 31 32 /* MSI must be globally enabled and supported by the device */ 33 if (!pci_msi_enable) 34 return 0; 35 36 if (!dev || dev->no_msi) 37 return 0; 38 39 /* 40 * You can't ask to have 0 or less MSIs configured. 41 * a) it's stupid .. 42 * b) the list manipulation code assumes nvec >= 1. 43 */ 44 if (nvec < 1) 45 return 0; 46 47 /* 48 * Any bridge which does NOT route MSI transactions from its 49 * secondary bus to its primary bus must set NO_MSI flag on 50 * the secondary pci_bus. 51 * 52 * The NO_MSI flag can either be set directly by: 53 * - arch-specific PCI host bus controller drivers (deprecated) 54 * - quirks for specific PCI bridges 55 * 56 * or indirectly by platform-specific PCI host bridge drivers by 57 * advertising the 'msi_domain' property, which results in 58 * the NO_MSI flag when no MSI domain is found for this bridge 59 * at probe time. 60 */ 61 for (bus = dev->bus; bus; bus = bus->parent) 62 if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) 63 return 0; 64 65 return 1; 66 } 67 68 static void pcim_msi_release(void *pcidev) 69 { 70 struct pci_dev *dev = pcidev; 71 72 dev->is_msi_managed = false; 73 pci_free_irq_vectors(dev); 74 } 75 76 /* 77 * Needs to be separate from pcim_release to prevent an ordering problem 78 * vs. msi_device_data_release() in the MSI core code. 79 */ 80 static int pcim_setup_msi_release(struct pci_dev *dev) 81 { 82 int ret; 83 84 if (!pci_is_managed(dev) || dev->is_msi_managed) 85 return 0; 86 87 ret = devm_add_action(&dev->dev, pcim_msi_release, dev); 88 if (!ret) 89 dev->is_msi_managed = true; 90 return ret; 91 } 92 93 /* 94 * Ordering vs. devres: msi device data has to be installed first so that 95 * pcim_msi_release() is invoked before it on device release. 96 */ 97 static int pci_setup_msi_context(struct pci_dev *dev) 98 { 99 int ret = msi_setup_device_data(&dev->dev); 100 101 if (!ret) 102 ret = pcim_setup_msi_release(dev); 103 return ret; 104 } 105 106 /* 107 * Helper functions for mask/unmask and MSI message handling 108 */ 109 110 void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set) 111 { 112 raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock; 113 unsigned long flags; 114 115 if (!desc->pci.msi_attrib.can_mask) 116 return; 117 118 raw_spin_lock_irqsave(lock, flags); 119 desc->pci.msi_mask &= ~clear; 120 desc->pci.msi_mask |= set; 121 pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->pci.mask_pos, 122 desc->pci.msi_mask); 123 raw_spin_unlock_irqrestore(lock, flags); 124 } 125 126 /** 127 * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts 128 * @data: pointer to irqdata associated to that interrupt 129 */ 130 void pci_msi_mask_irq(struct irq_data *data) 131 { 132 struct msi_desc *desc = irq_data_get_msi_desc(data); 133 134 __pci_msi_mask_desc(desc, BIT(data->irq - desc->irq)); 135 } 136 EXPORT_SYMBOL_GPL(pci_msi_mask_irq); 137 138 /** 139 * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts 140 * @data: pointer to irqdata associated to that interrupt 141 */ 142 void pci_msi_unmask_irq(struct irq_data *data) 143 { 144 struct msi_desc *desc = irq_data_get_msi_desc(data); 145 146 __pci_msi_unmask_desc(desc, BIT(data->irq - desc->irq)); 147 } 148 EXPORT_SYMBOL_GPL(pci_msi_unmask_irq); 149 150 void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 151 { 152 struct pci_dev *dev = msi_desc_to_pci_dev(entry); 153 154 BUG_ON(dev->current_state != PCI_D0); 155 156 if (entry->pci.msi_attrib.is_msix) { 157 void __iomem *base = pci_msix_desc_addr(entry); 158 159 if (WARN_ON_ONCE(entry->pci.msi_attrib.is_virtual)) 160 return; 161 162 msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); 163 msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); 164 msg->data = readl(base + PCI_MSIX_ENTRY_DATA); 165 } else { 166 int pos = dev->msi_cap; 167 u16 data; 168 169 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, 170 &msg->address_lo); 171 if (entry->pci.msi_attrib.is_64) { 172 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, 173 &msg->address_hi); 174 pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data); 175 } else { 176 msg->address_hi = 0; 177 pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data); 178 } 179 msg->data = data; 180 } 181 } 182 183 static inline void pci_write_msg_msi(struct pci_dev *dev, struct msi_desc *desc, 184 struct msi_msg *msg) 185 { 186 int pos = dev->msi_cap; 187 u16 msgctl; 188 189 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 190 msgctl &= ~PCI_MSI_FLAGS_QSIZE; 191 msgctl |= desc->pci.msi_attrib.multiple << 4; 192 pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl); 193 194 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, msg->address_lo); 195 if (desc->pci.msi_attrib.is_64) { 196 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, msg->address_hi); 197 pci_write_config_word(dev, pos + PCI_MSI_DATA_64, msg->data); 198 } else { 199 pci_write_config_word(dev, pos + PCI_MSI_DATA_32, msg->data); 200 } 201 /* Ensure that the writes are visible in the device */ 202 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 203 } 204 205 static inline void pci_write_msg_msix(struct msi_desc *desc, struct msi_msg *msg) 206 { 207 void __iomem *base = pci_msix_desc_addr(desc); 208 u32 ctrl = desc->pci.msix_ctrl; 209 bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT); 210 211 if (desc->pci.msi_attrib.is_virtual) 212 return; 213 /* 214 * The specification mandates that the entry is masked 215 * when the message is modified: 216 * 217 * "If software changes the Address or Data value of an 218 * entry while the entry is unmasked, the result is 219 * undefined." 220 */ 221 if (unmasked) 222 pci_msix_write_vector_ctrl(desc, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT); 223 224 writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); 225 writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); 226 writel(msg->data, base + PCI_MSIX_ENTRY_DATA); 227 228 if (unmasked) 229 pci_msix_write_vector_ctrl(desc, ctrl); 230 231 /* Ensure that the writes are visible in the device */ 232 readl(base + PCI_MSIX_ENTRY_DATA); 233 } 234 235 void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 236 { 237 struct pci_dev *dev = msi_desc_to_pci_dev(entry); 238 239 if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) { 240 /* Don't touch the hardware now */ 241 } else if (entry->pci.msi_attrib.is_msix) { 242 pci_write_msg_msix(entry, msg); 243 } else { 244 pci_write_msg_msi(dev, entry, msg); 245 } 246 247 entry->msg = *msg; 248 249 if (entry->write_msi_msg) 250 entry->write_msi_msg(entry, entry->write_msi_msg_data); 251 } 252 253 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) 254 { 255 struct msi_desc *entry = irq_get_msi_desc(irq); 256 257 __pci_write_msi_msg(entry, msg); 258 } 259 EXPORT_SYMBOL_GPL(pci_write_msi_msg); 260 261 262 /* PCI/MSI specific functionality */ 263 264 static void pci_intx_for_msi(struct pci_dev *dev, int enable) 265 { 266 if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) 267 pci_intx(dev, enable); 268 } 269 270 static void pci_msi_set_enable(struct pci_dev *dev, int enable) 271 { 272 u16 control; 273 274 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 275 control &= ~PCI_MSI_FLAGS_ENABLE; 276 if (enable) 277 control |= PCI_MSI_FLAGS_ENABLE; 278 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); 279 } 280 281 static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, 282 struct irq_affinity_desc *masks) 283 { 284 struct msi_desc desc; 285 u16 control; 286 287 /* MSI Entry Initialization */ 288 memset(&desc, 0, sizeof(desc)); 289 290 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 291 /* Lies, damned lies, and MSIs */ 292 if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING) 293 control |= PCI_MSI_FLAGS_MASKBIT; 294 /* Respect XEN's mask disabling */ 295 if (pci_msi_ignore_mask) 296 control &= ~PCI_MSI_FLAGS_MASKBIT; 297 298 desc.nvec_used = nvec; 299 desc.pci.msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); 300 desc.pci.msi_attrib.can_mask = !!(control & PCI_MSI_FLAGS_MASKBIT); 301 desc.pci.msi_attrib.default_irq = dev->irq; 302 desc.pci.msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; 303 desc.pci.msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); 304 desc.affinity = masks; 305 306 if (control & PCI_MSI_FLAGS_64BIT) 307 desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64; 308 else 309 desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32; 310 311 /* Save the initial mask status */ 312 if (desc.pci.msi_attrib.can_mask) 313 pci_read_config_dword(dev, desc.pci.mask_pos, &desc.pci.msi_mask); 314 315 return msi_insert_msi_desc(&dev->dev, &desc); 316 } 317 318 static int msi_verify_entries(struct pci_dev *dev) 319 { 320 struct msi_desc *entry; 321 322 if (!dev->no_64bit_msi) 323 return 0; 324 325 msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { 326 if (entry->msg.address_hi) { 327 pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n", 328 entry->msg.address_hi, entry->msg.address_lo); 329 break; 330 } 331 } 332 return !entry ? 0 : -EIO; 333 } 334 335 /** 336 * msi_capability_init - configure device's MSI capability structure 337 * @dev: pointer to the pci_dev data structure of MSI device function 338 * @nvec: number of interrupts to allocate 339 * @affd: description of automatic IRQ affinity assignments (may be %NULL) 340 * 341 * Setup the MSI capability structure of the device with the requested 342 * number of interrupts. A return value of zero indicates the successful 343 * setup of an entry with the new MSI IRQ. A negative return value indicates 344 * an error, and a positive return value indicates the number of interrupts 345 * which could have been allocated. 346 */ 347 static int msi_capability_init(struct pci_dev *dev, int nvec, 348 struct irq_affinity *affd) 349 { 350 struct irq_affinity_desc *masks = NULL; 351 struct msi_desc *entry, desc; 352 int ret; 353 354 /* Reject multi-MSI early on irq domain enabled architectures */ 355 if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY)) 356 return 1; 357 358 /* 359 * Disable MSI during setup in the hardware, but mark it enabled 360 * so that setup code can evaluate it. 361 */ 362 pci_msi_set_enable(dev, 0); 363 dev->msi_enabled = 1; 364 365 if (affd) 366 masks = irq_create_affinity_masks(nvec, affd); 367 368 msi_lock_descs(&dev->dev); 369 ret = msi_setup_msi_desc(dev, nvec, masks); 370 if (ret) 371 goto fail; 372 373 /* All MSIs are unmasked by default; mask them all */ 374 entry = msi_first_desc(&dev->dev, MSI_DESC_ALL); 375 pci_msi_mask(entry, msi_multi_mask(entry)); 376 /* 377 * Copy the MSI descriptor for the error path because 378 * pci_msi_setup_msi_irqs() will free it for the hierarchical 379 * interrupt domain case. 380 */ 381 memcpy(&desc, entry, sizeof(desc)); 382 383 /* Configure MSI capability structure */ 384 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); 385 if (ret) 386 goto err; 387 388 ret = msi_verify_entries(dev); 389 if (ret) 390 goto err; 391 392 /* Set MSI enabled bits */ 393 pci_intx_for_msi(dev, 0); 394 pci_msi_set_enable(dev, 1); 395 396 pcibios_free_irq(dev); 397 dev->irq = entry->irq; 398 goto unlock; 399 400 err: 401 pci_msi_unmask(&desc, msi_multi_mask(&desc)); 402 pci_free_msi_irqs(dev); 403 fail: 404 dev->msi_enabled = 0; 405 unlock: 406 msi_unlock_descs(&dev->dev); 407 kfree(masks); 408 return ret; 409 } 410 411 int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, 412 struct irq_affinity *affd) 413 { 414 int nvec; 415 int rc; 416 417 if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0) 418 return -EINVAL; 419 420 /* Check whether driver already requested MSI-X IRQs */ 421 if (dev->msix_enabled) { 422 pci_info(dev, "can't enable MSI (MSI-X already enabled)\n"); 423 return -EINVAL; 424 } 425 426 if (maxvec < minvec) 427 return -ERANGE; 428 429 if (WARN_ON_ONCE(dev->msi_enabled)) 430 return -EINVAL; 431 432 nvec = pci_msi_vec_count(dev); 433 if (nvec < 0) 434 return nvec; 435 if (nvec < minvec) 436 return -ENOSPC; 437 438 if (nvec > maxvec) 439 nvec = maxvec; 440 441 rc = pci_setup_msi_context(dev); 442 if (rc) 443 return rc; 444 445 if (!pci_setup_msi_device_domain(dev)) 446 return -ENODEV; 447 448 for (;;) { 449 if (affd) { 450 nvec = irq_calc_affinity_vectors(minvec, nvec, affd); 451 if (nvec < minvec) 452 return -ENOSPC; 453 } 454 455 rc = msi_capability_init(dev, nvec, affd); 456 if (rc == 0) 457 return nvec; 458 459 if (rc < 0) 460 return rc; 461 if (rc < minvec) 462 return -ENOSPC; 463 464 nvec = rc; 465 } 466 } 467 468 /** 469 * pci_msi_vec_count - Return the number of MSI vectors a device can send 470 * @dev: device to report about 471 * 472 * This function returns the number of MSI vectors a device requested via 473 * Multiple Message Capable register. It returns a negative errno if the 474 * device is not capable sending MSI interrupts. Otherwise, the call succeeds 475 * and returns a power of two, up to a maximum of 2^5 (32), according to the 476 * MSI specification. 477 **/ 478 int pci_msi_vec_count(struct pci_dev *dev) 479 { 480 int ret; 481 u16 msgctl; 482 483 if (!dev->msi_cap) 484 return -EINVAL; 485 486 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl); 487 ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); 488 489 return ret; 490 } 491 EXPORT_SYMBOL(pci_msi_vec_count); 492 493 /* 494 * Architecture override returns true when the PCI MSI message should be 495 * written by the generic restore function. 496 */ 497 bool __weak arch_restore_msi_irqs(struct pci_dev *dev) 498 { 499 return true; 500 } 501 502 void __pci_restore_msi_state(struct pci_dev *dev) 503 { 504 struct msi_desc *entry; 505 u16 control; 506 507 if (!dev->msi_enabled) 508 return; 509 510 entry = irq_get_msi_desc(dev->irq); 511 512 pci_intx_for_msi(dev, 0); 513 pci_msi_set_enable(dev, 0); 514 if (arch_restore_msi_irqs(dev)) 515 __pci_write_msi_msg(entry, &entry->msg); 516 517 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 518 pci_msi_update_mask(entry, 0, 0); 519 control &= ~PCI_MSI_FLAGS_QSIZE; 520 control |= (entry->pci.msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE; 521 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); 522 } 523 524 void pci_msi_shutdown(struct pci_dev *dev) 525 { 526 struct msi_desc *desc; 527 528 if (!pci_msi_enable || !dev || !dev->msi_enabled) 529 return; 530 531 pci_msi_set_enable(dev, 0); 532 pci_intx_for_msi(dev, 1); 533 dev->msi_enabled = 0; 534 535 /* Return the device with MSI unmasked as initial states */ 536 desc = msi_first_desc(&dev->dev, MSI_DESC_ALL); 537 if (!WARN_ON_ONCE(!desc)) 538 pci_msi_unmask(desc, msi_multi_mask(desc)); 539 540 /* Restore dev->irq to its default pin-assertion IRQ */ 541 dev->irq = desc->pci.msi_attrib.default_irq; 542 pcibios_alloc_irq(dev); 543 } 544 545 /* PCI/MSI-X specific functionality */ 546 547 static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set) 548 { 549 u16 ctrl; 550 551 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl); 552 ctrl &= ~clear; 553 ctrl |= set; 554 pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl); 555 } 556 557 static void __iomem *msix_map_region(struct pci_dev *dev, 558 unsigned int nr_entries) 559 { 560 resource_size_t phys_addr; 561 u32 table_offset; 562 unsigned long flags; 563 u8 bir; 564 565 pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE, 566 &table_offset); 567 bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR); 568 flags = pci_resource_flags(dev, bir); 569 if (!flags || (flags & IORESOURCE_UNSET)) 570 return NULL; 571 572 table_offset &= PCI_MSIX_TABLE_OFFSET; 573 phys_addr = pci_resource_start(dev, bir) + table_offset; 574 575 return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE); 576 } 577 578 /** 579 * msix_prepare_msi_desc - Prepare a half initialized MSI descriptor for operation 580 * @dev: The PCI device for which the descriptor is prepared 581 * @desc: The MSI descriptor for preparation 582 * 583 * This is separate from msix_setup_msi_descs() below to handle dynamic 584 * allocations for MSI-X after initial enablement. 585 * 586 * Ideally the whole MSI-X setup would work that way, but there is no way to 587 * support this for the legacy arch_setup_msi_irqs() mechanism and for the 588 * fake irq domains like the x86 XEN one. Sigh... 589 * 590 * The descriptor is zeroed and only @desc::msi_index and @desc::affinity 591 * are set. When called from msix_setup_msi_descs() then the is_virtual 592 * attribute is initialized as well. 593 * 594 * Fill in the rest. 595 */ 596 void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc) 597 { 598 desc->nvec_used = 1; 599 desc->pci.msi_attrib.is_msix = 1; 600 desc->pci.msi_attrib.is_64 = 1; 601 desc->pci.msi_attrib.default_irq = dev->irq; 602 desc->pci.mask_base = dev->msix_base; 603 desc->pci.msi_attrib.can_mask = !pci_msi_ignore_mask && 604 !desc->pci.msi_attrib.is_virtual; 605 606 if (desc->pci.msi_attrib.can_mask) { 607 void __iomem *addr = pci_msix_desc_addr(desc); 608 609 desc->pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); 610 } 611 } 612 613 static int msix_setup_msi_descs(struct pci_dev *dev, struct msix_entry *entries, 614 int nvec, struct irq_affinity_desc *masks) 615 { 616 int ret = 0, i, vec_count = pci_msix_vec_count(dev); 617 struct irq_affinity_desc *curmsk; 618 struct msi_desc desc; 619 620 memset(&desc, 0, sizeof(desc)); 621 622 for (i = 0, curmsk = masks; i < nvec; i++, curmsk++) { 623 desc.msi_index = entries ? entries[i].entry : i; 624 desc.affinity = masks ? curmsk : NULL; 625 desc.pci.msi_attrib.is_virtual = desc.msi_index >= vec_count; 626 627 msix_prepare_msi_desc(dev, &desc); 628 629 ret = msi_insert_msi_desc(&dev->dev, &desc); 630 if (ret) 631 break; 632 } 633 return ret; 634 } 635 636 static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries) 637 { 638 struct msi_desc *desc; 639 640 if (entries) { 641 msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) { 642 entries->vector = desc->irq; 643 entries++; 644 } 645 } 646 } 647 648 static void msix_mask_all(void __iomem *base, int tsize) 649 { 650 u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; 651 int i; 652 653 if (pci_msi_ignore_mask) 654 return; 655 656 for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE) 657 writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL); 658 } 659 660 static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries, 661 int nvec, struct irq_affinity *affd) 662 { 663 struct irq_affinity_desc *masks = NULL; 664 int ret; 665 666 if (affd) 667 masks = irq_create_affinity_masks(nvec, affd); 668 669 msi_lock_descs(&dev->dev); 670 ret = msix_setup_msi_descs(dev, entries, nvec, masks); 671 if (ret) 672 goto out_free; 673 674 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); 675 if (ret) 676 goto out_free; 677 678 /* Check if all MSI entries honor device restrictions */ 679 ret = msi_verify_entries(dev); 680 if (ret) 681 goto out_free; 682 683 msix_update_entries(dev, entries); 684 goto out_unlock; 685 686 out_free: 687 pci_free_msi_irqs(dev); 688 out_unlock: 689 msi_unlock_descs(&dev->dev); 690 kfree(masks); 691 return ret; 692 } 693 694 /** 695 * msix_capability_init - configure device's MSI-X capability 696 * @dev: pointer to the pci_dev data structure of MSI-X device function 697 * @entries: pointer to an array of struct msix_entry entries 698 * @nvec: number of @entries 699 * @affd: Optional pointer to enable automatic affinity assignment 700 * 701 * Setup the MSI-X capability structure of device function with a 702 * single MSI-X IRQ. A return of zero indicates the successful setup of 703 * requested MSI-X entries with allocated IRQs or non-zero for otherwise. 704 **/ 705 static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, 706 int nvec, struct irq_affinity *affd) 707 { 708 int ret, tsize; 709 u16 control; 710 711 /* 712 * Some devices require MSI-X to be enabled before the MSI-X 713 * registers can be accessed. Mask all the vectors to prevent 714 * interrupts coming in before they're fully set up. 715 */ 716 pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL | 717 PCI_MSIX_FLAGS_ENABLE); 718 719 /* Mark it enabled so setup functions can query it */ 720 dev->msix_enabled = 1; 721 722 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); 723 /* Request & Map MSI-X table region */ 724 tsize = msix_table_size(control); 725 dev->msix_base = msix_map_region(dev, tsize); 726 if (!dev->msix_base) { 727 ret = -ENOMEM; 728 goto out_disable; 729 } 730 731 ret = msix_setup_interrupts(dev, entries, nvec, affd); 732 if (ret) 733 goto out_disable; 734 735 /* Disable INTX */ 736 pci_intx_for_msi(dev, 0); 737 738 /* 739 * Ensure that all table entries are masked to prevent 740 * stale entries from firing in a crash kernel. 741 * 742 * Done late to deal with a broken Marvell NVME device 743 * which takes the MSI-X mask bits into account even 744 * when MSI-X is disabled, which prevents MSI delivery. 745 */ 746 msix_mask_all(dev->msix_base, tsize); 747 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 748 749 pcibios_free_irq(dev); 750 return 0; 751 752 out_disable: 753 dev->msix_enabled = 0; 754 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0); 755 756 return ret; 757 } 758 759 static bool pci_msix_validate_entries(struct pci_dev *dev, struct msix_entry *entries, int nvec) 760 { 761 bool nogap; 762 int i, j; 763 764 if (!entries) 765 return true; 766 767 nogap = pci_msi_domain_supports(dev, MSI_FLAG_MSIX_CONTIGUOUS, DENY_LEGACY); 768 769 for (i = 0; i < nvec; i++) { 770 /* Check for duplicate entries */ 771 for (j = i + 1; j < nvec; j++) { 772 if (entries[i].entry == entries[j].entry) 773 return false; 774 } 775 /* Check for unsupported gaps */ 776 if (nogap && entries[i].entry != i) 777 return false; 778 } 779 return true; 780 } 781 782 int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, 783 int maxvec, struct irq_affinity *affd, int flags) 784 { 785 int hwsize, rc, nvec = maxvec; 786 787 if (maxvec < minvec) 788 return -ERANGE; 789 790 if (dev->msi_enabled) { 791 pci_info(dev, "can't enable MSI-X (MSI already enabled)\n"); 792 return -EINVAL; 793 } 794 795 if (WARN_ON_ONCE(dev->msix_enabled)) 796 return -EINVAL; 797 798 /* Check MSI-X early on irq domain enabled architectures */ 799 if (!pci_msi_domain_supports(dev, MSI_FLAG_PCI_MSIX, ALLOW_LEGACY)) 800 return -ENOTSUPP; 801 802 if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0) 803 return -EINVAL; 804 805 hwsize = pci_msix_vec_count(dev); 806 if (hwsize < 0) 807 return hwsize; 808 809 if (!pci_msix_validate_entries(dev, entries, nvec)) 810 return -EINVAL; 811 812 if (hwsize < nvec) { 813 /* Keep the IRQ virtual hackery working */ 814 if (flags & PCI_IRQ_VIRTUAL) 815 hwsize = nvec; 816 else 817 nvec = hwsize; 818 } 819 820 if (nvec < minvec) 821 return -ENOSPC; 822 823 rc = pci_setup_msi_context(dev); 824 if (rc) 825 return rc; 826 827 if (!pci_setup_msix_device_domain(dev, hwsize)) 828 return -ENODEV; 829 830 for (;;) { 831 if (affd) { 832 nvec = irq_calc_affinity_vectors(minvec, nvec, affd); 833 if (nvec < minvec) 834 return -ENOSPC; 835 } 836 837 rc = msix_capability_init(dev, entries, nvec, affd); 838 if (rc == 0) 839 return nvec; 840 841 if (rc < 0) 842 return rc; 843 if (rc < minvec) 844 return -ENOSPC; 845 846 nvec = rc; 847 } 848 } 849 850 void __pci_restore_msix_state(struct pci_dev *dev) 851 { 852 struct msi_desc *entry; 853 bool write_msg; 854 855 if (!dev->msix_enabled) 856 return; 857 858 /* route the table */ 859 pci_intx_for_msi(dev, 0); 860 pci_msix_clear_and_set_ctrl(dev, 0, 861 PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); 862 863 write_msg = arch_restore_msi_irqs(dev); 864 865 msi_lock_descs(&dev->dev); 866 msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { 867 if (write_msg) 868 __pci_write_msi_msg(entry, &entry->msg); 869 pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl); 870 } 871 msi_unlock_descs(&dev->dev); 872 873 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 874 } 875 876 void pci_msix_shutdown(struct pci_dev *dev) 877 { 878 struct msi_desc *desc; 879 880 if (!pci_msi_enable || !dev || !dev->msix_enabled) 881 return; 882 883 if (pci_dev_is_disconnected(dev)) { 884 dev->msix_enabled = 0; 885 return; 886 } 887 888 /* Return the device with MSI-X masked as initial states */ 889 msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) 890 pci_msix_mask(desc); 891 892 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); 893 pci_intx_for_msi(dev, 1); 894 dev->msix_enabled = 0; 895 pcibios_alloc_irq(dev); 896 } 897 898 /* Common interfaces */ 899 900 void pci_free_msi_irqs(struct pci_dev *dev) 901 { 902 pci_msi_teardown_msi_irqs(dev); 903 904 if (dev->msix_base) { 905 iounmap(dev->msix_base); 906 dev->msix_base = NULL; 907 } 908 } 909 910 /* Misc. infrastructure */ 911 912 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) 913 { 914 return to_pci_dev(desc->dev); 915 } 916 EXPORT_SYMBOL(msi_desc_to_pci_dev); 917 918 void pci_no_msi(void) 919 { 920 pci_msi_enable = 0; 921 } 922