1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * PCI Message Signaled Interrupt (MSI) 4 * 5 * Copyright (C) 2003-2004 Intel 6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) 7 * Copyright (C) 2016 Christoph Hellwig. 8 */ 9 #include <linux/err.h> 10 #include <linux/export.h> 11 #include <linux/irq.h> 12 13 #include "../pci.h" 14 #include "msi.h" 15 16 static int pci_msi_enable = 1; 17 int pci_msi_ignore_mask; 18 19 static noinline void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set) 20 { 21 raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock; 22 unsigned long flags; 23 24 if (!desc->pci.msi_attrib.can_mask) 25 return; 26 27 raw_spin_lock_irqsave(lock, flags); 28 desc->pci.msi_mask &= ~clear; 29 desc->pci.msi_mask |= set; 30 pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->pci.mask_pos, 31 desc->pci.msi_mask); 32 raw_spin_unlock_irqrestore(lock, flags); 33 } 34 35 static inline void pci_msi_mask(struct msi_desc *desc, u32 mask) 36 { 37 pci_msi_update_mask(desc, 0, mask); 38 } 39 40 static inline void pci_msi_unmask(struct msi_desc *desc, u32 mask) 41 { 42 pci_msi_update_mask(desc, mask, 0); 43 } 44 45 static inline void __iomem *pci_msix_desc_addr(struct msi_desc *desc) 46 { 47 return desc->pci.mask_base + desc->msi_index * PCI_MSIX_ENTRY_SIZE; 48 } 49 50 /* 51 * This internal function does not flush PCI writes to the device. All 52 * users must ensure that they read from the device before either assuming 53 * that the device state is up to date, or returning out of this file. 54 * It does not affect the msi_desc::msix_ctrl cache either. Use with care! 55 */ 56 static void pci_msix_write_vector_ctrl(struct msi_desc *desc, u32 ctrl) 57 { 58 void __iomem *desc_addr = pci_msix_desc_addr(desc); 59 60 if (desc->pci.msi_attrib.can_mask) 61 writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL); 62 } 63 64 static inline void pci_msix_mask(struct msi_desc *desc) 65 { 66 desc->pci.msix_ctrl |= PCI_MSIX_ENTRY_CTRL_MASKBIT; 67 pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl); 68 /* Flush write to device */ 69 readl(desc->pci.mask_base); 70 } 71 72 static inline void pci_msix_unmask(struct msi_desc *desc) 73 { 74 desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; 75 pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl); 76 } 77 78 static void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask) 79 { 80 if (desc->pci.msi_attrib.is_msix) 81 pci_msix_mask(desc); 82 else 83 pci_msi_mask(desc, mask); 84 } 85 86 static void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask) 87 { 88 if (desc->pci.msi_attrib.is_msix) 89 pci_msix_unmask(desc); 90 else 91 pci_msi_unmask(desc, mask); 92 } 93 94 /** 95 * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts 96 * @data: pointer to irqdata associated to that interrupt 97 */ 98 void pci_msi_mask_irq(struct irq_data *data) 99 { 100 struct msi_desc *desc = irq_data_get_msi_desc(data); 101 102 __pci_msi_mask_desc(desc, BIT(data->irq - desc->irq)); 103 } 104 EXPORT_SYMBOL_GPL(pci_msi_mask_irq); 105 106 /** 107 * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts 108 * @data: pointer to irqdata associated to that interrupt 109 */ 110 void pci_msi_unmask_irq(struct irq_data *data) 111 { 112 struct msi_desc *desc = irq_data_get_msi_desc(data); 113 114 __pci_msi_unmask_desc(desc, BIT(data->irq - desc->irq)); 115 } 116 EXPORT_SYMBOL_GPL(pci_msi_unmask_irq); 117 118 void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 119 { 120 struct pci_dev *dev = msi_desc_to_pci_dev(entry); 121 122 BUG_ON(dev->current_state != PCI_D0); 123 124 if (entry->pci.msi_attrib.is_msix) { 125 void __iomem *base = pci_msix_desc_addr(entry); 126 127 if (WARN_ON_ONCE(entry->pci.msi_attrib.is_virtual)) 128 return; 129 130 msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); 131 msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); 132 msg->data = readl(base + PCI_MSIX_ENTRY_DATA); 133 } else { 134 int pos = dev->msi_cap; 135 u16 data; 136 137 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, 138 &msg->address_lo); 139 if (entry->pci.msi_attrib.is_64) { 140 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, 141 &msg->address_hi); 142 pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data); 143 } else { 144 msg->address_hi = 0; 145 pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data); 146 } 147 msg->data = data; 148 } 149 } 150 151 void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 152 { 153 struct pci_dev *dev = msi_desc_to_pci_dev(entry); 154 155 if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) { 156 /* Don't touch the hardware now */ 157 } else if (entry->pci.msi_attrib.is_msix) { 158 void __iomem *base = pci_msix_desc_addr(entry); 159 u32 ctrl = entry->pci.msix_ctrl; 160 bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT); 161 162 if (entry->pci.msi_attrib.is_virtual) 163 goto skip; 164 165 /* 166 * The specification mandates that the entry is masked 167 * when the message is modified: 168 * 169 * "If software changes the Address or Data value of an 170 * entry while the entry is unmasked, the result is 171 * undefined." 172 */ 173 if (unmasked) 174 pci_msix_write_vector_ctrl(entry, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT); 175 176 writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); 177 writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); 178 writel(msg->data, base + PCI_MSIX_ENTRY_DATA); 179 180 if (unmasked) 181 pci_msix_write_vector_ctrl(entry, ctrl); 182 183 /* Ensure that the writes are visible in the device */ 184 readl(base + PCI_MSIX_ENTRY_DATA); 185 } else { 186 int pos = dev->msi_cap; 187 u16 msgctl; 188 189 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 190 msgctl &= ~PCI_MSI_FLAGS_QSIZE; 191 msgctl |= entry->pci.msi_attrib.multiple << 4; 192 pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl); 193 194 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, 195 msg->address_lo); 196 if (entry->pci.msi_attrib.is_64) { 197 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, 198 msg->address_hi); 199 pci_write_config_word(dev, pos + PCI_MSI_DATA_64, 200 msg->data); 201 } else { 202 pci_write_config_word(dev, pos + PCI_MSI_DATA_32, 203 msg->data); 204 } 205 /* Ensure that the writes are visible in the device */ 206 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 207 } 208 209 skip: 210 entry->msg = *msg; 211 212 if (entry->write_msi_msg) 213 entry->write_msi_msg(entry, entry->write_msi_msg_data); 214 215 } 216 217 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) 218 { 219 struct msi_desc *entry = irq_get_msi_desc(irq); 220 221 __pci_write_msi_msg(entry, msg); 222 } 223 EXPORT_SYMBOL_GPL(pci_write_msi_msg); 224 225 static void free_msi_irqs(struct pci_dev *dev) 226 { 227 pci_msi_teardown_msi_irqs(dev); 228 229 if (dev->msix_base) { 230 iounmap(dev->msix_base); 231 dev->msix_base = NULL; 232 } 233 } 234 235 static void pci_intx_for_msi(struct pci_dev *dev, int enable) 236 { 237 if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) 238 pci_intx(dev, enable); 239 } 240 241 static void pci_msi_set_enable(struct pci_dev *dev, int enable) 242 { 243 u16 control; 244 245 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 246 control &= ~PCI_MSI_FLAGS_ENABLE; 247 if (enable) 248 control |= PCI_MSI_FLAGS_ENABLE; 249 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); 250 } 251 252 /* 253 * Architecture override returns true when the PCI MSI message should be 254 * written by the generic restore function. 255 */ 256 bool __weak arch_restore_msi_irqs(struct pci_dev *dev) 257 { 258 return true; 259 } 260 261 static void __pci_restore_msi_state(struct pci_dev *dev) 262 { 263 struct msi_desc *entry; 264 u16 control; 265 266 if (!dev->msi_enabled) 267 return; 268 269 entry = irq_get_msi_desc(dev->irq); 270 271 pci_intx_for_msi(dev, 0); 272 pci_msi_set_enable(dev, 0); 273 if (arch_restore_msi_irqs(dev)) 274 __pci_write_msi_msg(entry, &entry->msg); 275 276 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 277 pci_msi_update_mask(entry, 0, 0); 278 control &= ~PCI_MSI_FLAGS_QSIZE; 279 control |= (entry->pci.msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE; 280 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); 281 } 282 283 static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set) 284 { 285 u16 ctrl; 286 287 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl); 288 ctrl &= ~clear; 289 ctrl |= set; 290 pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl); 291 } 292 293 static void __pci_restore_msix_state(struct pci_dev *dev) 294 { 295 struct msi_desc *entry; 296 bool write_msg; 297 298 if (!dev->msix_enabled) 299 return; 300 301 /* route the table */ 302 pci_intx_for_msi(dev, 0); 303 pci_msix_clear_and_set_ctrl(dev, 0, 304 PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); 305 306 write_msg = arch_restore_msi_irqs(dev); 307 308 msi_lock_descs(&dev->dev); 309 msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { 310 if (write_msg) 311 __pci_write_msi_msg(entry, &entry->msg); 312 pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl); 313 } 314 msi_unlock_descs(&dev->dev); 315 316 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 317 } 318 319 void pci_restore_msi_state(struct pci_dev *dev) 320 { 321 __pci_restore_msi_state(dev); 322 __pci_restore_msix_state(dev); 323 } 324 EXPORT_SYMBOL_GPL(pci_restore_msi_state); 325 326 static void pcim_msi_release(void *pcidev) 327 { 328 struct pci_dev *dev = pcidev; 329 330 dev->is_msi_managed = false; 331 pci_free_irq_vectors(dev); 332 } 333 334 /* 335 * Needs to be separate from pcim_release to prevent an ordering problem 336 * vs. msi_device_data_release() in the MSI core code. 337 */ 338 static int pcim_setup_msi_release(struct pci_dev *dev) 339 { 340 int ret; 341 342 if (!pci_is_managed(dev) || dev->is_msi_managed) 343 return 0; 344 345 ret = devm_add_action(&dev->dev, pcim_msi_release, dev); 346 if (!ret) 347 dev->is_msi_managed = true; 348 return ret; 349 } 350 351 /* 352 * Ordering vs. devres: msi device data has to be installed first so that 353 * pcim_msi_release() is invoked before it on device release. 354 */ 355 static int pci_setup_msi_context(struct pci_dev *dev) 356 { 357 int ret = msi_setup_device_data(&dev->dev); 358 359 if (!ret) 360 ret = pcim_setup_msi_release(dev); 361 return ret; 362 } 363 364 static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, 365 struct irq_affinity_desc *masks) 366 { 367 struct msi_desc desc; 368 u16 control; 369 370 /* MSI Entry Initialization */ 371 memset(&desc, 0, sizeof(desc)); 372 373 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 374 /* Lies, damned lies, and MSIs */ 375 if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING) 376 control |= PCI_MSI_FLAGS_MASKBIT; 377 /* Respect XEN's mask disabling */ 378 if (pci_msi_ignore_mask) 379 control &= ~PCI_MSI_FLAGS_MASKBIT; 380 381 desc.nvec_used = nvec; 382 desc.pci.msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); 383 desc.pci.msi_attrib.can_mask = !!(control & PCI_MSI_FLAGS_MASKBIT); 384 desc.pci.msi_attrib.default_irq = dev->irq; 385 desc.pci.msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; 386 desc.pci.msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); 387 desc.affinity = masks; 388 389 if (control & PCI_MSI_FLAGS_64BIT) 390 desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64; 391 else 392 desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32; 393 394 /* Save the initial mask status */ 395 if (desc.pci.msi_attrib.can_mask) 396 pci_read_config_dword(dev, desc.pci.mask_pos, &desc.pci.msi_mask); 397 398 return msi_add_msi_desc(&dev->dev, &desc); 399 } 400 401 static int msi_verify_entries(struct pci_dev *dev) 402 { 403 struct msi_desc *entry; 404 405 if (!dev->no_64bit_msi) 406 return 0; 407 408 msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { 409 if (entry->msg.address_hi) { 410 pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n", 411 entry->msg.address_hi, entry->msg.address_lo); 412 break; 413 } 414 } 415 return !entry ? 0 : -EIO; 416 } 417 418 /** 419 * msi_capability_init - configure device's MSI capability structure 420 * @dev: pointer to the pci_dev data structure of MSI device function 421 * @nvec: number of interrupts to allocate 422 * @affd: description of automatic IRQ affinity assignments (may be %NULL) 423 * 424 * Setup the MSI capability structure of the device with the requested 425 * number of interrupts. A return value of zero indicates the successful 426 * setup of an entry with the new MSI IRQ. A negative return value indicates 427 * an error, and a positive return value indicates the number of interrupts 428 * which could have been allocated. 429 */ 430 static int msi_capability_init(struct pci_dev *dev, int nvec, 431 struct irq_affinity *affd) 432 { 433 struct irq_affinity_desc *masks = NULL; 434 struct msi_desc *entry; 435 int ret; 436 437 /* 438 * Disable MSI during setup in the hardware, but mark it enabled 439 * so that setup code can evaluate it. 440 */ 441 pci_msi_set_enable(dev, 0); 442 dev->msi_enabled = 1; 443 444 if (affd) 445 masks = irq_create_affinity_masks(nvec, affd); 446 447 msi_lock_descs(&dev->dev); 448 ret = msi_setup_msi_desc(dev, nvec, masks); 449 if (ret) 450 goto fail; 451 452 /* All MSIs are unmasked by default; mask them all */ 453 entry = msi_first_desc(&dev->dev, MSI_DESC_ALL); 454 pci_msi_mask(entry, msi_multi_mask(entry)); 455 456 /* Configure MSI capability structure */ 457 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); 458 if (ret) 459 goto err; 460 461 ret = msi_verify_entries(dev); 462 if (ret) 463 goto err; 464 465 /* Set MSI enabled bits */ 466 pci_intx_for_msi(dev, 0); 467 pci_msi_set_enable(dev, 1); 468 469 pcibios_free_irq(dev); 470 dev->irq = entry->irq; 471 goto unlock; 472 473 err: 474 pci_msi_unmask(entry, msi_multi_mask(entry)); 475 free_msi_irqs(dev); 476 fail: 477 dev->msi_enabled = 0; 478 unlock: 479 msi_unlock_descs(&dev->dev); 480 kfree(masks); 481 return ret; 482 } 483 484 static void __iomem *msix_map_region(struct pci_dev *dev, 485 unsigned int nr_entries) 486 { 487 resource_size_t phys_addr; 488 u32 table_offset; 489 unsigned long flags; 490 u8 bir; 491 492 pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE, 493 &table_offset); 494 bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR); 495 flags = pci_resource_flags(dev, bir); 496 if (!flags || (flags & IORESOURCE_UNSET)) 497 return NULL; 498 499 table_offset &= PCI_MSIX_TABLE_OFFSET; 500 phys_addr = pci_resource_start(dev, bir) + table_offset; 501 502 return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE); 503 } 504 505 static int msix_setup_msi_descs(struct pci_dev *dev, void __iomem *base, 506 struct msix_entry *entries, int nvec, 507 struct irq_affinity_desc *masks) 508 { 509 int ret = 0, i, vec_count = pci_msix_vec_count(dev); 510 struct irq_affinity_desc *curmsk; 511 struct msi_desc desc; 512 void __iomem *addr; 513 514 memset(&desc, 0, sizeof(desc)); 515 516 desc.nvec_used = 1; 517 desc.pci.msi_attrib.is_msix = 1; 518 desc.pci.msi_attrib.is_64 = 1; 519 desc.pci.msi_attrib.default_irq = dev->irq; 520 desc.pci.mask_base = base; 521 522 for (i = 0, curmsk = masks; i < nvec; i++, curmsk++) { 523 desc.msi_index = entries ? entries[i].entry : i; 524 desc.affinity = masks ? curmsk : NULL; 525 desc.pci.msi_attrib.is_virtual = desc.msi_index >= vec_count; 526 desc.pci.msi_attrib.can_mask = !pci_msi_ignore_mask && 527 !desc.pci.msi_attrib.is_virtual; 528 529 if (!desc.pci.msi_attrib.can_mask) { 530 addr = pci_msix_desc_addr(&desc); 531 desc.pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); 532 } 533 534 ret = msi_add_msi_desc(&dev->dev, &desc); 535 if (ret) 536 break; 537 } 538 return ret; 539 } 540 541 static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries) 542 { 543 struct msi_desc *desc; 544 545 if (entries) { 546 msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) { 547 entries->vector = desc->irq; 548 entries++; 549 } 550 } 551 } 552 553 static void msix_mask_all(void __iomem *base, int tsize) 554 { 555 u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; 556 int i; 557 558 if (pci_msi_ignore_mask) 559 return; 560 561 for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE) 562 writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL); 563 } 564 565 static int msix_setup_interrupts(struct pci_dev *dev, void __iomem *base, 566 struct msix_entry *entries, int nvec, 567 struct irq_affinity *affd) 568 { 569 struct irq_affinity_desc *masks = NULL; 570 int ret; 571 572 if (affd) 573 masks = irq_create_affinity_masks(nvec, affd); 574 575 msi_lock_descs(&dev->dev); 576 ret = msix_setup_msi_descs(dev, base, entries, nvec, masks); 577 if (ret) 578 goto out_free; 579 580 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); 581 if (ret) 582 goto out_free; 583 584 /* Check if all MSI entries honor device restrictions */ 585 ret = msi_verify_entries(dev); 586 if (ret) 587 goto out_free; 588 589 msix_update_entries(dev, entries); 590 goto out_unlock; 591 592 out_free: 593 free_msi_irqs(dev); 594 out_unlock: 595 msi_unlock_descs(&dev->dev); 596 kfree(masks); 597 return ret; 598 } 599 600 /** 601 * msix_capability_init - configure device's MSI-X capability 602 * @dev: pointer to the pci_dev data structure of MSI-X device function 603 * @entries: pointer to an array of struct msix_entry entries 604 * @nvec: number of @entries 605 * @affd: Optional pointer to enable automatic affinity assignment 606 * 607 * Setup the MSI-X capability structure of device function with a 608 * single MSI-X IRQ. A return of zero indicates the successful setup of 609 * requested MSI-X entries with allocated IRQs or non-zero for otherwise. 610 **/ 611 static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, 612 int nvec, struct irq_affinity *affd) 613 { 614 void __iomem *base; 615 int ret, tsize; 616 u16 control; 617 618 /* 619 * Some devices require MSI-X to be enabled before the MSI-X 620 * registers can be accessed. Mask all the vectors to prevent 621 * interrupts coming in before they're fully set up. 622 */ 623 pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL | 624 PCI_MSIX_FLAGS_ENABLE); 625 626 /* Mark it enabled so setup functions can query it */ 627 dev->msix_enabled = 1; 628 629 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); 630 /* Request & Map MSI-X table region */ 631 tsize = msix_table_size(control); 632 base = msix_map_region(dev, tsize); 633 if (!base) { 634 ret = -ENOMEM; 635 goto out_disable; 636 } 637 638 dev->msix_base = base; 639 640 ret = msix_setup_interrupts(dev, base, entries, nvec, affd); 641 if (ret) 642 goto out_disable; 643 644 /* Disable INTX */ 645 pci_intx_for_msi(dev, 0); 646 647 /* 648 * Ensure that all table entries are masked to prevent 649 * stale entries from firing in a crash kernel. 650 * 651 * Done late to deal with a broken Marvell NVME device 652 * which takes the MSI-X mask bits into account even 653 * when MSI-X is disabled, which prevents MSI delivery. 654 */ 655 msix_mask_all(base, tsize); 656 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 657 658 pcibios_free_irq(dev); 659 return 0; 660 661 out_disable: 662 dev->msix_enabled = 0; 663 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0); 664 665 return ret; 666 } 667 668 /** 669 * pci_msi_supported - check whether MSI may be enabled on a device 670 * @dev: pointer to the pci_dev data structure of MSI device function 671 * @nvec: how many MSIs have been requested? 672 * 673 * Look at global flags, the device itself, and its parent buses 674 * to determine if MSI/-X are supported for the device. If MSI/-X is 675 * supported return 1, else return 0. 676 **/ 677 static int pci_msi_supported(struct pci_dev *dev, int nvec) 678 { 679 struct pci_bus *bus; 680 681 /* MSI must be globally enabled and supported by the device */ 682 if (!pci_msi_enable) 683 return 0; 684 685 if (!dev || dev->no_msi) 686 return 0; 687 688 /* 689 * You can't ask to have 0 or less MSIs configured. 690 * a) it's stupid .. 691 * b) the list manipulation code assumes nvec >= 1. 692 */ 693 if (nvec < 1) 694 return 0; 695 696 /* 697 * Any bridge which does NOT route MSI transactions from its 698 * secondary bus to its primary bus must set NO_MSI flag on 699 * the secondary pci_bus. 700 * 701 * The NO_MSI flag can either be set directly by: 702 * - arch-specific PCI host bus controller drivers (deprecated) 703 * - quirks for specific PCI bridges 704 * 705 * or indirectly by platform-specific PCI host bridge drivers by 706 * advertising the 'msi_domain' property, which results in 707 * the NO_MSI flag when no MSI domain is found for this bridge 708 * at probe time. 709 */ 710 for (bus = dev->bus; bus; bus = bus->parent) 711 if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) 712 return 0; 713 714 return 1; 715 } 716 717 /** 718 * pci_msi_vec_count - Return the number of MSI vectors a device can send 719 * @dev: device to report about 720 * 721 * This function returns the number of MSI vectors a device requested via 722 * Multiple Message Capable register. It returns a negative errno if the 723 * device is not capable sending MSI interrupts. Otherwise, the call succeeds 724 * and returns a power of two, up to a maximum of 2^5 (32), according to the 725 * MSI specification. 726 **/ 727 int pci_msi_vec_count(struct pci_dev *dev) 728 { 729 int ret; 730 u16 msgctl; 731 732 if (!dev->msi_cap) 733 return -EINVAL; 734 735 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl); 736 ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); 737 738 return ret; 739 } 740 EXPORT_SYMBOL(pci_msi_vec_count); 741 742 static void pci_msi_shutdown(struct pci_dev *dev) 743 { 744 struct msi_desc *desc; 745 746 if (!pci_msi_enable || !dev || !dev->msi_enabled) 747 return; 748 749 pci_msi_set_enable(dev, 0); 750 pci_intx_for_msi(dev, 1); 751 dev->msi_enabled = 0; 752 753 /* Return the device with MSI unmasked as initial states */ 754 desc = msi_first_desc(&dev->dev, MSI_DESC_ALL); 755 if (!WARN_ON_ONCE(!desc)) 756 pci_msi_unmask(desc, msi_multi_mask(desc)); 757 758 /* Restore dev->irq to its default pin-assertion IRQ */ 759 dev->irq = desc->pci.msi_attrib.default_irq; 760 pcibios_alloc_irq(dev); 761 } 762 763 void pci_disable_msi(struct pci_dev *dev) 764 { 765 if (!pci_msi_enable || !dev || !dev->msi_enabled) 766 return; 767 768 msi_lock_descs(&dev->dev); 769 pci_msi_shutdown(dev); 770 free_msi_irqs(dev); 771 msi_unlock_descs(&dev->dev); 772 } 773 EXPORT_SYMBOL(pci_disable_msi); 774 775 /** 776 * pci_msix_vec_count - return the number of device's MSI-X table entries 777 * @dev: pointer to the pci_dev data structure of MSI-X device function 778 * This function returns the number of device's MSI-X table entries and 779 * therefore the number of MSI-X vectors device is capable of sending. 780 * It returns a negative errno if the device is not capable of sending MSI-X 781 * interrupts. 782 **/ 783 int pci_msix_vec_count(struct pci_dev *dev) 784 { 785 u16 control; 786 787 if (!dev->msix_cap) 788 return -EINVAL; 789 790 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); 791 return msix_table_size(control); 792 } 793 EXPORT_SYMBOL(pci_msix_vec_count); 794 795 static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, 796 int nvec, struct irq_affinity *affd, int flags) 797 { 798 int nr_entries; 799 int i, j; 800 801 if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0) 802 return -EINVAL; 803 804 nr_entries = pci_msix_vec_count(dev); 805 if (nr_entries < 0) 806 return nr_entries; 807 if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL)) 808 return nr_entries; 809 810 if (entries) { 811 /* Check for any invalid entries */ 812 for (i = 0; i < nvec; i++) { 813 if (entries[i].entry >= nr_entries) 814 return -EINVAL; /* invalid entry */ 815 for (j = i + 1; j < nvec; j++) { 816 if (entries[i].entry == entries[j].entry) 817 return -EINVAL; /* duplicate entry */ 818 } 819 } 820 } 821 822 /* Check whether driver already requested for MSI IRQ */ 823 if (dev->msi_enabled) { 824 pci_info(dev, "can't enable MSI-X (MSI IRQ already assigned)\n"); 825 return -EINVAL; 826 } 827 return msix_capability_init(dev, entries, nvec, affd); 828 } 829 830 static void pci_msix_shutdown(struct pci_dev *dev) 831 { 832 struct msi_desc *desc; 833 834 if (!pci_msi_enable || !dev || !dev->msix_enabled) 835 return; 836 837 if (pci_dev_is_disconnected(dev)) { 838 dev->msix_enabled = 0; 839 return; 840 } 841 842 /* Return the device with MSI-X masked as initial states */ 843 msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) 844 pci_msix_mask(desc); 845 846 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); 847 pci_intx_for_msi(dev, 1); 848 dev->msix_enabled = 0; 849 pcibios_alloc_irq(dev); 850 } 851 852 void pci_disable_msix(struct pci_dev *dev) 853 { 854 if (!pci_msi_enable || !dev || !dev->msix_enabled) 855 return; 856 857 msi_lock_descs(&dev->dev); 858 pci_msix_shutdown(dev); 859 free_msi_irqs(dev); 860 msi_unlock_descs(&dev->dev); 861 } 862 EXPORT_SYMBOL(pci_disable_msix); 863 864 static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, 865 struct irq_affinity *affd) 866 { 867 int nvec; 868 int rc; 869 870 if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0) 871 return -EINVAL; 872 873 /* Check whether driver already requested MSI-X IRQs */ 874 if (dev->msix_enabled) { 875 pci_info(dev, "can't enable MSI (MSI-X already enabled)\n"); 876 return -EINVAL; 877 } 878 879 if (maxvec < minvec) 880 return -ERANGE; 881 882 if (WARN_ON_ONCE(dev->msi_enabled)) 883 return -EINVAL; 884 885 nvec = pci_msi_vec_count(dev); 886 if (nvec < 0) 887 return nvec; 888 if (nvec < minvec) 889 return -ENOSPC; 890 891 if (nvec > maxvec) 892 nvec = maxvec; 893 894 rc = pci_setup_msi_context(dev); 895 if (rc) 896 return rc; 897 898 for (;;) { 899 if (affd) { 900 nvec = irq_calc_affinity_vectors(minvec, nvec, affd); 901 if (nvec < minvec) 902 return -ENOSPC; 903 } 904 905 rc = msi_capability_init(dev, nvec, affd); 906 if (rc == 0) 907 return nvec; 908 909 if (rc < 0) 910 return rc; 911 if (rc < minvec) 912 return -ENOSPC; 913 914 nvec = rc; 915 } 916 } 917 918 /* deprecated, don't use */ 919 int pci_enable_msi(struct pci_dev *dev) 920 { 921 int rc = __pci_enable_msi_range(dev, 1, 1, NULL); 922 if (rc < 0) 923 return rc; 924 return 0; 925 } 926 EXPORT_SYMBOL(pci_enable_msi); 927 928 static int __pci_enable_msix_range(struct pci_dev *dev, 929 struct msix_entry *entries, int minvec, 930 int maxvec, struct irq_affinity *affd, 931 int flags) 932 { 933 int rc, nvec = maxvec; 934 935 if (maxvec < minvec) 936 return -ERANGE; 937 938 if (WARN_ON_ONCE(dev->msix_enabled)) 939 return -EINVAL; 940 941 rc = pci_setup_msi_context(dev); 942 if (rc) 943 return rc; 944 945 for (;;) { 946 if (affd) { 947 nvec = irq_calc_affinity_vectors(minvec, nvec, affd); 948 if (nvec < minvec) 949 return -ENOSPC; 950 } 951 952 rc = __pci_enable_msix(dev, entries, nvec, affd, flags); 953 if (rc == 0) 954 return nvec; 955 956 if (rc < 0) 957 return rc; 958 if (rc < minvec) 959 return -ENOSPC; 960 961 nvec = rc; 962 } 963 } 964 965 /** 966 * pci_enable_msix_range - configure device's MSI-X capability structure 967 * @dev: pointer to the pci_dev data structure of MSI-X device function 968 * @entries: pointer to an array of MSI-X entries 969 * @minvec: minimum number of MSI-X IRQs requested 970 * @maxvec: maximum number of MSI-X IRQs requested 971 * 972 * Setup the MSI-X capability structure of device function with a maximum 973 * possible number of interrupts in the range between @minvec and @maxvec 974 * upon its software driver call to request for MSI-X mode enabled on its 975 * hardware device function. It returns a negative errno if an error occurs. 976 * If it succeeds, it returns the actual number of interrupts allocated and 977 * indicates the successful configuration of MSI-X capability structure 978 * with new allocated MSI-X interrupts. 979 **/ 980 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, 981 int minvec, int maxvec) 982 { 983 return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0); 984 } 985 EXPORT_SYMBOL(pci_enable_msix_range); 986 987 /** 988 * pci_alloc_irq_vectors_affinity - allocate multiple IRQs for a device 989 * @dev: PCI device to operate on 990 * @min_vecs: minimum number of vectors required (must be >= 1) 991 * @max_vecs: maximum (desired) number of vectors 992 * @flags: flags or quirks for the allocation 993 * @affd: optional description of the affinity requirements 994 * 995 * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI 996 * vectors if available, and fall back to a single legacy vector 997 * if neither is available. Return the number of vectors allocated, 998 * (which might be smaller than @max_vecs) if successful, or a negative 999 * error code on error. If less than @min_vecs interrupt vectors are 1000 * available for @dev the function will fail with -ENOSPC. 1001 * 1002 * To get the Linux IRQ number used for a vector that can be passed to 1003 * request_irq() use the pci_irq_vector() helper. 1004 */ 1005 int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, 1006 unsigned int max_vecs, unsigned int flags, 1007 struct irq_affinity *affd) 1008 { 1009 struct irq_affinity msi_default_affd = {0}; 1010 int nvecs = -ENOSPC; 1011 1012 if (flags & PCI_IRQ_AFFINITY) { 1013 if (!affd) 1014 affd = &msi_default_affd; 1015 } else { 1016 if (WARN_ON(affd)) 1017 affd = NULL; 1018 } 1019 1020 if (flags & PCI_IRQ_MSIX) { 1021 nvecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, 1022 affd, flags); 1023 if (nvecs > 0) 1024 return nvecs; 1025 } 1026 1027 if (flags & PCI_IRQ_MSI) { 1028 nvecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd); 1029 if (nvecs > 0) 1030 return nvecs; 1031 } 1032 1033 /* use legacy IRQ if allowed */ 1034 if (flags & PCI_IRQ_LEGACY) { 1035 if (min_vecs == 1 && dev->irq) { 1036 /* 1037 * Invoke the affinity spreading logic to ensure that 1038 * the device driver can adjust queue configuration 1039 * for the single interrupt case. 1040 */ 1041 if (affd) 1042 irq_create_affinity_masks(1, affd); 1043 pci_intx(dev, 1); 1044 return 1; 1045 } 1046 } 1047 1048 return nvecs; 1049 } 1050 EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity); 1051 1052 /** 1053 * pci_free_irq_vectors - free previously allocated IRQs for a device 1054 * @dev: PCI device to operate on 1055 * 1056 * Undoes the allocations and enabling in pci_alloc_irq_vectors(). 1057 */ 1058 void pci_free_irq_vectors(struct pci_dev *dev) 1059 { 1060 pci_disable_msix(dev); 1061 pci_disable_msi(dev); 1062 } 1063 EXPORT_SYMBOL(pci_free_irq_vectors); 1064 1065 /** 1066 * pci_irq_vector - return Linux IRQ number of a device vector 1067 * @dev: PCI device to operate on 1068 * @nr: Interrupt vector index (0-based) 1069 * 1070 * @nr has the following meanings depending on the interrupt mode: 1071 * MSI-X: The index in the MSI-X vector table 1072 * MSI: The index of the enabled MSI vectors 1073 * INTx: Must be 0 1074 * 1075 * Return: The Linux interrupt number or -EINVAl if @nr is out of range. 1076 */ 1077 int pci_irq_vector(struct pci_dev *dev, unsigned int nr) 1078 { 1079 unsigned int irq; 1080 1081 if (!dev->msi_enabled && !dev->msix_enabled) 1082 return !nr ? dev->irq : -EINVAL; 1083 1084 irq = msi_get_virq(&dev->dev, nr); 1085 return irq ? irq : -EINVAL; 1086 } 1087 EXPORT_SYMBOL(pci_irq_vector); 1088 1089 /** 1090 * pci_irq_get_affinity - return the affinity of a particular MSI vector 1091 * @dev: PCI device to operate on 1092 * @nr: device-relative interrupt vector index (0-based). 1093 * 1094 * @nr has the following meanings depending on the interrupt mode: 1095 * MSI-X: The index in the MSI-X vector table 1096 * MSI: The index of the enabled MSI vectors 1097 * INTx: Must be 0 1098 * 1099 * Return: A cpumask pointer or NULL if @nr is out of range 1100 */ 1101 const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) 1102 { 1103 int idx, irq = pci_irq_vector(dev, nr); 1104 struct msi_desc *desc; 1105 1106 if (WARN_ON_ONCE(irq <= 0)) 1107 return NULL; 1108 1109 desc = irq_get_msi_desc(irq); 1110 /* Non-MSI does not have the information handy */ 1111 if (!desc) 1112 return cpu_possible_mask; 1113 1114 if (WARN_ON_ONCE(!desc->affinity)) 1115 return NULL; 1116 1117 /* 1118 * MSI has a mask array in the descriptor. 1119 * MSI-X has a single mask. 1120 */ 1121 idx = dev->msi_enabled ? nr : 0; 1122 return &desc->affinity[idx].mask; 1123 } 1124 EXPORT_SYMBOL(pci_irq_get_affinity); 1125 1126 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) 1127 { 1128 return to_pci_dev(desc->dev); 1129 } 1130 EXPORT_SYMBOL(msi_desc_to_pci_dev); 1131 1132 void pci_no_msi(void) 1133 { 1134 pci_msi_enable = 0; 1135 } 1136 1137 /** 1138 * pci_msi_enabled - is MSI enabled? 1139 * 1140 * Returns true if MSI has not been disabled by the command-line option 1141 * pci=nomsi. 1142 **/ 1143 int pci_msi_enabled(void) 1144 { 1145 return pci_msi_enable; 1146 } 1147 EXPORT_SYMBOL(pci_msi_enabled); 1148