1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2016,2017 IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive: " fmt 7 8 #include <linux/types.h> 9 #include <linux/threads.h> 10 #include <linux/kernel.h> 11 #include <linux/irq.h> 12 #include <linux/debugfs.h> 13 #include <linux/smp.h> 14 #include <linux/interrupt.h> 15 #include <linux/seq_file.h> 16 #include <linux/init.h> 17 #include <linux/cpu.h> 18 #include <linux/of.h> 19 #include <linux/slab.h> 20 #include <linux/spinlock.h> 21 #include <linux/msi.h> 22 #include <linux/vmalloc.h> 23 24 #include <asm/debugfs.h> 25 #include <asm/prom.h> 26 #include <asm/io.h> 27 #include <asm/smp.h> 28 #include <asm/machdep.h> 29 #include <asm/irq.h> 30 #include <asm/errno.h> 31 #include <asm/xive.h> 32 #include <asm/xive-regs.h> 33 #include <asm/xmon.h> 34 35 #include "xive-internal.h" 36 37 #undef DEBUG_FLUSH 38 #undef DEBUG_ALL 39 40 #ifdef DEBUG_ALL 41 #define DBG_VERBOSE(fmt, ...) pr_devel("cpu %d - " fmt, \ 42 smp_processor_id(), ## __VA_ARGS__) 43 #else 44 #define DBG_VERBOSE(fmt...) do { } while(0) 45 #endif 46 47 bool __xive_enabled; 48 EXPORT_SYMBOL_GPL(__xive_enabled); 49 bool xive_cmdline_disabled; 50 51 /* We use only one priority for now */ 52 static u8 xive_irq_priority; 53 54 /* TIMA exported to KVM */ 55 void __iomem *xive_tima; 56 EXPORT_SYMBOL_GPL(xive_tima); 57 u32 xive_tima_offset; 58 59 /* Backend ops */ 60 static const struct xive_ops *xive_ops; 61 62 /* Our global interrupt domain */ 63 static struct irq_domain *xive_irq_domain; 64 65 #ifdef CONFIG_SMP 66 /* The IPIs use the same logical irq number when on the same chip */ 67 static struct xive_ipi_desc { 68 unsigned int irq; 69 char name[16]; 70 atomic_t started; 71 } *xive_ipis; 72 73 /* 74 * Use early_cpu_to_node() for hot-plugged CPUs 75 */ 76 static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu) 77 { 78 return xive_ipis[early_cpu_to_node(cpu)].irq; 79 } 80 #endif 81 82 /* Xive state for each CPU */ 83 static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); 84 85 /* An invalid CPU target */ 86 #define XIVE_INVALID_TARGET (-1) 87 88 /* 89 * Read the next entry in a queue, return its content if it's valid 90 * or 0 if there is no new entry. 91 * 92 * The queue pointer is moved forward unless "just_peek" is set 93 */ 94 static u32 xive_read_eq(struct xive_q *q, bool just_peek) 95 { 96 u32 cur; 97 98 if (!q->qpage) 99 return 0; 100 cur = be32_to_cpup(q->qpage + q->idx); 101 102 /* Check valid bit (31) vs current toggle polarity */ 103 if ((cur >> 31) == q->toggle) 104 return 0; 105 106 /* If consuming from the queue ... */ 107 if (!just_peek) { 108 /* Next entry */ 109 q->idx = (q->idx + 1) & q->msk; 110 111 /* Wrap around: flip valid toggle */ 112 if (q->idx == 0) 113 q->toggle ^= 1; 114 } 115 /* Mask out the valid bit (31) */ 116 return cur & 0x7fffffff; 117 } 118 119 /* 120 * Scans all the queue that may have interrupts in them 121 * (based on "pending_prio") in priority order until an 122 * interrupt is found or all the queues are empty. 123 * 124 * Then updates the CPPR (Current Processor Priority 125 * Register) based on the most favored interrupt found 126 * (0xff if none) and return what was found (0 if none). 127 * 128 * If just_peek is set, return the most favored pending 129 * interrupt if any but don't update the queue pointers. 130 * 131 * Note: This function can operate generically on any number 132 * of queues (up to 8). The current implementation of the XIVE 133 * driver only uses a single queue however. 134 * 135 * Note2: This will also "flush" "the pending_count" of a queue 136 * into the "count" when that queue is observed to be empty. 137 * This is used to keep track of the amount of interrupts 138 * targetting a queue. When an interrupt is moved away from 139 * a queue, we only decrement that queue count once the queue 140 * has been observed empty to avoid races. 141 */ 142 static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek) 143 { 144 u32 irq = 0; 145 u8 prio = 0; 146 147 /* Find highest pending priority */ 148 while (xc->pending_prio != 0) { 149 struct xive_q *q; 150 151 prio = ffs(xc->pending_prio) - 1; 152 DBG_VERBOSE("scan_irq: trying prio %d\n", prio); 153 154 /* Try to fetch */ 155 irq = xive_read_eq(&xc->queue[prio], just_peek); 156 157 /* Found something ? That's it */ 158 if (irq) { 159 if (just_peek || irq_to_desc(irq)) 160 break; 161 /* 162 * We should never get here; if we do then we must 163 * have failed to synchronize the interrupt properly 164 * when shutting it down. 165 */ 166 pr_crit("xive: got interrupt %d without descriptor, dropping\n", 167 irq); 168 WARN_ON(1); 169 continue; 170 } 171 172 /* Clear pending bits */ 173 xc->pending_prio &= ~(1 << prio); 174 175 /* 176 * Check if the queue count needs adjusting due to 177 * interrupts being moved away. See description of 178 * xive_dec_target_count() 179 */ 180 q = &xc->queue[prio]; 181 if (atomic_read(&q->pending_count)) { 182 int p = atomic_xchg(&q->pending_count, 0); 183 if (p) { 184 WARN_ON(p > atomic_read(&q->count)); 185 atomic_sub(p, &q->count); 186 } 187 } 188 } 189 190 /* If nothing was found, set CPPR to 0xff */ 191 if (irq == 0) 192 prio = 0xff; 193 194 /* Update HW CPPR to match if necessary */ 195 if (prio != xc->cppr) { 196 DBG_VERBOSE("scan_irq: adjusting CPPR to %d\n", prio); 197 xc->cppr = prio; 198 out_8(xive_tima + xive_tima_offset + TM_CPPR, prio); 199 } 200 201 return irq; 202 } 203 204 /* 205 * This is used to perform the magic loads from an ESB 206 * described in xive-regs.h 207 */ 208 static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) 209 { 210 u64 val; 211 212 if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) 213 offset |= XIVE_ESB_LD_ST_MO; 214 215 if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) 216 val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0); 217 else 218 val = in_be64(xd->eoi_mmio + offset); 219 220 return (u8)val; 221 } 222 223 static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data) 224 { 225 if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) 226 xive_ops->esb_rw(xd->hw_irq, offset, data, 1); 227 else 228 out_be64(xd->eoi_mmio + offset, data); 229 } 230 231 #ifdef CONFIG_XMON 232 static notrace void xive_dump_eq(const char *name, struct xive_q *q) 233 { 234 u32 i0, i1, idx; 235 236 if (!q->qpage) 237 return; 238 idx = q->idx; 239 i0 = be32_to_cpup(q->qpage + idx); 240 idx = (idx + 1) & q->msk; 241 i1 = be32_to_cpup(q->qpage + idx); 242 xmon_printf("%s idx=%d T=%d %08x %08x ...", name, 243 q->idx, q->toggle, i0, i1); 244 } 245 246 notrace void xmon_xive_do_dump(int cpu) 247 { 248 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 249 250 xmon_printf("CPU %d:", cpu); 251 if (xc) { 252 xmon_printf("pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); 253 254 #ifdef CONFIG_SMP 255 { 256 u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); 257 258 xmon_printf("IPI=0x%08x PQ=%c%c ", xc->hw_ipi, 259 val & XIVE_ESB_VAL_P ? 'P' : '-', 260 val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 261 } 262 #endif 263 xive_dump_eq("EQ", &xc->queue[xive_irq_priority]); 264 } 265 xmon_printf("\n"); 266 } 267 268 static struct irq_data *xive_get_irq_data(u32 hw_irq) 269 { 270 unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq); 271 272 return irq ? irq_get_irq_data(irq) : NULL; 273 } 274 275 int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) 276 { 277 int rc; 278 u32 target; 279 u8 prio; 280 u32 lirq; 281 282 rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); 283 if (rc) { 284 xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); 285 return rc; 286 } 287 288 xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", 289 hw_irq, target, prio, lirq); 290 291 if (!d) 292 d = xive_get_irq_data(hw_irq); 293 294 if (d) { 295 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 296 u64 val = xive_esb_read(xd, XIVE_ESB_GET); 297 298 xmon_printf("flags=%c%c%c PQ=%c%c", 299 xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', 300 xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', 301 xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', 302 val & XIVE_ESB_VAL_P ? 'P' : '-', 303 val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 304 } 305 306 xmon_printf("\n"); 307 return 0; 308 } 309 310 void xmon_xive_get_irq_all(void) 311 { 312 unsigned int i; 313 struct irq_desc *desc; 314 315 for_each_irq_desc(i, desc) { 316 struct irq_data *d = irq_desc_get_irq_data(desc); 317 unsigned int hwirq = (unsigned int)irqd_to_hwirq(d); 318 319 if (d->domain == xive_irq_domain) 320 xmon_xive_get_irq_config(hwirq, d); 321 } 322 } 323 324 #endif /* CONFIG_XMON */ 325 326 static unsigned int xive_get_irq(void) 327 { 328 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 329 u32 irq; 330 331 /* 332 * This can be called either as a result of a HW interrupt or 333 * as a "replay" because EOI decided there was still something 334 * in one of the queues. 335 * 336 * First we perform an ACK cycle in order to update our mask 337 * of pending priorities. This will also have the effect of 338 * updating the CPPR to the most favored pending interrupts. 339 * 340 * In the future, if we have a way to differentiate a first 341 * entry (on HW interrupt) from a replay triggered by EOI, 342 * we could skip this on replays unless we soft-mask tells us 343 * that a new HW interrupt occurred. 344 */ 345 xive_ops->update_pending(xc); 346 347 DBG_VERBOSE("get_irq: pending=%02x\n", xc->pending_prio); 348 349 /* Scan our queue(s) for interrupts */ 350 irq = xive_scan_interrupts(xc, false); 351 352 DBG_VERBOSE("get_irq: got irq 0x%x, new pending=0x%02x\n", 353 irq, xc->pending_prio); 354 355 /* Return pending interrupt if any */ 356 if (irq == XIVE_BAD_IRQ) 357 return 0; 358 return irq; 359 } 360 361 /* 362 * After EOI'ing an interrupt, we need to re-check the queue 363 * to see if another interrupt is pending since multiple 364 * interrupts can coalesce into a single notification to the 365 * CPU. 366 * 367 * If we find that there is indeed more in there, we call 368 * force_external_irq_replay() to make Linux synthetize an 369 * external interrupt on the next call to local_irq_restore(). 370 */ 371 static void xive_do_queue_eoi(struct xive_cpu *xc) 372 { 373 if (xive_scan_interrupts(xc, true) != 0) { 374 DBG_VERBOSE("eoi: pending=0x%02x\n", xc->pending_prio); 375 force_external_irq_replay(); 376 } 377 } 378 379 /* 380 * EOI an interrupt at the source. There are several methods 381 * to do this depending on the HW version and source type 382 */ 383 static void xive_do_source_eoi(struct xive_irq_data *xd) 384 { 385 u8 eoi_val; 386 387 xd->stale_p = false; 388 389 /* If the XIVE supports the new "store EOI facility, use it */ 390 if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) { 391 xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); 392 return; 393 } 394 395 /* 396 * For LSIs, we use the "EOI cycle" special load rather than 397 * PQ bits, as they are automatically re-triggered in HW when 398 * still pending. 399 */ 400 if (xd->flags & XIVE_IRQ_FLAG_LSI) { 401 xive_esb_read(xd, XIVE_ESB_LOAD_EOI); 402 return; 403 } 404 405 /* 406 * Otherwise, we use the special MMIO that does a clear of 407 * both P and Q and returns the old Q. This allows us to then 408 * do a re-trigger if Q was set rather than synthesizing an 409 * interrupt in software 410 */ 411 eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); 412 DBG_VERBOSE("eoi_val=%x\n", eoi_val); 413 414 /* Re-trigger if needed */ 415 if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) 416 out_be64(xd->trig_mmio, 0); 417 } 418 419 /* irq_chip eoi callback, called with irq descriptor lock held */ 420 static void xive_irq_eoi(struct irq_data *d) 421 { 422 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 423 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 424 425 DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n", 426 d->irq, irqd_to_hwirq(d), xc->pending_prio); 427 428 /* 429 * EOI the source if it hasn't been disabled and hasn't 430 * been passed-through to a KVM guest 431 */ 432 if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && 433 !(xd->flags & XIVE_IRQ_FLAG_NO_EOI)) 434 xive_do_source_eoi(xd); 435 else 436 xd->stale_p = true; 437 438 /* 439 * Clear saved_p to indicate that it's no longer occupying 440 * a queue slot on the target queue 441 */ 442 xd->saved_p = false; 443 444 /* Check for more work in the queue */ 445 xive_do_queue_eoi(xc); 446 } 447 448 /* 449 * Helper used to mask and unmask an interrupt source. 450 */ 451 static void xive_do_source_set_mask(struct xive_irq_data *xd, 452 bool mask) 453 { 454 u64 val; 455 456 /* 457 * If the interrupt had P set, it may be in a queue. 458 * 459 * We need to make sure we don't re-enable it until it 460 * has been fetched from that queue and EOId. We keep 461 * a copy of that P state and use it to restore the 462 * ESB accordingly on unmask. 463 */ 464 if (mask) { 465 val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01); 466 if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P)) 467 xd->saved_p = true; 468 xd->stale_p = false; 469 } else if (xd->saved_p) { 470 xive_esb_read(xd, XIVE_ESB_SET_PQ_10); 471 xd->saved_p = false; 472 } else { 473 xive_esb_read(xd, XIVE_ESB_SET_PQ_00); 474 xd->stale_p = false; 475 } 476 } 477 478 /* 479 * Try to chose "cpu" as a new interrupt target. Increments 480 * the queue accounting for that target if it's not already 481 * full. 482 */ 483 static bool xive_try_pick_target(int cpu) 484 { 485 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 486 struct xive_q *q = &xc->queue[xive_irq_priority]; 487 int max; 488 489 /* 490 * Calculate max number of interrupts in that queue. 491 * 492 * We leave a gap of 1 just in case... 493 */ 494 max = (q->msk + 1) - 1; 495 return !!atomic_add_unless(&q->count, 1, max); 496 } 497 498 /* 499 * Un-account an interrupt for a target CPU. We don't directly 500 * decrement q->count since the interrupt might still be present 501 * in the queue. 502 * 503 * Instead increment a separate counter "pending_count" which 504 * will be substracted from "count" later when that CPU observes 505 * the queue to be empty. 506 */ 507 static void xive_dec_target_count(int cpu) 508 { 509 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 510 struct xive_q *q = &xc->queue[xive_irq_priority]; 511 512 if (WARN_ON(cpu < 0 || !xc)) { 513 pr_err("%s: cpu=%d xc=%p\n", __func__, cpu, xc); 514 return; 515 } 516 517 /* 518 * We increment the "pending count" which will be used 519 * to decrement the target queue count whenever it's next 520 * processed and found empty. This ensure that we don't 521 * decrement while we still have the interrupt there 522 * occupying a slot. 523 */ 524 atomic_inc(&q->pending_count); 525 } 526 527 /* Find a tentative CPU target in a CPU mask */ 528 static int xive_find_target_in_mask(const struct cpumask *mask, 529 unsigned int fuzz) 530 { 531 int cpu, first, num, i; 532 533 /* Pick up a starting point CPU in the mask based on fuzz */ 534 num = min_t(int, cpumask_weight(mask), nr_cpu_ids); 535 first = fuzz % num; 536 537 /* Locate it */ 538 cpu = cpumask_first(mask); 539 for (i = 0; i < first && cpu < nr_cpu_ids; i++) 540 cpu = cpumask_next(cpu, mask); 541 542 /* Sanity check */ 543 if (WARN_ON(cpu >= nr_cpu_ids)) 544 cpu = cpumask_first(cpu_online_mask); 545 546 /* Remember first one to handle wrap-around */ 547 first = cpu; 548 549 /* 550 * Now go through the entire mask until we find a valid 551 * target. 552 */ 553 do { 554 /* 555 * We re-check online as the fallback case passes us 556 * an untested affinity mask 557 */ 558 if (cpu_online(cpu) && xive_try_pick_target(cpu)) 559 return cpu; 560 cpu = cpumask_next(cpu, mask); 561 /* Wrap around */ 562 if (cpu >= nr_cpu_ids) 563 cpu = cpumask_first(mask); 564 } while (cpu != first); 565 566 return -1; 567 } 568 569 /* 570 * Pick a target CPU for an interrupt. This is done at 571 * startup or if the affinity is changed in a way that 572 * invalidates the current target. 573 */ 574 static int xive_pick_irq_target(struct irq_data *d, 575 const struct cpumask *affinity) 576 { 577 static unsigned int fuzz; 578 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 579 cpumask_var_t mask; 580 int cpu = -1; 581 582 /* 583 * If we have chip IDs, first we try to build a mask of 584 * CPUs matching the CPU and find a target in there 585 */ 586 if (xd->src_chip != XIVE_INVALID_CHIP_ID && 587 zalloc_cpumask_var(&mask, GFP_ATOMIC)) { 588 /* Build a mask of matching chip IDs */ 589 for_each_cpu_and(cpu, affinity, cpu_online_mask) { 590 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 591 if (xc->chip_id == xd->src_chip) 592 cpumask_set_cpu(cpu, mask); 593 } 594 /* Try to find a target */ 595 if (cpumask_empty(mask)) 596 cpu = -1; 597 else 598 cpu = xive_find_target_in_mask(mask, fuzz++); 599 free_cpumask_var(mask); 600 if (cpu >= 0) 601 return cpu; 602 fuzz--; 603 } 604 605 /* No chip IDs, fallback to using the affinity mask */ 606 return xive_find_target_in_mask(affinity, fuzz++); 607 } 608 609 static unsigned int xive_irq_startup(struct irq_data *d) 610 { 611 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 612 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 613 int target, rc; 614 615 xd->saved_p = false; 616 xd->stale_p = false; 617 pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n", 618 d->irq, hw_irq, d); 619 620 #ifdef CONFIG_PCI_MSI 621 /* 622 * The generic MSI code returns with the interrupt disabled on the 623 * card, using the MSI mask bits. Firmware doesn't appear to unmask 624 * at that level, so we do it here by hand. 625 */ 626 if (irq_data_get_msi_desc(d)) 627 pci_msi_unmask_irq(d); 628 #endif 629 630 /* Pick a target */ 631 target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d)); 632 if (target == XIVE_INVALID_TARGET) { 633 /* Try again breaking affinity */ 634 target = xive_pick_irq_target(d, cpu_online_mask); 635 if (target == XIVE_INVALID_TARGET) 636 return -ENXIO; 637 pr_warn("irq %d started with broken affinity\n", d->irq); 638 } 639 640 /* Sanity check */ 641 if (WARN_ON(target == XIVE_INVALID_TARGET || 642 target >= nr_cpu_ids)) 643 target = smp_processor_id(); 644 645 xd->target = target; 646 647 /* 648 * Configure the logical number to be the Linux IRQ number 649 * and set the target queue 650 */ 651 rc = xive_ops->configure_irq(hw_irq, 652 get_hard_smp_processor_id(target), 653 xive_irq_priority, d->irq); 654 if (rc) 655 return rc; 656 657 /* Unmask the ESB */ 658 xive_do_source_set_mask(xd, false); 659 660 return 0; 661 } 662 663 /* called with irq descriptor lock held */ 664 static void xive_irq_shutdown(struct irq_data *d) 665 { 666 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 667 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 668 669 pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n", 670 d->irq, hw_irq, d); 671 672 if (WARN_ON(xd->target == XIVE_INVALID_TARGET)) 673 return; 674 675 /* Mask the interrupt at the source */ 676 xive_do_source_set_mask(xd, true); 677 678 /* 679 * Mask the interrupt in HW in the IVT/EAS and set the number 680 * to be the "bad" IRQ number 681 */ 682 xive_ops->configure_irq(hw_irq, 683 get_hard_smp_processor_id(xd->target), 684 0xff, XIVE_BAD_IRQ); 685 686 xive_dec_target_count(xd->target); 687 xd->target = XIVE_INVALID_TARGET; 688 } 689 690 static void xive_irq_unmask(struct irq_data *d) 691 { 692 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 693 694 pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd); 695 696 xive_do_source_set_mask(xd, false); 697 } 698 699 static void xive_irq_mask(struct irq_data *d) 700 { 701 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 702 703 pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd); 704 705 xive_do_source_set_mask(xd, true); 706 } 707 708 static int xive_irq_set_affinity(struct irq_data *d, 709 const struct cpumask *cpumask, 710 bool force) 711 { 712 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 713 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 714 u32 target, old_target; 715 int rc = 0; 716 717 pr_devel("xive_irq_set_affinity: irq %d\n", d->irq); 718 719 /* Is this valid ? */ 720 if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) 721 return -EINVAL; 722 723 /* Don't do anything if the interrupt isn't started */ 724 if (!irqd_is_started(d)) 725 return IRQ_SET_MASK_OK; 726 727 /* 728 * If existing target is already in the new mask, and is 729 * online then do nothing. 730 */ 731 if (xd->target != XIVE_INVALID_TARGET && 732 cpu_online(xd->target) && 733 cpumask_test_cpu(xd->target, cpumask)) 734 return IRQ_SET_MASK_OK; 735 736 /* Pick a new target */ 737 target = xive_pick_irq_target(d, cpumask); 738 739 /* No target found */ 740 if (target == XIVE_INVALID_TARGET) 741 return -ENXIO; 742 743 /* Sanity check */ 744 if (WARN_ON(target >= nr_cpu_ids)) 745 target = smp_processor_id(); 746 747 old_target = xd->target; 748 749 /* 750 * Only configure the irq if it's not currently passed-through to 751 * a KVM guest 752 */ 753 if (!irqd_is_forwarded_to_vcpu(d)) 754 rc = xive_ops->configure_irq(hw_irq, 755 get_hard_smp_processor_id(target), 756 xive_irq_priority, d->irq); 757 if (rc < 0) { 758 pr_err("Error %d reconfiguring irq %d\n", rc, d->irq); 759 return rc; 760 } 761 762 pr_devel(" target: 0x%x\n", target); 763 xd->target = target; 764 765 /* Give up previous target */ 766 if (old_target != XIVE_INVALID_TARGET) 767 xive_dec_target_count(old_target); 768 769 return IRQ_SET_MASK_OK; 770 } 771 772 static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type) 773 { 774 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 775 776 /* 777 * We only support these. This has really no effect other than setting 778 * the corresponding descriptor bits mind you but those will in turn 779 * affect the resend function when re-enabling an edge interrupt. 780 * 781 * Set set the default to edge as explained in map(). 782 */ 783 if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE) 784 flow_type = IRQ_TYPE_EDGE_RISING; 785 786 if (flow_type != IRQ_TYPE_EDGE_RISING && 787 flow_type != IRQ_TYPE_LEVEL_LOW) 788 return -EINVAL; 789 790 irqd_set_trigger_type(d, flow_type); 791 792 /* 793 * Double check it matches what the FW thinks 794 * 795 * NOTE: We don't know yet if the PAPR interface will provide 796 * the LSI vs MSI information apart from the device-tree so 797 * this check might have to move into an optional backend call 798 * that is specific to the native backend 799 */ 800 if ((flow_type == IRQ_TYPE_LEVEL_LOW) != 801 !!(xd->flags & XIVE_IRQ_FLAG_LSI)) { 802 pr_warn("Interrupt %d (HW 0x%x) type mismatch, Linux says %s, FW says %s\n", 803 d->irq, (u32)irqd_to_hwirq(d), 804 (flow_type == IRQ_TYPE_LEVEL_LOW) ? "Level" : "Edge", 805 (xd->flags & XIVE_IRQ_FLAG_LSI) ? "Level" : "Edge"); 806 } 807 808 return IRQ_SET_MASK_OK_NOCOPY; 809 } 810 811 static int xive_irq_retrigger(struct irq_data *d) 812 { 813 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 814 815 /* This should be only for MSIs */ 816 if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI)) 817 return 0; 818 819 /* 820 * To perform a retrigger, we first set the PQ bits to 821 * 11, then perform an EOI. 822 */ 823 xive_esb_read(xd, XIVE_ESB_SET_PQ_11); 824 xive_do_source_eoi(xd); 825 826 return 1; 827 } 828 829 /* 830 * Caller holds the irq descriptor lock, so this won't be called 831 * concurrently with xive_get_irqchip_state on the same interrupt. 832 */ 833 static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) 834 { 835 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 836 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 837 int rc; 838 u8 pq; 839 840 /* 841 * This is called by KVM with state non-NULL for enabling 842 * pass-through or NULL for disabling it 843 */ 844 if (state) { 845 irqd_set_forwarded_to_vcpu(d); 846 847 /* Set it to PQ=10 state to prevent further sends */ 848 pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10); 849 if (!xd->stale_p) { 850 xd->saved_p = !!(pq & XIVE_ESB_VAL_P); 851 xd->stale_p = !xd->saved_p; 852 } 853 854 /* No target ? nothing to do */ 855 if (xd->target == XIVE_INVALID_TARGET) { 856 /* 857 * An untargetted interrupt should have been 858 * also masked at the source 859 */ 860 WARN_ON(xd->saved_p); 861 862 return 0; 863 } 864 865 /* 866 * If P was set, adjust state to PQ=11 to indicate 867 * that a resend is needed for the interrupt to reach 868 * the guest. Also remember the value of P. 869 * 870 * This also tells us that it's in flight to a host queue 871 * or has already been fetched but hasn't been EOIed yet 872 * by the host. This it's potentially using up a host 873 * queue slot. This is important to know because as long 874 * as this is the case, we must not hard-unmask it when 875 * "returning" that interrupt to the host. 876 * 877 * This saved_p is cleared by the host EOI, when we know 878 * for sure the queue slot is no longer in use. 879 */ 880 if (xd->saved_p) { 881 xive_esb_read(xd, XIVE_ESB_SET_PQ_11); 882 883 /* 884 * Sync the XIVE source HW to ensure the interrupt 885 * has gone through the EAS before we change its 886 * target to the guest. That should guarantee us 887 * that we *will* eventually get an EOI for it on 888 * the host. Otherwise there would be a small window 889 * for P to be seen here but the interrupt going 890 * to the guest queue. 891 */ 892 if (xive_ops->sync_source) 893 xive_ops->sync_source(hw_irq); 894 } 895 } else { 896 irqd_clr_forwarded_to_vcpu(d); 897 898 /* No host target ? hard mask and return */ 899 if (xd->target == XIVE_INVALID_TARGET) { 900 xive_do_source_set_mask(xd, true); 901 return 0; 902 } 903 904 /* 905 * Sync the XIVE source HW to ensure the interrupt 906 * has gone through the EAS before we change its 907 * target to the host. 908 */ 909 if (xive_ops->sync_source) 910 xive_ops->sync_source(hw_irq); 911 912 /* 913 * By convention we are called with the interrupt in 914 * a PQ=10 or PQ=11 state, ie, it won't fire and will 915 * have latched in Q whether there's a pending HW 916 * interrupt or not. 917 * 918 * First reconfigure the target. 919 */ 920 rc = xive_ops->configure_irq(hw_irq, 921 get_hard_smp_processor_id(xd->target), 922 xive_irq_priority, d->irq); 923 if (rc) 924 return rc; 925 926 /* 927 * Then if saved_p is not set, effectively re-enable the 928 * interrupt with an EOI. If it is set, we know there is 929 * still a message in a host queue somewhere that will be 930 * EOId eventually. 931 * 932 * Note: We don't check irqd_irq_disabled(). Effectively, 933 * we *will* let the irq get through even if masked if the 934 * HW is still firing it in order to deal with the whole 935 * saved_p business properly. If the interrupt triggers 936 * while masked, the generic code will re-mask it anyway. 937 */ 938 if (!xd->saved_p) 939 xive_do_source_eoi(xd); 940 941 } 942 return 0; 943 } 944 945 /* Called with irq descriptor lock held. */ 946 static int xive_get_irqchip_state(struct irq_data *data, 947 enum irqchip_irq_state which, bool *state) 948 { 949 struct xive_irq_data *xd = irq_data_get_irq_handler_data(data); 950 u8 pq; 951 952 switch (which) { 953 case IRQCHIP_STATE_ACTIVE: 954 pq = xive_esb_read(xd, XIVE_ESB_GET); 955 956 /* 957 * The esb value being all 1's means we couldn't get 958 * the PQ state of the interrupt through mmio. It may 959 * happen, for example when querying a PHB interrupt 960 * while the PHB is in an error state. We consider the 961 * interrupt to be inactive in that case. 962 */ 963 *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p && 964 (xd->saved_p || !!(pq & XIVE_ESB_VAL_P)); 965 return 0; 966 default: 967 return -EINVAL; 968 } 969 } 970 971 static struct irq_chip xive_irq_chip = { 972 .name = "XIVE-IRQ", 973 .irq_startup = xive_irq_startup, 974 .irq_shutdown = xive_irq_shutdown, 975 .irq_eoi = xive_irq_eoi, 976 .irq_mask = xive_irq_mask, 977 .irq_unmask = xive_irq_unmask, 978 .irq_set_affinity = xive_irq_set_affinity, 979 .irq_set_type = xive_irq_set_type, 980 .irq_retrigger = xive_irq_retrigger, 981 .irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity, 982 .irq_get_irqchip_state = xive_get_irqchip_state, 983 }; 984 985 bool is_xive_irq(struct irq_chip *chip) 986 { 987 return chip == &xive_irq_chip; 988 } 989 EXPORT_SYMBOL_GPL(is_xive_irq); 990 991 void xive_cleanup_irq_data(struct xive_irq_data *xd) 992 { 993 if (xd->eoi_mmio) { 994 iounmap(xd->eoi_mmio); 995 if (xd->eoi_mmio == xd->trig_mmio) 996 xd->trig_mmio = NULL; 997 xd->eoi_mmio = NULL; 998 } 999 if (xd->trig_mmio) { 1000 iounmap(xd->trig_mmio); 1001 xd->trig_mmio = NULL; 1002 } 1003 } 1004 EXPORT_SYMBOL_GPL(xive_cleanup_irq_data); 1005 1006 static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) 1007 { 1008 struct xive_irq_data *xd; 1009 int rc; 1010 1011 xd = kzalloc(sizeof(struct xive_irq_data), GFP_KERNEL); 1012 if (!xd) 1013 return -ENOMEM; 1014 rc = xive_ops->populate_irq_data(hw, xd); 1015 if (rc) { 1016 kfree(xd); 1017 return rc; 1018 } 1019 xd->target = XIVE_INVALID_TARGET; 1020 irq_set_handler_data(virq, xd); 1021 1022 /* 1023 * Turn OFF by default the interrupt being mapped. A side 1024 * effect of this check is the mapping the ESB page of the 1025 * interrupt in the Linux address space. This prevents page 1026 * fault issues in the crash handler which masks all 1027 * interrupts. 1028 */ 1029 xive_esb_read(xd, XIVE_ESB_SET_PQ_01); 1030 1031 return 0; 1032 } 1033 1034 static void xive_irq_free_data(unsigned int virq) 1035 { 1036 struct xive_irq_data *xd = irq_get_handler_data(virq); 1037 1038 if (!xd) 1039 return; 1040 irq_set_handler_data(virq, NULL); 1041 xive_cleanup_irq_data(xd); 1042 kfree(xd); 1043 } 1044 1045 #ifdef CONFIG_SMP 1046 1047 static void xive_cause_ipi(int cpu) 1048 { 1049 struct xive_cpu *xc; 1050 struct xive_irq_data *xd; 1051 1052 xc = per_cpu(xive_cpu, cpu); 1053 1054 DBG_VERBOSE("IPI CPU %d -> %d (HW IRQ 0x%x)\n", 1055 smp_processor_id(), cpu, xc->hw_ipi); 1056 1057 xd = &xc->ipi_data; 1058 if (WARN_ON(!xd->trig_mmio)) 1059 return; 1060 out_be64(xd->trig_mmio, 0); 1061 } 1062 1063 static irqreturn_t xive_muxed_ipi_action(int irq, void *dev_id) 1064 { 1065 return smp_ipi_demux(); 1066 } 1067 1068 static void xive_ipi_eoi(struct irq_data *d) 1069 { 1070 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 1071 1072 /* Handle possible race with unplug and drop stale IPIs */ 1073 if (!xc) 1074 return; 1075 1076 DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", 1077 d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); 1078 1079 xive_do_source_eoi(&xc->ipi_data); 1080 xive_do_queue_eoi(xc); 1081 } 1082 1083 static void xive_ipi_do_nothing(struct irq_data *d) 1084 { 1085 /* 1086 * Nothing to do, we never mask/unmask IPIs, but the callback 1087 * has to exist for the struct irq_chip. 1088 */ 1089 } 1090 1091 static struct irq_chip xive_ipi_chip = { 1092 .name = "XIVE-IPI", 1093 .irq_eoi = xive_ipi_eoi, 1094 .irq_mask = xive_ipi_do_nothing, 1095 .irq_unmask = xive_ipi_do_nothing, 1096 }; 1097 1098 /* 1099 * IPIs are marked per-cpu. We use separate HW interrupts under the 1100 * hood but associated with the same "linux" interrupt 1101 */ 1102 struct xive_ipi_alloc_info { 1103 irq_hw_number_t hwirq; 1104 }; 1105 1106 static int xive_ipi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, 1107 unsigned int nr_irqs, void *arg) 1108 { 1109 struct xive_ipi_alloc_info *info = arg; 1110 int i; 1111 1112 for (i = 0; i < nr_irqs; i++) { 1113 irq_domain_set_info(domain, virq + i, info->hwirq + i, &xive_ipi_chip, 1114 domain->host_data, handle_percpu_irq, 1115 NULL, NULL); 1116 } 1117 return 0; 1118 } 1119 1120 static const struct irq_domain_ops xive_ipi_irq_domain_ops = { 1121 .alloc = xive_ipi_irq_domain_alloc, 1122 }; 1123 1124 static int __init xive_init_ipis(void) 1125 { 1126 struct fwnode_handle *fwnode; 1127 struct irq_domain *ipi_domain; 1128 unsigned int node; 1129 int ret = -ENOMEM; 1130 1131 fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI"); 1132 if (!fwnode) 1133 goto out; 1134 1135 ipi_domain = irq_domain_create_linear(fwnode, nr_node_ids, 1136 &xive_ipi_irq_domain_ops, NULL); 1137 if (!ipi_domain) 1138 goto out_free_fwnode; 1139 1140 xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL); 1141 if (!xive_ipis) 1142 goto out_free_domain; 1143 1144 for_each_node(node) { 1145 struct xive_ipi_desc *xid = &xive_ipis[node]; 1146 struct xive_ipi_alloc_info info = { node }; 1147 1148 /* 1149 * Map one IPI interrupt per node for all cpus of that node. 1150 * Since the HW interrupt number doesn't have any meaning, 1151 * simply use the node number. 1152 */ 1153 ret = irq_domain_alloc_irqs(ipi_domain, 1, node, &info); 1154 if (ret < 0) 1155 goto out_free_xive_ipis; 1156 xid->irq = ret; 1157 1158 snprintf(xid->name, sizeof(xid->name), "IPI-%d", node); 1159 } 1160 1161 return ret; 1162 1163 out_free_xive_ipis: 1164 kfree(xive_ipis); 1165 out_free_domain: 1166 irq_domain_remove(ipi_domain); 1167 out_free_fwnode: 1168 irq_domain_free_fwnode(fwnode); 1169 out: 1170 return ret; 1171 } 1172 1173 static int __init xive_request_ipi(unsigned int cpu) 1174 { 1175 struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)]; 1176 int ret; 1177 1178 if (atomic_inc_return(&xid->started) > 1) 1179 return 0; 1180 1181 ret = request_irq(xid->irq, xive_muxed_ipi_action, 1182 IRQF_PERCPU | IRQF_NO_THREAD, 1183 xid->name, NULL); 1184 1185 WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); 1186 return ret; 1187 } 1188 1189 static int xive_setup_cpu_ipi(unsigned int cpu) 1190 { 1191 unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); 1192 struct xive_cpu *xc; 1193 int rc; 1194 1195 pr_debug("Setting up IPI for CPU %d\n", cpu); 1196 1197 xc = per_cpu(xive_cpu, cpu); 1198 1199 /* Check if we are already setup */ 1200 if (xc->hw_ipi != XIVE_BAD_IRQ) 1201 return 0; 1202 1203 /* Register the IPI */ 1204 xive_request_ipi(cpu); 1205 1206 /* Grab an IPI from the backend, this will populate xc->hw_ipi */ 1207 if (xive_ops->get_ipi(cpu, xc)) 1208 return -EIO; 1209 1210 /* 1211 * Populate the IRQ data in the xive_cpu structure and 1212 * configure the HW / enable the IPIs. 1213 */ 1214 rc = xive_ops->populate_irq_data(xc->hw_ipi, &xc->ipi_data); 1215 if (rc) { 1216 pr_err("Failed to populate IPI data on CPU %d\n", cpu); 1217 return -EIO; 1218 } 1219 rc = xive_ops->configure_irq(xc->hw_ipi, 1220 get_hard_smp_processor_id(cpu), 1221 xive_irq_priority, xive_ipi_irq); 1222 if (rc) { 1223 pr_err("Failed to map IPI CPU %d\n", cpu); 1224 return -EIO; 1225 } 1226 pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu, 1227 xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); 1228 1229 /* Unmask it */ 1230 xive_do_source_set_mask(&xc->ipi_data, false); 1231 1232 return 0; 1233 } 1234 1235 static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) 1236 { 1237 unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); 1238 1239 /* Disable the IPI and free the IRQ data */ 1240 1241 /* Already cleaned up ? */ 1242 if (xc->hw_ipi == XIVE_BAD_IRQ) 1243 return; 1244 1245 /* TODO: clear IPI mapping */ 1246 1247 /* Mask the IPI */ 1248 xive_do_source_set_mask(&xc->ipi_data, true); 1249 1250 /* 1251 * Note: We don't call xive_cleanup_irq_data() to free 1252 * the mappings as this is called from an IPI on kexec 1253 * which is not a safe environment to call iounmap() 1254 */ 1255 1256 /* Deconfigure/mask in the backend */ 1257 xive_ops->configure_irq(xc->hw_ipi, hard_smp_processor_id(), 1258 0xff, xive_ipi_irq); 1259 1260 /* Free the IPIs in the backend */ 1261 xive_ops->put_ipi(cpu, xc); 1262 } 1263 1264 void __init xive_smp_probe(void) 1265 { 1266 smp_ops->cause_ipi = xive_cause_ipi; 1267 1268 /* Register the IPI */ 1269 xive_init_ipis(); 1270 1271 /* Allocate and setup IPI for the boot CPU */ 1272 xive_setup_cpu_ipi(smp_processor_id()); 1273 } 1274 1275 #endif /* CONFIG_SMP */ 1276 1277 static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, 1278 irq_hw_number_t hw) 1279 { 1280 int rc; 1281 1282 /* 1283 * Mark interrupts as edge sensitive by default so that resend 1284 * actually works. Will fix that up below if needed. 1285 */ 1286 irq_clear_status_flags(virq, IRQ_LEVEL); 1287 1288 rc = xive_irq_alloc_data(virq, hw); 1289 if (rc) 1290 return rc; 1291 1292 irq_set_chip_and_handler(virq, &xive_irq_chip, handle_fasteoi_irq); 1293 1294 return 0; 1295 } 1296 1297 static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) 1298 { 1299 xive_irq_free_data(virq); 1300 } 1301 1302 static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct, 1303 const u32 *intspec, unsigned int intsize, 1304 irq_hw_number_t *out_hwirq, unsigned int *out_flags) 1305 1306 { 1307 *out_hwirq = intspec[0]; 1308 1309 /* 1310 * If intsize is at least 2, we look for the type in the second cell, 1311 * we assume the LSB indicates a level interrupt. 1312 */ 1313 if (intsize > 1) { 1314 if (intspec[1] & 1) 1315 *out_flags = IRQ_TYPE_LEVEL_LOW; 1316 else 1317 *out_flags = IRQ_TYPE_EDGE_RISING; 1318 } else 1319 *out_flags = IRQ_TYPE_LEVEL_LOW; 1320 1321 return 0; 1322 } 1323 1324 static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node, 1325 enum irq_domain_bus_token bus_token) 1326 { 1327 return xive_ops->match(node); 1328 } 1329 1330 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS 1331 static const char * const esb_names[] = { "RESET", "OFF", "PENDING", "QUEUED" }; 1332 1333 static const struct { 1334 u64 mask; 1335 char *name; 1336 } xive_irq_flags[] = { 1337 { XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" }, 1338 { XIVE_IRQ_FLAG_LSI, "LSI" }, 1339 { XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" }, 1340 { XIVE_IRQ_FLAG_NO_EOI, "NO_EOI" }, 1341 }; 1342 1343 static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d, 1344 struct irq_data *irqd, int ind) 1345 { 1346 struct xive_irq_data *xd; 1347 u64 val; 1348 int i; 1349 1350 /* No IRQ domain level information. To be done */ 1351 if (!irqd) 1352 return; 1353 1354 if (!is_xive_irq(irq_data_get_irq_chip(irqd))) 1355 return; 1356 1357 seq_printf(m, "%*sXIVE:\n", ind, ""); 1358 ind++; 1359 1360 xd = irq_data_get_irq_handler_data(irqd); 1361 if (!xd) { 1362 seq_printf(m, "%*snot assigned\n", ind, ""); 1363 return; 1364 } 1365 1366 val = xive_esb_read(xd, XIVE_ESB_GET); 1367 seq_printf(m, "%*sESB: %s\n", ind, "", esb_names[val & 0x3]); 1368 seq_printf(m, "%*sPstate: %s %s\n", ind, "", xd->stale_p ? "stale" : "", 1369 xd->saved_p ? "saved" : ""); 1370 seq_printf(m, "%*sTarget: %d\n", ind, "", xd->target); 1371 seq_printf(m, "%*sChip: %d\n", ind, "", xd->src_chip); 1372 seq_printf(m, "%*sTrigger: 0x%016llx\n", ind, "", xd->trig_page); 1373 seq_printf(m, "%*sEOI: 0x%016llx\n", ind, "", xd->eoi_page); 1374 seq_printf(m, "%*sFlags: 0x%llx\n", ind, "", xd->flags); 1375 for (i = 0; i < ARRAY_SIZE(xive_irq_flags); i++) { 1376 if (xd->flags & xive_irq_flags[i].mask) 1377 seq_printf(m, "%*s%s\n", ind + 12, "", xive_irq_flags[i].name); 1378 } 1379 } 1380 #endif 1381 1382 static const struct irq_domain_ops xive_irq_domain_ops = { 1383 .match = xive_irq_domain_match, 1384 .map = xive_irq_domain_map, 1385 .unmap = xive_irq_domain_unmap, 1386 .xlate = xive_irq_domain_xlate, 1387 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS 1388 .debug_show = xive_irq_domain_debug_show, 1389 #endif 1390 }; 1391 1392 static void __init xive_init_host(struct device_node *np) 1393 { 1394 xive_irq_domain = irq_domain_add_nomap(np, XIVE_MAX_IRQ, 1395 &xive_irq_domain_ops, NULL); 1396 if (WARN_ON(xive_irq_domain == NULL)) 1397 return; 1398 irq_set_default_host(xive_irq_domain); 1399 } 1400 1401 static void xive_cleanup_cpu_queues(unsigned int cpu, struct xive_cpu *xc) 1402 { 1403 if (xc->queue[xive_irq_priority].qpage) 1404 xive_ops->cleanup_queue(cpu, xc, xive_irq_priority); 1405 } 1406 1407 static int xive_setup_cpu_queues(unsigned int cpu, struct xive_cpu *xc) 1408 { 1409 int rc = 0; 1410 1411 /* We setup 1 queues for now with a 64k page */ 1412 if (!xc->queue[xive_irq_priority].qpage) 1413 rc = xive_ops->setup_queue(cpu, xc, xive_irq_priority); 1414 1415 return rc; 1416 } 1417 1418 static int xive_prepare_cpu(unsigned int cpu) 1419 { 1420 struct xive_cpu *xc; 1421 1422 xc = per_cpu(xive_cpu, cpu); 1423 if (!xc) { 1424 xc = kzalloc_node(sizeof(struct xive_cpu), 1425 GFP_KERNEL, cpu_to_node(cpu)); 1426 if (!xc) 1427 return -ENOMEM; 1428 xc->hw_ipi = XIVE_BAD_IRQ; 1429 xc->chip_id = XIVE_INVALID_CHIP_ID; 1430 if (xive_ops->prepare_cpu) 1431 xive_ops->prepare_cpu(cpu, xc); 1432 1433 per_cpu(xive_cpu, cpu) = xc; 1434 } 1435 1436 /* Setup EQs if not already */ 1437 return xive_setup_cpu_queues(cpu, xc); 1438 } 1439 1440 static void xive_setup_cpu(void) 1441 { 1442 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 1443 1444 /* The backend might have additional things to do */ 1445 if (xive_ops->setup_cpu) 1446 xive_ops->setup_cpu(smp_processor_id(), xc); 1447 1448 /* Set CPPR to 0xff to enable flow of interrupts */ 1449 xc->cppr = 0xff; 1450 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff); 1451 } 1452 1453 #ifdef CONFIG_SMP 1454 void xive_smp_setup_cpu(void) 1455 { 1456 pr_devel("SMP setup CPU %d\n", smp_processor_id()); 1457 1458 /* This will have already been done on the boot CPU */ 1459 if (smp_processor_id() != boot_cpuid) 1460 xive_setup_cpu(); 1461 1462 } 1463 1464 int xive_smp_prepare_cpu(unsigned int cpu) 1465 { 1466 int rc; 1467 1468 /* Allocate per-CPU data and queues */ 1469 rc = xive_prepare_cpu(cpu); 1470 if (rc) 1471 return rc; 1472 1473 /* Allocate and setup IPI for the new CPU */ 1474 return xive_setup_cpu_ipi(cpu); 1475 } 1476 1477 #ifdef CONFIG_HOTPLUG_CPU 1478 static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) 1479 { 1480 u32 irq; 1481 1482 /* We assume local irqs are disabled */ 1483 WARN_ON(!irqs_disabled()); 1484 1485 /* Check what's already in the CPU queue */ 1486 while ((irq = xive_scan_interrupts(xc, false)) != 0) { 1487 /* 1488 * We need to re-route that interrupt to its new destination. 1489 * First get and lock the descriptor 1490 */ 1491 struct irq_desc *desc = irq_to_desc(irq); 1492 struct irq_data *d = irq_desc_get_irq_data(desc); 1493 struct xive_irq_data *xd; 1494 1495 /* 1496 * Ignore anything that isn't a XIVE irq and ignore 1497 * IPIs, so can just be dropped. 1498 */ 1499 if (d->domain != xive_irq_domain) 1500 continue; 1501 1502 /* 1503 * The IRQ should have already been re-routed, it's just a 1504 * stale in the old queue, so re-trigger it in order to make 1505 * it reach is new destination. 1506 */ 1507 #ifdef DEBUG_FLUSH 1508 pr_info("CPU %d: Got irq %d while offline, re-sending...\n", 1509 cpu, irq); 1510 #endif 1511 raw_spin_lock(&desc->lock); 1512 xd = irq_desc_get_handler_data(desc); 1513 1514 /* 1515 * Clear saved_p to indicate that it's no longer pending 1516 */ 1517 xd->saved_p = false; 1518 1519 /* 1520 * For LSIs, we EOI, this will cause a resend if it's 1521 * still asserted. Otherwise do an MSI retrigger. 1522 */ 1523 if (xd->flags & XIVE_IRQ_FLAG_LSI) 1524 xive_do_source_eoi(xd); 1525 else 1526 xive_irq_retrigger(d); 1527 1528 raw_spin_unlock(&desc->lock); 1529 } 1530 } 1531 1532 void xive_smp_disable_cpu(void) 1533 { 1534 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 1535 unsigned int cpu = smp_processor_id(); 1536 1537 /* Migrate interrupts away from the CPU */ 1538 irq_migrate_all_off_this_cpu(); 1539 1540 /* Set CPPR to 0 to disable flow of interrupts */ 1541 xc->cppr = 0; 1542 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0); 1543 1544 /* Flush everything still in the queue */ 1545 xive_flush_cpu_queue(cpu, xc); 1546 1547 /* Re-enable CPPR */ 1548 xc->cppr = 0xff; 1549 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff); 1550 } 1551 1552 void xive_flush_interrupt(void) 1553 { 1554 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 1555 unsigned int cpu = smp_processor_id(); 1556 1557 /* Called if an interrupt occurs while the CPU is hot unplugged */ 1558 xive_flush_cpu_queue(cpu, xc); 1559 } 1560 1561 #endif /* CONFIG_HOTPLUG_CPU */ 1562 1563 #endif /* CONFIG_SMP */ 1564 1565 void xive_teardown_cpu(void) 1566 { 1567 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 1568 unsigned int cpu = smp_processor_id(); 1569 1570 /* Set CPPR to 0 to disable flow of interrupts */ 1571 xc->cppr = 0; 1572 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0); 1573 1574 if (xive_ops->teardown_cpu) 1575 xive_ops->teardown_cpu(cpu, xc); 1576 1577 #ifdef CONFIG_SMP 1578 /* Get rid of IPI */ 1579 xive_cleanup_cpu_ipi(cpu, xc); 1580 #endif 1581 1582 /* Disable and free the queues */ 1583 xive_cleanup_cpu_queues(cpu, xc); 1584 } 1585 1586 void xive_shutdown(void) 1587 { 1588 xive_ops->shutdown(); 1589 } 1590 1591 bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops, 1592 void __iomem *area, u32 offset, u8 max_prio) 1593 { 1594 xive_tima = area; 1595 xive_tima_offset = offset; 1596 xive_ops = ops; 1597 xive_irq_priority = max_prio; 1598 1599 ppc_md.get_irq = xive_get_irq; 1600 __xive_enabled = true; 1601 1602 pr_devel("Initializing host..\n"); 1603 xive_init_host(np); 1604 1605 pr_devel("Initializing boot CPU..\n"); 1606 1607 /* Allocate per-CPU data and queues */ 1608 xive_prepare_cpu(smp_processor_id()); 1609 1610 /* Get ready for interrupts */ 1611 xive_setup_cpu(); 1612 1613 pr_info("Interrupt handling initialized with %s backend\n", 1614 xive_ops->name); 1615 pr_info("Using priority %d for all interrupts\n", max_prio); 1616 1617 return true; 1618 } 1619 1620 __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift) 1621 { 1622 unsigned int alloc_order; 1623 struct page *pages; 1624 __be32 *qpage; 1625 1626 alloc_order = xive_alloc_order(queue_shift); 1627 pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order); 1628 if (!pages) 1629 return ERR_PTR(-ENOMEM); 1630 qpage = (__be32 *)page_address(pages); 1631 memset(qpage, 0, 1 << queue_shift); 1632 1633 return qpage; 1634 } 1635 1636 static int __init xive_off(char *arg) 1637 { 1638 xive_cmdline_disabled = true; 1639 return 0; 1640 } 1641 __setup("xive=off", xive_off); 1642 1643 static void xive_debug_show_cpu(struct seq_file *m, int cpu) 1644 { 1645 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 1646 1647 seq_printf(m, "CPU %d:", cpu); 1648 if (xc) { 1649 seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); 1650 1651 #ifdef CONFIG_SMP 1652 { 1653 u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); 1654 1655 seq_printf(m, "IPI=0x%08x PQ=%c%c ", xc->hw_ipi, 1656 val & XIVE_ESB_VAL_P ? 'P' : '-', 1657 val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 1658 } 1659 #endif 1660 { 1661 struct xive_q *q = &xc->queue[xive_irq_priority]; 1662 u32 i0, i1, idx; 1663 1664 if (q->qpage) { 1665 idx = q->idx; 1666 i0 = be32_to_cpup(q->qpage + idx); 1667 idx = (idx + 1) & q->msk; 1668 i1 = be32_to_cpup(q->qpage + idx); 1669 seq_printf(m, "EQ idx=%d T=%d %08x %08x ...", 1670 q->idx, q->toggle, i0, i1); 1671 } 1672 } 1673 } 1674 seq_puts(m, "\n"); 1675 } 1676 1677 static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) 1678 { 1679 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 1680 int rc; 1681 u32 target; 1682 u8 prio; 1683 u32 lirq; 1684 struct xive_irq_data *xd; 1685 u64 val; 1686 1687 rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); 1688 if (rc) { 1689 seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); 1690 return; 1691 } 1692 1693 seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", 1694 hw_irq, target, prio, lirq); 1695 1696 xd = irq_data_get_irq_handler_data(d); 1697 val = xive_esb_read(xd, XIVE_ESB_GET); 1698 seq_printf(m, "flags=%c%c%c PQ=%c%c", 1699 xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', 1700 xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', 1701 xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', 1702 val & XIVE_ESB_VAL_P ? 'P' : '-', 1703 val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 1704 seq_puts(m, "\n"); 1705 } 1706 1707 static int xive_core_debug_show(struct seq_file *m, void *private) 1708 { 1709 unsigned int i; 1710 struct irq_desc *desc; 1711 int cpu; 1712 1713 if (xive_ops->debug_show) 1714 xive_ops->debug_show(m, private); 1715 1716 for_each_possible_cpu(cpu) 1717 xive_debug_show_cpu(m, cpu); 1718 1719 for_each_irq_desc(i, desc) { 1720 struct irq_data *d = irq_desc_get_irq_data(desc); 1721 1722 if (d->domain == xive_irq_domain) 1723 xive_debug_show_irq(m, d); 1724 } 1725 return 0; 1726 } 1727 DEFINE_SHOW_ATTRIBUTE(xive_core_debug); 1728 1729 int xive_core_debug_init(void) 1730 { 1731 if (xive_enabled()) 1732 debugfs_create_file("xive", 0400, powerpc_debugfs_root, 1733 NULL, &xive_core_debug_fops); 1734 return 0; 1735 } 1736