1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2016,2017 IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive: " fmt 7 8 #include <linux/types.h> 9 #include <linux/threads.h> 10 #include <linux/kernel.h> 11 #include <linux/irq.h> 12 #include <linux/debugfs.h> 13 #include <linux/smp.h> 14 #include <linux/interrupt.h> 15 #include <linux/seq_file.h> 16 #include <linux/init.h> 17 #include <linux/cpu.h> 18 #include <linux/of.h> 19 #include <linux/slab.h> 20 #include <linux/spinlock.h> 21 #include <linux/msi.h> 22 23 #include <asm/debugfs.h> 24 #include <asm/prom.h> 25 #include <asm/io.h> 26 #include <asm/smp.h> 27 #include <asm/machdep.h> 28 #include <asm/irq.h> 29 #include <asm/errno.h> 30 #include <asm/xive.h> 31 #include <asm/xive-regs.h> 32 #include <asm/xmon.h> 33 34 #include "xive-internal.h" 35 36 #undef DEBUG_FLUSH 37 #undef DEBUG_ALL 38 39 #ifdef DEBUG_ALL 40 #define DBG_VERBOSE(fmt, ...) pr_devel("cpu %d - " fmt, \ 41 smp_processor_id(), ## __VA_ARGS__) 42 #else 43 #define DBG_VERBOSE(fmt...) do { } while(0) 44 #endif 45 46 bool __xive_enabled; 47 EXPORT_SYMBOL_GPL(__xive_enabled); 48 bool xive_cmdline_disabled; 49 50 /* We use only one priority for now */ 51 static u8 xive_irq_priority; 52 53 /* TIMA exported to KVM */ 54 void __iomem *xive_tima; 55 EXPORT_SYMBOL_GPL(xive_tima); 56 u32 xive_tima_offset; 57 58 /* Backend ops */ 59 static const struct xive_ops *xive_ops; 60 61 /* Our global interrupt domain */ 62 static struct irq_domain *xive_irq_domain; 63 64 #ifdef CONFIG_SMP 65 /* The IPIs all use the same logical irq number */ 66 static u32 xive_ipi_irq; 67 #endif 68 69 /* Xive state for each CPU */ 70 static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); 71 72 /* An invalid CPU target */ 73 #define XIVE_INVALID_TARGET (-1) 74 75 /* 76 * Read the next entry in a queue, return its content if it's valid 77 * or 0 if there is no new entry. 78 * 79 * The queue pointer is moved forward unless "just_peek" is set 80 */ 81 static u32 xive_read_eq(struct xive_q *q, bool just_peek) 82 { 83 u32 cur; 84 85 if (!q->qpage) 86 return 0; 87 cur = be32_to_cpup(q->qpage + q->idx); 88 89 /* Check valid bit (31) vs current toggle polarity */ 90 if ((cur >> 31) == q->toggle) 91 return 0; 92 93 /* If consuming from the queue ... */ 94 if (!just_peek) { 95 /* Next entry */ 96 q->idx = (q->idx + 1) & q->msk; 97 98 /* Wrap around: flip valid toggle */ 99 if (q->idx == 0) 100 q->toggle ^= 1; 101 } 102 /* Mask out the valid bit (31) */ 103 return cur & 0x7fffffff; 104 } 105 106 /* 107 * Scans all the queue that may have interrupts in them 108 * (based on "pending_prio") in priority order until an 109 * interrupt is found or all the queues are empty. 110 * 111 * Then updates the CPPR (Current Processor Priority 112 * Register) based on the most favored interrupt found 113 * (0xff if none) and return what was found (0 if none). 114 * 115 * If just_peek is set, return the most favored pending 116 * interrupt if any but don't update the queue pointers. 117 * 118 * Note: This function can operate generically on any number 119 * of queues (up to 8). The current implementation of the XIVE 120 * driver only uses a single queue however. 121 * 122 * Note2: This will also "flush" "the pending_count" of a queue 123 * into the "count" when that queue is observed to be empty. 124 * This is used to keep track of the amount of interrupts 125 * targetting a queue. When an interrupt is moved away from 126 * a queue, we only decrement that queue count once the queue 127 * has been observed empty to avoid races. 128 */ 129 static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek) 130 { 131 u32 irq = 0; 132 u8 prio = 0; 133 134 /* Find highest pending priority */ 135 while (xc->pending_prio != 0) { 136 struct xive_q *q; 137 138 prio = ffs(xc->pending_prio) - 1; 139 DBG_VERBOSE("scan_irq: trying prio %d\n", prio); 140 141 /* Try to fetch */ 142 irq = xive_read_eq(&xc->queue[prio], just_peek); 143 144 /* Found something ? That's it */ 145 if (irq) { 146 if (just_peek || irq_to_desc(irq)) 147 break; 148 /* 149 * We should never get here; if we do then we must 150 * have failed to synchronize the interrupt properly 151 * when shutting it down. 152 */ 153 pr_crit("xive: got interrupt %d without descriptor, dropping\n", 154 irq); 155 WARN_ON(1); 156 continue; 157 } 158 159 /* Clear pending bits */ 160 xc->pending_prio &= ~(1 << prio); 161 162 /* 163 * Check if the queue count needs adjusting due to 164 * interrupts being moved away. See description of 165 * xive_dec_target_count() 166 */ 167 q = &xc->queue[prio]; 168 if (atomic_read(&q->pending_count)) { 169 int p = atomic_xchg(&q->pending_count, 0); 170 if (p) { 171 WARN_ON(p > atomic_read(&q->count)); 172 atomic_sub(p, &q->count); 173 } 174 } 175 } 176 177 /* If nothing was found, set CPPR to 0xff */ 178 if (irq == 0) 179 prio = 0xff; 180 181 /* Update HW CPPR to match if necessary */ 182 if (prio != xc->cppr) { 183 DBG_VERBOSE("scan_irq: adjusting CPPR to %d\n", prio); 184 xc->cppr = prio; 185 out_8(xive_tima + xive_tima_offset + TM_CPPR, prio); 186 } 187 188 return irq; 189 } 190 191 /* 192 * This is used to perform the magic loads from an ESB 193 * described in xive-regs.h 194 */ 195 static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) 196 { 197 u64 val; 198 199 /* Handle HW errata */ 200 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) 201 offset |= offset << 4; 202 203 if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) 204 val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0); 205 else 206 val = in_be64(xd->eoi_mmio + offset); 207 208 return (u8)val; 209 } 210 211 static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data) 212 { 213 /* Handle HW errata */ 214 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) 215 offset |= offset << 4; 216 217 if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) 218 xive_ops->esb_rw(xd->hw_irq, offset, data, 1); 219 else 220 out_be64(xd->eoi_mmio + offset, data); 221 } 222 223 #ifdef CONFIG_XMON 224 static notrace void xive_dump_eq(const char *name, struct xive_q *q) 225 { 226 u32 i0, i1, idx; 227 228 if (!q->qpage) 229 return; 230 idx = q->idx; 231 i0 = be32_to_cpup(q->qpage + idx); 232 idx = (idx + 1) & q->msk; 233 i1 = be32_to_cpup(q->qpage + idx); 234 xmon_printf("%s idx=%d T=%d %08x %08x ...", name, 235 q->idx, q->toggle, i0, i1); 236 } 237 238 notrace void xmon_xive_do_dump(int cpu) 239 { 240 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 241 242 xmon_printf("CPU %d:", cpu); 243 if (xc) { 244 xmon_printf("pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); 245 246 #ifdef CONFIG_SMP 247 { 248 u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); 249 250 xmon_printf("IPI=0x%08x PQ=%c%c ", xc->hw_ipi, 251 val & XIVE_ESB_VAL_P ? 'P' : '-', 252 val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 253 } 254 #endif 255 xive_dump_eq("EQ", &xc->queue[xive_irq_priority]); 256 } 257 xmon_printf("\n"); 258 } 259 260 int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) 261 { 262 struct irq_chip *chip = irq_data_get_irq_chip(d); 263 int rc; 264 u32 target; 265 u8 prio; 266 u32 lirq; 267 268 if (!is_xive_irq(chip)) 269 return -EINVAL; 270 271 rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); 272 if (rc) { 273 xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); 274 return rc; 275 } 276 277 xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", 278 hw_irq, target, prio, lirq); 279 280 if (d) { 281 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 282 u64 val = xive_esb_read(xd, XIVE_ESB_GET); 283 284 xmon_printf("flags=%c%c%c PQ=%c%c", 285 xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', 286 xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', 287 xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', 288 val & XIVE_ESB_VAL_P ? 'P' : '-', 289 val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 290 } 291 292 xmon_printf("\n"); 293 return 0; 294 } 295 296 #endif /* CONFIG_XMON */ 297 298 static unsigned int xive_get_irq(void) 299 { 300 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 301 u32 irq; 302 303 /* 304 * This can be called either as a result of a HW interrupt or 305 * as a "replay" because EOI decided there was still something 306 * in one of the queues. 307 * 308 * First we perform an ACK cycle in order to update our mask 309 * of pending priorities. This will also have the effect of 310 * updating the CPPR to the most favored pending interrupts. 311 * 312 * In the future, if we have a way to differentiate a first 313 * entry (on HW interrupt) from a replay triggered by EOI, 314 * we could skip this on replays unless we soft-mask tells us 315 * that a new HW interrupt occurred. 316 */ 317 xive_ops->update_pending(xc); 318 319 DBG_VERBOSE("get_irq: pending=%02x\n", xc->pending_prio); 320 321 /* Scan our queue(s) for interrupts */ 322 irq = xive_scan_interrupts(xc, false); 323 324 DBG_VERBOSE("get_irq: got irq 0x%x, new pending=0x%02x\n", 325 irq, xc->pending_prio); 326 327 /* Return pending interrupt if any */ 328 if (irq == XIVE_BAD_IRQ) 329 return 0; 330 return irq; 331 } 332 333 /* 334 * After EOI'ing an interrupt, we need to re-check the queue 335 * to see if another interrupt is pending since multiple 336 * interrupts can coalesce into a single notification to the 337 * CPU. 338 * 339 * If we find that there is indeed more in there, we call 340 * force_external_irq_replay() to make Linux synthetize an 341 * external interrupt on the next call to local_irq_restore(). 342 */ 343 static void xive_do_queue_eoi(struct xive_cpu *xc) 344 { 345 if (xive_scan_interrupts(xc, true) != 0) { 346 DBG_VERBOSE("eoi: pending=0x%02x\n", xc->pending_prio); 347 force_external_irq_replay(); 348 } 349 } 350 351 /* 352 * EOI an interrupt at the source. There are several methods 353 * to do this depending on the HW version and source type 354 */ 355 static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) 356 { 357 xd->stale_p = false; 358 /* If the XIVE supports the new "store EOI facility, use it */ 359 if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) 360 xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); 361 else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) { 362 /* 363 * The FW told us to call it. This happens for some 364 * interrupt sources that need additional HW whacking 365 * beyond the ESB manipulation. For example LPC interrupts 366 * on P9 DD1.0 needed a latch to be clared in the LPC bridge 367 * itself. The Firmware will take care of it. 368 */ 369 if (WARN_ON_ONCE(!xive_ops->eoi)) 370 return; 371 xive_ops->eoi(hw_irq); 372 } else { 373 u8 eoi_val; 374 375 /* 376 * Otherwise for EOI, we use the special MMIO that does 377 * a clear of both P and Q and returns the old Q, 378 * except for LSIs where we use the "EOI cycle" special 379 * load. 380 * 381 * This allows us to then do a re-trigger if Q was set 382 * rather than synthesizing an interrupt in software 383 * 384 * For LSIs the HW EOI cycle is used rather than PQ bits, 385 * as they are automatically re-triggred in HW when still 386 * pending. 387 */ 388 if (xd->flags & XIVE_IRQ_FLAG_LSI) 389 xive_esb_read(xd, XIVE_ESB_LOAD_EOI); 390 else { 391 eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); 392 DBG_VERBOSE("eoi_val=%x\n", eoi_val); 393 394 /* Re-trigger if needed */ 395 if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) 396 out_be64(xd->trig_mmio, 0); 397 } 398 } 399 } 400 401 /* irq_chip eoi callback, called with irq descriptor lock held */ 402 static void xive_irq_eoi(struct irq_data *d) 403 { 404 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 405 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 406 407 DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n", 408 d->irq, irqd_to_hwirq(d), xc->pending_prio); 409 410 /* 411 * EOI the source if it hasn't been disabled and hasn't 412 * been passed-through to a KVM guest 413 */ 414 if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && 415 !(xd->flags & XIVE_IRQ_NO_EOI)) 416 xive_do_source_eoi(irqd_to_hwirq(d), xd); 417 else 418 xd->stale_p = true; 419 420 /* 421 * Clear saved_p to indicate that it's no longer occupying 422 * a queue slot on the target queue 423 */ 424 xd->saved_p = false; 425 426 /* Check for more work in the queue */ 427 xive_do_queue_eoi(xc); 428 } 429 430 /* 431 * Helper used to mask and unmask an interrupt source. This 432 * is only called for normal interrupts that do not require 433 * masking/unmasking via firmware. 434 */ 435 static void xive_do_source_set_mask(struct xive_irq_data *xd, 436 bool mask) 437 { 438 u64 val; 439 440 /* 441 * If the interrupt had P set, it may be in a queue. 442 * 443 * We need to make sure we don't re-enable it until it 444 * has been fetched from that queue and EOId. We keep 445 * a copy of that P state and use it to restore the 446 * ESB accordingly on unmask. 447 */ 448 if (mask) { 449 val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01); 450 if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P)) 451 xd->saved_p = true; 452 xd->stale_p = false; 453 } else if (xd->saved_p) { 454 xive_esb_read(xd, XIVE_ESB_SET_PQ_10); 455 xd->saved_p = false; 456 } else { 457 xive_esb_read(xd, XIVE_ESB_SET_PQ_00); 458 xd->stale_p = false; 459 } 460 } 461 462 /* 463 * Try to chose "cpu" as a new interrupt target. Increments 464 * the queue accounting for that target if it's not already 465 * full. 466 */ 467 static bool xive_try_pick_target(int cpu) 468 { 469 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 470 struct xive_q *q = &xc->queue[xive_irq_priority]; 471 int max; 472 473 /* 474 * Calculate max number of interrupts in that queue. 475 * 476 * We leave a gap of 1 just in case... 477 */ 478 max = (q->msk + 1) - 1; 479 return !!atomic_add_unless(&q->count, 1, max); 480 } 481 482 /* 483 * Un-account an interrupt for a target CPU. We don't directly 484 * decrement q->count since the interrupt might still be present 485 * in the queue. 486 * 487 * Instead increment a separate counter "pending_count" which 488 * will be substracted from "count" later when that CPU observes 489 * the queue to be empty. 490 */ 491 static void xive_dec_target_count(int cpu) 492 { 493 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 494 struct xive_q *q = &xc->queue[xive_irq_priority]; 495 496 if (WARN_ON(cpu < 0 || !xc)) { 497 pr_err("%s: cpu=%d xc=%p\n", __func__, cpu, xc); 498 return; 499 } 500 501 /* 502 * We increment the "pending count" which will be used 503 * to decrement the target queue count whenever it's next 504 * processed and found empty. This ensure that we don't 505 * decrement while we still have the interrupt there 506 * occupying a slot. 507 */ 508 atomic_inc(&q->pending_count); 509 } 510 511 /* Find a tentative CPU target in a CPU mask */ 512 static int xive_find_target_in_mask(const struct cpumask *mask, 513 unsigned int fuzz) 514 { 515 int cpu, first, num, i; 516 517 /* Pick up a starting point CPU in the mask based on fuzz */ 518 num = min_t(int, cpumask_weight(mask), nr_cpu_ids); 519 first = fuzz % num; 520 521 /* Locate it */ 522 cpu = cpumask_first(mask); 523 for (i = 0; i < first && cpu < nr_cpu_ids; i++) 524 cpu = cpumask_next(cpu, mask); 525 526 /* Sanity check */ 527 if (WARN_ON(cpu >= nr_cpu_ids)) 528 cpu = cpumask_first(cpu_online_mask); 529 530 /* Remember first one to handle wrap-around */ 531 first = cpu; 532 533 /* 534 * Now go through the entire mask until we find a valid 535 * target. 536 */ 537 do { 538 /* 539 * We re-check online as the fallback case passes us 540 * an untested affinity mask 541 */ 542 if (cpu_online(cpu) && xive_try_pick_target(cpu)) 543 return cpu; 544 cpu = cpumask_next(cpu, mask); 545 /* Wrap around */ 546 if (cpu >= nr_cpu_ids) 547 cpu = cpumask_first(mask); 548 } while (cpu != first); 549 550 return -1; 551 } 552 553 /* 554 * Pick a target CPU for an interrupt. This is done at 555 * startup or if the affinity is changed in a way that 556 * invalidates the current target. 557 */ 558 static int xive_pick_irq_target(struct irq_data *d, 559 const struct cpumask *affinity) 560 { 561 static unsigned int fuzz; 562 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 563 cpumask_var_t mask; 564 int cpu = -1; 565 566 /* 567 * If we have chip IDs, first we try to build a mask of 568 * CPUs matching the CPU and find a target in there 569 */ 570 if (xd->src_chip != XIVE_INVALID_CHIP_ID && 571 zalloc_cpumask_var(&mask, GFP_ATOMIC)) { 572 /* Build a mask of matching chip IDs */ 573 for_each_cpu_and(cpu, affinity, cpu_online_mask) { 574 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 575 if (xc->chip_id == xd->src_chip) 576 cpumask_set_cpu(cpu, mask); 577 } 578 /* Try to find a target */ 579 if (cpumask_empty(mask)) 580 cpu = -1; 581 else 582 cpu = xive_find_target_in_mask(mask, fuzz++); 583 free_cpumask_var(mask); 584 if (cpu >= 0) 585 return cpu; 586 fuzz--; 587 } 588 589 /* No chip IDs, fallback to using the affinity mask */ 590 return xive_find_target_in_mask(affinity, fuzz++); 591 } 592 593 static unsigned int xive_irq_startup(struct irq_data *d) 594 { 595 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 596 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 597 int target, rc; 598 599 xd->saved_p = false; 600 xd->stale_p = false; 601 pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n", 602 d->irq, hw_irq, d); 603 604 #ifdef CONFIG_PCI_MSI 605 /* 606 * The generic MSI code returns with the interrupt disabled on the 607 * card, using the MSI mask bits. Firmware doesn't appear to unmask 608 * at that level, so we do it here by hand. 609 */ 610 if (irq_data_get_msi_desc(d)) 611 pci_msi_unmask_irq(d); 612 #endif 613 614 /* Pick a target */ 615 target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d)); 616 if (target == XIVE_INVALID_TARGET) { 617 /* Try again breaking affinity */ 618 target = xive_pick_irq_target(d, cpu_online_mask); 619 if (target == XIVE_INVALID_TARGET) 620 return -ENXIO; 621 pr_warn("irq %d started with broken affinity\n", d->irq); 622 } 623 624 /* Sanity check */ 625 if (WARN_ON(target == XIVE_INVALID_TARGET || 626 target >= nr_cpu_ids)) 627 target = smp_processor_id(); 628 629 xd->target = target; 630 631 /* 632 * Configure the logical number to be the Linux IRQ number 633 * and set the target queue 634 */ 635 rc = xive_ops->configure_irq(hw_irq, 636 get_hard_smp_processor_id(target), 637 xive_irq_priority, d->irq); 638 if (rc) 639 return rc; 640 641 /* Unmask the ESB */ 642 xive_do_source_set_mask(xd, false); 643 644 return 0; 645 } 646 647 /* called with irq descriptor lock held */ 648 static void xive_irq_shutdown(struct irq_data *d) 649 { 650 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 651 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 652 653 pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n", 654 d->irq, hw_irq, d); 655 656 if (WARN_ON(xd->target == XIVE_INVALID_TARGET)) 657 return; 658 659 /* Mask the interrupt at the source */ 660 xive_do_source_set_mask(xd, true); 661 662 /* 663 * Mask the interrupt in HW in the IVT/EAS and set the number 664 * to be the "bad" IRQ number 665 */ 666 xive_ops->configure_irq(hw_irq, 667 get_hard_smp_processor_id(xd->target), 668 0xff, XIVE_BAD_IRQ); 669 670 xive_dec_target_count(xd->target); 671 xd->target = XIVE_INVALID_TARGET; 672 } 673 674 static void xive_irq_unmask(struct irq_data *d) 675 { 676 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 677 678 pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd); 679 680 /* 681 * This is a workaround for PCI LSI problems on P9, for 682 * these, we call FW to set the mask. The problems might 683 * be fixed by P9 DD2.0, if that is the case, firmware 684 * will no longer set that flag. 685 */ 686 if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { 687 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 688 xive_ops->configure_irq(hw_irq, 689 get_hard_smp_processor_id(xd->target), 690 xive_irq_priority, d->irq); 691 return; 692 } 693 694 xive_do_source_set_mask(xd, false); 695 } 696 697 static void xive_irq_mask(struct irq_data *d) 698 { 699 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 700 701 pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd); 702 703 /* 704 * This is a workaround for PCI LSI problems on P9, for 705 * these, we call OPAL to set the mask. The problems might 706 * be fixed by P9 DD2.0, if that is the case, firmware 707 * will no longer set that flag. 708 */ 709 if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { 710 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 711 xive_ops->configure_irq(hw_irq, 712 get_hard_smp_processor_id(xd->target), 713 0xff, d->irq); 714 return; 715 } 716 717 xive_do_source_set_mask(xd, true); 718 } 719 720 static int xive_irq_set_affinity(struct irq_data *d, 721 const struct cpumask *cpumask, 722 bool force) 723 { 724 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 725 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 726 u32 target, old_target; 727 int rc = 0; 728 729 pr_devel("xive_irq_set_affinity: irq %d\n", d->irq); 730 731 /* Is this valid ? */ 732 if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) 733 return -EINVAL; 734 735 /* Don't do anything if the interrupt isn't started */ 736 if (!irqd_is_started(d)) 737 return IRQ_SET_MASK_OK; 738 739 /* 740 * If existing target is already in the new mask, and is 741 * online then do nothing. 742 */ 743 if (xd->target != XIVE_INVALID_TARGET && 744 cpu_online(xd->target) && 745 cpumask_test_cpu(xd->target, cpumask)) 746 return IRQ_SET_MASK_OK; 747 748 /* Pick a new target */ 749 target = xive_pick_irq_target(d, cpumask); 750 751 /* No target found */ 752 if (target == XIVE_INVALID_TARGET) 753 return -ENXIO; 754 755 /* Sanity check */ 756 if (WARN_ON(target >= nr_cpu_ids)) 757 target = smp_processor_id(); 758 759 old_target = xd->target; 760 761 /* 762 * Only configure the irq if it's not currently passed-through to 763 * a KVM guest 764 */ 765 if (!irqd_is_forwarded_to_vcpu(d)) 766 rc = xive_ops->configure_irq(hw_irq, 767 get_hard_smp_processor_id(target), 768 xive_irq_priority, d->irq); 769 if (rc < 0) { 770 pr_err("Error %d reconfiguring irq %d\n", rc, d->irq); 771 return rc; 772 } 773 774 pr_devel(" target: 0x%x\n", target); 775 xd->target = target; 776 777 /* Give up previous target */ 778 if (old_target != XIVE_INVALID_TARGET) 779 xive_dec_target_count(old_target); 780 781 return IRQ_SET_MASK_OK; 782 } 783 784 static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type) 785 { 786 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 787 788 /* 789 * We only support these. This has really no effect other than setting 790 * the corresponding descriptor bits mind you but those will in turn 791 * affect the resend function when re-enabling an edge interrupt. 792 * 793 * Set set the default to edge as explained in map(). 794 */ 795 if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE) 796 flow_type = IRQ_TYPE_EDGE_RISING; 797 798 if (flow_type != IRQ_TYPE_EDGE_RISING && 799 flow_type != IRQ_TYPE_LEVEL_LOW) 800 return -EINVAL; 801 802 irqd_set_trigger_type(d, flow_type); 803 804 /* 805 * Double check it matches what the FW thinks 806 * 807 * NOTE: We don't know yet if the PAPR interface will provide 808 * the LSI vs MSI information apart from the device-tree so 809 * this check might have to move into an optional backend call 810 * that is specific to the native backend 811 */ 812 if ((flow_type == IRQ_TYPE_LEVEL_LOW) != 813 !!(xd->flags & XIVE_IRQ_FLAG_LSI)) { 814 pr_warn("Interrupt %d (HW 0x%x) type mismatch, Linux says %s, FW says %s\n", 815 d->irq, (u32)irqd_to_hwirq(d), 816 (flow_type == IRQ_TYPE_LEVEL_LOW) ? "Level" : "Edge", 817 (xd->flags & XIVE_IRQ_FLAG_LSI) ? "Level" : "Edge"); 818 } 819 820 return IRQ_SET_MASK_OK_NOCOPY; 821 } 822 823 static int xive_irq_retrigger(struct irq_data *d) 824 { 825 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 826 827 /* This should be only for MSIs */ 828 if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI)) 829 return 0; 830 831 /* 832 * To perform a retrigger, we first set the PQ bits to 833 * 11, then perform an EOI. 834 */ 835 xive_esb_read(xd, XIVE_ESB_SET_PQ_11); 836 837 /* 838 * Note: We pass "0" to the hw_irq argument in order to 839 * avoid calling into the backend EOI code which we don't 840 * want to do in the case of a re-trigger. Backends typically 841 * only do EOI for LSIs anyway. 842 */ 843 xive_do_source_eoi(0, xd); 844 845 return 1; 846 } 847 848 /* 849 * Caller holds the irq descriptor lock, so this won't be called 850 * concurrently with xive_get_irqchip_state on the same interrupt. 851 */ 852 static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) 853 { 854 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 855 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 856 int rc; 857 u8 pq; 858 859 /* 860 * We only support this on interrupts that do not require 861 * firmware calls for masking and unmasking 862 */ 863 if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) 864 return -EIO; 865 866 /* 867 * This is called by KVM with state non-NULL for enabling 868 * pass-through or NULL for disabling it 869 */ 870 if (state) { 871 irqd_set_forwarded_to_vcpu(d); 872 873 /* Set it to PQ=10 state to prevent further sends */ 874 pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10); 875 if (!xd->stale_p) { 876 xd->saved_p = !!(pq & XIVE_ESB_VAL_P); 877 xd->stale_p = !xd->saved_p; 878 } 879 880 /* No target ? nothing to do */ 881 if (xd->target == XIVE_INVALID_TARGET) { 882 /* 883 * An untargetted interrupt should have been 884 * also masked at the source 885 */ 886 WARN_ON(xd->saved_p); 887 888 return 0; 889 } 890 891 /* 892 * If P was set, adjust state to PQ=11 to indicate 893 * that a resend is needed for the interrupt to reach 894 * the guest. Also remember the value of P. 895 * 896 * This also tells us that it's in flight to a host queue 897 * or has already been fetched but hasn't been EOIed yet 898 * by the host. This it's potentially using up a host 899 * queue slot. This is important to know because as long 900 * as this is the case, we must not hard-unmask it when 901 * "returning" that interrupt to the host. 902 * 903 * This saved_p is cleared by the host EOI, when we know 904 * for sure the queue slot is no longer in use. 905 */ 906 if (xd->saved_p) { 907 xive_esb_read(xd, XIVE_ESB_SET_PQ_11); 908 909 /* 910 * Sync the XIVE source HW to ensure the interrupt 911 * has gone through the EAS before we change its 912 * target to the guest. That should guarantee us 913 * that we *will* eventually get an EOI for it on 914 * the host. Otherwise there would be a small window 915 * for P to be seen here but the interrupt going 916 * to the guest queue. 917 */ 918 if (xive_ops->sync_source) 919 xive_ops->sync_source(hw_irq); 920 } 921 } else { 922 irqd_clr_forwarded_to_vcpu(d); 923 924 /* No host target ? hard mask and return */ 925 if (xd->target == XIVE_INVALID_TARGET) { 926 xive_do_source_set_mask(xd, true); 927 return 0; 928 } 929 930 /* 931 * Sync the XIVE source HW to ensure the interrupt 932 * has gone through the EAS before we change its 933 * target to the host. 934 */ 935 if (xive_ops->sync_source) 936 xive_ops->sync_source(hw_irq); 937 938 /* 939 * By convention we are called with the interrupt in 940 * a PQ=10 or PQ=11 state, ie, it won't fire and will 941 * have latched in Q whether there's a pending HW 942 * interrupt or not. 943 * 944 * First reconfigure the target. 945 */ 946 rc = xive_ops->configure_irq(hw_irq, 947 get_hard_smp_processor_id(xd->target), 948 xive_irq_priority, d->irq); 949 if (rc) 950 return rc; 951 952 /* 953 * Then if saved_p is not set, effectively re-enable the 954 * interrupt with an EOI. If it is set, we know there is 955 * still a message in a host queue somewhere that will be 956 * EOId eventually. 957 * 958 * Note: We don't check irqd_irq_disabled(). Effectively, 959 * we *will* let the irq get through even if masked if the 960 * HW is still firing it in order to deal with the whole 961 * saved_p business properly. If the interrupt triggers 962 * while masked, the generic code will re-mask it anyway. 963 */ 964 if (!xd->saved_p) 965 xive_do_source_eoi(hw_irq, xd); 966 967 } 968 return 0; 969 } 970 971 /* Called with irq descriptor lock held. */ 972 static int xive_get_irqchip_state(struct irq_data *data, 973 enum irqchip_irq_state which, bool *state) 974 { 975 struct xive_irq_data *xd = irq_data_get_irq_handler_data(data); 976 u8 pq; 977 978 switch (which) { 979 case IRQCHIP_STATE_ACTIVE: 980 pq = xive_esb_read(xd, XIVE_ESB_GET); 981 982 /* 983 * The esb value being all 1's means we couldn't get 984 * the PQ state of the interrupt through mmio. It may 985 * happen, for example when querying a PHB interrupt 986 * while the PHB is in an error state. We consider the 987 * interrupt to be inactive in that case. 988 */ 989 *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p && 990 (xd->saved_p || !!(pq & XIVE_ESB_VAL_P)); 991 return 0; 992 default: 993 return -EINVAL; 994 } 995 } 996 997 static struct irq_chip xive_irq_chip = { 998 .name = "XIVE-IRQ", 999 .irq_startup = xive_irq_startup, 1000 .irq_shutdown = xive_irq_shutdown, 1001 .irq_eoi = xive_irq_eoi, 1002 .irq_mask = xive_irq_mask, 1003 .irq_unmask = xive_irq_unmask, 1004 .irq_set_affinity = xive_irq_set_affinity, 1005 .irq_set_type = xive_irq_set_type, 1006 .irq_retrigger = xive_irq_retrigger, 1007 .irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity, 1008 .irq_get_irqchip_state = xive_get_irqchip_state, 1009 }; 1010 1011 bool is_xive_irq(struct irq_chip *chip) 1012 { 1013 return chip == &xive_irq_chip; 1014 } 1015 EXPORT_SYMBOL_GPL(is_xive_irq); 1016 1017 void xive_cleanup_irq_data(struct xive_irq_data *xd) 1018 { 1019 if (xd->eoi_mmio) { 1020 iounmap(xd->eoi_mmio); 1021 if (xd->eoi_mmio == xd->trig_mmio) 1022 xd->trig_mmio = NULL; 1023 xd->eoi_mmio = NULL; 1024 } 1025 if (xd->trig_mmio) { 1026 iounmap(xd->trig_mmio); 1027 xd->trig_mmio = NULL; 1028 } 1029 } 1030 EXPORT_SYMBOL_GPL(xive_cleanup_irq_data); 1031 1032 static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) 1033 { 1034 struct xive_irq_data *xd; 1035 int rc; 1036 1037 xd = kzalloc(sizeof(struct xive_irq_data), GFP_KERNEL); 1038 if (!xd) 1039 return -ENOMEM; 1040 rc = xive_ops->populate_irq_data(hw, xd); 1041 if (rc) { 1042 kfree(xd); 1043 return rc; 1044 } 1045 xd->target = XIVE_INVALID_TARGET; 1046 irq_set_handler_data(virq, xd); 1047 1048 /* 1049 * Turn OFF by default the interrupt being mapped. A side 1050 * effect of this check is the mapping the ESB page of the 1051 * interrupt in the Linux address space. This prevents page 1052 * fault issues in the crash handler which masks all 1053 * interrupts. 1054 */ 1055 xive_esb_read(xd, XIVE_ESB_SET_PQ_01); 1056 1057 return 0; 1058 } 1059 1060 static void xive_irq_free_data(unsigned int virq) 1061 { 1062 struct xive_irq_data *xd = irq_get_handler_data(virq); 1063 1064 if (!xd) 1065 return; 1066 irq_set_handler_data(virq, NULL); 1067 xive_cleanup_irq_data(xd); 1068 kfree(xd); 1069 } 1070 1071 #ifdef CONFIG_SMP 1072 1073 static void xive_cause_ipi(int cpu) 1074 { 1075 struct xive_cpu *xc; 1076 struct xive_irq_data *xd; 1077 1078 xc = per_cpu(xive_cpu, cpu); 1079 1080 DBG_VERBOSE("IPI CPU %d -> %d (HW IRQ 0x%x)\n", 1081 smp_processor_id(), cpu, xc->hw_ipi); 1082 1083 xd = &xc->ipi_data; 1084 if (WARN_ON(!xd->trig_mmio)) 1085 return; 1086 out_be64(xd->trig_mmio, 0); 1087 } 1088 1089 static irqreturn_t xive_muxed_ipi_action(int irq, void *dev_id) 1090 { 1091 return smp_ipi_demux(); 1092 } 1093 1094 static void xive_ipi_eoi(struct irq_data *d) 1095 { 1096 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 1097 1098 /* Handle possible race with unplug and drop stale IPIs */ 1099 if (!xc) 1100 return; 1101 1102 DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", 1103 d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); 1104 1105 xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data); 1106 xive_do_queue_eoi(xc); 1107 } 1108 1109 static void xive_ipi_do_nothing(struct irq_data *d) 1110 { 1111 /* 1112 * Nothing to do, we never mask/unmask IPIs, but the callback 1113 * has to exist for the struct irq_chip. 1114 */ 1115 } 1116 1117 static struct irq_chip xive_ipi_chip = { 1118 .name = "XIVE-IPI", 1119 .irq_eoi = xive_ipi_eoi, 1120 .irq_mask = xive_ipi_do_nothing, 1121 .irq_unmask = xive_ipi_do_nothing, 1122 }; 1123 1124 static void __init xive_request_ipi(void) 1125 { 1126 unsigned int virq; 1127 1128 /* 1129 * Initialization failed, move on, we might manage to 1130 * reach the point where we display our errors before 1131 * the system falls appart 1132 */ 1133 if (!xive_irq_domain) 1134 return; 1135 1136 /* Initialize it */ 1137 virq = irq_create_mapping(xive_irq_domain, 0); 1138 xive_ipi_irq = virq; 1139 1140 WARN_ON(request_irq(virq, xive_muxed_ipi_action, 1141 IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL)); 1142 } 1143 1144 static int xive_setup_cpu_ipi(unsigned int cpu) 1145 { 1146 struct xive_cpu *xc; 1147 int rc; 1148 1149 pr_debug("Setting up IPI for CPU %d\n", cpu); 1150 1151 xc = per_cpu(xive_cpu, cpu); 1152 1153 /* Check if we are already setup */ 1154 if (xc->hw_ipi != XIVE_BAD_IRQ) 1155 return 0; 1156 1157 /* Grab an IPI from the backend, this will populate xc->hw_ipi */ 1158 if (xive_ops->get_ipi(cpu, xc)) 1159 return -EIO; 1160 1161 /* 1162 * Populate the IRQ data in the xive_cpu structure and 1163 * configure the HW / enable the IPIs. 1164 */ 1165 rc = xive_ops->populate_irq_data(xc->hw_ipi, &xc->ipi_data); 1166 if (rc) { 1167 pr_err("Failed to populate IPI data on CPU %d\n", cpu); 1168 return -EIO; 1169 } 1170 rc = xive_ops->configure_irq(xc->hw_ipi, 1171 get_hard_smp_processor_id(cpu), 1172 xive_irq_priority, xive_ipi_irq); 1173 if (rc) { 1174 pr_err("Failed to map IPI CPU %d\n", cpu); 1175 return -EIO; 1176 } 1177 pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu, 1178 xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); 1179 1180 /* Unmask it */ 1181 xive_do_source_set_mask(&xc->ipi_data, false); 1182 1183 return 0; 1184 } 1185 1186 static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) 1187 { 1188 /* Disable the IPI and free the IRQ data */ 1189 1190 /* Already cleaned up ? */ 1191 if (xc->hw_ipi == XIVE_BAD_IRQ) 1192 return; 1193 1194 /* Mask the IPI */ 1195 xive_do_source_set_mask(&xc->ipi_data, true); 1196 1197 /* 1198 * Note: We don't call xive_cleanup_irq_data() to free 1199 * the mappings as this is called from an IPI on kexec 1200 * which is not a safe environment to call iounmap() 1201 */ 1202 1203 /* Deconfigure/mask in the backend */ 1204 xive_ops->configure_irq(xc->hw_ipi, hard_smp_processor_id(), 1205 0xff, xive_ipi_irq); 1206 1207 /* Free the IPIs in the backend */ 1208 xive_ops->put_ipi(cpu, xc); 1209 } 1210 1211 void __init xive_smp_probe(void) 1212 { 1213 smp_ops->cause_ipi = xive_cause_ipi; 1214 1215 /* Register the IPI */ 1216 xive_request_ipi(); 1217 1218 /* Allocate and setup IPI for the boot CPU */ 1219 xive_setup_cpu_ipi(smp_processor_id()); 1220 } 1221 1222 #endif /* CONFIG_SMP */ 1223 1224 static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, 1225 irq_hw_number_t hw) 1226 { 1227 int rc; 1228 1229 /* 1230 * Mark interrupts as edge sensitive by default so that resend 1231 * actually works. Will fix that up below if needed. 1232 */ 1233 irq_clear_status_flags(virq, IRQ_LEVEL); 1234 1235 #ifdef CONFIG_SMP 1236 /* IPIs are special and come up with HW number 0 */ 1237 if (hw == 0) { 1238 /* 1239 * IPIs are marked per-cpu. We use separate HW interrupts under 1240 * the hood but associated with the same "linux" interrupt 1241 */ 1242 irq_set_chip_and_handler(virq, &xive_ipi_chip, 1243 handle_percpu_irq); 1244 return 0; 1245 } 1246 #endif 1247 1248 rc = xive_irq_alloc_data(virq, hw); 1249 if (rc) 1250 return rc; 1251 1252 irq_set_chip_and_handler(virq, &xive_irq_chip, handle_fasteoi_irq); 1253 1254 return 0; 1255 } 1256 1257 static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) 1258 { 1259 struct irq_data *data = irq_get_irq_data(virq); 1260 unsigned int hw_irq; 1261 1262 /* XXX Assign BAD number */ 1263 if (!data) 1264 return; 1265 hw_irq = (unsigned int)irqd_to_hwirq(data); 1266 if (hw_irq) 1267 xive_irq_free_data(virq); 1268 } 1269 1270 static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct, 1271 const u32 *intspec, unsigned int intsize, 1272 irq_hw_number_t *out_hwirq, unsigned int *out_flags) 1273 1274 { 1275 *out_hwirq = intspec[0]; 1276 1277 /* 1278 * If intsize is at least 2, we look for the type in the second cell, 1279 * we assume the LSB indicates a level interrupt. 1280 */ 1281 if (intsize > 1) { 1282 if (intspec[1] & 1) 1283 *out_flags = IRQ_TYPE_LEVEL_LOW; 1284 else 1285 *out_flags = IRQ_TYPE_EDGE_RISING; 1286 } else 1287 *out_flags = IRQ_TYPE_LEVEL_LOW; 1288 1289 return 0; 1290 } 1291 1292 static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node, 1293 enum irq_domain_bus_token bus_token) 1294 { 1295 return xive_ops->match(node); 1296 } 1297 1298 static const struct irq_domain_ops xive_irq_domain_ops = { 1299 .match = xive_irq_domain_match, 1300 .map = xive_irq_domain_map, 1301 .unmap = xive_irq_domain_unmap, 1302 .xlate = xive_irq_domain_xlate, 1303 }; 1304 1305 static void __init xive_init_host(void) 1306 { 1307 xive_irq_domain = irq_domain_add_nomap(NULL, XIVE_MAX_IRQ, 1308 &xive_irq_domain_ops, NULL); 1309 if (WARN_ON(xive_irq_domain == NULL)) 1310 return; 1311 irq_set_default_host(xive_irq_domain); 1312 } 1313 1314 static void xive_cleanup_cpu_queues(unsigned int cpu, struct xive_cpu *xc) 1315 { 1316 if (xc->queue[xive_irq_priority].qpage) 1317 xive_ops->cleanup_queue(cpu, xc, xive_irq_priority); 1318 } 1319 1320 static int xive_setup_cpu_queues(unsigned int cpu, struct xive_cpu *xc) 1321 { 1322 int rc = 0; 1323 1324 /* We setup 1 queues for now with a 64k page */ 1325 if (!xc->queue[xive_irq_priority].qpage) 1326 rc = xive_ops->setup_queue(cpu, xc, xive_irq_priority); 1327 1328 return rc; 1329 } 1330 1331 static int xive_prepare_cpu(unsigned int cpu) 1332 { 1333 struct xive_cpu *xc; 1334 1335 xc = per_cpu(xive_cpu, cpu); 1336 if (!xc) { 1337 struct device_node *np; 1338 1339 xc = kzalloc_node(sizeof(struct xive_cpu), 1340 GFP_KERNEL, cpu_to_node(cpu)); 1341 if (!xc) 1342 return -ENOMEM; 1343 np = of_get_cpu_node(cpu, NULL); 1344 if (np) 1345 xc->chip_id = of_get_ibm_chip_id(np); 1346 of_node_put(np); 1347 xc->hw_ipi = XIVE_BAD_IRQ; 1348 1349 per_cpu(xive_cpu, cpu) = xc; 1350 } 1351 1352 /* Setup EQs if not already */ 1353 return xive_setup_cpu_queues(cpu, xc); 1354 } 1355 1356 static void xive_setup_cpu(void) 1357 { 1358 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 1359 1360 /* The backend might have additional things to do */ 1361 if (xive_ops->setup_cpu) 1362 xive_ops->setup_cpu(smp_processor_id(), xc); 1363 1364 /* Set CPPR to 0xff to enable flow of interrupts */ 1365 xc->cppr = 0xff; 1366 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff); 1367 } 1368 1369 #ifdef CONFIG_SMP 1370 void xive_smp_setup_cpu(void) 1371 { 1372 pr_devel("SMP setup CPU %d\n", smp_processor_id()); 1373 1374 /* This will have already been done on the boot CPU */ 1375 if (smp_processor_id() != boot_cpuid) 1376 xive_setup_cpu(); 1377 1378 } 1379 1380 int xive_smp_prepare_cpu(unsigned int cpu) 1381 { 1382 int rc; 1383 1384 /* Allocate per-CPU data and queues */ 1385 rc = xive_prepare_cpu(cpu); 1386 if (rc) 1387 return rc; 1388 1389 /* Allocate and setup IPI for the new CPU */ 1390 return xive_setup_cpu_ipi(cpu); 1391 } 1392 1393 #ifdef CONFIG_HOTPLUG_CPU 1394 static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) 1395 { 1396 u32 irq; 1397 1398 /* We assume local irqs are disabled */ 1399 WARN_ON(!irqs_disabled()); 1400 1401 /* Check what's already in the CPU queue */ 1402 while ((irq = xive_scan_interrupts(xc, false)) != 0) { 1403 /* 1404 * We need to re-route that interrupt to its new destination. 1405 * First get and lock the descriptor 1406 */ 1407 struct irq_desc *desc = irq_to_desc(irq); 1408 struct irq_data *d = irq_desc_get_irq_data(desc); 1409 struct xive_irq_data *xd; 1410 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 1411 1412 /* 1413 * Ignore anything that isn't a XIVE irq and ignore 1414 * IPIs, so can just be dropped. 1415 */ 1416 if (d->domain != xive_irq_domain || hw_irq == 0) 1417 continue; 1418 1419 /* 1420 * The IRQ should have already been re-routed, it's just a 1421 * stale in the old queue, so re-trigger it in order to make 1422 * it reach is new destination. 1423 */ 1424 #ifdef DEBUG_FLUSH 1425 pr_info("CPU %d: Got irq %d while offline, re-sending...\n", 1426 cpu, irq); 1427 #endif 1428 raw_spin_lock(&desc->lock); 1429 xd = irq_desc_get_handler_data(desc); 1430 1431 /* 1432 * Clear saved_p to indicate that it's no longer pending 1433 */ 1434 xd->saved_p = false; 1435 1436 /* 1437 * For LSIs, we EOI, this will cause a resend if it's 1438 * still asserted. Otherwise do an MSI retrigger. 1439 */ 1440 if (xd->flags & XIVE_IRQ_FLAG_LSI) 1441 xive_do_source_eoi(irqd_to_hwirq(d), xd); 1442 else 1443 xive_irq_retrigger(d); 1444 1445 raw_spin_unlock(&desc->lock); 1446 } 1447 } 1448 1449 void xive_smp_disable_cpu(void) 1450 { 1451 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 1452 unsigned int cpu = smp_processor_id(); 1453 1454 /* Migrate interrupts away from the CPU */ 1455 irq_migrate_all_off_this_cpu(); 1456 1457 /* Set CPPR to 0 to disable flow of interrupts */ 1458 xc->cppr = 0; 1459 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0); 1460 1461 /* Flush everything still in the queue */ 1462 xive_flush_cpu_queue(cpu, xc); 1463 1464 /* Re-enable CPPR */ 1465 xc->cppr = 0xff; 1466 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff); 1467 } 1468 1469 void xive_flush_interrupt(void) 1470 { 1471 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 1472 unsigned int cpu = smp_processor_id(); 1473 1474 /* Called if an interrupt occurs while the CPU is hot unplugged */ 1475 xive_flush_cpu_queue(cpu, xc); 1476 } 1477 1478 #endif /* CONFIG_HOTPLUG_CPU */ 1479 1480 #endif /* CONFIG_SMP */ 1481 1482 void xive_teardown_cpu(void) 1483 { 1484 struct xive_cpu *xc = __this_cpu_read(xive_cpu); 1485 unsigned int cpu = smp_processor_id(); 1486 1487 /* Set CPPR to 0 to disable flow of interrupts */ 1488 xc->cppr = 0; 1489 out_8(xive_tima + xive_tima_offset + TM_CPPR, 0); 1490 1491 if (xive_ops->teardown_cpu) 1492 xive_ops->teardown_cpu(cpu, xc); 1493 1494 #ifdef CONFIG_SMP 1495 /* Get rid of IPI */ 1496 xive_cleanup_cpu_ipi(cpu, xc); 1497 #endif 1498 1499 /* Disable and free the queues */ 1500 xive_cleanup_cpu_queues(cpu, xc); 1501 } 1502 1503 void xive_shutdown(void) 1504 { 1505 xive_ops->shutdown(); 1506 } 1507 1508 bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, 1509 u8 max_prio) 1510 { 1511 xive_tima = area; 1512 xive_tima_offset = offset; 1513 xive_ops = ops; 1514 xive_irq_priority = max_prio; 1515 1516 ppc_md.get_irq = xive_get_irq; 1517 __xive_enabled = true; 1518 1519 pr_devel("Initializing host..\n"); 1520 xive_init_host(); 1521 1522 pr_devel("Initializing boot CPU..\n"); 1523 1524 /* Allocate per-CPU data and queues */ 1525 xive_prepare_cpu(smp_processor_id()); 1526 1527 /* Get ready for interrupts */ 1528 xive_setup_cpu(); 1529 1530 pr_info("Interrupt handling initialized with %s backend\n", 1531 xive_ops->name); 1532 pr_info("Using priority %d for all interrupts\n", max_prio); 1533 1534 return true; 1535 } 1536 1537 __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift) 1538 { 1539 unsigned int alloc_order; 1540 struct page *pages; 1541 __be32 *qpage; 1542 1543 alloc_order = xive_alloc_order(queue_shift); 1544 pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order); 1545 if (!pages) 1546 return ERR_PTR(-ENOMEM); 1547 qpage = (__be32 *)page_address(pages); 1548 memset(qpage, 0, 1 << queue_shift); 1549 1550 return qpage; 1551 } 1552 1553 static int __init xive_off(char *arg) 1554 { 1555 xive_cmdline_disabled = true; 1556 return 0; 1557 } 1558 __setup("xive=off", xive_off); 1559 1560 void xive_debug_show_cpu(struct seq_file *m, int cpu) 1561 { 1562 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 1563 1564 seq_printf(m, "CPU %d:", cpu); 1565 if (xc) { 1566 seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); 1567 1568 #ifdef CONFIG_SMP 1569 { 1570 u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); 1571 1572 seq_printf(m, "IPI=0x%08x PQ=%c%c ", xc->hw_ipi, 1573 val & XIVE_ESB_VAL_P ? 'P' : '-', 1574 val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 1575 } 1576 #endif 1577 { 1578 struct xive_q *q = &xc->queue[xive_irq_priority]; 1579 u32 i0, i1, idx; 1580 1581 if (q->qpage) { 1582 idx = q->idx; 1583 i0 = be32_to_cpup(q->qpage + idx); 1584 idx = (idx + 1) & q->msk; 1585 i1 = be32_to_cpup(q->qpage + idx); 1586 seq_printf(m, "EQ idx=%d T=%d %08x %08x ...", 1587 q->idx, q->toggle, i0, i1); 1588 } 1589 } 1590 } 1591 seq_puts(m, "\n"); 1592 } 1593 1594 void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d) 1595 { 1596 struct irq_chip *chip = irq_data_get_irq_chip(d); 1597 int rc; 1598 u32 target; 1599 u8 prio; 1600 u32 lirq; 1601 1602 if (!is_xive_irq(chip)) 1603 return; 1604 1605 rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); 1606 if (rc) { 1607 seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); 1608 return; 1609 } 1610 1611 seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", 1612 hw_irq, target, prio, lirq); 1613 1614 if (d) { 1615 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 1616 u64 val = xive_esb_read(xd, XIVE_ESB_GET); 1617 1618 seq_printf(m, "flags=%c%c%c PQ=%c%c", 1619 xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', 1620 xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', 1621 xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', 1622 val & XIVE_ESB_VAL_P ? 'P' : '-', 1623 val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 1624 } 1625 seq_puts(m, "\n"); 1626 } 1627 1628 static int xive_core_debug_show(struct seq_file *m, void *private) 1629 { 1630 unsigned int i; 1631 struct irq_desc *desc; 1632 int cpu; 1633 1634 if (xive_ops->debug_show) 1635 xive_ops->debug_show(m, private); 1636 1637 for_each_possible_cpu(cpu) 1638 xive_debug_show_cpu(m, cpu); 1639 1640 for_each_irq_desc(i, desc) { 1641 struct irq_data *d = irq_desc_get_irq_data(desc); 1642 unsigned int hw_irq; 1643 1644 if (!d) 1645 continue; 1646 1647 hw_irq = (unsigned int)irqd_to_hwirq(d); 1648 1649 /* IPIs are special (HW number 0) */ 1650 if (hw_irq) 1651 xive_debug_show_irq(m, hw_irq, d); 1652 } 1653 return 0; 1654 } 1655 DEFINE_SHOW_ATTRIBUTE(xive_core_debug); 1656 1657 int xive_core_debug_init(void) 1658 { 1659 debugfs_create_file("xive", 0400, powerpc_debugfs_root, 1660 NULL, &xive_core_debug_fops); 1661 return 0; 1662 } 1663