1 /* 2 * QEMU PowerPC sPAPR IRQ interface 3 * 4 * Copyright (c) 2018, IBM Corporation. 5 * 6 * This code is licensed under the GPL version 2 or later. See the 7 * COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/log.h" 12 #include "qemu/error-report.h" 13 #include "qapi/error.h" 14 #include "qapi/type-helpers.h" 15 #include "hw/irq.h" 16 #include "hw/ppc/spapr.h" 17 #include "hw/ppc/spapr_cpu_core.h" 18 #include "hw/ppc/spapr_xive.h" 19 #include "hw/ppc/xics.h" 20 #include "hw/ppc/xics_spapr.h" 21 #include "hw/qdev-properties.h" 22 #include "monitor/monitor.h" 23 #include "cpu-models.h" 24 #include "sysemu/kvm.h" 25 26 #include "trace.h" 27 28 QEMU_BUILD_BUG_ON(SPAPR_IRQ_NR_IPIS > SPAPR_XIRQ_BASE); 29 30 static const TypeInfo spapr_intc_info = { 31 .name = TYPE_SPAPR_INTC, 32 .parent = TYPE_INTERFACE, 33 .class_size = sizeof(SpaprInterruptControllerClass), 34 }; 35 36 static void spapr_irq_msi_init(SpaprMachineState *spapr) 37 { 38 if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { 39 /* Legacy mode doesn't use this allocator */ 40 return; 41 } 42 43 spapr->irq_map_nr = spapr_irq_nr_msis(spapr); 44 spapr->irq_map = bitmap_new(spapr->irq_map_nr); 45 } 46 47 int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align, 48 Error **errp) 49 { 50 int irq; 51 52 /* 53 * The 'align_mask' parameter of bitmap_find_next_zero_area() 54 * should be one less than a power of 2; 0 means no 55 * alignment. Adapt the 'align' value of the former allocator 56 * to fit the requirements of bitmap_find_next_zero_area() 57 */ 58 align -= 1; 59 60 irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num, 61 align); 62 if (irq == spapr->irq_map_nr) { 63 error_setg(errp, "can't find a free %d-IRQ block", num); 64 return -1; 65 } 66 67 bitmap_set(spapr->irq_map, irq, num); 68 69 return irq + SPAPR_IRQ_MSI; 70 } 71 72 void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num) 73 { 74 bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); 75 } 76 77 int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, 78 SpaprInterruptController *intc, 79 uint32_t nr_servers, 80 Error **errp) 81 { 82 Error *local_err = NULL; 83 84 if (kvm_enabled() && kvm_kernel_irqchip_allowed()) { 85 if (fn(intc, nr_servers, &local_err) < 0) { 86 if (kvm_kernel_irqchip_required()) { 87 error_prepend(&local_err, 88 "kernel_irqchip requested but unavailable: "); 89 error_propagate(errp, local_err); 90 return -1; 91 } 92 93 /* 94 * We failed to initialize the KVM device, fallback to 95 * emulated mode 96 */ 97 error_prepend(&local_err, 98 "kernel_irqchip allowed but unavailable: "); 99 error_append_hint(&local_err, 100 "Falling back to kernel-irqchip=off\n"); 101 warn_report_err(local_err); 102 } 103 } 104 105 return 0; 106 } 107 108 /* 109 * XICS IRQ backend. 110 */ 111 112 SpaprIrq spapr_irq_xics = { 113 .xics = true, 114 .xive = false, 115 }; 116 117 /* 118 * XIVE IRQ backend. 119 */ 120 121 SpaprIrq spapr_irq_xive = { 122 .xics = false, 123 .xive = true, 124 }; 125 126 /* 127 * Dual XIVE and XICS IRQ backend. 128 * 129 * Both interrupt mode, XIVE and XICS, objects are created but the 130 * machine starts in legacy interrupt mode (XICS). It can be changed 131 * by the CAS negotiation process and, in that case, the new mode is 132 * activated after an extra machine reset. 133 */ 134 135 /* 136 * Define values in sync with the XIVE and XICS backend 137 */ 138 SpaprIrq spapr_irq_dual = { 139 .xics = true, 140 .xive = true, 141 }; 142 143 144 static int spapr_irq_check(SpaprMachineState *spapr, Error **errp) 145 { 146 ERRP_GUARD(); 147 MachineState *machine = MACHINE(spapr); 148 149 /* 150 * Sanity checks on non-P9 machines. On these, XIVE is not 151 * advertised, see spapr_dt_ov5_platform_support() 152 */ 153 if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 154 0, spapr->max_compat_pvr)) { 155 /* 156 * If the 'dual' interrupt mode is selected, force XICS as CAS 157 * negotiation is useless. 158 */ 159 if (spapr->irq == &spapr_irq_dual) { 160 spapr->irq = &spapr_irq_xics; 161 return 0; 162 } 163 164 /* 165 * Non-P9 machines using only XIVE is a bogus setup. We have two 166 * scenarios to take into account because of the compat mode: 167 * 168 * 1. POWER7/8 machines should fail to init later on when creating 169 * the XIVE interrupt presenters because a POWER9 exception 170 * model is required. 171 172 * 2. POWER9 machines using the POWER8 compat mode won't fail and 173 * will let the OS boot with a partial XIVE setup : DT 174 * properties but no hcalls. 175 * 176 * To cover both and not confuse the OS, add an early failure in 177 * QEMU. 178 */ 179 if (!spapr->irq->xics) { 180 error_setg(errp, "XIVE-only machines require a POWER9 CPU"); 181 return -1; 182 } 183 } 184 185 /* 186 * On a POWER9 host, some older KVM XICS devices cannot be destroyed and 187 * re-created. Same happens with KVM nested guests. Detect that early to 188 * avoid QEMU to exit later when the guest reboots. 189 */ 190 if (kvm_enabled() && 191 spapr->irq == &spapr_irq_dual && 192 kvm_kernel_irqchip_required() && 193 xics_kvm_has_broken_disconnect()) { 194 error_setg(errp, 195 "KVM is incompatible with ic-mode=dual,kernel-irqchip=on"); 196 error_append_hint(errp, 197 "This can happen with an old KVM or in a KVM nested guest.\n"); 198 error_append_hint(errp, 199 "Try without kernel-irqchip or with kernel-irqchip=off.\n"); 200 return -1; 201 } 202 203 return 0; 204 } 205 206 /* 207 * sPAPR IRQ frontend routines for devices 208 */ 209 #define ALL_INTCS(spapr_) \ 210 { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), } 211 212 int spapr_irq_cpu_intc_create(SpaprMachineState *spapr, 213 PowerPCCPU *cpu, Error **errp) 214 { 215 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 216 int i; 217 int rc; 218 219 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 220 SpaprInterruptController *intc = intcs[i]; 221 if (intc) { 222 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 223 rc = sicc->cpu_intc_create(intc, cpu, errp); 224 if (rc < 0) { 225 return rc; 226 } 227 } 228 } 229 230 return 0; 231 } 232 233 void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu) 234 { 235 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 236 int i; 237 238 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 239 SpaprInterruptController *intc = intcs[i]; 240 if (intc) { 241 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 242 sicc->cpu_intc_reset(intc, cpu); 243 } 244 } 245 } 246 247 void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu) 248 { 249 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 250 int i; 251 252 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 253 SpaprInterruptController *intc = intcs[i]; 254 if (intc) { 255 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 256 sicc->cpu_intc_destroy(intc, cpu); 257 } 258 } 259 } 260 261 static void spapr_set_irq(void *opaque, int irq, int level) 262 { 263 SpaprMachineState *spapr = SPAPR_MACHINE(opaque); 264 SpaprInterruptControllerClass *sicc 265 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 266 267 sicc->set_irq(spapr->active_intc, irq, level); 268 } 269 270 void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon) 271 { 272 SpaprInterruptControllerClass *sicc 273 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 274 g_autoptr(GString) buf = g_string_new(""); 275 g_autoptr(HumanReadableText) info = NULL; 276 277 sicc->print_info(spapr->active_intc, buf); 278 info = human_readable_text_from_str(buf); 279 monitor_puts(mon, info->human_readable_text); 280 } 281 282 void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers, 283 void *fdt, uint32_t phandle) 284 { 285 SpaprInterruptControllerClass *sicc 286 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 287 288 sicc->dt(spapr->active_intc, nr_servers, fdt, phandle); 289 } 290 291 uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr) 292 { 293 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 294 295 if (smc->legacy_irq_allocation) { 296 return smc->nr_xirqs; 297 } else { 298 return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI; 299 } 300 } 301 302 void spapr_irq_init(SpaprMachineState *spapr, Error **errp) 303 { 304 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 305 306 if (kvm_enabled() && kvm_kernel_irqchip_split()) { 307 error_setg(errp, "kernel_irqchip split mode not supported on pseries"); 308 return; 309 } 310 311 if (spapr_irq_check(spapr, errp) < 0) { 312 return; 313 } 314 315 /* Initialize the MSI IRQ allocator. */ 316 spapr_irq_msi_init(spapr); 317 318 if (spapr->irq->xics) { 319 Object *obj; 320 321 obj = object_new(TYPE_ICS_SPAPR); 322 323 object_property_add_child(OBJECT(spapr), "ics", obj); 324 object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr), 325 &error_abort); 326 object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort); 327 if (!qdev_realize(DEVICE(obj), NULL, errp)) { 328 return; 329 } 330 331 spapr->ics = ICS_SPAPR(obj); 332 } 333 334 if (spapr->irq->xive) { 335 uint32_t nr_servers = spapr_max_server_number(spapr); 336 DeviceState *dev; 337 int i; 338 339 dev = qdev_new(TYPE_SPAPR_XIVE); 340 qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_IRQ_NR_IPIS); 341 /* 342 * 8 XIVE END structures per CPU. One for each available 343 * priority 344 */ 345 qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3); 346 object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr), 347 &error_abort); 348 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); 349 350 spapr->xive = SPAPR_XIVE(dev); 351 352 /* Enable the CPU IPIs */ 353 for (i = 0; i < nr_servers; ++i) { 354 SpaprInterruptControllerClass *sicc 355 = SPAPR_INTC_GET_CLASS(spapr->xive); 356 357 if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i, 358 false, errp) < 0) { 359 return; 360 } 361 } 362 363 spapr_xive_hcall_init(spapr); 364 } 365 366 spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr, 367 smc->nr_xirqs + SPAPR_IRQ_NR_IPIS); 368 369 /* 370 * Mostly we don't actually need this until reset, except that not 371 * having this set up can cause VFIO devices to issue a 372 * false-positive warning during realize(), because they don't yet 373 * have an in-kernel irq chip. 374 */ 375 spapr_irq_update_active_intc(spapr); 376 } 377 378 int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp) 379 { 380 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 381 int i; 382 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 383 int rc; 384 385 assert(irq >= SPAPR_XIRQ_BASE); 386 assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 387 388 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 389 SpaprInterruptController *intc = intcs[i]; 390 if (intc) { 391 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 392 rc = sicc->claim_irq(intc, irq, lsi, errp); 393 if (rc < 0) { 394 return rc; 395 } 396 } 397 } 398 399 return 0; 400 } 401 402 void spapr_irq_free(SpaprMachineState *spapr, int irq, int num) 403 { 404 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 405 int i, j; 406 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 407 408 assert(irq >= SPAPR_XIRQ_BASE); 409 assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 410 411 for (i = irq; i < (irq + num); i++) { 412 for (j = 0; j < ARRAY_SIZE(intcs); j++) { 413 SpaprInterruptController *intc = intcs[j]; 414 415 if (intc) { 416 SpaprInterruptControllerClass *sicc 417 = SPAPR_INTC_GET_CLASS(intc); 418 sicc->free_irq(intc, i); 419 } 420 } 421 } 422 } 423 424 qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq) 425 { 426 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 427 428 /* 429 * This interface is basically for VIO and PHB devices to find the 430 * right qemu_irq to manipulate, so we only allow access to the 431 * external irqs for now. Currently anything which needs to 432 * access the IPIs most naturally gets there via the guest side 433 * interfaces, we can change this if we need to in future. 434 */ 435 assert(irq >= SPAPR_XIRQ_BASE); 436 assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 437 438 if (spapr->ics) { 439 assert(ics_valid_irq(spapr->ics, irq)); 440 } 441 if (spapr->xive) { 442 assert(irq < spapr->xive->nr_irqs); 443 assert(xive_eas_is_valid(&spapr->xive->eat[irq])); 444 } 445 446 return spapr->qirqs[irq]; 447 } 448 449 int spapr_irq_post_load(SpaprMachineState *spapr, int version_id) 450 { 451 SpaprInterruptControllerClass *sicc; 452 453 spapr_irq_update_active_intc(spapr); 454 sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); 455 return sicc->post_load(spapr->active_intc, version_id); 456 } 457 458 void spapr_irq_reset(SpaprMachineState *spapr, Error **errp) 459 { 460 assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr)); 461 462 spapr_irq_update_active_intc(spapr); 463 } 464 465 int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp) 466 { 467 const char *nodename = "interrupt-controller"; 468 int offset, phandle; 469 470 offset = fdt_subnode_offset(fdt, 0, nodename); 471 if (offset < 0) { 472 error_setg(errp, "Can't find node \"%s\": %s", 473 nodename, fdt_strerror(offset)); 474 return -1; 475 } 476 477 phandle = fdt_get_phandle(fdt, offset); 478 if (!phandle) { 479 error_setg(errp, "Can't get phandle of node \"%s\"", nodename); 480 return -1; 481 } 482 483 return phandle; 484 } 485 486 static void set_active_intc(SpaprMachineState *spapr, 487 SpaprInterruptController *new_intc) 488 { 489 SpaprInterruptControllerClass *sicc; 490 uint32_t nr_servers = spapr_max_server_number(spapr); 491 492 assert(new_intc); 493 494 if (new_intc == spapr->active_intc) { 495 /* Nothing to do */ 496 return; 497 } 498 499 if (spapr->active_intc) { 500 sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); 501 if (sicc->deactivate) { 502 sicc->deactivate(spapr->active_intc); 503 } 504 } 505 506 sicc = SPAPR_INTC_GET_CLASS(new_intc); 507 if (sicc->activate) { 508 sicc->activate(new_intc, nr_servers, &error_fatal); 509 } 510 511 spapr->active_intc = new_intc; 512 513 /* 514 * We've changed the kernel irqchip, let VFIO devices know they 515 * need to readjust. 516 */ 517 kvm_irqchip_change_notify(); 518 } 519 520 void spapr_irq_update_active_intc(SpaprMachineState *spapr) 521 { 522 SpaprInterruptController *new_intc; 523 524 if (!spapr->ics) { 525 /* 526 * XXX before we run CAS, ov5_cas is initialized empty, which 527 * indicates XICS, even if we have ic-mode=xive. TODO: clean 528 * up the CAS path so that we have a clearer way of handling 529 * this. 530 */ 531 new_intc = SPAPR_INTC(spapr->xive); 532 } else if (spapr->ov5_cas 533 && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { 534 new_intc = SPAPR_INTC(spapr->xive); 535 } else { 536 new_intc = SPAPR_INTC(spapr->ics); 537 } 538 539 set_active_intc(spapr, new_intc); 540 } 541 542 /* 543 * XICS legacy routines - to deprecate one day 544 */ 545 546 static int ics_find_free_block(ICSState *ics, int num, int alignnum) 547 { 548 int first, i; 549 550 for (first = 0; first < ics->nr_irqs; first += alignnum) { 551 if (num > (ics->nr_irqs - first)) { 552 return -1; 553 } 554 for (i = first; i < first + num; ++i) { 555 if (!ics_irq_free(ics, i)) { 556 break; 557 } 558 } 559 if (i == (first + num)) { 560 return first; 561 } 562 } 563 564 return -1; 565 } 566 567 int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp) 568 { 569 ICSState *ics = spapr->ics; 570 int first = -1; 571 572 assert(ics); 573 574 /* 575 * MSIMesage::data is used for storing VIRQ so 576 * it has to be aligned to num to support multiple 577 * MSI vectors. MSI-X is not affected by this. 578 * The hint is used for the first IRQ, the rest should 579 * be allocated continuously. 580 */ 581 if (align) { 582 assert((num == 1) || (num == 2) || (num == 4) || 583 (num == 8) || (num == 16) || (num == 32)); 584 first = ics_find_free_block(ics, num, num); 585 } else { 586 first = ics_find_free_block(ics, num, 1); 587 } 588 589 if (first < 0) { 590 error_setg(errp, "can't find a free %d-IRQ block", num); 591 return -1; 592 } 593 594 return first + ics->offset; 595 } 596 597 SpaprIrq spapr_irq_xics_legacy = { 598 .xics = true, 599 .xive = false, 600 }; 601 602 static void spapr_irq_register_types(void) 603 { 604 type_register_static(&spapr_intc_info); 605 } 606 607 type_init(spapr_irq_register_types) 608