1 /* 2 * QEMU PowerPC sPAPR IRQ interface 3 * 4 * Copyright (c) 2018, IBM Corporation. 5 * 6 * This code is licensed under the GPL version 2 or later. See the 7 * COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/log.h" 12 #include "qemu/error-report.h" 13 #include "qapi/error.h" 14 #include "hw/irq.h" 15 #include "hw/ppc/spapr.h" 16 #include "hw/ppc/spapr_cpu_core.h" 17 #include "hw/ppc/spapr_xive.h" 18 #include "hw/ppc/xics.h" 19 #include "hw/ppc/xics_spapr.h" 20 #include "hw/qdev-properties.h" 21 #include "cpu-models.h" 22 #include "sysemu/kvm.h" 23 24 #include "trace.h" 25 26 QEMU_BUILD_BUG_ON(SPAPR_IRQ_NR_IPIS > SPAPR_XIRQ_BASE); 27 28 static const TypeInfo spapr_intc_info = { 29 .name = TYPE_SPAPR_INTC, 30 .parent = TYPE_INTERFACE, 31 .class_size = sizeof(SpaprInterruptControllerClass), 32 }; 33 34 static void spapr_irq_msi_init(SpaprMachineState *spapr) 35 { 36 if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { 37 /* Legacy mode doesn't use this allocator */ 38 return; 39 } 40 41 spapr->irq_map_nr = spapr_irq_nr_msis(spapr); 42 spapr->irq_map = bitmap_new(spapr->irq_map_nr); 43 } 44 45 int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align, 46 Error **errp) 47 { 48 int irq; 49 50 /* 51 * The 'align_mask' parameter of bitmap_find_next_zero_area() 52 * should be one less than a power of 2; 0 means no 53 * alignment. Adapt the 'align' value of the former allocator 54 * to fit the requirements of bitmap_find_next_zero_area() 55 */ 56 align -= 1; 57 58 irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num, 59 align); 60 if (irq == spapr->irq_map_nr) { 61 error_setg(errp, "can't find a free %d-IRQ block", num); 62 return -1; 63 } 64 65 bitmap_set(spapr->irq_map, irq, num); 66 67 return irq + SPAPR_IRQ_MSI; 68 } 69 70 void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num) 71 { 72 bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); 73 } 74 75 int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, 76 SpaprInterruptController *intc, 77 uint32_t nr_servers, 78 Error **errp) 79 { 80 Error *local_err = NULL; 81 82 if (kvm_enabled() && kvm_kernel_irqchip_allowed()) { 83 if (fn(intc, nr_servers, &local_err) < 0) { 84 if (kvm_kernel_irqchip_required()) { 85 error_prepend(&local_err, 86 "kernel_irqchip requested but unavailable: "); 87 error_propagate(errp, local_err); 88 return -1; 89 } 90 91 /* 92 * We failed to initialize the KVM device, fallback to 93 * emulated mode 94 */ 95 error_prepend(&local_err, 96 "kernel_irqchip allowed but unavailable: "); 97 error_append_hint(&local_err, 98 "Falling back to kernel-irqchip=off\n"); 99 warn_report_err(local_err); 100 } 101 } 102 103 return 0; 104 } 105 106 /* 107 * XICS IRQ backend. 108 */ 109 110 SpaprIrq spapr_irq_xics = { 111 .xics = true, 112 .xive = false, 113 }; 114 115 /* 116 * XIVE IRQ backend. 117 */ 118 119 SpaprIrq spapr_irq_xive = { 120 .xics = false, 121 .xive = true, 122 }; 123 124 /* 125 * Dual XIVE and XICS IRQ backend. 126 * 127 * Both interrupt mode, XIVE and XICS, objects are created but the 128 * machine starts in legacy interrupt mode (XICS). It can be changed 129 * by the CAS negotiation process and, in that case, the new mode is 130 * activated after an extra machine reset. 131 */ 132 133 /* 134 * Define values in sync with the XIVE and XICS backend 135 */ 136 SpaprIrq spapr_irq_dual = { 137 .xics = true, 138 .xive = true, 139 }; 140 141 142 static int spapr_irq_check(SpaprMachineState *spapr, Error **errp) 143 { 144 ERRP_GUARD(); 145 MachineState *machine = MACHINE(spapr); 146 147 /* 148 * Sanity checks on non-P9 machines. On these, XIVE is not 149 * advertised, see spapr_dt_ov5_platform_support() 150 */ 151 if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 152 0, spapr->max_compat_pvr)) { 153 /* 154 * If the 'dual' interrupt mode is selected, force XICS as CAS 155 * negotiation is useless. 156 */ 157 if (spapr->irq == &spapr_irq_dual) { 158 spapr->irq = &spapr_irq_xics; 159 return 0; 160 } 161 162 /* 163 * Non-P9 machines using only XIVE is a bogus setup. We have two 164 * scenarios to take into account because of the compat mode: 165 * 166 * 1. POWER7/8 machines should fail to init later on when creating 167 * the XIVE interrupt presenters because a POWER9 exception 168 * model is required. 169 170 * 2. POWER9 machines using the POWER8 compat mode won't fail and 171 * will let the OS boot with a partial XIVE setup : DT 172 * properties but no hcalls. 173 * 174 * To cover both and not confuse the OS, add an early failure in 175 * QEMU. 176 */ 177 if (!spapr->irq->xics) { 178 error_setg(errp, "XIVE-only machines require a POWER9 CPU"); 179 return -1; 180 } 181 } 182 183 /* 184 * On a POWER9 host, some older KVM XICS devices cannot be destroyed and 185 * re-created. Same happens with KVM nested guests. Detect that early to 186 * avoid QEMU to exit later when the guest reboots. 187 */ 188 if (kvm_enabled() && 189 spapr->irq == &spapr_irq_dual && 190 kvm_kernel_irqchip_required() && 191 xics_kvm_has_broken_disconnect()) { 192 error_setg(errp, 193 "KVM is incompatible with ic-mode=dual,kernel-irqchip=on"); 194 error_append_hint(errp, 195 "This can happen with an old KVM or in a KVM nested guest.\n"); 196 error_append_hint(errp, 197 "Try without kernel-irqchip or with kernel-irqchip=off.\n"); 198 return -1; 199 } 200 201 return 0; 202 } 203 204 /* 205 * sPAPR IRQ frontend routines for devices 206 */ 207 #define ALL_INTCS(spapr_) \ 208 { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), } 209 210 int spapr_irq_cpu_intc_create(SpaprMachineState *spapr, 211 PowerPCCPU *cpu, Error **errp) 212 { 213 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 214 int i; 215 int rc; 216 217 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 218 SpaprInterruptController *intc = intcs[i]; 219 if (intc) { 220 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 221 rc = sicc->cpu_intc_create(intc, cpu, errp); 222 if (rc < 0) { 223 return rc; 224 } 225 } 226 } 227 228 return 0; 229 } 230 231 void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu) 232 { 233 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 234 int i; 235 236 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 237 SpaprInterruptController *intc = intcs[i]; 238 if (intc) { 239 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 240 sicc->cpu_intc_reset(intc, cpu); 241 } 242 } 243 } 244 245 void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu) 246 { 247 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 248 int i; 249 250 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 251 SpaprInterruptController *intc = intcs[i]; 252 if (intc) { 253 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 254 sicc->cpu_intc_destroy(intc, cpu); 255 } 256 } 257 } 258 259 static void spapr_set_irq(void *opaque, int irq, int level) 260 { 261 SpaprMachineState *spapr = SPAPR_MACHINE(opaque); 262 SpaprInterruptControllerClass *sicc 263 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 264 265 sicc->set_irq(spapr->active_intc, irq, level); 266 } 267 268 void spapr_irq_print_info(SpaprMachineState *spapr, GString *buf) 269 { 270 SpaprInterruptControllerClass *sicc 271 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 272 273 sicc->print_info(spapr->active_intc, buf); 274 } 275 276 void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers, 277 void *fdt, uint32_t phandle) 278 { 279 SpaprInterruptControllerClass *sicc 280 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 281 282 sicc->dt(spapr->active_intc, nr_servers, fdt, phandle); 283 } 284 285 uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr) 286 { 287 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 288 289 if (smc->legacy_irq_allocation) { 290 return smc->nr_xirqs; 291 } else { 292 return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI; 293 } 294 } 295 296 void spapr_irq_init(SpaprMachineState *spapr, Error **errp) 297 { 298 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 299 300 if (kvm_enabled() && kvm_kernel_irqchip_split()) { 301 error_setg(errp, "kernel_irqchip split mode not supported on pseries"); 302 return; 303 } 304 305 if (spapr_irq_check(spapr, errp) < 0) { 306 return; 307 } 308 309 /* Initialize the MSI IRQ allocator. */ 310 spapr_irq_msi_init(spapr); 311 312 if (spapr->irq->xics) { 313 Object *obj; 314 315 obj = object_new(TYPE_ICS_SPAPR); 316 317 object_property_add_child(OBJECT(spapr), "ics", obj); 318 object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr), 319 &error_abort); 320 object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort); 321 if (!qdev_realize(DEVICE(obj), NULL, errp)) { 322 return; 323 } 324 325 spapr->ics = ICS_SPAPR(obj); 326 } 327 328 if (spapr->irq->xive) { 329 uint32_t nr_servers = spapr_max_server_number(spapr); 330 DeviceState *dev; 331 int i; 332 333 dev = qdev_new(TYPE_SPAPR_XIVE); 334 qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_IRQ_NR_IPIS); 335 /* 336 * 8 XIVE END structures per CPU. One for each available 337 * priority 338 */ 339 qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3); 340 object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr), 341 &error_abort); 342 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); 343 344 spapr->xive = SPAPR_XIVE(dev); 345 346 /* Enable the CPU IPIs */ 347 for (i = 0; i < nr_servers; ++i) { 348 SpaprInterruptControllerClass *sicc 349 = SPAPR_INTC_GET_CLASS(spapr->xive); 350 351 if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i, 352 false, errp) < 0) { 353 return; 354 } 355 } 356 357 spapr_xive_hcall_init(spapr); 358 } 359 360 spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr, 361 smc->nr_xirqs + SPAPR_IRQ_NR_IPIS); 362 363 /* 364 * Mostly we don't actually need this until reset, except that not 365 * having this set up can cause VFIO devices to issue a 366 * false-positive warning during realize(), because they don't yet 367 * have an in-kernel irq chip. 368 */ 369 spapr_irq_update_active_intc(spapr); 370 } 371 372 int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp) 373 { 374 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 375 int i; 376 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 377 int rc; 378 379 assert(irq >= SPAPR_XIRQ_BASE); 380 assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 381 382 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 383 SpaprInterruptController *intc = intcs[i]; 384 if (intc) { 385 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 386 rc = sicc->claim_irq(intc, irq, lsi, errp); 387 if (rc < 0) { 388 return rc; 389 } 390 } 391 } 392 393 return 0; 394 } 395 396 void spapr_irq_free(SpaprMachineState *spapr, int irq, int num) 397 { 398 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 399 int i, j; 400 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 401 402 assert(irq >= SPAPR_XIRQ_BASE); 403 assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 404 405 for (i = irq; i < (irq + num); i++) { 406 for (j = 0; j < ARRAY_SIZE(intcs); j++) { 407 SpaprInterruptController *intc = intcs[j]; 408 409 if (intc) { 410 SpaprInterruptControllerClass *sicc 411 = SPAPR_INTC_GET_CLASS(intc); 412 sicc->free_irq(intc, i); 413 } 414 } 415 } 416 } 417 418 qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq) 419 { 420 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 421 422 /* 423 * This interface is basically for VIO and PHB devices to find the 424 * right qemu_irq to manipulate, so we only allow access to the 425 * external irqs for now. Currently anything which needs to 426 * access the IPIs most naturally gets there via the guest side 427 * interfaces, we can change this if we need to in future. 428 */ 429 assert(irq >= SPAPR_XIRQ_BASE); 430 assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 431 432 if (spapr->ics) { 433 assert(ics_valid_irq(spapr->ics, irq)); 434 } 435 if (spapr->xive) { 436 assert(irq < spapr->xive->nr_irqs); 437 assert(xive_eas_is_valid(&spapr->xive->eat[irq])); 438 } 439 440 return spapr->qirqs[irq]; 441 } 442 443 int spapr_irq_post_load(SpaprMachineState *spapr, int version_id) 444 { 445 SpaprInterruptControllerClass *sicc; 446 447 spapr_irq_update_active_intc(spapr); 448 sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); 449 return sicc->post_load(spapr->active_intc, version_id); 450 } 451 452 void spapr_irq_reset(SpaprMachineState *spapr, Error **errp) 453 { 454 assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr)); 455 456 spapr_irq_update_active_intc(spapr); 457 } 458 459 int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp) 460 { 461 const char *nodename = "interrupt-controller"; 462 int offset, phandle; 463 464 offset = fdt_subnode_offset(fdt, 0, nodename); 465 if (offset < 0) { 466 error_setg(errp, "Can't find node \"%s\": %s", 467 nodename, fdt_strerror(offset)); 468 return -1; 469 } 470 471 phandle = fdt_get_phandle(fdt, offset); 472 if (!phandle) { 473 error_setg(errp, "Can't get phandle of node \"%s\"", nodename); 474 return -1; 475 } 476 477 return phandle; 478 } 479 480 static void set_active_intc(SpaprMachineState *spapr, 481 SpaprInterruptController *new_intc) 482 { 483 SpaprInterruptControllerClass *sicc; 484 uint32_t nr_servers = spapr_max_server_number(spapr); 485 486 assert(new_intc); 487 488 if (new_intc == spapr->active_intc) { 489 /* Nothing to do */ 490 return; 491 } 492 493 if (spapr->active_intc) { 494 sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); 495 if (sicc->deactivate) { 496 sicc->deactivate(spapr->active_intc); 497 } 498 } 499 500 sicc = SPAPR_INTC_GET_CLASS(new_intc); 501 if (sicc->activate) { 502 sicc->activate(new_intc, nr_servers, &error_fatal); 503 } 504 505 spapr->active_intc = new_intc; 506 507 /* 508 * We've changed the kernel irqchip, let VFIO devices know they 509 * need to readjust. 510 */ 511 kvm_irqchip_change_notify(); 512 } 513 514 void spapr_irq_update_active_intc(SpaprMachineState *spapr) 515 { 516 SpaprInterruptController *new_intc; 517 518 if (!spapr->ics) { 519 /* 520 * XXX before we run CAS, ov5_cas is initialized empty, which 521 * indicates XICS, even if we have ic-mode=xive. TODO: clean 522 * up the CAS path so that we have a clearer way of handling 523 * this. 524 */ 525 new_intc = SPAPR_INTC(spapr->xive); 526 } else if (spapr->ov5_cas 527 && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { 528 new_intc = SPAPR_INTC(spapr->xive); 529 } else { 530 new_intc = SPAPR_INTC(spapr->ics); 531 } 532 533 set_active_intc(spapr, new_intc); 534 } 535 536 /* 537 * XICS legacy routines - to deprecate one day 538 */ 539 540 static int ics_find_free_block(ICSState *ics, int num, int alignnum) 541 { 542 int first, i; 543 544 for (first = 0; first < ics->nr_irqs; first += alignnum) { 545 if (num > (ics->nr_irqs - first)) { 546 return -1; 547 } 548 for (i = first; i < first + num; ++i) { 549 if (!ics_irq_free(ics, i)) { 550 break; 551 } 552 } 553 if (i == (first + num)) { 554 return first; 555 } 556 } 557 558 return -1; 559 } 560 561 int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp) 562 { 563 ICSState *ics = spapr->ics; 564 int first = -1; 565 566 assert(ics); 567 568 /* 569 * MSIMesage::data is used for storing VIRQ so 570 * it has to be aligned to num to support multiple 571 * MSI vectors. MSI-X is not affected by this. 572 * The hint is used for the first IRQ, the rest should 573 * be allocated continuously. 574 */ 575 if (align) { 576 assert((num == 1) || (num == 2) || (num == 4) || 577 (num == 8) || (num == 16) || (num == 32)); 578 first = ics_find_free_block(ics, num, num); 579 } else { 580 first = ics_find_free_block(ics, num, 1); 581 } 582 583 if (first < 0) { 584 error_setg(errp, "can't find a free %d-IRQ block", num); 585 return -1; 586 } 587 588 return first + ics->offset; 589 } 590 591 SpaprIrq spapr_irq_xics_legacy = { 592 .xics = true, 593 .xive = false, 594 }; 595 596 static void spapr_irq_register_types(void) 597 { 598 type_register_static(&spapr_intc_info); 599 } 600 601 type_init(spapr_irq_register_types) 602