1 /* 2 * QEMU PowerPC sPAPR IRQ interface 3 * 4 * Copyright (c) 2018, IBM Corporation. 5 * 6 * This code is licensed under the GPL version 2 or later. See the 7 * COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/log.h" 12 #include "qemu/error-report.h" 13 #include "qapi/error.h" 14 #include "hw/irq.h" 15 #include "hw/ppc/spapr.h" 16 #include "hw/ppc/spapr_cpu_core.h" 17 #include "hw/ppc/spapr_xive.h" 18 #include "hw/ppc/xics.h" 19 #include "hw/ppc/xics_spapr.h" 20 #include "hw/qdev-properties.h" 21 #include "cpu-models.h" 22 #include "sysemu/kvm.h" 23 24 #include "trace.h" 25 26 static const TypeInfo spapr_intc_info = { 27 .name = TYPE_SPAPR_INTC, 28 .parent = TYPE_INTERFACE, 29 .class_size = sizeof(SpaprInterruptControllerClass), 30 }; 31 32 static void spapr_irq_msi_init(SpaprMachineState *spapr) 33 { 34 if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { 35 /* Legacy mode doesn't use this allocator */ 36 return; 37 } 38 39 spapr->irq_map_nr = spapr_irq_nr_msis(spapr); 40 spapr->irq_map = bitmap_new(spapr->irq_map_nr); 41 } 42 43 int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align, 44 Error **errp) 45 { 46 int irq; 47 48 /* 49 * The 'align_mask' parameter of bitmap_find_next_zero_area() 50 * should be one less than a power of 2; 0 means no 51 * alignment. Adapt the 'align' value of the former allocator 52 * to fit the requirements of bitmap_find_next_zero_area() 53 */ 54 align -= 1; 55 56 irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num, 57 align); 58 if (irq == spapr->irq_map_nr) { 59 error_setg(errp, "can't find a free %d-IRQ block", num); 60 return -1; 61 } 62 63 bitmap_set(spapr->irq_map, irq, num); 64 65 return irq + SPAPR_IRQ_MSI; 66 } 67 68 void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num) 69 { 70 bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); 71 } 72 73 int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **), 74 SpaprInterruptController *intc, 75 Error **errp) 76 { 77 MachineState *machine = MACHINE(qdev_get_machine()); 78 Error *local_err = NULL; 79 80 if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { 81 if (fn(intc, &local_err) < 0) { 82 if (machine_kernel_irqchip_required(machine)) { 83 error_prepend(&local_err, 84 "kernel_irqchip requested but unavailable: "); 85 error_propagate(errp, local_err); 86 return -1; 87 } 88 89 /* 90 * We failed to initialize the KVM device, fallback to 91 * emulated mode 92 */ 93 error_prepend(&local_err, 94 "kernel_irqchip allowed but unavailable: "); 95 error_append_hint(&local_err, 96 "Falling back to kernel-irqchip=off\n"); 97 warn_report_err(local_err); 98 } 99 } 100 101 return 0; 102 } 103 104 /* 105 * XICS IRQ backend. 106 */ 107 108 SpaprIrq spapr_irq_xics = { 109 .xics = true, 110 .xive = false, 111 }; 112 113 /* 114 * XIVE IRQ backend. 115 */ 116 117 SpaprIrq spapr_irq_xive = { 118 .xics = false, 119 .xive = true, 120 }; 121 122 /* 123 * Dual XIVE and XICS IRQ backend. 124 * 125 * Both interrupt mode, XIVE and XICS, objects are created but the 126 * machine starts in legacy interrupt mode (XICS). It can be changed 127 * by the CAS negotiation process and, in that case, the new mode is 128 * activated after an extra machine reset. 129 */ 130 131 /* 132 * Define values in sync with the XIVE and XICS backend 133 */ 134 SpaprIrq spapr_irq_dual = { 135 .xics = true, 136 .xive = true, 137 }; 138 139 140 static int spapr_irq_check(SpaprMachineState *spapr, Error **errp) 141 { 142 MachineState *machine = MACHINE(spapr); 143 144 /* 145 * Sanity checks on non-P9 machines. On these, XIVE is not 146 * advertised, see spapr_dt_ov5_platform_support() 147 */ 148 if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 149 0, spapr->max_compat_pvr)) { 150 /* 151 * If the 'dual' interrupt mode is selected, force XICS as CAS 152 * negotiation is useless. 153 */ 154 if (spapr->irq == &spapr_irq_dual) { 155 spapr->irq = &spapr_irq_xics; 156 return 0; 157 } 158 159 /* 160 * Non-P9 machines using only XIVE is a bogus setup. We have two 161 * scenarios to take into account because of the compat mode: 162 * 163 * 1. POWER7/8 machines should fail to init later on when creating 164 * the XIVE interrupt presenters because a POWER9 exception 165 * model is required. 166 167 * 2. POWER9 machines using the POWER8 compat mode won't fail and 168 * will let the OS boot with a partial XIVE setup : DT 169 * properties but no hcalls. 170 * 171 * To cover both and not confuse the OS, add an early failure in 172 * QEMU. 173 */ 174 if (spapr->irq == &spapr_irq_xive) { 175 error_setg(errp, "XIVE-only machines require a POWER9 CPU"); 176 return -1; 177 } 178 } 179 180 /* 181 * On a POWER9 host, some older KVM XICS devices cannot be destroyed and 182 * re-created. Detect that early to avoid QEMU to exit later when the 183 * guest reboots. 184 */ 185 if (kvm_enabled() && 186 spapr->irq == &spapr_irq_dual && 187 machine_kernel_irqchip_required(machine) && 188 xics_kvm_has_broken_disconnect(spapr)) { 189 error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on"); 190 return -1; 191 } 192 193 return 0; 194 } 195 196 /* 197 * sPAPR IRQ frontend routines for devices 198 */ 199 #define ALL_INTCS(spapr_) \ 200 { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), } 201 202 int spapr_irq_cpu_intc_create(SpaprMachineState *spapr, 203 PowerPCCPU *cpu, Error **errp) 204 { 205 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 206 int i; 207 int rc; 208 209 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 210 SpaprInterruptController *intc = intcs[i]; 211 if (intc) { 212 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 213 rc = sicc->cpu_intc_create(intc, cpu, errp); 214 if (rc < 0) { 215 return rc; 216 } 217 } 218 } 219 220 return 0; 221 } 222 223 void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu) 224 { 225 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 226 int i; 227 228 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 229 SpaprInterruptController *intc = intcs[i]; 230 if (intc) { 231 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 232 sicc->cpu_intc_reset(intc, cpu); 233 } 234 } 235 } 236 237 void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu) 238 { 239 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 240 int i; 241 242 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 243 SpaprInterruptController *intc = intcs[i]; 244 if (intc) { 245 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 246 sicc->cpu_intc_destroy(intc, cpu); 247 } 248 } 249 } 250 251 static void spapr_set_irq(void *opaque, int irq, int level) 252 { 253 SpaprMachineState *spapr = SPAPR_MACHINE(opaque); 254 SpaprInterruptControllerClass *sicc 255 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 256 257 sicc->set_irq(spapr->active_intc, irq, level); 258 } 259 260 void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon) 261 { 262 SpaprInterruptControllerClass *sicc 263 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 264 265 sicc->print_info(spapr->active_intc, mon); 266 } 267 268 void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers, 269 void *fdt, uint32_t phandle) 270 { 271 SpaprInterruptControllerClass *sicc 272 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 273 274 sicc->dt(spapr->active_intc, nr_servers, fdt, phandle); 275 } 276 277 uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr) 278 { 279 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 280 281 if (smc->legacy_irq_allocation) { 282 return smc->nr_xirqs; 283 } else { 284 return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI; 285 } 286 } 287 288 void spapr_irq_init(SpaprMachineState *spapr, Error **errp) 289 { 290 MachineState *machine = MACHINE(spapr); 291 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 292 293 if (machine_kernel_irqchip_split(machine)) { 294 error_setg(errp, "kernel_irqchip split mode not supported on pseries"); 295 return; 296 } 297 298 if (!kvm_enabled() && machine_kernel_irqchip_required(machine)) { 299 error_setg(errp, 300 "kernel_irqchip requested but only available with KVM"); 301 return; 302 } 303 304 if (spapr_irq_check(spapr, errp) < 0) { 305 return; 306 } 307 308 /* Initialize the MSI IRQ allocator. */ 309 spapr_irq_msi_init(spapr); 310 311 if (spapr->irq->xics) { 312 Error *local_err = NULL; 313 Object *obj; 314 315 obj = object_new(TYPE_ICS_SPAPR); 316 object_property_add_child(OBJECT(spapr), "ics", obj, &local_err); 317 if (local_err) { 318 error_propagate(errp, local_err); 319 return; 320 } 321 322 object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr), 323 &local_err); 324 if (local_err) { 325 error_propagate(errp, local_err); 326 return; 327 } 328 329 object_property_set_int(obj, smc->nr_xirqs, "nr-irqs", &local_err); 330 if (local_err) { 331 error_propagate(errp, local_err); 332 return; 333 } 334 335 object_property_set_bool(obj, true, "realized", &local_err); 336 if (local_err) { 337 error_propagate(errp, local_err); 338 return; 339 } 340 341 spapr->ics = ICS_SPAPR(obj); 342 } 343 344 if (spapr->irq->xive) { 345 uint32_t nr_servers = spapr_max_server_number(spapr); 346 DeviceState *dev; 347 int i; 348 349 dev = qdev_create(NULL, TYPE_SPAPR_XIVE); 350 qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE); 351 /* 352 * 8 XIVE END structures per CPU. One for each available 353 * priority 354 */ 355 qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3); 356 qdev_init_nofail(dev); 357 358 spapr->xive = SPAPR_XIVE(dev); 359 360 /* Enable the CPU IPIs */ 361 for (i = 0; i < nr_servers; ++i) { 362 SpaprInterruptControllerClass *sicc 363 = SPAPR_INTC_GET_CLASS(spapr->xive); 364 365 if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i, 366 false, errp) < 0) { 367 return; 368 } 369 } 370 371 spapr_xive_hcall_init(spapr); 372 } 373 374 spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr, 375 smc->nr_xirqs + SPAPR_XIRQ_BASE); 376 377 /* 378 * Mostly we don't actually need this until reset, except that not 379 * having this set up can cause VFIO devices to issue a 380 * false-positive warning during realize(), because they don't yet 381 * have an in-kernel irq chip. 382 */ 383 spapr_irq_update_active_intc(spapr); 384 } 385 386 int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp) 387 { 388 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 389 int i; 390 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 391 int rc; 392 393 assert(irq >= SPAPR_XIRQ_BASE); 394 assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 395 396 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 397 SpaprInterruptController *intc = intcs[i]; 398 if (intc) { 399 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 400 rc = sicc->claim_irq(intc, irq, lsi, errp); 401 if (rc < 0) { 402 return rc; 403 } 404 } 405 } 406 407 return 0; 408 } 409 410 void spapr_irq_free(SpaprMachineState *spapr, int irq, int num) 411 { 412 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 413 int i, j; 414 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 415 416 assert(irq >= SPAPR_XIRQ_BASE); 417 assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 418 419 for (i = irq; i < (irq + num); i++) { 420 for (j = 0; j < ARRAY_SIZE(intcs); j++) { 421 SpaprInterruptController *intc = intcs[j]; 422 423 if (intc) { 424 SpaprInterruptControllerClass *sicc 425 = SPAPR_INTC_GET_CLASS(intc); 426 sicc->free_irq(intc, i); 427 } 428 } 429 } 430 } 431 432 qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq) 433 { 434 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 435 436 /* 437 * This interface is basically for VIO and PHB devices to find the 438 * right qemu_irq to manipulate, so we only allow access to the 439 * external irqs for now. Currently anything which needs to 440 * access the IPIs most naturally gets there via the guest side 441 * interfaces, we can change this if we need to in future. 442 */ 443 assert(irq >= SPAPR_XIRQ_BASE); 444 assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 445 446 if (spapr->ics) { 447 assert(ics_valid_irq(spapr->ics, irq)); 448 } 449 if (spapr->xive) { 450 assert(irq < spapr->xive->nr_irqs); 451 assert(xive_eas_is_valid(&spapr->xive->eat[irq])); 452 } 453 454 return spapr->qirqs[irq]; 455 } 456 457 int spapr_irq_post_load(SpaprMachineState *spapr, int version_id) 458 { 459 SpaprInterruptControllerClass *sicc; 460 461 spapr_irq_update_active_intc(spapr); 462 sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); 463 return sicc->post_load(spapr->active_intc, version_id); 464 } 465 466 void spapr_irq_reset(SpaprMachineState *spapr, Error **errp) 467 { 468 assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr)); 469 470 spapr_irq_update_active_intc(spapr); 471 } 472 473 int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp) 474 { 475 const char *nodename = "interrupt-controller"; 476 int offset, phandle; 477 478 offset = fdt_subnode_offset(fdt, 0, nodename); 479 if (offset < 0) { 480 error_setg(errp, "Can't find node \"%s\": %s", 481 nodename, fdt_strerror(offset)); 482 return -1; 483 } 484 485 phandle = fdt_get_phandle(fdt, offset); 486 if (!phandle) { 487 error_setg(errp, "Can't get phandle of node \"%s\"", nodename); 488 return -1; 489 } 490 491 return phandle; 492 } 493 494 static void set_active_intc(SpaprMachineState *spapr, 495 SpaprInterruptController *new_intc) 496 { 497 SpaprInterruptControllerClass *sicc; 498 499 assert(new_intc); 500 501 if (new_intc == spapr->active_intc) { 502 /* Nothing to do */ 503 return; 504 } 505 506 if (spapr->active_intc) { 507 sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); 508 if (sicc->deactivate) { 509 sicc->deactivate(spapr->active_intc); 510 } 511 } 512 513 sicc = SPAPR_INTC_GET_CLASS(new_intc); 514 if (sicc->activate) { 515 sicc->activate(new_intc, &error_fatal); 516 } 517 518 spapr->active_intc = new_intc; 519 520 /* 521 * We've changed the kernel irqchip, let VFIO devices know they 522 * need to readjust. 523 */ 524 kvm_irqchip_change_notify(); 525 } 526 527 void spapr_irq_update_active_intc(SpaprMachineState *spapr) 528 { 529 SpaprInterruptController *new_intc; 530 531 if (!spapr->ics) { 532 /* 533 * XXX before we run CAS, ov5_cas is initialized empty, which 534 * indicates XICS, even if we have ic-mode=xive. TODO: clean 535 * up the CAS path so that we have a clearer way of handling 536 * this. 537 */ 538 new_intc = SPAPR_INTC(spapr->xive); 539 } else if (spapr->ov5_cas 540 && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { 541 new_intc = SPAPR_INTC(spapr->xive); 542 } else { 543 new_intc = SPAPR_INTC(spapr->ics); 544 } 545 546 set_active_intc(spapr, new_intc); 547 } 548 549 /* 550 * XICS legacy routines - to deprecate one day 551 */ 552 553 static int ics_find_free_block(ICSState *ics, int num, int alignnum) 554 { 555 int first, i; 556 557 for (first = 0; first < ics->nr_irqs; first += alignnum) { 558 if (num > (ics->nr_irqs - first)) { 559 return -1; 560 } 561 for (i = first; i < first + num; ++i) { 562 if (!ics_irq_free(ics, i)) { 563 break; 564 } 565 } 566 if (i == (first + num)) { 567 return first; 568 } 569 } 570 571 return -1; 572 } 573 574 int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp) 575 { 576 ICSState *ics = spapr->ics; 577 int first = -1; 578 579 assert(ics); 580 581 /* 582 * MSIMesage::data is used for storing VIRQ so 583 * it has to be aligned to num to support multiple 584 * MSI vectors. MSI-X is not affected by this. 585 * The hint is used for the first IRQ, the rest should 586 * be allocated continuously. 587 */ 588 if (align) { 589 assert((num == 1) || (num == 2) || (num == 4) || 590 (num == 8) || (num == 16) || (num == 32)); 591 first = ics_find_free_block(ics, num, num); 592 } else { 593 first = ics_find_free_block(ics, num, 1); 594 } 595 596 if (first < 0) { 597 error_setg(errp, "can't find a free %d-IRQ block", num); 598 return -1; 599 } 600 601 return first + ics->offset; 602 } 603 604 SpaprIrq spapr_irq_xics_legacy = { 605 .xics = true, 606 .xive = false, 607 }; 608 609 static void spapr_irq_register_types(void) 610 { 611 type_register_static(&spapr_intc_info); 612 } 613 614 type_init(spapr_irq_register_types) 615