1 /* 2 * QEMU PowerPC sPAPR IRQ interface 3 * 4 * Copyright (c) 2018, IBM Corporation. 5 * 6 * This code is licensed under the GPL version 2 or later. See the 7 * COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/log.h" 12 #include "qemu/error-report.h" 13 #include "qapi/error.h" 14 #include "hw/irq.h" 15 #include "hw/ppc/spapr.h" 16 #include "hw/ppc/spapr_cpu_core.h" 17 #include "hw/ppc/spapr_xive.h" 18 #include "hw/ppc/xics.h" 19 #include "hw/ppc/xics_spapr.h" 20 #include "hw/qdev-properties.h" 21 #include "cpu-models.h" 22 #include "sysemu/kvm.h" 23 24 #include "trace.h" 25 26 static const TypeInfo spapr_intc_info = { 27 .name = TYPE_SPAPR_INTC, 28 .parent = TYPE_INTERFACE, 29 .class_size = sizeof(SpaprInterruptControllerClass), 30 }; 31 32 static void spapr_irq_msi_init(SpaprMachineState *spapr) 33 { 34 if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { 35 /* Legacy mode doesn't use this allocator */ 36 return; 37 } 38 39 spapr->irq_map_nr = spapr_irq_nr_msis(spapr); 40 spapr->irq_map = bitmap_new(spapr->irq_map_nr); 41 } 42 43 int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align, 44 Error **errp) 45 { 46 int irq; 47 48 /* 49 * The 'align_mask' parameter of bitmap_find_next_zero_area() 50 * should be one less than a power of 2; 0 means no 51 * alignment. Adapt the 'align' value of the former allocator 52 * to fit the requirements of bitmap_find_next_zero_area() 53 */ 54 align -= 1; 55 56 irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num, 57 align); 58 if (irq == spapr->irq_map_nr) { 59 error_setg(errp, "can't find a free %d-IRQ block", num); 60 return -1; 61 } 62 63 bitmap_set(spapr->irq_map, irq, num); 64 65 return irq + SPAPR_IRQ_MSI; 66 } 67 68 void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num) 69 { 70 bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); 71 } 72 73 int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, 74 SpaprInterruptController *intc, 75 uint32_t nr_servers, 76 Error **errp) 77 { 78 MachineState *machine = MACHINE(qdev_get_machine()); 79 Error *local_err = NULL; 80 81 if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { 82 if (fn(intc, nr_servers, &local_err) < 0) { 83 if (machine_kernel_irqchip_required(machine)) { 84 error_prepend(&local_err, 85 "kernel_irqchip requested but unavailable: "); 86 error_propagate(errp, local_err); 87 return -1; 88 } 89 90 /* 91 * We failed to initialize the KVM device, fallback to 92 * emulated mode 93 */ 94 error_prepend(&local_err, 95 "kernel_irqchip allowed but unavailable: "); 96 error_append_hint(&local_err, 97 "Falling back to kernel-irqchip=off\n"); 98 warn_report_err(local_err); 99 } 100 } 101 102 return 0; 103 } 104 105 /* 106 * XICS IRQ backend. 107 */ 108 109 SpaprIrq spapr_irq_xics = { 110 .xics = true, 111 .xive = false, 112 }; 113 114 /* 115 * XIVE IRQ backend. 116 */ 117 118 SpaprIrq spapr_irq_xive = { 119 .xics = false, 120 .xive = true, 121 }; 122 123 /* 124 * Dual XIVE and XICS IRQ backend. 125 * 126 * Both interrupt mode, XIVE and XICS, objects are created but the 127 * machine starts in legacy interrupt mode (XICS). It can be changed 128 * by the CAS negotiation process and, in that case, the new mode is 129 * activated after an extra machine reset. 130 */ 131 132 /* 133 * Define values in sync with the XIVE and XICS backend 134 */ 135 SpaprIrq spapr_irq_dual = { 136 .xics = true, 137 .xive = true, 138 }; 139 140 141 static int spapr_irq_check(SpaprMachineState *spapr, Error **errp) 142 { 143 MachineState *machine = MACHINE(spapr); 144 145 /* 146 * Sanity checks on non-P9 machines. On these, XIVE is not 147 * advertised, see spapr_dt_ov5_platform_support() 148 */ 149 if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 150 0, spapr->max_compat_pvr)) { 151 /* 152 * If the 'dual' interrupt mode is selected, force XICS as CAS 153 * negotiation is useless. 154 */ 155 if (spapr->irq == &spapr_irq_dual) { 156 spapr->irq = &spapr_irq_xics; 157 return 0; 158 } 159 160 /* 161 * Non-P9 machines using only XIVE is a bogus setup. We have two 162 * scenarios to take into account because of the compat mode: 163 * 164 * 1. POWER7/8 machines should fail to init later on when creating 165 * the XIVE interrupt presenters because a POWER9 exception 166 * model is required. 167 168 * 2. POWER9 machines using the POWER8 compat mode won't fail and 169 * will let the OS boot with a partial XIVE setup : DT 170 * properties but no hcalls. 171 * 172 * To cover both and not confuse the OS, add an early failure in 173 * QEMU. 174 */ 175 if (spapr->irq == &spapr_irq_xive) { 176 error_setg(errp, "XIVE-only machines require a POWER9 CPU"); 177 return -1; 178 } 179 } 180 181 /* 182 * On a POWER9 host, some older KVM XICS devices cannot be destroyed and 183 * re-created. Detect that early to avoid QEMU to exit later when the 184 * guest reboots. 185 */ 186 if (kvm_enabled() && 187 spapr->irq == &spapr_irq_dual && 188 machine_kernel_irqchip_required(machine) && 189 xics_kvm_has_broken_disconnect(spapr)) { 190 error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on"); 191 return -1; 192 } 193 194 return 0; 195 } 196 197 /* 198 * sPAPR IRQ frontend routines for devices 199 */ 200 #define ALL_INTCS(spapr_) \ 201 { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), } 202 203 int spapr_irq_cpu_intc_create(SpaprMachineState *spapr, 204 PowerPCCPU *cpu, Error **errp) 205 { 206 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 207 int i; 208 int rc; 209 210 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 211 SpaprInterruptController *intc = intcs[i]; 212 if (intc) { 213 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 214 rc = sicc->cpu_intc_create(intc, cpu, errp); 215 if (rc < 0) { 216 return rc; 217 } 218 } 219 } 220 221 return 0; 222 } 223 224 void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu) 225 { 226 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 227 int i; 228 229 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 230 SpaprInterruptController *intc = intcs[i]; 231 if (intc) { 232 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 233 sicc->cpu_intc_reset(intc, cpu); 234 } 235 } 236 } 237 238 void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu) 239 { 240 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 241 int i; 242 243 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 244 SpaprInterruptController *intc = intcs[i]; 245 if (intc) { 246 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 247 sicc->cpu_intc_destroy(intc, cpu); 248 } 249 } 250 } 251 252 static void spapr_set_irq(void *opaque, int irq, int level) 253 { 254 SpaprMachineState *spapr = SPAPR_MACHINE(opaque); 255 SpaprInterruptControllerClass *sicc 256 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 257 258 sicc->set_irq(spapr->active_intc, irq, level); 259 } 260 261 void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon) 262 { 263 SpaprInterruptControllerClass *sicc 264 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 265 266 sicc->print_info(spapr->active_intc, mon); 267 } 268 269 void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers, 270 void *fdt, uint32_t phandle) 271 { 272 SpaprInterruptControllerClass *sicc 273 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 274 275 sicc->dt(spapr->active_intc, nr_servers, fdt, phandle); 276 } 277 278 uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr) 279 { 280 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 281 282 if (smc->legacy_irq_allocation) { 283 return smc->nr_xirqs; 284 } else { 285 return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI; 286 } 287 } 288 289 void spapr_irq_init(SpaprMachineState *spapr, Error **errp) 290 { 291 MachineState *machine = MACHINE(spapr); 292 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 293 294 if (machine_kernel_irqchip_split(machine)) { 295 error_setg(errp, "kernel_irqchip split mode not supported on pseries"); 296 return; 297 } 298 299 if (!kvm_enabled() && machine_kernel_irqchip_required(machine)) { 300 error_setg(errp, 301 "kernel_irqchip requested but only available with KVM"); 302 return; 303 } 304 305 if (spapr_irq_check(spapr, errp) < 0) { 306 return; 307 } 308 309 /* Initialize the MSI IRQ allocator. */ 310 spapr_irq_msi_init(spapr); 311 312 if (spapr->irq->xics) { 313 Error *local_err = NULL; 314 Object *obj; 315 316 obj = object_new(TYPE_ICS_SPAPR); 317 318 object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort); 319 object_property_set_link(obj, OBJECT(spapr), ICS_PROP_XICS, 320 &error_abort); 321 object_property_set_int(obj, smc->nr_xirqs, "nr-irqs", &error_abort); 322 object_property_set_bool(obj, true, "realized", &local_err); 323 if (local_err) { 324 error_propagate(errp, local_err); 325 return; 326 } 327 328 spapr->ics = ICS_SPAPR(obj); 329 } 330 331 if (spapr->irq->xive) { 332 uint32_t nr_servers = spapr_max_server_number(spapr); 333 DeviceState *dev; 334 int i; 335 336 dev = qdev_create(NULL, TYPE_SPAPR_XIVE); 337 qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE); 338 /* 339 * 8 XIVE END structures per CPU. One for each available 340 * priority 341 */ 342 qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3); 343 qdev_init_nofail(dev); 344 345 spapr->xive = SPAPR_XIVE(dev); 346 347 /* Enable the CPU IPIs */ 348 for (i = 0; i < nr_servers; ++i) { 349 SpaprInterruptControllerClass *sicc 350 = SPAPR_INTC_GET_CLASS(spapr->xive); 351 352 if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i, 353 false, errp) < 0) { 354 return; 355 } 356 } 357 358 spapr_xive_hcall_init(spapr); 359 } 360 361 spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr, 362 smc->nr_xirqs + SPAPR_XIRQ_BASE); 363 364 /* 365 * Mostly we don't actually need this until reset, except that not 366 * having this set up can cause VFIO devices to issue a 367 * false-positive warning during realize(), because they don't yet 368 * have an in-kernel irq chip. 369 */ 370 spapr_irq_update_active_intc(spapr); 371 } 372 373 int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp) 374 { 375 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 376 int i; 377 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 378 int rc; 379 380 assert(irq >= SPAPR_XIRQ_BASE); 381 assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 382 383 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 384 SpaprInterruptController *intc = intcs[i]; 385 if (intc) { 386 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 387 rc = sicc->claim_irq(intc, irq, lsi, errp); 388 if (rc < 0) { 389 return rc; 390 } 391 } 392 } 393 394 return 0; 395 } 396 397 void spapr_irq_free(SpaprMachineState *spapr, int irq, int num) 398 { 399 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 400 int i, j; 401 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 402 403 assert(irq >= SPAPR_XIRQ_BASE); 404 assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 405 406 for (i = irq; i < (irq + num); i++) { 407 for (j = 0; j < ARRAY_SIZE(intcs); j++) { 408 SpaprInterruptController *intc = intcs[j]; 409 410 if (intc) { 411 SpaprInterruptControllerClass *sicc 412 = SPAPR_INTC_GET_CLASS(intc); 413 sicc->free_irq(intc, i); 414 } 415 } 416 } 417 } 418 419 qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq) 420 { 421 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 422 423 /* 424 * This interface is basically for VIO and PHB devices to find the 425 * right qemu_irq to manipulate, so we only allow access to the 426 * external irqs for now. Currently anything which needs to 427 * access the IPIs most naturally gets there via the guest side 428 * interfaces, we can change this if we need to in future. 429 */ 430 assert(irq >= SPAPR_XIRQ_BASE); 431 assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 432 433 if (spapr->ics) { 434 assert(ics_valid_irq(spapr->ics, irq)); 435 } 436 if (spapr->xive) { 437 assert(irq < spapr->xive->nr_irqs); 438 assert(xive_eas_is_valid(&spapr->xive->eat[irq])); 439 } 440 441 return spapr->qirqs[irq]; 442 } 443 444 int spapr_irq_post_load(SpaprMachineState *spapr, int version_id) 445 { 446 SpaprInterruptControllerClass *sicc; 447 448 spapr_irq_update_active_intc(spapr); 449 sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); 450 return sicc->post_load(spapr->active_intc, version_id); 451 } 452 453 void spapr_irq_reset(SpaprMachineState *spapr, Error **errp) 454 { 455 assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr)); 456 457 spapr_irq_update_active_intc(spapr); 458 } 459 460 int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp) 461 { 462 const char *nodename = "interrupt-controller"; 463 int offset, phandle; 464 465 offset = fdt_subnode_offset(fdt, 0, nodename); 466 if (offset < 0) { 467 error_setg(errp, "Can't find node \"%s\": %s", 468 nodename, fdt_strerror(offset)); 469 return -1; 470 } 471 472 phandle = fdt_get_phandle(fdt, offset); 473 if (!phandle) { 474 error_setg(errp, "Can't get phandle of node \"%s\"", nodename); 475 return -1; 476 } 477 478 return phandle; 479 } 480 481 static void set_active_intc(SpaprMachineState *spapr, 482 SpaprInterruptController *new_intc) 483 { 484 SpaprInterruptControllerClass *sicc; 485 uint32_t nr_servers = spapr_max_server_number(spapr); 486 487 assert(new_intc); 488 489 if (new_intc == spapr->active_intc) { 490 /* Nothing to do */ 491 return; 492 } 493 494 if (spapr->active_intc) { 495 sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); 496 if (sicc->deactivate) { 497 sicc->deactivate(spapr->active_intc); 498 } 499 } 500 501 sicc = SPAPR_INTC_GET_CLASS(new_intc); 502 if (sicc->activate) { 503 sicc->activate(new_intc, nr_servers, &error_fatal); 504 } 505 506 spapr->active_intc = new_intc; 507 508 /* 509 * We've changed the kernel irqchip, let VFIO devices know they 510 * need to readjust. 511 */ 512 kvm_irqchip_change_notify(); 513 } 514 515 void spapr_irq_update_active_intc(SpaprMachineState *spapr) 516 { 517 SpaprInterruptController *new_intc; 518 519 if (!spapr->ics) { 520 /* 521 * XXX before we run CAS, ov5_cas is initialized empty, which 522 * indicates XICS, even if we have ic-mode=xive. TODO: clean 523 * up the CAS path so that we have a clearer way of handling 524 * this. 525 */ 526 new_intc = SPAPR_INTC(spapr->xive); 527 } else if (spapr->ov5_cas 528 && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { 529 new_intc = SPAPR_INTC(spapr->xive); 530 } else { 531 new_intc = SPAPR_INTC(spapr->ics); 532 } 533 534 set_active_intc(spapr, new_intc); 535 } 536 537 /* 538 * XICS legacy routines - to deprecate one day 539 */ 540 541 static int ics_find_free_block(ICSState *ics, int num, int alignnum) 542 { 543 int first, i; 544 545 for (first = 0; first < ics->nr_irqs; first += alignnum) { 546 if (num > (ics->nr_irqs - first)) { 547 return -1; 548 } 549 for (i = first; i < first + num; ++i) { 550 if (!ics_irq_free(ics, i)) { 551 break; 552 } 553 } 554 if (i == (first + num)) { 555 return first; 556 } 557 } 558 559 return -1; 560 } 561 562 int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp) 563 { 564 ICSState *ics = spapr->ics; 565 int first = -1; 566 567 assert(ics); 568 569 /* 570 * MSIMesage::data is used for storing VIRQ so 571 * it has to be aligned to num to support multiple 572 * MSI vectors. MSI-X is not affected by this. 573 * The hint is used for the first IRQ, the rest should 574 * be allocated continuously. 575 */ 576 if (align) { 577 assert((num == 1) || (num == 2) || (num == 4) || 578 (num == 8) || (num == 16) || (num == 32)); 579 first = ics_find_free_block(ics, num, num); 580 } else { 581 first = ics_find_free_block(ics, num, 1); 582 } 583 584 if (first < 0) { 585 error_setg(errp, "can't find a free %d-IRQ block", num); 586 return -1; 587 } 588 589 return first + ics->offset; 590 } 591 592 SpaprIrq spapr_irq_xics_legacy = { 593 .xics = true, 594 .xive = false, 595 }; 596 597 static void spapr_irq_register_types(void) 598 { 599 type_register_static(&spapr_intc_info); 600 } 601 602 type_init(spapr_irq_register_types) 603