1 /* 2 * QEMU PowerPC sPAPR IRQ interface 3 * 4 * Copyright (c) 2018, IBM Corporation. 5 * 6 * This code is licensed under the GPL version 2 or later. See the 7 * COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/log.h" 12 #include "qemu/error-report.h" 13 #include "qapi/error.h" 14 #include "hw/irq.h" 15 #include "hw/ppc/spapr.h" 16 #include "hw/ppc/spapr_cpu_core.h" 17 #include "hw/ppc/spapr_xive.h" 18 #include "hw/ppc/xics.h" 19 #include "hw/ppc/xics_spapr.h" 20 #include "hw/qdev-properties.h" 21 #include "cpu-models.h" 22 #include "sysemu/kvm.h" 23 24 #include "trace.h" 25 26 static const TypeInfo spapr_intc_info = { 27 .name = TYPE_SPAPR_INTC, 28 .parent = TYPE_INTERFACE, 29 .class_size = sizeof(SpaprInterruptControllerClass), 30 }; 31 32 static void spapr_irq_msi_init(SpaprMachineState *spapr) 33 { 34 if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { 35 /* Legacy mode doesn't use this allocator */ 36 return; 37 } 38 39 spapr->irq_map_nr = spapr_irq_nr_msis(spapr); 40 spapr->irq_map = bitmap_new(spapr->irq_map_nr); 41 } 42 43 int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align, 44 Error **errp) 45 { 46 int irq; 47 48 /* 49 * The 'align_mask' parameter of bitmap_find_next_zero_area() 50 * should be one less than a power of 2; 0 means no 51 * alignment. Adapt the 'align' value of the former allocator 52 * to fit the requirements of bitmap_find_next_zero_area() 53 */ 54 align -= 1; 55 56 irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num, 57 align); 58 if (irq == spapr->irq_map_nr) { 59 error_setg(errp, "can't find a free %d-IRQ block", num); 60 return -1; 61 } 62 63 bitmap_set(spapr->irq_map, irq, num); 64 65 return irq + SPAPR_IRQ_MSI; 66 } 67 68 void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num) 69 { 70 bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); 71 } 72 73 int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, 74 SpaprInterruptController *intc, 75 uint32_t nr_servers, 76 Error **errp) 77 { 78 Error *local_err = NULL; 79 80 if (kvm_enabled() && kvm_kernel_irqchip_allowed()) { 81 if (fn(intc, nr_servers, &local_err) < 0) { 82 if (kvm_kernel_irqchip_required()) { 83 error_prepend(&local_err, 84 "kernel_irqchip requested but unavailable: "); 85 error_propagate(errp, local_err); 86 return -1; 87 } 88 89 /* 90 * We failed to initialize the KVM device, fallback to 91 * emulated mode 92 */ 93 error_prepend(&local_err, 94 "kernel_irqchip allowed but unavailable: "); 95 error_append_hint(&local_err, 96 "Falling back to kernel-irqchip=off\n"); 97 warn_report_err(local_err); 98 } 99 } 100 101 return 0; 102 } 103 104 /* 105 * XICS IRQ backend. 106 */ 107 108 SpaprIrq spapr_irq_xics = { 109 .xics = true, 110 .xive = false, 111 }; 112 113 /* 114 * XIVE IRQ backend. 115 */ 116 117 SpaprIrq spapr_irq_xive = { 118 .xics = false, 119 .xive = true, 120 }; 121 122 /* 123 * Dual XIVE and XICS IRQ backend. 124 * 125 * Both interrupt mode, XIVE and XICS, objects are created but the 126 * machine starts in legacy interrupt mode (XICS). It can be changed 127 * by the CAS negotiation process and, in that case, the new mode is 128 * activated after an extra machine reset. 129 */ 130 131 /* 132 * Define values in sync with the XIVE and XICS backend 133 */ 134 SpaprIrq spapr_irq_dual = { 135 .xics = true, 136 .xive = true, 137 }; 138 139 140 static int spapr_irq_check(SpaprMachineState *spapr, Error **errp) 141 { 142 ERRP_GUARD(); 143 MachineState *machine = MACHINE(spapr); 144 145 /* 146 * Sanity checks on non-P9 machines. On these, XIVE is not 147 * advertised, see spapr_dt_ov5_platform_support() 148 */ 149 if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 150 0, spapr->max_compat_pvr)) { 151 /* 152 * If the 'dual' interrupt mode is selected, force XICS as CAS 153 * negotiation is useless. 154 */ 155 if (spapr->irq == &spapr_irq_dual) { 156 spapr->irq = &spapr_irq_xics; 157 return 0; 158 } 159 160 /* 161 * Non-P9 machines using only XIVE is a bogus setup. We have two 162 * scenarios to take into account because of the compat mode: 163 * 164 * 1. POWER7/8 machines should fail to init later on when creating 165 * the XIVE interrupt presenters because a POWER9 exception 166 * model is required. 167 168 * 2. POWER9 machines using the POWER8 compat mode won't fail and 169 * will let the OS boot with a partial XIVE setup : DT 170 * properties but no hcalls. 171 * 172 * To cover both and not confuse the OS, add an early failure in 173 * QEMU. 174 */ 175 if (spapr->irq == &spapr_irq_xive) { 176 error_setg(errp, "XIVE-only machines require a POWER9 CPU"); 177 return -1; 178 } 179 } 180 181 /* 182 * On a POWER9 host, some older KVM XICS devices cannot be destroyed and 183 * re-created. Same happens with KVM nested guests. Detect that early to 184 * avoid QEMU to exit later when the guest reboots. 185 */ 186 if (kvm_enabled() && 187 spapr->irq == &spapr_irq_dual && 188 kvm_kernel_irqchip_required() && 189 xics_kvm_has_broken_disconnect(spapr)) { 190 error_setg(errp, 191 "KVM is incompatible with ic-mode=dual,kernel-irqchip=on"); 192 error_append_hint(errp, 193 "This can happen with an old KVM or in a KVM nested guest.\n"); 194 error_append_hint(errp, 195 "Try without kernel-irqchip or with kernel-irqchip=off.\n"); 196 return -1; 197 } 198 199 return 0; 200 } 201 202 /* 203 * sPAPR IRQ frontend routines for devices 204 */ 205 #define ALL_INTCS(spapr_) \ 206 { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), } 207 208 int spapr_irq_cpu_intc_create(SpaprMachineState *spapr, 209 PowerPCCPU *cpu, Error **errp) 210 { 211 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 212 int i; 213 int rc; 214 215 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 216 SpaprInterruptController *intc = intcs[i]; 217 if (intc) { 218 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 219 rc = sicc->cpu_intc_create(intc, cpu, errp); 220 if (rc < 0) { 221 return rc; 222 } 223 } 224 } 225 226 return 0; 227 } 228 229 void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu) 230 { 231 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 232 int i; 233 234 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 235 SpaprInterruptController *intc = intcs[i]; 236 if (intc) { 237 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 238 sicc->cpu_intc_reset(intc, cpu); 239 } 240 } 241 } 242 243 void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu) 244 { 245 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 246 int i; 247 248 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 249 SpaprInterruptController *intc = intcs[i]; 250 if (intc) { 251 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 252 sicc->cpu_intc_destroy(intc, cpu); 253 } 254 } 255 } 256 257 static void spapr_set_irq(void *opaque, int irq, int level) 258 { 259 SpaprMachineState *spapr = SPAPR_MACHINE(opaque); 260 SpaprInterruptControllerClass *sicc 261 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 262 263 sicc->set_irq(spapr->active_intc, irq, level); 264 } 265 266 void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon) 267 { 268 SpaprInterruptControllerClass *sicc 269 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 270 271 sicc->print_info(spapr->active_intc, mon); 272 } 273 274 void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers, 275 void *fdt, uint32_t phandle) 276 { 277 SpaprInterruptControllerClass *sicc 278 = SPAPR_INTC_GET_CLASS(spapr->active_intc); 279 280 sicc->dt(spapr->active_intc, nr_servers, fdt, phandle); 281 } 282 283 uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr) 284 { 285 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 286 287 if (smc->legacy_irq_allocation) { 288 return smc->nr_xirqs; 289 } else { 290 return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI; 291 } 292 } 293 294 void spapr_irq_init(SpaprMachineState *spapr, Error **errp) 295 { 296 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 297 298 if (kvm_enabled() && kvm_kernel_irqchip_split()) { 299 error_setg(errp, "kernel_irqchip split mode not supported on pseries"); 300 return; 301 } 302 303 if (spapr_irq_check(spapr, errp) < 0) { 304 return; 305 } 306 307 /* Initialize the MSI IRQ allocator. */ 308 spapr_irq_msi_init(spapr); 309 310 if (spapr->irq->xics) { 311 Object *obj; 312 313 obj = object_new(TYPE_ICS_SPAPR); 314 315 object_property_add_child(OBJECT(spapr), "ics", obj); 316 object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr), 317 &error_abort); 318 object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort); 319 if (!qdev_realize(DEVICE(obj), NULL, errp)) { 320 return; 321 } 322 323 spapr->ics = ICS_SPAPR(obj); 324 } 325 326 if (spapr->irq->xive) { 327 uint32_t nr_servers = spapr_max_server_number(spapr); 328 DeviceState *dev; 329 int i; 330 331 dev = qdev_new(TYPE_SPAPR_XIVE); 332 qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE); 333 /* 334 * 8 XIVE END structures per CPU. One for each available 335 * priority 336 */ 337 qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3); 338 object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr), 339 &error_abort); 340 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); 341 342 spapr->xive = SPAPR_XIVE(dev); 343 344 /* Enable the CPU IPIs */ 345 for (i = 0; i < nr_servers; ++i) { 346 SpaprInterruptControllerClass *sicc 347 = SPAPR_INTC_GET_CLASS(spapr->xive); 348 349 if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i, 350 false, errp) < 0) { 351 return; 352 } 353 } 354 355 spapr_xive_hcall_init(spapr); 356 } 357 358 spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr, 359 smc->nr_xirqs + SPAPR_XIRQ_BASE); 360 361 /* 362 * Mostly we don't actually need this until reset, except that not 363 * having this set up can cause VFIO devices to issue a 364 * false-positive warning during realize(), because they don't yet 365 * have an in-kernel irq chip. 366 */ 367 spapr_irq_update_active_intc(spapr); 368 } 369 370 int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp) 371 { 372 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 373 int i; 374 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 375 int rc; 376 377 assert(irq >= SPAPR_XIRQ_BASE); 378 assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 379 380 for (i = 0; i < ARRAY_SIZE(intcs); i++) { 381 SpaprInterruptController *intc = intcs[i]; 382 if (intc) { 383 SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); 384 rc = sicc->claim_irq(intc, irq, lsi, errp); 385 if (rc < 0) { 386 return rc; 387 } 388 } 389 } 390 391 return 0; 392 } 393 394 void spapr_irq_free(SpaprMachineState *spapr, int irq, int num) 395 { 396 SpaprInterruptController *intcs[] = ALL_INTCS(spapr); 397 int i, j; 398 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 399 400 assert(irq >= SPAPR_XIRQ_BASE); 401 assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 402 403 for (i = irq; i < (irq + num); i++) { 404 for (j = 0; j < ARRAY_SIZE(intcs); j++) { 405 SpaprInterruptController *intc = intcs[j]; 406 407 if (intc) { 408 SpaprInterruptControllerClass *sicc 409 = SPAPR_INTC_GET_CLASS(intc); 410 sicc->free_irq(intc, i); 411 } 412 } 413 } 414 } 415 416 qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq) 417 { 418 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 419 420 /* 421 * This interface is basically for VIO and PHB devices to find the 422 * right qemu_irq to manipulate, so we only allow access to the 423 * external irqs for now. Currently anything which needs to 424 * access the IPIs most naturally gets there via the guest side 425 * interfaces, we can change this if we need to in future. 426 */ 427 assert(irq >= SPAPR_XIRQ_BASE); 428 assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); 429 430 if (spapr->ics) { 431 assert(ics_valid_irq(spapr->ics, irq)); 432 } 433 if (spapr->xive) { 434 assert(irq < spapr->xive->nr_irqs); 435 assert(xive_eas_is_valid(&spapr->xive->eat[irq])); 436 } 437 438 return spapr->qirqs[irq]; 439 } 440 441 int spapr_irq_post_load(SpaprMachineState *spapr, int version_id) 442 { 443 SpaprInterruptControllerClass *sicc; 444 445 spapr_irq_update_active_intc(spapr); 446 sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); 447 return sicc->post_load(spapr->active_intc, version_id); 448 } 449 450 void spapr_irq_reset(SpaprMachineState *spapr, Error **errp) 451 { 452 assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr)); 453 454 spapr_irq_update_active_intc(spapr); 455 } 456 457 int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp) 458 { 459 const char *nodename = "interrupt-controller"; 460 int offset, phandle; 461 462 offset = fdt_subnode_offset(fdt, 0, nodename); 463 if (offset < 0) { 464 error_setg(errp, "Can't find node \"%s\": %s", 465 nodename, fdt_strerror(offset)); 466 return -1; 467 } 468 469 phandle = fdt_get_phandle(fdt, offset); 470 if (!phandle) { 471 error_setg(errp, "Can't get phandle of node \"%s\"", nodename); 472 return -1; 473 } 474 475 return phandle; 476 } 477 478 static void set_active_intc(SpaprMachineState *spapr, 479 SpaprInterruptController *new_intc) 480 { 481 SpaprInterruptControllerClass *sicc; 482 uint32_t nr_servers = spapr_max_server_number(spapr); 483 484 assert(new_intc); 485 486 if (new_intc == spapr->active_intc) { 487 /* Nothing to do */ 488 return; 489 } 490 491 if (spapr->active_intc) { 492 sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); 493 if (sicc->deactivate) { 494 sicc->deactivate(spapr->active_intc); 495 } 496 } 497 498 sicc = SPAPR_INTC_GET_CLASS(new_intc); 499 if (sicc->activate) { 500 sicc->activate(new_intc, nr_servers, &error_fatal); 501 } 502 503 spapr->active_intc = new_intc; 504 505 /* 506 * We've changed the kernel irqchip, let VFIO devices know they 507 * need to readjust. 508 */ 509 kvm_irqchip_change_notify(); 510 } 511 512 void spapr_irq_update_active_intc(SpaprMachineState *spapr) 513 { 514 SpaprInterruptController *new_intc; 515 516 if (!spapr->ics) { 517 /* 518 * XXX before we run CAS, ov5_cas is initialized empty, which 519 * indicates XICS, even if we have ic-mode=xive. TODO: clean 520 * up the CAS path so that we have a clearer way of handling 521 * this. 522 */ 523 new_intc = SPAPR_INTC(spapr->xive); 524 } else if (spapr->ov5_cas 525 && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { 526 new_intc = SPAPR_INTC(spapr->xive); 527 } else { 528 new_intc = SPAPR_INTC(spapr->ics); 529 } 530 531 set_active_intc(spapr, new_intc); 532 } 533 534 /* 535 * XICS legacy routines - to deprecate one day 536 */ 537 538 static int ics_find_free_block(ICSState *ics, int num, int alignnum) 539 { 540 int first, i; 541 542 for (first = 0; first < ics->nr_irqs; first += alignnum) { 543 if (num > (ics->nr_irqs - first)) { 544 return -1; 545 } 546 for (i = first; i < first + num; ++i) { 547 if (!ics_irq_free(ics, i)) { 548 break; 549 } 550 } 551 if (i == (first + num)) { 552 return first; 553 } 554 } 555 556 return -1; 557 } 558 559 int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp) 560 { 561 ICSState *ics = spapr->ics; 562 int first = -1; 563 564 assert(ics); 565 566 /* 567 * MSIMesage::data is used for storing VIRQ so 568 * it has to be aligned to num to support multiple 569 * MSI vectors. MSI-X is not affected by this. 570 * The hint is used for the first IRQ, the rest should 571 * be allocated continuously. 572 */ 573 if (align) { 574 assert((num == 1) || (num == 2) || (num == 4) || 575 (num == 8) || (num == 16) || (num == 32)); 576 first = ics_find_free_block(ics, num, num); 577 } else { 578 first = ics_find_free_block(ics, num, 1); 579 } 580 581 if (first < 0) { 582 error_setg(errp, "can't find a free %d-IRQ block", num); 583 return -1; 584 } 585 586 return first + ics->offset; 587 } 588 589 SpaprIrq spapr_irq_xics_legacy = { 590 .xics = true, 591 .xive = false, 592 }; 593 594 static void spapr_irq_register_types(void) 595 { 596 type_register_static(&spapr_intc_info); 597 } 598 599 type_init(spapr_irq_register_types) 600