xref: /openbmc/qemu/hw/ppc/spapr_irq.c (revision 4abeadf65161edf45989b5ada81be5e002106342)
1 /*
2  * QEMU PowerPC sPAPR IRQ interface
3  *
4  * Copyright (c) 2018, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/log.h"
12 #include "qemu/error-report.h"
13 #include "qapi/error.h"
14 #include "qapi/type-helpers.h"
15 #include "hw/irq.h"
16 #include "hw/ppc/spapr.h"
17 #include "hw/ppc/spapr_cpu_core.h"
18 #include "hw/ppc/spapr_xive.h"
19 #include "hw/ppc/xics.h"
20 #include "hw/ppc/xics_spapr.h"
21 #include "hw/qdev-properties.h"
22 #include "monitor/monitor.h"
23 #include "cpu-models.h"
24 #include "sysemu/kvm.h"
25 
26 #include "trace.h"
27 
28 QEMU_BUILD_BUG_ON(SPAPR_IRQ_NR_IPIS > SPAPR_XIRQ_BASE);
29 
30 static const TypeInfo spapr_intc_info = {
31     .name = TYPE_SPAPR_INTC,
32     .parent = TYPE_INTERFACE,
33     .class_size = sizeof(SpaprInterruptControllerClass),
34 };
35 
36 static void spapr_irq_msi_init(SpaprMachineState *spapr)
37 {
38     if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
39         /* Legacy mode doesn't use this allocator */
40         return;
41     }
42 
43     spapr->irq_map_nr = spapr_irq_nr_msis(spapr);
44     spapr->irq_map = bitmap_new(spapr->irq_map_nr);
45 }
46 
47 int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
48                         Error **errp)
49 {
50     int irq;
51 
52     /*
53      * The 'align_mask' parameter of bitmap_find_next_zero_area()
54      * should be one less than a power of 2; 0 means no
55      * alignment. Adapt the 'align' value of the former allocator
56      * to fit the requirements of bitmap_find_next_zero_area()
57      */
58     align -= 1;
59 
60     irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
61                                      align);
62     if (irq == spapr->irq_map_nr) {
63         error_setg(errp, "can't find a free %d-IRQ block", num);
64         return -1;
65     }
66 
67     bitmap_set(spapr->irq_map, irq, num);
68 
69     return irq + SPAPR_IRQ_MSI;
70 }
71 
72 void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num)
73 {
74     bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
75 }
76 
77 int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn,
78                        SpaprInterruptController *intc,
79                        uint32_t nr_servers,
80                        Error **errp)
81 {
82     Error *local_err = NULL;
83 
84     if (kvm_enabled() && kvm_kernel_irqchip_allowed()) {
85         if (fn(intc, nr_servers, &local_err) < 0) {
86             if (kvm_kernel_irqchip_required()) {
87                 error_prepend(&local_err,
88                               "kernel_irqchip requested but unavailable: ");
89                 error_propagate(errp, local_err);
90                 return -1;
91             }
92 
93             /*
94              * We failed to initialize the KVM device, fallback to
95              * emulated mode
96              */
97             error_prepend(&local_err,
98                           "kernel_irqchip allowed but unavailable: ");
99             error_append_hint(&local_err,
100                               "Falling back to kernel-irqchip=off\n");
101             warn_report_err(local_err);
102         }
103     }
104 
105     return 0;
106 }
107 
108 /*
109  * XICS IRQ backend.
110  */
111 
112 SpaprIrq spapr_irq_xics = {
113     .xics        = true,
114     .xive        = false,
115 };
116 
117 /*
118  * XIVE IRQ backend.
119  */
120 
121 SpaprIrq spapr_irq_xive = {
122     .xics        = false,
123     .xive        = true,
124 };
125 
126 /*
127  * Dual XIVE and XICS IRQ backend.
128  *
129  * Both interrupt mode, XIVE and XICS, objects are created but the
130  * machine starts in legacy interrupt mode (XICS). It can be changed
131  * by the CAS negotiation process and, in that case, the new mode is
132  * activated after an extra machine reset.
133  */
134 
135 /*
136  * Define values in sync with the XIVE and XICS backend
137  */
138 SpaprIrq spapr_irq_dual = {
139     .xics        = true,
140     .xive        = true,
141 };
142 
143 
144 static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
145 {
146     ERRP_GUARD();
147     MachineState *machine = MACHINE(spapr);
148 
149     /*
150      * Sanity checks on non-P9 machines. On these, XIVE is not
151      * advertised, see spapr_dt_ov5_platform_support()
152      */
153     if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
154                                0, spapr->max_compat_pvr)) {
155         /*
156          * If the 'dual' interrupt mode is selected, force XICS as CAS
157          * negotiation is useless.
158          */
159         if (spapr->irq == &spapr_irq_dual) {
160             spapr->irq = &spapr_irq_xics;
161             return 0;
162         }
163 
164         /*
165          * Non-P9 machines using only XIVE is a bogus setup. We have two
166          * scenarios to take into account because of the compat mode:
167          *
168          * 1. POWER7/8 machines should fail to init later on when creating
169          *    the XIVE interrupt presenters because a POWER9 exception
170          *    model is required.
171 
172          * 2. POWER9 machines using the POWER8 compat mode won't fail and
173          *    will let the OS boot with a partial XIVE setup : DT
174          *    properties but no hcalls.
175          *
176          * To cover both and not confuse the OS, add an early failure in
177          * QEMU.
178          */
179         if (!spapr->irq->xics) {
180             error_setg(errp, "XIVE-only machines require a POWER9 CPU");
181             return -1;
182         }
183     }
184 
185     /*
186      * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
187      * re-created. Same happens with KVM nested guests. Detect that early to
188      * avoid QEMU to exit later when the guest reboots.
189      */
190     if (kvm_enabled() &&
191         spapr->irq == &spapr_irq_dual &&
192         kvm_kernel_irqchip_required() &&
193         xics_kvm_has_broken_disconnect()) {
194         error_setg(errp,
195             "KVM is incompatible with ic-mode=dual,kernel-irqchip=on");
196         error_append_hint(errp,
197             "This can happen with an old KVM or in a KVM nested guest.\n");
198         error_append_hint(errp,
199             "Try without kernel-irqchip or with kernel-irqchip=off.\n");
200         return -1;
201     }
202 
203     return 0;
204 }
205 
206 /*
207  * sPAPR IRQ frontend routines for devices
208  */
209 #define ALL_INTCS(spapr_) \
210     { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), }
211 
212 int spapr_irq_cpu_intc_create(SpaprMachineState *spapr,
213                               PowerPCCPU *cpu, Error **errp)
214 {
215     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
216     int i;
217     int rc;
218 
219     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
220         SpaprInterruptController *intc = intcs[i];
221         if (intc) {
222             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
223             rc = sicc->cpu_intc_create(intc, cpu, errp);
224             if (rc < 0) {
225                 return rc;
226             }
227         }
228     }
229 
230     return 0;
231 }
232 
233 void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu)
234 {
235     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
236     int i;
237 
238     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
239         SpaprInterruptController *intc = intcs[i];
240         if (intc) {
241             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
242             sicc->cpu_intc_reset(intc, cpu);
243         }
244     }
245 }
246 
247 void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu)
248 {
249     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
250     int i;
251 
252     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
253         SpaprInterruptController *intc = intcs[i];
254         if (intc) {
255             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
256             sicc->cpu_intc_destroy(intc, cpu);
257         }
258     }
259 }
260 
261 static void spapr_set_irq(void *opaque, int irq, int level)
262 {
263     SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
264     SpaprInterruptControllerClass *sicc
265         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
266 
267     sicc->set_irq(spapr->active_intc, irq, level);
268 }
269 
270 void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon)
271 {
272     SpaprInterruptControllerClass *sicc
273         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
274     g_autoptr(GString) buf = g_string_new("");
275     g_autoptr(HumanReadableText) info = NULL;
276 
277     sicc->print_info(spapr->active_intc, buf);
278     info = human_readable_text_from_str(buf);
279     monitor_puts(mon, info->human_readable_text);
280 }
281 
282 void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers,
283                   void *fdt, uint32_t phandle)
284 {
285     SpaprInterruptControllerClass *sicc
286         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
287 
288     sicc->dt(spapr->active_intc, nr_servers, fdt, phandle);
289 }
290 
291 uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr)
292 {
293     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
294 
295     if (smc->legacy_irq_allocation) {
296         return smc->nr_xirqs;
297     } else {
298         return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI;
299     }
300 }
301 
302 void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
303 {
304     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
305 
306     if (kvm_enabled() && kvm_kernel_irqchip_split()) {
307         error_setg(errp, "kernel_irqchip split mode not supported on pseries");
308         return;
309     }
310 
311     if (spapr_irq_check(spapr, errp) < 0) {
312         return;
313     }
314 
315     /* Initialize the MSI IRQ allocator. */
316     spapr_irq_msi_init(spapr);
317 
318     if (spapr->irq->xics) {
319         Object *obj;
320 
321         obj = object_new(TYPE_ICS_SPAPR);
322 
323         object_property_add_child(OBJECT(spapr), "ics", obj);
324         object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr),
325                                  &error_abort);
326         object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort);
327         if (!qdev_realize(DEVICE(obj), NULL, errp)) {
328             return;
329         }
330 
331         spapr->ics = ICS_SPAPR(obj);
332     }
333 
334     if (spapr->irq->xive) {
335         uint32_t nr_servers = spapr_max_server_number(spapr);
336         DeviceState *dev;
337         int i;
338 
339         dev = qdev_new(TYPE_SPAPR_XIVE);
340         qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_IRQ_NR_IPIS);
341         /*
342          * 8 XIVE END structures per CPU. One for each available
343          * priority
344          */
345         qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
346         object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr),
347                                  &error_abort);
348         sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
349 
350         spapr->xive = SPAPR_XIVE(dev);
351 
352         /* Enable the CPU IPIs */
353         for (i = 0; i < nr_servers; ++i) {
354             SpaprInterruptControllerClass *sicc
355                 = SPAPR_INTC_GET_CLASS(spapr->xive);
356 
357             if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
358                                 false, errp) < 0) {
359                 return;
360             }
361         }
362 
363         spapr_xive_hcall_init(spapr);
364     }
365 
366     spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
367                                       smc->nr_xirqs + SPAPR_IRQ_NR_IPIS);
368 
369     /*
370      * Mostly we don't actually need this until reset, except that not
371      * having this set up can cause VFIO devices to issue a
372      * false-positive warning during realize(), because they don't yet
373      * have an in-kernel irq chip.
374      */
375     spapr_irq_update_active_intc(spapr);
376 }
377 
378 int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
379 {
380     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
381     int i;
382     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
383     int rc;
384 
385     assert(irq >= SPAPR_XIRQ_BASE);
386     assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
387 
388     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
389         SpaprInterruptController *intc = intcs[i];
390         if (intc) {
391             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
392             rc = sicc->claim_irq(intc, irq, lsi, errp);
393             if (rc < 0) {
394                 return rc;
395             }
396         }
397     }
398 
399     return 0;
400 }
401 
402 void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
403 {
404     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
405     int i, j;
406     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
407 
408     assert(irq >= SPAPR_XIRQ_BASE);
409     assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE));
410 
411     for (i = irq; i < (irq + num); i++) {
412         for (j = 0; j < ARRAY_SIZE(intcs); j++) {
413             SpaprInterruptController *intc = intcs[j];
414 
415             if (intc) {
416                 SpaprInterruptControllerClass *sicc
417                     = SPAPR_INTC_GET_CLASS(intc);
418                 sicc->free_irq(intc, i);
419             }
420         }
421     }
422 }
423 
424 qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
425 {
426     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
427 
428     /*
429      * This interface is basically for VIO and PHB devices to find the
430      * right qemu_irq to manipulate, so we only allow access to the
431      * external irqs for now.  Currently anything which needs to
432      * access the IPIs most naturally gets there via the guest side
433      * interfaces, we can change this if we need to in future.
434      */
435     assert(irq >= SPAPR_XIRQ_BASE);
436     assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
437 
438     if (spapr->ics) {
439         assert(ics_valid_irq(spapr->ics, irq));
440     }
441     if (spapr->xive) {
442         assert(irq < spapr->xive->nr_irqs);
443         assert(xive_eas_is_valid(&spapr->xive->eat[irq]));
444     }
445 
446     return spapr->qirqs[irq];
447 }
448 
449 int spapr_irq_post_load(SpaprMachineState *spapr, int version_id)
450 {
451     SpaprInterruptControllerClass *sicc;
452 
453     spapr_irq_update_active_intc(spapr);
454     sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
455     return sicc->post_load(spapr->active_intc, version_id);
456 }
457 
458 void spapr_irq_reset(SpaprMachineState *spapr, Error **errp)
459 {
460     assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr));
461 
462     spapr_irq_update_active_intc(spapr);
463 }
464 
465 int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp)
466 {
467     const char *nodename = "interrupt-controller";
468     int offset, phandle;
469 
470     offset = fdt_subnode_offset(fdt, 0, nodename);
471     if (offset < 0) {
472         error_setg(errp, "Can't find node \"%s\": %s",
473                    nodename, fdt_strerror(offset));
474         return -1;
475     }
476 
477     phandle = fdt_get_phandle(fdt, offset);
478     if (!phandle) {
479         error_setg(errp, "Can't get phandle of node \"%s\"", nodename);
480         return -1;
481     }
482 
483     return phandle;
484 }
485 
486 static void set_active_intc(SpaprMachineState *spapr,
487                             SpaprInterruptController *new_intc)
488 {
489     SpaprInterruptControllerClass *sicc;
490     uint32_t nr_servers = spapr_max_server_number(spapr);
491 
492     assert(new_intc);
493 
494     if (new_intc == spapr->active_intc) {
495         /* Nothing to do */
496         return;
497     }
498 
499     if (spapr->active_intc) {
500         sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
501         if (sicc->deactivate) {
502             sicc->deactivate(spapr->active_intc);
503         }
504     }
505 
506     sicc = SPAPR_INTC_GET_CLASS(new_intc);
507     if (sicc->activate) {
508         sicc->activate(new_intc, nr_servers, &error_fatal);
509     }
510 
511     spapr->active_intc = new_intc;
512 
513     /*
514      * We've changed the kernel irqchip, let VFIO devices know they
515      * need to readjust.
516      */
517     kvm_irqchip_change_notify();
518 }
519 
520 void spapr_irq_update_active_intc(SpaprMachineState *spapr)
521 {
522     SpaprInterruptController *new_intc;
523 
524     if (!spapr->ics) {
525         /*
526          * XXX before we run CAS, ov5_cas is initialized empty, which
527          * indicates XICS, even if we have ic-mode=xive.  TODO: clean
528          * up the CAS path so that we have a clearer way of handling
529          * this.
530          */
531         new_intc = SPAPR_INTC(spapr->xive);
532     } else if (spapr->ov5_cas
533                && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
534         new_intc = SPAPR_INTC(spapr->xive);
535     } else {
536         new_intc = SPAPR_INTC(spapr->ics);
537     }
538 
539     set_active_intc(spapr, new_intc);
540 }
541 
542 /*
543  * XICS legacy routines - to deprecate one day
544  */
545 
546 static int ics_find_free_block(ICSState *ics, int num, int alignnum)
547 {
548     int first, i;
549 
550     for (first = 0; first < ics->nr_irqs; first += alignnum) {
551         if (num > (ics->nr_irqs - first)) {
552             return -1;
553         }
554         for (i = first; i < first + num; ++i) {
555             if (!ics_irq_free(ics, i)) {
556                 break;
557             }
558         }
559         if (i == (first + num)) {
560             return first;
561         }
562     }
563 
564     return -1;
565 }
566 
567 int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
568 {
569     ICSState *ics = spapr->ics;
570     int first = -1;
571 
572     assert(ics);
573 
574     /*
575      * MSIMesage::data is used for storing VIRQ so
576      * it has to be aligned to num to support multiple
577      * MSI vectors. MSI-X is not affected by this.
578      * The hint is used for the first IRQ, the rest should
579      * be allocated continuously.
580      */
581     if (align) {
582         assert((num == 1) || (num == 2) || (num == 4) ||
583                (num == 8) || (num == 16) || (num == 32));
584         first = ics_find_free_block(ics, num, num);
585     } else {
586         first = ics_find_free_block(ics, num, 1);
587     }
588 
589     if (first < 0) {
590         error_setg(errp, "can't find a free %d-IRQ block", num);
591         return -1;
592     }
593 
594     return first + ics->offset;
595 }
596 
597 SpaprIrq spapr_irq_xics_legacy = {
598     .xics        = true,
599     .xive        = false,
600 };
601 
602 static void spapr_irq_register_types(void)
603 {
604     type_register_static(&spapr_intc_info);
605 }
606 
607 type_init(spapr_irq_register_types)
608