xref: /openbmc/qemu/hw/ppc/spapr_irq.c (revision 9b13640da3f94c0fbacbae6d23bd91febfa44588)
1 /*
2  * QEMU PowerPC sPAPR IRQ interface
3  *
4  * Copyright (c) 2018, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/log.h"
12 #include "qemu/error-report.h"
13 #include "qapi/error.h"
14 #include "hw/irq.h"
15 #include "hw/ppc/spapr.h"
16 #include "hw/ppc/spapr_cpu_core.h"
17 #include "hw/ppc/spapr_xive.h"
18 #include "hw/ppc/xics.h"
19 #include "hw/ppc/xics_spapr.h"
20 #include "hw/qdev-properties.h"
21 #include "cpu-models.h"
22 #include "sysemu/kvm.h"
23 
24 #include "trace.h"
25 
26 QEMU_BUILD_BUG_ON(SPAPR_IRQ_NR_IPIS > SPAPR_XIRQ_BASE);
27 
28 static const TypeInfo spapr_intc_info = {
29     .name = TYPE_SPAPR_INTC,
30     .parent = TYPE_INTERFACE,
31     .class_size = sizeof(SpaprInterruptControllerClass),
32 };
33 
34 static void spapr_irq_msi_init(SpaprMachineState *spapr)
35 {
36     if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
37         /* Legacy mode doesn't use this allocator */
38         return;
39     }
40 
41     spapr->irq_map_nr = spapr_irq_nr_msis(spapr);
42     spapr->irq_map = bitmap_new(spapr->irq_map_nr);
43 }
44 
45 int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
46                         Error **errp)
47 {
48     int irq;
49 
50     /*
51      * The 'align_mask' parameter of bitmap_find_next_zero_area()
52      * should be one less than a power of 2; 0 means no
53      * alignment. Adapt the 'align' value of the former allocator
54      * to fit the requirements of bitmap_find_next_zero_area()
55      */
56     align -= 1;
57 
58     irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
59                                      align);
60     if (irq == spapr->irq_map_nr) {
61         error_setg(errp, "can't find a free %d-IRQ block", num);
62         return -1;
63     }
64 
65     bitmap_set(spapr->irq_map, irq, num);
66 
67     return irq + SPAPR_IRQ_MSI;
68 }
69 
70 void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num)
71 {
72     bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
73 }
74 
75 int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn,
76                        SpaprInterruptController *intc,
77                        uint32_t nr_servers,
78                        Error **errp)
79 {
80     Error *local_err = NULL;
81 
82     if (kvm_enabled() && kvm_kernel_irqchip_allowed()) {
83         if (fn(intc, nr_servers, &local_err) < 0) {
84             if (kvm_kernel_irqchip_required()) {
85                 error_prepend(&local_err,
86                               "kernel_irqchip requested but unavailable: ");
87                 error_propagate(errp, local_err);
88                 return -1;
89             }
90 
91             /*
92              * We failed to initialize the KVM device, fallback to
93              * emulated mode
94              */
95             error_prepend(&local_err,
96                           "kernel_irqchip allowed but unavailable: ");
97             error_append_hint(&local_err,
98                               "Falling back to kernel-irqchip=off\n");
99             warn_report_err(local_err);
100         }
101     }
102 
103     return 0;
104 }
105 
106 /*
107  * XICS IRQ backend.
108  */
109 
110 SpaprIrq spapr_irq_xics = {
111     .xics        = true,
112     .xive        = false,
113 };
114 
115 /*
116  * XIVE IRQ backend.
117  */
118 
119 SpaprIrq spapr_irq_xive = {
120     .xics        = false,
121     .xive        = true,
122 };
123 
124 /*
125  * Dual XIVE and XICS IRQ backend.
126  *
127  * Both interrupt mode, XIVE and XICS, objects are created but the
128  * machine starts in legacy interrupt mode (XICS). It can be changed
129  * by the CAS negotiation process and, in that case, the new mode is
130  * activated after an extra machine reset.
131  */
132 
133 /*
134  * Define values in sync with the XIVE and XICS backend
135  */
136 SpaprIrq spapr_irq_dual = {
137     .xics        = true,
138     .xive        = true,
139 };
140 
141 
142 static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
143 {
144     ERRP_GUARD();
145     MachineState *machine = MACHINE(spapr);
146 
147     /*
148      * Sanity checks on non-P9 machines. On these, XIVE is not
149      * advertised, see spapr_dt_ov5_platform_support()
150      */
151     if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
152                                0, spapr->max_compat_pvr)) {
153         /*
154          * If the 'dual' interrupt mode is selected, force XICS as CAS
155          * negotiation is useless.
156          */
157         if (spapr->irq == &spapr_irq_dual) {
158             spapr->irq = &spapr_irq_xics;
159             return 0;
160         }
161 
162         /*
163          * Non-P9 machines using only XIVE is a bogus setup. We have two
164          * scenarios to take into account because of the compat mode:
165          *
166          * 1. POWER7/8 machines should fail to init later on when creating
167          *    the XIVE interrupt presenters because a POWER9 exception
168          *    model is required.
169 
170          * 2. POWER9 machines using the POWER8 compat mode won't fail and
171          *    will let the OS boot with a partial XIVE setup : DT
172          *    properties but no hcalls.
173          *
174          * To cover both and not confuse the OS, add an early failure in
175          * QEMU.
176          */
177         if (!spapr->irq->xics) {
178             error_setg(errp, "XIVE-only machines require a POWER9 CPU");
179             return -1;
180         }
181     }
182 
183     /*
184      * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
185      * re-created. Same happens with KVM nested guests. Detect that early to
186      * avoid QEMU to exit later when the guest reboots.
187      */
188     if (kvm_enabled() &&
189         spapr->irq == &spapr_irq_dual &&
190         kvm_kernel_irqchip_required() &&
191         xics_kvm_has_broken_disconnect()) {
192         error_setg(errp,
193             "KVM is incompatible with ic-mode=dual,kernel-irqchip=on");
194         error_append_hint(errp,
195             "This can happen with an old KVM or in a KVM nested guest.\n");
196         error_append_hint(errp,
197             "Try without kernel-irqchip or with kernel-irqchip=off.\n");
198         return -1;
199     }
200 
201     return 0;
202 }
203 
204 /*
205  * sPAPR IRQ frontend routines for devices
206  */
207 #define ALL_INTCS(spapr_) \
208     { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), }
209 
210 int spapr_irq_cpu_intc_create(SpaprMachineState *spapr,
211                               PowerPCCPU *cpu, Error **errp)
212 {
213     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
214     int i;
215     int rc;
216 
217     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
218         SpaprInterruptController *intc = intcs[i];
219         if (intc) {
220             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
221             rc = sicc->cpu_intc_create(intc, cpu, errp);
222             if (rc < 0) {
223                 return rc;
224             }
225         }
226     }
227 
228     return 0;
229 }
230 
231 void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu)
232 {
233     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
234     int i;
235 
236     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
237         SpaprInterruptController *intc = intcs[i];
238         if (intc) {
239             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
240             sicc->cpu_intc_reset(intc, cpu);
241         }
242     }
243 }
244 
245 void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu)
246 {
247     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
248     int i;
249 
250     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
251         SpaprInterruptController *intc = intcs[i];
252         if (intc) {
253             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
254             sicc->cpu_intc_destroy(intc, cpu);
255         }
256     }
257 }
258 
259 static void spapr_set_irq(void *opaque, int irq, int level)
260 {
261     SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
262     SpaprInterruptControllerClass *sicc
263         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
264 
265     sicc->set_irq(spapr->active_intc, irq, level);
266 }
267 
268 void spapr_irq_print_info(SpaprMachineState *spapr, GString *buf)
269 {
270     SpaprInterruptControllerClass *sicc
271         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
272 
273     sicc->print_info(spapr->active_intc, buf);
274 }
275 
276 void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers,
277                   void *fdt, uint32_t phandle)
278 {
279     SpaprInterruptControllerClass *sicc
280         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
281 
282     sicc->dt(spapr->active_intc, nr_servers, fdt, phandle);
283 }
284 
285 uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr)
286 {
287     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
288 
289     if (smc->legacy_irq_allocation) {
290         return smc->nr_xirqs;
291     } else {
292         return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI;
293     }
294 }
295 
296 void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
297 {
298     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
299 
300     if (kvm_enabled() && kvm_kernel_irqchip_split()) {
301         error_setg(errp, "kernel_irqchip split mode not supported on pseries");
302         return;
303     }
304 
305     if (spapr_irq_check(spapr, errp) < 0) {
306         return;
307     }
308 
309     /* Initialize the MSI IRQ allocator. */
310     spapr_irq_msi_init(spapr);
311 
312     if (spapr->irq->xics) {
313         Object *obj;
314 
315         obj = object_new(TYPE_ICS_SPAPR);
316 
317         object_property_add_child(OBJECT(spapr), "ics", obj);
318         object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr),
319                                  &error_abort);
320         object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort);
321         if (!qdev_realize(DEVICE(obj), NULL, errp)) {
322             return;
323         }
324 
325         spapr->ics = ICS_SPAPR(obj);
326     }
327 
328     if (spapr->irq->xive) {
329         uint32_t nr_servers = spapr_max_server_number(spapr);
330         DeviceState *dev;
331         int i;
332 
333         dev = qdev_new(TYPE_SPAPR_XIVE);
334         qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_IRQ_NR_IPIS);
335         /*
336          * 8 XIVE END structures per CPU. One for each available
337          * priority
338          */
339         qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
340         object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr),
341                                  &error_abort);
342         sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
343 
344         spapr->xive = SPAPR_XIVE(dev);
345 
346         /* Enable the CPU IPIs */
347         for (i = 0; i < nr_servers; ++i) {
348             SpaprInterruptControllerClass *sicc
349                 = SPAPR_INTC_GET_CLASS(spapr->xive);
350 
351             if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
352                                 false, errp) < 0) {
353                 return;
354             }
355         }
356 
357         spapr_xive_hcall_init(spapr);
358     }
359 
360     spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
361                                       smc->nr_xirqs + SPAPR_IRQ_NR_IPIS);
362 
363     /*
364      * Mostly we don't actually need this until reset, except that not
365      * having this set up can cause VFIO devices to issue a
366      * false-positive warning during realize(), because they don't yet
367      * have an in-kernel irq chip.
368      */
369     spapr_irq_update_active_intc(spapr);
370 }
371 
372 int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
373 {
374     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
375     int i;
376     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
377     int rc;
378 
379     assert(irq >= SPAPR_XIRQ_BASE);
380     assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
381 
382     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
383         SpaprInterruptController *intc = intcs[i];
384         if (intc) {
385             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
386             rc = sicc->claim_irq(intc, irq, lsi, errp);
387             if (rc < 0) {
388                 return rc;
389             }
390         }
391     }
392 
393     return 0;
394 }
395 
396 void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
397 {
398     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
399     int i, j;
400     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
401 
402     assert(irq >= SPAPR_XIRQ_BASE);
403     assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE));
404 
405     for (i = irq; i < (irq + num); i++) {
406         for (j = 0; j < ARRAY_SIZE(intcs); j++) {
407             SpaprInterruptController *intc = intcs[j];
408 
409             if (intc) {
410                 SpaprInterruptControllerClass *sicc
411                     = SPAPR_INTC_GET_CLASS(intc);
412                 sicc->free_irq(intc, i);
413             }
414         }
415     }
416 }
417 
418 qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
419 {
420     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
421 
422     /*
423      * This interface is basically for VIO and PHB devices to find the
424      * right qemu_irq to manipulate, so we only allow access to the
425      * external irqs for now.  Currently anything which needs to
426      * access the IPIs most naturally gets there via the guest side
427      * interfaces, we can change this if we need to in future.
428      */
429     assert(irq >= SPAPR_XIRQ_BASE);
430     assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
431 
432     if (spapr->ics) {
433         assert(ics_valid_irq(spapr->ics, irq));
434     }
435     if (spapr->xive) {
436         assert(irq < spapr->xive->nr_irqs);
437         assert(xive_eas_is_valid(&spapr->xive->eat[irq]));
438     }
439 
440     return spapr->qirqs[irq];
441 }
442 
443 int spapr_irq_post_load(SpaprMachineState *spapr, int version_id)
444 {
445     SpaprInterruptControllerClass *sicc;
446 
447     spapr_irq_update_active_intc(spapr);
448     sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
449     return sicc->post_load(spapr->active_intc, version_id);
450 }
451 
452 void spapr_irq_reset(SpaprMachineState *spapr, Error **errp)
453 {
454     assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr));
455 
456     spapr_irq_update_active_intc(spapr);
457 }
458 
459 int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp)
460 {
461     const char *nodename = "interrupt-controller";
462     int offset, phandle;
463 
464     offset = fdt_subnode_offset(fdt, 0, nodename);
465     if (offset < 0) {
466         error_setg(errp, "Can't find node \"%s\": %s",
467                    nodename, fdt_strerror(offset));
468         return -1;
469     }
470 
471     phandle = fdt_get_phandle(fdt, offset);
472     if (!phandle) {
473         error_setg(errp, "Can't get phandle of node \"%s\"", nodename);
474         return -1;
475     }
476 
477     return phandle;
478 }
479 
480 static void set_active_intc(SpaprMachineState *spapr,
481                             SpaprInterruptController *new_intc)
482 {
483     SpaprInterruptControllerClass *sicc;
484     uint32_t nr_servers = spapr_max_server_number(spapr);
485 
486     assert(new_intc);
487 
488     if (new_intc == spapr->active_intc) {
489         /* Nothing to do */
490         return;
491     }
492 
493     if (spapr->active_intc) {
494         sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
495         if (sicc->deactivate) {
496             sicc->deactivate(spapr->active_intc);
497         }
498     }
499 
500     sicc = SPAPR_INTC_GET_CLASS(new_intc);
501     if (sicc->activate) {
502         sicc->activate(new_intc, nr_servers, &error_fatal);
503     }
504 
505     spapr->active_intc = new_intc;
506 
507     /*
508      * We've changed the kernel irqchip, let VFIO devices know they
509      * need to readjust.
510      */
511     kvm_irqchip_change_notify();
512 }
513 
514 void spapr_irq_update_active_intc(SpaprMachineState *spapr)
515 {
516     SpaprInterruptController *new_intc;
517 
518     if (!spapr->ics) {
519         /*
520          * XXX before we run CAS, ov5_cas is initialized empty, which
521          * indicates XICS, even if we have ic-mode=xive.  TODO: clean
522          * up the CAS path so that we have a clearer way of handling
523          * this.
524          */
525         new_intc = SPAPR_INTC(spapr->xive);
526     } else if (spapr->ov5_cas
527                && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
528         new_intc = SPAPR_INTC(spapr->xive);
529     } else {
530         new_intc = SPAPR_INTC(spapr->ics);
531     }
532 
533     set_active_intc(spapr, new_intc);
534 }
535 
536 /*
537  * XICS legacy routines - to deprecate one day
538  */
539 
540 static int ics_find_free_block(ICSState *ics, int num, int alignnum)
541 {
542     int first, i;
543 
544     for (first = 0; first < ics->nr_irqs; first += alignnum) {
545         if (num > (ics->nr_irqs - first)) {
546             return -1;
547         }
548         for (i = first; i < first + num; ++i) {
549             if (!ics_irq_free(ics, i)) {
550                 break;
551             }
552         }
553         if (i == (first + num)) {
554             return first;
555         }
556     }
557 
558     return -1;
559 }
560 
561 int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
562 {
563     ICSState *ics = spapr->ics;
564     int first = -1;
565 
566     assert(ics);
567 
568     /*
569      * MSIMesage::data is used for storing VIRQ so
570      * it has to be aligned to num to support multiple
571      * MSI vectors. MSI-X is not affected by this.
572      * The hint is used for the first IRQ, the rest should
573      * be allocated continuously.
574      */
575     if (align) {
576         assert((num == 1) || (num == 2) || (num == 4) ||
577                (num == 8) || (num == 16) || (num == 32));
578         first = ics_find_free_block(ics, num, num);
579     } else {
580         first = ics_find_free_block(ics, num, 1);
581     }
582 
583     if (first < 0) {
584         error_setg(errp, "can't find a free %d-IRQ block", num);
585         return -1;
586     }
587 
588     return first + ics->offset;
589 }
590 
591 SpaprIrq spapr_irq_xics_legacy = {
592     .xics        = true,
593     .xive        = false,
594 };
595 
596 static void spapr_irq_register_types(void)
597 {
598     type_register_static(&spapr_intc_info);
599 }
600 
601 type_init(spapr_irq_register_types)
602