xref: /openbmc/qemu/hw/intc/xive.c (revision 135b03cb)
1 /*
2  * QEMU PowerPC XIVE interrupt controller model
3  *
4  * Copyright (c) 2017-2018, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/log.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "target/ppc/cpu.h"
15 #include "sysemu/cpus.h"
16 #include "sysemu/dma.h"
17 #include "sysemu/reset.h"
18 #include "hw/qdev-properties.h"
19 #include "migration/vmstate.h"
20 #include "monitor/monitor.h"
21 #include "hw/irq.h"
22 #include "hw/ppc/xive.h"
23 #include "hw/ppc/xive_regs.h"
24 
25 /*
26  * XIVE Thread Interrupt Management context
27  */
28 
29 /*
30  * Convert a priority number to an Interrupt Pending Buffer (IPB)
31  * register, which indicates a pending interrupt at the priority
32  * corresponding to the bit number
33  */
34 static uint8_t priority_to_ipb(uint8_t priority)
35 {
36     return priority > XIVE_PRIORITY_MAX ?
37         0 : 1 << (XIVE_PRIORITY_MAX - priority);
38 }
39 
40 /*
41  * Convert an Interrupt Pending Buffer (IPB) register to a Pending
42  * Interrupt Priority Register (PIPR), which contains the priority of
43  * the most favored pending notification.
44  */
45 static uint8_t ipb_to_pipr(uint8_t ibp)
46 {
47     return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
48 }
49 
50 static void ipb_update(uint8_t *regs, uint8_t priority)
51 {
52     regs[TM_IPB] |= priority_to_ipb(priority);
53     regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
54 }
55 
56 static uint8_t exception_mask(uint8_t ring)
57 {
58     switch (ring) {
59     case TM_QW1_OS:
60         return TM_QW1_NSR_EO;
61     case TM_QW3_HV_PHYS:
62         return TM_QW3_NSR_HE;
63     default:
64         g_assert_not_reached();
65     }
66 }
67 
68 static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring)
69 {
70         switch (ring) {
71         case TM_QW0_USER:
72                 return 0; /* Not supported */
73         case TM_QW1_OS:
74                 return tctx->os_output;
75         case TM_QW2_HV_POOL:
76         case TM_QW3_HV_PHYS:
77                 return tctx->hv_output;
78         default:
79                 return 0;
80         }
81 }
82 
83 static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
84 {
85     uint8_t *regs = &tctx->regs[ring];
86     uint8_t nsr = regs[TM_NSR];
87     uint8_t mask = exception_mask(ring);
88 
89     qemu_irq_lower(xive_tctx_output(tctx, ring));
90 
91     if (regs[TM_NSR] & mask) {
92         uint8_t cppr = regs[TM_PIPR];
93 
94         regs[TM_CPPR] = cppr;
95 
96         /* Reset the pending buffer bit */
97         regs[TM_IPB] &= ~priority_to_ipb(cppr);
98         regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
99 
100         /* Drop Exception bit */
101         regs[TM_NSR] &= ~mask;
102     }
103 
104     return (nsr << 8) | regs[TM_CPPR];
105 }
106 
107 static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
108 {
109     uint8_t *regs = &tctx->regs[ring];
110 
111     if (regs[TM_PIPR] < regs[TM_CPPR]) {
112         switch (ring) {
113         case TM_QW1_OS:
114             regs[TM_NSR] |= TM_QW1_NSR_EO;
115             break;
116         case TM_QW3_HV_PHYS:
117             regs[TM_NSR] |= (TM_QW3_NSR_HE_PHYS << 6);
118             break;
119         default:
120             g_assert_not_reached();
121         }
122         qemu_irq_raise(xive_tctx_output(tctx, ring));
123     }
124 }
125 
126 static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
127 {
128     if (cppr > XIVE_PRIORITY_MAX) {
129         cppr = 0xff;
130     }
131 
132     tctx->regs[ring + TM_CPPR] = cppr;
133 
134     /* CPPR has changed, check if we need to raise a pending exception */
135     xive_tctx_notify(tctx, ring);
136 }
137 
138 static inline uint32_t xive_tctx_word2(uint8_t *ring)
139 {
140     return *((uint32_t *) &ring[TM_WORD2]);
141 }
142 
143 /*
144  * XIVE Thread Interrupt Management Area (TIMA)
145  */
146 
147 static void xive_tm_set_hv_cppr(XiveTCTX *tctx, hwaddr offset,
148                                 uint64_t value, unsigned size)
149 {
150     xive_tctx_set_cppr(tctx, TM_QW3_HV_PHYS, value & 0xff);
151 }
152 
153 static uint64_t xive_tm_ack_hv_reg(XiveTCTX *tctx, hwaddr offset, unsigned size)
154 {
155     return xive_tctx_accept(tctx, TM_QW3_HV_PHYS);
156 }
157 
158 static uint64_t xive_tm_pull_pool_ctx(XiveTCTX *tctx, hwaddr offset,
159                                       unsigned size)
160 {
161     uint32_t qw2w2_prev = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
162     uint32_t qw2w2;
163 
164     qw2w2 = xive_set_field32(TM_QW2W2_VP, qw2w2_prev, 0);
165     memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4);
166     return qw2w2;
167 }
168 
169 static void xive_tm_vt_push(XiveTCTX *tctx, hwaddr offset,
170                             uint64_t value, unsigned size)
171 {
172     tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = value & 0xff;
173 }
174 
175 static uint64_t xive_tm_vt_poll(XiveTCTX *tctx, hwaddr offset, unsigned size)
176 {
177     return tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] & 0xff;
178 }
179 
180 /*
181  * Define an access map for each page of the TIMA that we will use in
182  * the memory region ops to filter values when doing loads and stores
183  * of raw registers values
184  *
185  * Registers accessibility bits :
186  *
187  *    0x0 - no access
188  *    0x1 - write only
189  *    0x2 - read only
190  *    0x3 - read/write
191  */
192 
193 static const uint8_t xive_tm_hw_view[] = {
194     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
195     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
196     0, 0, 3, 3,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
197     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   3, 3, 3, 0, /* QW-3 PHYS */
198 };
199 
200 static const uint8_t xive_tm_hv_view[] = {
201     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
202     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
203     0, 0, 3, 3,   0, 0, 0, 0,   0, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
204     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   0, 0, 0, 0, /* QW-3 PHYS */
205 };
206 
207 static const uint8_t xive_tm_os_view[] = {
208     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
209     2, 3, 2, 2,   2, 2, 0, 2,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
210     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
211     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
212 };
213 
214 static const uint8_t xive_tm_user_view[] = {
215     3, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-0 User */
216     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
217     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
218     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
219 };
220 
221 /*
222  * Overall TIMA access map for the thread interrupt management context
223  * registers
224  */
225 static const uint8_t *xive_tm_views[] = {
226     [XIVE_TM_HW_PAGE]   = xive_tm_hw_view,
227     [XIVE_TM_HV_PAGE]   = xive_tm_hv_view,
228     [XIVE_TM_OS_PAGE]   = xive_tm_os_view,
229     [XIVE_TM_USER_PAGE] = xive_tm_user_view,
230 };
231 
232 /*
233  * Computes a register access mask for a given offset in the TIMA
234  */
235 static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write)
236 {
237     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
238     uint8_t reg_offset = offset & 0x3F;
239     uint8_t reg_mask = write ? 0x1 : 0x2;
240     uint64_t mask = 0x0;
241     int i;
242 
243     for (i = 0; i < size; i++) {
244         if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) {
245             mask |= (uint64_t) 0xff << (8 * (size - i - 1));
246         }
247     }
248 
249     return mask;
250 }
251 
252 static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
253                               unsigned size)
254 {
255     uint8_t ring_offset = offset & 0x30;
256     uint8_t reg_offset = offset & 0x3F;
257     uint64_t mask = xive_tm_mask(offset, size, true);
258     int i;
259 
260     /*
261      * Only 4 or 8 bytes stores are allowed and the User ring is
262      * excluded
263      */
264     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
265         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
266                       HWADDR_PRIx"\n", offset);
267         return;
268     }
269 
270     /*
271      * Use the register offset for the raw values and filter out
272      * reserved values
273      */
274     for (i = 0; i < size; i++) {
275         uint8_t byte_mask = (mask >> (8 * (size - i - 1)));
276         if (byte_mask) {
277             tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) &
278                 byte_mask;
279         }
280     }
281 }
282 
283 static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
284 {
285     uint8_t ring_offset = offset & 0x30;
286     uint8_t reg_offset = offset & 0x3F;
287     uint64_t mask = xive_tm_mask(offset, size, false);
288     uint64_t ret;
289     int i;
290 
291     /*
292      * Only 4 or 8 bytes loads are allowed and the User ring is
293      * excluded
294      */
295     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
296         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
297                       HWADDR_PRIx"\n", offset);
298         return -1;
299     }
300 
301     /* Use the register offset for the raw values */
302     ret = 0;
303     for (i = 0; i < size; i++) {
304         ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1));
305     }
306 
307     /* filter out reserved values */
308     return ret & mask;
309 }
310 
311 /*
312  * The TM context is mapped twice within each page. Stores and loads
313  * to the first mapping below 2K write and read the specified values
314  * without modification. The second mapping above 2K performs specific
315  * state changes (side effects) in addition to setting/returning the
316  * interrupt management area context of the processor thread.
317  */
318 static uint64_t xive_tm_ack_os_reg(XiveTCTX *tctx, hwaddr offset, unsigned size)
319 {
320     return xive_tctx_accept(tctx, TM_QW1_OS);
321 }
322 
323 static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset,
324                                 uint64_t value, unsigned size)
325 {
326     xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
327 }
328 
329 /*
330  * Adjust the IPB to allow a CPU to process event queues of other
331  * priorities during one physical interrupt cycle.
332  */
333 static void xive_tm_set_os_pending(XiveTCTX *tctx, hwaddr offset,
334                                    uint64_t value, unsigned size)
335 {
336     ipb_update(&tctx->regs[TM_QW1_OS], value & 0xff);
337     xive_tctx_notify(tctx, TM_QW1_OS);
338 }
339 
340 /*
341  * Define a mapping of "special" operations depending on the TIMA page
342  * offset and the size of the operation.
343  */
344 typedef struct XiveTmOp {
345     uint8_t  page_offset;
346     uint32_t op_offset;
347     unsigned size;
348     void     (*write_handler)(XiveTCTX *tctx, hwaddr offset, uint64_t value,
349                               unsigned size);
350     uint64_t (*read_handler)(XiveTCTX *tctx, hwaddr offset, unsigned size);
351 } XiveTmOp;
352 
353 static const XiveTmOp xive_tm_operations[] = {
354     /*
355      * MMIOs below 2K : raw values and special operations without side
356      * effects
357      */
358     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,   1, xive_tm_set_os_cppr, NULL },
359     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr, NULL },
360     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, NULL },
361     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, xive_tm_vt_poll },
362 
363     /* MMIOs above 2K : special operations with side effects */
364     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,     2, NULL, xive_tm_ack_os_reg },
365     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
366     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,     2, NULL, xive_tm_ack_hv_reg },
367     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,  4, NULL, xive_tm_pull_pool_ctx },
368     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,  8, NULL, xive_tm_pull_pool_ctx },
369 };
370 
371 static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write)
372 {
373     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
374     uint32_t op_offset = offset & 0xFFF;
375     int i;
376 
377     for (i = 0; i < ARRAY_SIZE(xive_tm_operations); i++) {
378         const XiveTmOp *xto = &xive_tm_operations[i];
379 
380         /* Accesses done from a more privileged TIMA page is allowed */
381         if (xto->page_offset >= page_offset &&
382             xto->op_offset == op_offset &&
383             xto->size == size &&
384             ((write && xto->write_handler) || (!write && xto->read_handler))) {
385             return xto;
386         }
387     }
388     return NULL;
389 }
390 
391 /*
392  * TIMA MMIO handlers
393  */
394 void xive_tctx_tm_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
395                         unsigned size)
396 {
397     const XiveTmOp *xto;
398 
399     /*
400      * TODO: check V bit in Q[0-3]W2
401      */
402 
403     /*
404      * First, check for special operations in the 2K region
405      */
406     if (offset & 0x800) {
407         xto = xive_tm_find_op(offset, size, true);
408         if (!xto) {
409             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA"
410                           "@%"HWADDR_PRIx"\n", offset);
411         } else {
412             xto->write_handler(tctx, offset, value, size);
413         }
414         return;
415     }
416 
417     /*
418      * Then, for special operations in the region below 2K.
419      */
420     xto = xive_tm_find_op(offset, size, true);
421     if (xto) {
422         xto->write_handler(tctx, offset, value, size);
423         return;
424     }
425 
426     /*
427      * Finish with raw access to the register values
428      */
429     xive_tm_raw_write(tctx, offset, value, size);
430 }
431 
432 uint64_t xive_tctx_tm_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
433 {
434     const XiveTmOp *xto;
435 
436     /*
437      * TODO: check V bit in Q[0-3]W2
438      */
439 
440     /*
441      * First, check for special operations in the 2K region
442      */
443     if (offset & 0x800) {
444         xto = xive_tm_find_op(offset, size, false);
445         if (!xto) {
446             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
447                           "@%"HWADDR_PRIx"\n", offset);
448             return -1;
449         }
450         return xto->read_handler(tctx, offset, size);
451     }
452 
453     /*
454      * Then, for special operations in the region below 2K.
455      */
456     xto = xive_tm_find_op(offset, size, false);
457     if (xto) {
458         return xto->read_handler(tctx, offset, size);
459     }
460 
461     /*
462      * Finish with raw access to the register values
463      */
464     return xive_tm_raw_read(tctx, offset, size);
465 }
466 
467 static void xive_tm_write(void *opaque, hwaddr offset,
468                           uint64_t value, unsigned size)
469 {
470     XiveTCTX *tctx = xive_router_get_tctx(XIVE_ROUTER(opaque), current_cpu);
471 
472     xive_tctx_tm_write(tctx, offset, value, size);
473 }
474 
475 static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size)
476 {
477     XiveTCTX *tctx = xive_router_get_tctx(XIVE_ROUTER(opaque), current_cpu);
478 
479     return xive_tctx_tm_read(tctx, offset, size);
480 }
481 
482 const MemoryRegionOps xive_tm_ops = {
483     .read = xive_tm_read,
484     .write = xive_tm_write,
485     .endianness = DEVICE_BIG_ENDIAN,
486     .valid = {
487         .min_access_size = 1,
488         .max_access_size = 8,
489     },
490     .impl = {
491         .min_access_size = 1,
492         .max_access_size = 8,
493     },
494 };
495 
496 static char *xive_tctx_ring_print(uint8_t *ring)
497 {
498     uint32_t w2 = xive_tctx_word2(ring);
499 
500     return g_strdup_printf("%02x   %02x  %02x    %02x   %02x  "
501                    "%02x  %02x   %02x  %08x",
502                    ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB],
503                    ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR],
504                    be32_to_cpu(w2));
505 }
506 
507 static const char * const xive_tctx_ring_names[] = {
508     "USER", "OS", "POOL", "PHYS",
509 };
510 
511 void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
512 {
513     int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
514     int i;
515 
516     if (kvm_irqchip_in_kernel()) {
517         Error *local_err = NULL;
518 
519         kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
520         if (local_err) {
521             error_report_err(local_err);
522             return;
523         }
524     }
525 
526     monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
527                    "  W2\n", cpu_index);
528 
529     for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
530         char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);
531         monitor_printf(mon, "CPU[%04x]: %4s    %s\n", cpu_index,
532                        xive_tctx_ring_names[i], s);
533         g_free(s);
534     }
535 }
536 
537 static void xive_tctx_reset(void *dev)
538 {
539     XiveTCTX *tctx = XIVE_TCTX(dev);
540 
541     memset(tctx->regs, 0, sizeof(tctx->regs));
542 
543     /* Set some defaults */
544     tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
545     tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
546     tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
547 
548     /*
549      * Initialize PIPR to 0xFF to avoid phantom interrupts when the
550      * CPPR is first set.
551      */
552     tctx->regs[TM_QW1_OS + TM_PIPR] =
553         ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
554     tctx->regs[TM_QW3_HV_PHYS + TM_PIPR] =
555         ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]);
556 }
557 
558 static void xive_tctx_realize(DeviceState *dev, Error **errp)
559 {
560     XiveTCTX *tctx = XIVE_TCTX(dev);
561     PowerPCCPU *cpu;
562     CPUPPCState *env;
563     Object *obj;
564     Error *local_err = NULL;
565 
566     obj = object_property_get_link(OBJECT(dev), "cpu", &local_err);
567     if (!obj) {
568         error_propagate(errp, local_err);
569         error_prepend(errp, "required link 'cpu' not found: ");
570         return;
571     }
572 
573     cpu = POWERPC_CPU(obj);
574     tctx->cs = CPU(obj);
575 
576     env = &cpu->env;
577     switch (PPC_INPUT(env)) {
578     case PPC_FLAGS_INPUT_POWER9:
579         tctx->hv_output = env->irq_inputs[POWER9_INPUT_HINT];
580         tctx->os_output = env->irq_inputs[POWER9_INPUT_INT];
581         break;
582 
583     default:
584         error_setg(errp, "XIVE interrupt controller does not support "
585                    "this CPU bus model");
586         return;
587     }
588 
589     /* Connect the presenter to the VCPU (required for CPU hotplug) */
590     if (kvm_irqchip_in_kernel()) {
591         kvmppc_xive_cpu_connect(tctx, &local_err);
592         if (local_err) {
593             error_propagate(errp, local_err);
594             return;
595         }
596     }
597 
598     qemu_register_reset(xive_tctx_reset, dev);
599 }
600 
601 static void xive_tctx_unrealize(DeviceState *dev, Error **errp)
602 {
603     qemu_unregister_reset(xive_tctx_reset, dev);
604 }
605 
606 static int vmstate_xive_tctx_pre_save(void *opaque)
607 {
608     Error *local_err = NULL;
609 
610     if (kvm_irqchip_in_kernel()) {
611         kvmppc_xive_cpu_get_state(XIVE_TCTX(opaque), &local_err);
612         if (local_err) {
613             error_report_err(local_err);
614             return -1;
615         }
616     }
617 
618     return 0;
619 }
620 
621 static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
622 {
623     Error *local_err = NULL;
624 
625     if (kvm_irqchip_in_kernel()) {
626         /*
627          * Required for hotplugged CPU, for which the state comes
628          * after all states of the machine.
629          */
630         kvmppc_xive_cpu_set_state(XIVE_TCTX(opaque), &local_err);
631         if (local_err) {
632             error_report_err(local_err);
633             return -1;
634         }
635     }
636 
637     return 0;
638 }
639 
640 static const VMStateDescription vmstate_xive_tctx = {
641     .name = TYPE_XIVE_TCTX,
642     .version_id = 1,
643     .minimum_version_id = 1,
644     .pre_save = vmstate_xive_tctx_pre_save,
645     .post_load = vmstate_xive_tctx_post_load,
646     .fields = (VMStateField[]) {
647         VMSTATE_BUFFER(regs, XiveTCTX),
648         VMSTATE_END_OF_LIST()
649     },
650 };
651 
652 static void xive_tctx_class_init(ObjectClass *klass, void *data)
653 {
654     DeviceClass *dc = DEVICE_CLASS(klass);
655 
656     dc->desc = "XIVE Interrupt Thread Context";
657     dc->realize = xive_tctx_realize;
658     dc->unrealize = xive_tctx_unrealize;
659     dc->vmsd = &vmstate_xive_tctx;
660 }
661 
662 static const TypeInfo xive_tctx_info = {
663     .name          = TYPE_XIVE_TCTX,
664     .parent        = TYPE_DEVICE,
665     .instance_size = sizeof(XiveTCTX),
666     .class_init    = xive_tctx_class_init,
667 };
668 
669 Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp)
670 {
671     Error *local_err = NULL;
672     Object *obj;
673 
674     obj = object_new(TYPE_XIVE_TCTX);
675     object_property_add_child(cpu, TYPE_XIVE_TCTX, obj, &error_abort);
676     object_unref(obj);
677     object_property_add_const_link(obj, "cpu", cpu, &error_abort);
678     object_property_set_bool(obj, true, "realized", &local_err);
679     if (local_err) {
680         goto error;
681     }
682 
683     return obj;
684 
685 error:
686     object_unparent(obj);
687     error_propagate(errp, local_err);
688     return NULL;
689 }
690 
691 /*
692  * XIVE ESB helpers
693  */
694 
695 static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
696 {
697     uint8_t old_pq = *pq & 0x3;
698 
699     *pq &= ~0x3;
700     *pq |= value & 0x3;
701 
702     return old_pq;
703 }
704 
705 static bool xive_esb_trigger(uint8_t *pq)
706 {
707     uint8_t old_pq = *pq & 0x3;
708 
709     switch (old_pq) {
710     case XIVE_ESB_RESET:
711         xive_esb_set(pq, XIVE_ESB_PENDING);
712         return true;
713     case XIVE_ESB_PENDING:
714     case XIVE_ESB_QUEUED:
715         xive_esb_set(pq, XIVE_ESB_QUEUED);
716         return false;
717     case XIVE_ESB_OFF:
718         xive_esb_set(pq, XIVE_ESB_OFF);
719         return false;
720     default:
721          g_assert_not_reached();
722     }
723 }
724 
725 static bool xive_esb_eoi(uint8_t *pq)
726 {
727     uint8_t old_pq = *pq & 0x3;
728 
729     switch (old_pq) {
730     case XIVE_ESB_RESET:
731     case XIVE_ESB_PENDING:
732         xive_esb_set(pq, XIVE_ESB_RESET);
733         return false;
734     case XIVE_ESB_QUEUED:
735         xive_esb_set(pq, XIVE_ESB_PENDING);
736         return true;
737     case XIVE_ESB_OFF:
738         xive_esb_set(pq, XIVE_ESB_OFF);
739         return false;
740     default:
741          g_assert_not_reached();
742     }
743 }
744 
745 /*
746  * XIVE Interrupt Source (or IVSE)
747  */
748 
749 uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
750 {
751     assert(srcno < xsrc->nr_irqs);
752 
753     return xsrc->status[srcno] & 0x3;
754 }
755 
756 uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
757 {
758     assert(srcno < xsrc->nr_irqs);
759 
760     return xive_esb_set(&xsrc->status[srcno], pq);
761 }
762 
763 /*
764  * Returns whether the event notification should be forwarded.
765  */
766 static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno)
767 {
768     uint8_t old_pq = xive_source_esb_get(xsrc, srcno);
769 
770     xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
771 
772     switch (old_pq) {
773     case XIVE_ESB_RESET:
774         xive_source_esb_set(xsrc, srcno, XIVE_ESB_PENDING);
775         return true;
776     default:
777         return false;
778     }
779 }
780 
781 /*
782  * Returns whether the event notification should be forwarded.
783  */
784 static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
785 {
786     bool ret;
787 
788     assert(srcno < xsrc->nr_irqs);
789 
790     ret = xive_esb_trigger(&xsrc->status[srcno]);
791 
792     if (xive_source_irq_is_lsi(xsrc, srcno) &&
793         xive_source_esb_get(xsrc, srcno) == XIVE_ESB_QUEUED) {
794         qemu_log_mask(LOG_GUEST_ERROR,
795                       "XIVE: queued an event on LSI IRQ %d\n", srcno);
796     }
797 
798     return ret;
799 }
800 
801 /*
802  * Returns whether the event notification should be forwarded.
803  */
804 static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
805 {
806     bool ret;
807 
808     assert(srcno < xsrc->nr_irqs);
809 
810     ret = xive_esb_eoi(&xsrc->status[srcno]);
811 
812     /*
813      * LSI sources do not set the Q bit but they can still be
814      * asserted, in which case we should forward a new event
815      * notification
816      */
817     if (xive_source_irq_is_lsi(xsrc, srcno) &&
818         xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
819         ret = xive_source_lsi_trigger(xsrc, srcno);
820     }
821 
822     return ret;
823 }
824 
825 /*
826  * Forward the source event notification to the Router
827  */
828 static void xive_source_notify(XiveSource *xsrc, int srcno)
829 {
830     XiveNotifierClass *xnc = XIVE_NOTIFIER_GET_CLASS(xsrc->xive);
831 
832     if (xnc->notify) {
833         xnc->notify(xsrc->xive, srcno);
834     }
835 }
836 
837 /*
838  * In a two pages ESB MMIO setting, even page is the trigger page, odd
839  * page is for management
840  */
841 static inline bool addr_is_even(hwaddr addr, uint32_t shift)
842 {
843     return !((addr >> shift) & 1);
844 }
845 
846 static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
847 {
848     return xive_source_esb_has_2page(xsrc) &&
849         addr_is_even(addr, xsrc->esb_shift - 1);
850 }
851 
852 /*
853  * ESB MMIO loads
854  *                      Trigger page    Management/EOI page
855  *
856  * ESB MMIO setting     2 pages         1 or 2 pages
857  *
858  * 0x000 .. 0x3FF       -1              EOI and return 0|1
859  * 0x400 .. 0x7FF       -1              EOI and return 0|1
860  * 0x800 .. 0xBFF       -1              return PQ
861  * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=00
862  * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=01
863  * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=10
864  * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=11
865  */
866 static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
867 {
868     XiveSource *xsrc = XIVE_SOURCE(opaque);
869     uint32_t offset = addr & 0xFFF;
870     uint32_t srcno = addr >> xsrc->esb_shift;
871     uint64_t ret = -1;
872 
873     /* In a two pages ESB MMIO setting, trigger page should not be read */
874     if (xive_source_is_trigger_page(xsrc, addr)) {
875         qemu_log_mask(LOG_GUEST_ERROR,
876                       "XIVE: invalid load on IRQ %d trigger page at "
877                       "0x%"HWADDR_PRIx"\n", srcno, addr);
878         return -1;
879     }
880 
881     switch (offset) {
882     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
883         ret = xive_source_esb_eoi(xsrc, srcno);
884 
885         /* Forward the source event notification for routing */
886         if (ret) {
887             xive_source_notify(xsrc, srcno);
888         }
889         break;
890 
891     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
892         ret = xive_source_esb_get(xsrc, srcno);
893         break;
894 
895     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
896     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
897     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
898     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
899         ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
900         break;
901     default:
902         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
903                       offset);
904     }
905 
906     return ret;
907 }
908 
909 /*
910  * ESB MMIO stores
911  *                      Trigger page    Management/EOI page
912  *
913  * ESB MMIO setting     2 pages         1 or 2 pages
914  *
915  * 0x000 .. 0x3FF       Trigger         Trigger
916  * 0x400 .. 0x7FF       Trigger         EOI
917  * 0x800 .. 0xBFF       Trigger         undefined
918  * 0xC00 .. 0xCFF       Trigger         PQ=00
919  * 0xD00 .. 0xDFF       Trigger         PQ=01
920  * 0xE00 .. 0xDFF       Trigger         PQ=10
921  * 0xF00 .. 0xDFF       Trigger         PQ=11
922  */
923 static void xive_source_esb_write(void *opaque, hwaddr addr,
924                                   uint64_t value, unsigned size)
925 {
926     XiveSource *xsrc = XIVE_SOURCE(opaque);
927     uint32_t offset = addr & 0xFFF;
928     uint32_t srcno = addr >> xsrc->esb_shift;
929     bool notify = false;
930 
931     /* In a two pages ESB MMIO setting, trigger page only triggers */
932     if (xive_source_is_trigger_page(xsrc, addr)) {
933         notify = xive_source_esb_trigger(xsrc, srcno);
934         goto out;
935     }
936 
937     switch (offset) {
938     case 0 ... 0x3FF:
939         notify = xive_source_esb_trigger(xsrc, srcno);
940         break;
941 
942     case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
943         if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
944             qemu_log_mask(LOG_GUEST_ERROR,
945                           "XIVE: invalid Store EOI for IRQ %d\n", srcno);
946             return;
947         }
948 
949         notify = xive_source_esb_eoi(xsrc, srcno);
950         break;
951 
952     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
953     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
954     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
955     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
956         xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
957         break;
958 
959     default:
960         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
961                       offset);
962         return;
963     }
964 
965 out:
966     /* Forward the source event notification for routing */
967     if (notify) {
968         xive_source_notify(xsrc, srcno);
969     }
970 }
971 
972 static const MemoryRegionOps xive_source_esb_ops = {
973     .read = xive_source_esb_read,
974     .write = xive_source_esb_write,
975     .endianness = DEVICE_BIG_ENDIAN,
976     .valid = {
977         .min_access_size = 8,
978         .max_access_size = 8,
979     },
980     .impl = {
981         .min_access_size = 8,
982         .max_access_size = 8,
983     },
984 };
985 
986 void xive_source_set_irq(void *opaque, int srcno, int val)
987 {
988     XiveSource *xsrc = XIVE_SOURCE(opaque);
989     bool notify = false;
990 
991     if (xive_source_irq_is_lsi(xsrc, srcno)) {
992         if (val) {
993             notify = xive_source_lsi_trigger(xsrc, srcno);
994         } else {
995             xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
996         }
997     } else {
998         if (val) {
999             notify = xive_source_esb_trigger(xsrc, srcno);
1000         }
1001     }
1002 
1003     /* Forward the source event notification for routing */
1004     if (notify) {
1005         xive_source_notify(xsrc, srcno);
1006     }
1007 }
1008 
1009 void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
1010 {
1011     int i;
1012 
1013     for (i = 0; i < xsrc->nr_irqs; i++) {
1014         uint8_t pq = xive_source_esb_get(xsrc, i);
1015 
1016         if (pq == XIVE_ESB_OFF) {
1017             continue;
1018         }
1019 
1020         monitor_printf(mon, "  %08x %s %c%c%c\n", i + offset,
1021                        xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
1022                        pq & XIVE_ESB_VAL_P ? 'P' : '-',
1023                        pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1024                        xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ');
1025     }
1026 }
1027 
1028 static void xive_source_reset(void *dev)
1029 {
1030     XiveSource *xsrc = XIVE_SOURCE(dev);
1031 
1032     /* Do not clear the LSI bitmap */
1033 
1034     /* PQs are initialized to 0b01 (Q=1) which corresponds to "ints off" */
1035     memset(xsrc->status, XIVE_ESB_OFF, xsrc->nr_irqs);
1036 }
1037 
1038 static void xive_source_realize(DeviceState *dev, Error **errp)
1039 {
1040     XiveSource *xsrc = XIVE_SOURCE(dev);
1041     Object *obj;
1042     Error *local_err = NULL;
1043 
1044     obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
1045     if (!obj) {
1046         error_propagate(errp, local_err);
1047         error_prepend(errp, "required link 'xive' not found: ");
1048         return;
1049     }
1050 
1051     xsrc->xive = XIVE_NOTIFIER(obj);
1052 
1053     if (!xsrc->nr_irqs) {
1054         error_setg(errp, "Number of interrupt needs to be greater than 0");
1055         return;
1056     }
1057 
1058     if (xsrc->esb_shift != XIVE_ESB_4K &&
1059         xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
1060         xsrc->esb_shift != XIVE_ESB_64K &&
1061         xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
1062         error_setg(errp, "Invalid ESB shift setting");
1063         return;
1064     }
1065 
1066     xsrc->status = g_malloc0(xsrc->nr_irqs);
1067     xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
1068 
1069     if (!kvm_irqchip_in_kernel()) {
1070         memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
1071                               &xive_source_esb_ops, xsrc, "xive.esb",
1072                               (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
1073     }
1074 
1075     qemu_register_reset(xive_source_reset, dev);
1076 }
1077 
1078 static const VMStateDescription vmstate_xive_source = {
1079     .name = TYPE_XIVE_SOURCE,
1080     .version_id = 1,
1081     .minimum_version_id = 1,
1082     .fields = (VMStateField[]) {
1083         VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
1084         VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
1085         VMSTATE_END_OF_LIST()
1086     },
1087 };
1088 
1089 /*
1090  * The default XIVE interrupt source setting for the ESB MMIOs is two
1091  * 64k pages without Store EOI, to be in sync with KVM.
1092  */
1093 static Property xive_source_properties[] = {
1094     DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
1095     DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
1096     DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
1097     DEFINE_PROP_END_OF_LIST(),
1098 };
1099 
1100 static void xive_source_class_init(ObjectClass *klass, void *data)
1101 {
1102     DeviceClass *dc = DEVICE_CLASS(klass);
1103 
1104     dc->desc    = "XIVE Interrupt Source";
1105     dc->props   = xive_source_properties;
1106     dc->realize = xive_source_realize;
1107     dc->vmsd    = &vmstate_xive_source;
1108 }
1109 
1110 static const TypeInfo xive_source_info = {
1111     .name          = TYPE_XIVE_SOURCE,
1112     .parent        = TYPE_DEVICE,
1113     .instance_size = sizeof(XiveSource),
1114     .class_init    = xive_source_class_init,
1115 };
1116 
1117 /*
1118  * XiveEND helpers
1119  */
1120 
1121 void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
1122 {
1123     uint64_t qaddr_base = xive_end_qaddr(end);
1124     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1125     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1126     uint32_t qentries = 1 << (qsize + 10);
1127     int i;
1128 
1129     /*
1130      * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
1131      */
1132     monitor_printf(mon, " [ ");
1133     qindex = (qindex - (width - 1)) & (qentries - 1);
1134     for (i = 0; i < width; i++) {
1135         uint64_t qaddr = qaddr_base + (qindex << 2);
1136         uint32_t qdata = -1;
1137 
1138         if (dma_memory_read(&address_space_memory, qaddr, &qdata,
1139                             sizeof(qdata))) {
1140             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
1141                           HWADDR_PRIx "\n", qaddr);
1142             return;
1143         }
1144         monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "",
1145                        be32_to_cpu(qdata));
1146         qindex = (qindex + 1) & (qentries - 1);
1147     }
1148 }
1149 
1150 void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
1151 {
1152     uint64_t qaddr_base = xive_end_qaddr(end);
1153     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1154     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1155     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1156     uint32_t qentries = 1 << (qsize + 10);
1157 
1158     uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6);
1159     uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
1160 
1161     if (!xive_end_is_valid(end)) {
1162         return;
1163     }
1164 
1165     monitor_printf(mon, "  %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64
1166                    "% 6d/%5d ^%d", end_idx,
1167                    xive_end_is_valid(end)    ? 'v' : '-',
1168                    xive_end_is_enqueue(end)  ? 'q' : '-',
1169                    xive_end_is_notify(end)   ? 'n' : '-',
1170                    xive_end_is_backlog(end)  ? 'b' : '-',
1171                    xive_end_is_escalate(end) ? 'e' : '-',
1172                    priority, nvt, qaddr_base, qindex, qentries, qgen);
1173 
1174     xive_end_queue_pic_print_info(end, 6, mon);
1175     monitor_printf(mon, "]\n");
1176 }
1177 
1178 static void xive_end_enqueue(XiveEND *end, uint32_t data)
1179 {
1180     uint64_t qaddr_base = xive_end_qaddr(end);
1181     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1182     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1183     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1184 
1185     uint64_t qaddr = qaddr_base + (qindex << 2);
1186     uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
1187     uint32_t qentries = 1 << (qsize + 10);
1188 
1189     if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) {
1190         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
1191                       HWADDR_PRIx "\n", qaddr);
1192         return;
1193     }
1194 
1195     qindex = (qindex + 1) & (qentries - 1);
1196     if (qindex == 0) {
1197         qgen ^= 1;
1198         end->w1 = xive_set_field32(END_W1_GENERATION, end->w1, qgen);
1199     }
1200     end->w1 = xive_set_field32(END_W1_PAGE_OFF, end->w1, qindex);
1201 }
1202 
1203 /*
1204  * XIVE Router (aka. Virtualization Controller or IVRE)
1205  */
1206 
1207 int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1208                         XiveEAS *eas)
1209 {
1210     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1211 
1212     return xrc->get_eas(xrtr, eas_blk, eas_idx, eas);
1213 }
1214 
1215 int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1216                         XiveEND *end)
1217 {
1218    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1219 
1220    return xrc->get_end(xrtr, end_blk, end_idx, end);
1221 }
1222 
1223 int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1224                           XiveEND *end, uint8_t word_number)
1225 {
1226    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1227 
1228    return xrc->write_end(xrtr, end_blk, end_idx, end, word_number);
1229 }
1230 
1231 int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1232                         XiveNVT *nvt)
1233 {
1234    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1235 
1236    return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt);
1237 }
1238 
1239 int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1240                         XiveNVT *nvt, uint8_t word_number)
1241 {
1242    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1243 
1244    return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number);
1245 }
1246 
1247 XiveTCTX *xive_router_get_tctx(XiveRouter *xrtr, CPUState *cs)
1248 {
1249     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1250 
1251     return xrc->get_tctx(xrtr, cs);
1252 }
1253 
1254 /*
1255  * Encode the HW CAM line in the block group mode format :
1256  *
1257  *   chip << 19 | 0000000 0 0001 thread (7Bit)
1258  */
1259 static uint32_t xive_tctx_hw_cam_line(XiveTCTX *tctx)
1260 {
1261     CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env;
1262     uint32_t pir = env->spr_cb[SPR_PIR].default_value;
1263 
1264     return xive_nvt_cam_line((pir >> 8) & 0xf, 1 << 7 | (pir & 0x7f));
1265 }
1266 
1267 /*
1268  * The thread context register words are in big-endian format.
1269  */
1270 static int xive_presenter_tctx_match(XiveTCTX *tctx, uint8_t format,
1271                                      uint8_t nvt_blk, uint32_t nvt_idx,
1272                                      bool cam_ignore, uint32_t logic_serv)
1273 {
1274     uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx);
1275     uint32_t qw3w2 = xive_tctx_word2(&tctx->regs[TM_QW3_HV_PHYS]);
1276     uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
1277     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
1278     uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]);
1279 
1280     /*
1281      * TODO (PowerNV): ignore mode. The low order bits of the NVT
1282      * identifier are ignored in the "CAM" match.
1283      */
1284 
1285     if (format == 0) {
1286         if (cam_ignore == true) {
1287             /*
1288              * F=0 & i=1: Logical server notification (bits ignored at
1289              * the end of the NVT identifier)
1290              */
1291             qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
1292                           nvt_blk, nvt_idx);
1293              return -1;
1294         }
1295 
1296         /* F=0 & i=0: Specific NVT notification */
1297 
1298         /* PHYS ring */
1299         if ((be32_to_cpu(qw3w2) & TM_QW3W2_VT) &&
1300             cam == xive_tctx_hw_cam_line(tctx)) {
1301             return TM_QW3_HV_PHYS;
1302         }
1303 
1304         /* HV POOL ring */
1305         if ((be32_to_cpu(qw2w2) & TM_QW2W2_VP) &&
1306             cam == xive_get_field32(TM_QW2W2_POOL_CAM, qw2w2)) {
1307             return TM_QW2_HV_POOL;
1308         }
1309 
1310         /* OS ring */
1311         if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1312             cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) {
1313             return TM_QW1_OS;
1314         }
1315     } else {
1316         /* F=1 : User level Event-Based Branch (EBB) notification */
1317 
1318         /* USER ring */
1319         if  ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1320              (cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) &&
1321              (be32_to_cpu(qw0w2) & TM_QW0W2_VU) &&
1322              (logic_serv == xive_get_field32(TM_QW0W2_LOGIC_SERV, qw0w2))) {
1323             return TM_QW0_USER;
1324         }
1325     }
1326     return -1;
1327 }
1328 
1329 typedef struct XiveTCTXMatch {
1330     XiveTCTX *tctx;
1331     uint8_t ring;
1332 } XiveTCTXMatch;
1333 
1334 static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
1335                                  uint8_t nvt_blk, uint32_t nvt_idx,
1336                                  bool cam_ignore, uint8_t priority,
1337                                  uint32_t logic_serv, XiveTCTXMatch *match)
1338 {
1339     CPUState *cs;
1340 
1341     /*
1342      * TODO (PowerNV): handle chip_id overwrite of block field for
1343      * hardwired CAM compares
1344      */
1345 
1346     CPU_FOREACH(cs) {
1347         XiveTCTX *tctx = xive_router_get_tctx(xrtr, cs);
1348         int ring;
1349 
1350         /*
1351          * HW checks that the CPU is enabled in the Physical Thread
1352          * Enable Register (PTER).
1353          */
1354 
1355         /*
1356          * Check the thread context CAM lines and record matches. We
1357          * will handle CPU exception delivery later
1358          */
1359         ring = xive_presenter_tctx_match(tctx, format, nvt_blk, nvt_idx,
1360                                          cam_ignore, logic_serv);
1361         /*
1362          * Save the context and follow on to catch duplicates, that we
1363          * don't support yet.
1364          */
1365         if (ring != -1) {
1366             if (match->tctx) {
1367                 qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
1368                               "context NVT %x/%x\n", nvt_blk, nvt_idx);
1369                 return false;
1370             }
1371 
1372             match->ring = ring;
1373             match->tctx = tctx;
1374         }
1375     }
1376 
1377     if (!match->tctx) {
1378         qemu_log_mask(LOG_UNIMP, "XIVE: NVT %x/%x is not dispatched\n",
1379                       nvt_blk, nvt_idx);
1380         return false;
1381     }
1382 
1383     return true;
1384 }
1385 
1386 /*
1387  * This is our simple Xive Presenter Engine model. It is merged in the
1388  * Router as it does not require an extra object.
1389  *
1390  * It receives notification requests sent by the IVRE to find one
1391  * matching NVT (or more) dispatched on the processor threads. In case
1392  * of a single NVT notification, the process is abreviated and the
1393  * thread is signaled if a match is found. In case of a logical server
1394  * notification (bits ignored at the end of the NVT identifier), the
1395  * IVPE and IVRE select a winning thread using different filters. This
1396  * involves 2 or 3 exchanges on the PowerBus that the model does not
1397  * support.
1398  *
1399  * The parameters represent what is sent on the PowerBus
1400  */
1401 static void xive_presenter_notify(XiveRouter *xrtr, uint8_t format,
1402                                   uint8_t nvt_blk, uint32_t nvt_idx,
1403                                   bool cam_ignore, uint8_t priority,
1404                                   uint32_t logic_serv)
1405 {
1406     XiveNVT nvt;
1407     XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
1408     bool found;
1409 
1410     /* NVT cache lookup */
1411     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
1412         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n",
1413                       nvt_blk, nvt_idx);
1414         return;
1415     }
1416 
1417     if (!xive_nvt_is_valid(&nvt)) {
1418         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n",
1419                       nvt_blk, nvt_idx);
1420         return;
1421     }
1422 
1423     found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore,
1424                                  priority, logic_serv, &match);
1425     if (found) {
1426         ipb_update(&match.tctx->regs[match.ring], priority);
1427         xive_tctx_notify(match.tctx, match.ring);
1428         return;
1429     }
1430 
1431     /* Record the IPB in the associated NVT structure */
1432     ipb_update((uint8_t *) &nvt.w4, priority);
1433     xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
1434 
1435     /*
1436      * If no matching NVT is dispatched on a HW thread :
1437      * - update the NVT structure if backlog is activated
1438      * - escalate (ESe PQ bits and EAS in w4-5) if escalation is
1439      *   activated
1440      */
1441 }
1442 
1443 /*
1444  * An END trigger can come from an event trigger (IPI or HW) or from
1445  * another chip. We don't model the PowerBus but the END trigger
1446  * message has the same parameters than in the function below.
1447  */
1448 static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
1449                                    uint32_t end_idx, uint32_t end_data)
1450 {
1451     XiveEND end;
1452     uint8_t priority;
1453     uint8_t format;
1454 
1455     /* END cache lookup */
1456     if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
1457         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
1458                       end_idx);
1459         return;
1460     }
1461 
1462     if (!xive_end_is_valid(&end)) {
1463         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
1464                       end_blk, end_idx);
1465         return;
1466     }
1467 
1468     if (xive_end_is_enqueue(&end)) {
1469         xive_end_enqueue(&end, end_data);
1470         /* Enqueuing event data modifies the EQ toggle and index */
1471         xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
1472     }
1473 
1474     /*
1475      * The W7 format depends on the F bit in W6. It defines the type
1476      * of the notification :
1477      *
1478      *   F=0 : single or multiple NVT notification
1479      *   F=1 : User level Event-Based Branch (EBB) notification, no
1480      *         priority
1481      */
1482     format = xive_get_field32(END_W6_FORMAT_BIT, end.w6);
1483     priority = xive_get_field32(END_W7_F0_PRIORITY, end.w7);
1484 
1485     /* The END is masked */
1486     if (format == 0 && priority == 0xff) {
1487         return;
1488     }
1489 
1490     /*
1491      * Check the END ESn (Event State Buffer for notification) for
1492      * even futher coalescing in the Router
1493      */
1494     if (!xive_end_is_notify(&end)) {
1495         uint8_t pq = xive_get_field32(END_W1_ESn, end.w1);
1496         bool notify = xive_esb_trigger(&pq);
1497 
1498         if (pq != xive_get_field32(END_W1_ESn, end.w1)) {
1499             end.w1 = xive_set_field32(END_W1_ESn, end.w1, pq);
1500             xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
1501         }
1502 
1503         /* ESn[Q]=1 : end of notification */
1504         if (!notify) {
1505             return;
1506         }
1507     }
1508 
1509     /*
1510      * Follows IVPE notification
1511      */
1512     xive_presenter_notify(xrtr, format,
1513                           xive_get_field32(END_W6_NVT_BLOCK, end.w6),
1514                           xive_get_field32(END_W6_NVT_INDEX, end.w6),
1515                           xive_get_field32(END_W7_F0_IGNORE, end.w7),
1516                           priority,
1517                           xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7));
1518 
1519     /* TODO: Auto EOI. */
1520 }
1521 
1522 void xive_router_notify(XiveNotifier *xn, uint32_t lisn)
1523 {
1524     XiveRouter *xrtr = XIVE_ROUTER(xn);
1525     uint8_t eas_blk = XIVE_SRCNO_BLOCK(lisn);
1526     uint32_t eas_idx = XIVE_SRCNO_INDEX(lisn);
1527     XiveEAS eas;
1528 
1529     /* EAS cache lookup */
1530     if (xive_router_get_eas(xrtr, eas_blk, eas_idx, &eas)) {
1531         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
1532         return;
1533     }
1534 
1535     /*
1536      * The IVRE checks the State Bit Cache at this point. We skip the
1537      * SBC lookup because the state bits of the sources are modeled
1538      * internally in QEMU.
1539      */
1540 
1541     if (!xive_eas_is_valid(&eas)) {
1542         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
1543         return;
1544     }
1545 
1546     if (xive_eas_is_masked(&eas)) {
1547         /* Notification completed */
1548         return;
1549     }
1550 
1551     /*
1552      * The event trigger becomes an END trigger
1553      */
1554     xive_router_end_notify(xrtr,
1555                            xive_get_field64(EAS_END_BLOCK, eas.w),
1556                            xive_get_field64(EAS_END_INDEX, eas.w),
1557                            xive_get_field64(EAS_END_DATA,  eas.w));
1558 }
1559 
1560 static void xive_router_class_init(ObjectClass *klass, void *data)
1561 {
1562     DeviceClass *dc = DEVICE_CLASS(klass);
1563     XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
1564 
1565     dc->desc    = "XIVE Router Engine";
1566     xnc->notify = xive_router_notify;
1567 }
1568 
1569 static const TypeInfo xive_router_info = {
1570     .name          = TYPE_XIVE_ROUTER,
1571     .parent        = TYPE_SYS_BUS_DEVICE,
1572     .abstract      = true,
1573     .class_size    = sizeof(XiveRouterClass),
1574     .class_init    = xive_router_class_init,
1575     .interfaces    = (InterfaceInfo[]) {
1576         { TYPE_XIVE_NOTIFIER },
1577         { }
1578     }
1579 };
1580 
1581 void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon)
1582 {
1583     if (!xive_eas_is_valid(eas)) {
1584         return;
1585     }
1586 
1587     monitor_printf(mon, "  %08x %s end:%02x/%04x data:%08x\n",
1588                    lisn, xive_eas_is_masked(eas) ? "M" : " ",
1589                    (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
1590                    (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
1591                    (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
1592 }
1593 
1594 /*
1595  * END ESB MMIO loads
1596  */
1597 static uint64_t xive_end_source_read(void *opaque, hwaddr addr, unsigned size)
1598 {
1599     XiveENDSource *xsrc = XIVE_END_SOURCE(opaque);
1600     uint32_t offset = addr & 0xFFF;
1601     uint8_t end_blk;
1602     uint32_t end_idx;
1603     XiveEND end;
1604     uint32_t end_esmask;
1605     uint8_t pq;
1606     uint64_t ret = -1;
1607 
1608     end_blk = xsrc->block_id;
1609     end_idx = addr >> (xsrc->esb_shift + 1);
1610 
1611     if (xive_router_get_end(xsrc->xrtr, end_blk, end_idx, &end)) {
1612         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
1613                       end_idx);
1614         return -1;
1615     }
1616 
1617     if (!xive_end_is_valid(&end)) {
1618         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
1619                       end_blk, end_idx);
1620         return -1;
1621     }
1622 
1623     end_esmask = addr_is_even(addr, xsrc->esb_shift) ? END_W1_ESn : END_W1_ESe;
1624     pq = xive_get_field32(end_esmask, end.w1);
1625 
1626     switch (offset) {
1627     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
1628         ret = xive_esb_eoi(&pq);
1629 
1630         /* Forward the source event notification for routing ?? */
1631         break;
1632 
1633     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
1634         ret = pq;
1635         break;
1636 
1637     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1638     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1639     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1640     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1641         ret = xive_esb_set(&pq, (offset >> 8) & 0x3);
1642         break;
1643     default:
1644         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END ESB load addr %d\n",
1645                       offset);
1646         return -1;
1647     }
1648 
1649     if (pq != xive_get_field32(end_esmask, end.w1)) {
1650         end.w1 = xive_set_field32(end_esmask, end.w1, pq);
1651         xive_router_write_end(xsrc->xrtr, end_blk, end_idx, &end, 1);
1652     }
1653 
1654     return ret;
1655 }
1656 
1657 /*
1658  * END ESB MMIO stores are invalid
1659  */
1660 static void xive_end_source_write(void *opaque, hwaddr addr,
1661                                   uint64_t value, unsigned size)
1662 {
1663     qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr 0x%"
1664                   HWADDR_PRIx"\n", addr);
1665 }
1666 
1667 static const MemoryRegionOps xive_end_source_ops = {
1668     .read = xive_end_source_read,
1669     .write = xive_end_source_write,
1670     .endianness = DEVICE_BIG_ENDIAN,
1671     .valid = {
1672         .min_access_size = 8,
1673         .max_access_size = 8,
1674     },
1675     .impl = {
1676         .min_access_size = 8,
1677         .max_access_size = 8,
1678     },
1679 };
1680 
1681 static void xive_end_source_realize(DeviceState *dev, Error **errp)
1682 {
1683     XiveENDSource *xsrc = XIVE_END_SOURCE(dev);
1684     Object *obj;
1685     Error *local_err = NULL;
1686 
1687     obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
1688     if (!obj) {
1689         error_propagate(errp, local_err);
1690         error_prepend(errp, "required link 'xive' not found: ");
1691         return;
1692     }
1693 
1694     xsrc->xrtr = XIVE_ROUTER(obj);
1695 
1696     if (!xsrc->nr_ends) {
1697         error_setg(errp, "Number of interrupt needs to be greater than 0");
1698         return;
1699     }
1700 
1701     if (xsrc->esb_shift != XIVE_ESB_4K &&
1702         xsrc->esb_shift != XIVE_ESB_64K) {
1703         error_setg(errp, "Invalid ESB shift setting");
1704         return;
1705     }
1706 
1707     /*
1708      * Each END is assigned an even/odd pair of MMIO pages, the even page
1709      * manages the ESn field while the odd page manages the ESe field.
1710      */
1711     memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
1712                           &xive_end_source_ops, xsrc, "xive.end",
1713                           (1ull << (xsrc->esb_shift + 1)) * xsrc->nr_ends);
1714 }
1715 
1716 static Property xive_end_source_properties[] = {
1717     DEFINE_PROP_UINT8("block-id", XiveENDSource, block_id, 0),
1718     DEFINE_PROP_UINT32("nr-ends", XiveENDSource, nr_ends, 0),
1719     DEFINE_PROP_UINT32("shift", XiveENDSource, esb_shift, XIVE_ESB_64K),
1720     DEFINE_PROP_END_OF_LIST(),
1721 };
1722 
1723 static void xive_end_source_class_init(ObjectClass *klass, void *data)
1724 {
1725     DeviceClass *dc = DEVICE_CLASS(klass);
1726 
1727     dc->desc    = "XIVE END Source";
1728     dc->props   = xive_end_source_properties;
1729     dc->realize = xive_end_source_realize;
1730 }
1731 
1732 static const TypeInfo xive_end_source_info = {
1733     .name          = TYPE_XIVE_END_SOURCE,
1734     .parent        = TYPE_DEVICE,
1735     .instance_size = sizeof(XiveENDSource),
1736     .class_init    = xive_end_source_class_init,
1737 };
1738 
1739 /*
1740  * XIVE Notifier
1741  */
1742 static const TypeInfo xive_notifier_info = {
1743     .name = TYPE_XIVE_NOTIFIER,
1744     .parent = TYPE_INTERFACE,
1745     .class_size = sizeof(XiveNotifierClass),
1746 };
1747 
1748 static void xive_register_types(void)
1749 {
1750     type_register_static(&xive_source_info);
1751     type_register_static(&xive_notifier_info);
1752     type_register_static(&xive_router_info);
1753     type_register_static(&xive_end_source_info);
1754     type_register_static(&xive_tctx_info);
1755 }
1756 
1757 type_init(xive_register_types)
1758