xref: /openbmc/qemu/hw/intc/xive.c (revision 14bcc5239f4d4780ec52881779161c62c46e7243)
1 /*
2  * QEMU PowerPC XIVE interrupt controller model
3  *
4  * Copyright (c) 2017-2018, IBM Corporation.
5  *
6  * SPDX-License-Identifier: GPL-2.0-or-later
7  */
8 
9 #include "qemu/osdep.h"
10 #include "qemu/log.h"
11 #include "qemu/module.h"
12 #include "qapi/error.h"
13 #include "target/ppc/cpu.h"
14 #include "system/cpus.h"
15 #include "system/dma.h"
16 #include "system/reset.h"
17 #include "hw/qdev-properties.h"
18 #include "migration/vmstate.h"
19 #include "hw/irq.h"
20 #include "hw/ppc/xive.h"
21 #include "hw/ppc/xive2.h"
22 #include "hw/ppc/xive_regs.h"
23 #include "trace.h"
24 
25 /*
26  * XIVE Thread Interrupt Management context
27  */
28 
29 static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring)
30 {
31         switch (ring) {
32         case TM_QW0_USER:
33                 return 0; /* Not supported */
34         case TM_QW1_OS:
35                 return tctx->os_output;
36         case TM_QW2_HV_POOL:
37         case TM_QW3_HV_PHYS:
38                 return tctx->hv_output;
39         default:
40                 return 0;
41         }
42 }
43 
44 static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
45 {
46     uint8_t *regs = &tctx->regs[ring];
47     uint8_t nsr = regs[TM_NSR];
48 
49     qemu_irq_lower(xive_tctx_output(tctx, ring));
50 
51     if (regs[TM_NSR] != 0) {
52         uint8_t cppr = regs[TM_PIPR];
53         uint8_t alt_ring;
54         uint8_t *alt_regs;
55 
56         /* POOL interrupt uses IPB in QW2, POOL ring */
57         if ((ring == TM_QW3_HV_PHYS) &&
58             ((nsr & TM_QW3_NSR_HE) == (TM_QW3_NSR_HE_POOL << 6))) {
59             alt_ring = TM_QW2_HV_POOL;
60         } else {
61             alt_ring = ring;
62         }
63         alt_regs = &tctx->regs[alt_ring];
64 
65         regs[TM_CPPR] = cppr;
66 
67         /*
68          * If the interrupt was for a specific VP, reset the pending
69          * buffer bit, otherwise clear the logical server indicator
70          */
71         if (regs[TM_NSR] & TM_NSR_GRP_LVL) {
72             regs[TM_NSR] &= ~TM_NSR_GRP_LVL;
73         } else {
74             alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
75         }
76 
77         /* Drop the exception bit and any group/crowd */
78         regs[TM_NSR] = 0;
79 
80         trace_xive_tctx_accept(tctx->cs->cpu_index, alt_ring,
81                                alt_regs[TM_IPB], regs[TM_PIPR],
82                                regs[TM_CPPR], regs[TM_NSR]);
83     }
84 
85     return ((uint64_t)nsr << 8) | regs[TM_CPPR];
86 }
87 
88 void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring, uint8_t group_level)
89 {
90     /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
91     uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
92     uint8_t *alt_regs = &tctx->regs[alt_ring];
93     uint8_t *regs = &tctx->regs[ring];
94 
95     if (alt_regs[TM_PIPR] < alt_regs[TM_CPPR]) {
96         switch (ring) {
97         case TM_QW1_OS:
98             regs[TM_NSR] = TM_QW1_NSR_EO | (group_level & 0x3F);
99             break;
100         case TM_QW2_HV_POOL:
101             alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6) | (group_level & 0x3F);
102             break;
103         case TM_QW3_HV_PHYS:
104             regs[TM_NSR] = (TM_QW3_NSR_HE_PHYS << 6) | (group_level & 0x3F);
105             break;
106         default:
107             g_assert_not_reached();
108         }
109         trace_xive_tctx_notify(tctx->cs->cpu_index, ring,
110                                regs[TM_IPB], alt_regs[TM_PIPR],
111                                alt_regs[TM_CPPR], alt_regs[TM_NSR]);
112         qemu_irq_raise(xive_tctx_output(tctx, ring));
113     } else {
114         alt_regs[TM_NSR] = 0;
115         qemu_irq_lower(xive_tctx_output(tctx, ring));
116     }
117 }
118 
119 void xive_tctx_reset_signal(XiveTCTX *tctx, uint8_t ring)
120 {
121     /*
122      * Lower the External interrupt. Used when pulling a context. It is
123      * necessary to avoid catching it in the higher privilege context. It
124      * should be raised again when re-pushing the lower privilege context.
125      */
126     qemu_irq_lower(xive_tctx_output(tctx, ring));
127 }
128 
129 static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
130 {
131     uint8_t *regs = &tctx->regs[ring];
132     uint8_t pipr_min;
133     uint8_t ring_min;
134 
135     trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring,
136                              regs[TM_IPB], regs[TM_PIPR],
137                              cppr, regs[TM_NSR]);
138 
139     if (cppr > XIVE_PRIORITY_MAX) {
140         cppr = 0xff;
141     }
142 
143     tctx->regs[ring + TM_CPPR] = cppr;
144 
145     /*
146      * Recompute the PIPR based on local pending interrupts.  The PHYS
147      * ring must take the minimum of both the PHYS and POOL PIPR values.
148      */
149     pipr_min = xive_ipb_to_pipr(regs[TM_IPB]);
150     ring_min = ring;
151 
152     /* PHYS updates also depend on POOL values */
153     if (ring == TM_QW3_HV_PHYS) {
154         uint8_t *pool_regs = &tctx->regs[TM_QW2_HV_POOL];
155 
156         /* POOL values only matter if POOL ctx is valid */
157         if (pool_regs[TM_WORD2] & 0x80) {
158 
159             uint8_t pool_pipr = xive_ipb_to_pipr(pool_regs[TM_IPB]);
160 
161             /*
162              * Determine highest priority interrupt and
163              * remember which ring has it.
164              */
165             if (pool_pipr < pipr_min) {
166                 pipr_min = pool_pipr;
167                 ring_min = TM_QW2_HV_POOL;
168             }
169         }
170     }
171 
172     regs[TM_PIPR] = pipr_min;
173 
174     /* CPPR has changed, check if we need to raise a pending exception */
175     xive_tctx_notify(tctx, ring_min, 0);
176 }
177 
178 void xive_tctx_pipr_update(XiveTCTX *tctx, uint8_t ring, uint8_t priority,
179                            uint8_t group_level)
180  {
181     /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
182     uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
183     uint8_t *alt_regs = &tctx->regs[alt_ring];
184     uint8_t *regs = &tctx->regs[ring];
185 
186     if (group_level == 0) {
187         /* VP-specific */
188         regs[TM_IPB] |= xive_priority_to_ipb(priority);
189         alt_regs[TM_PIPR] = xive_ipb_to_pipr(regs[TM_IPB]);
190     } else {
191         /* VP-group */
192         alt_regs[TM_PIPR] = xive_priority_to_pipr(priority);
193     }
194     xive_tctx_notify(tctx, ring, group_level);
195  }
196 
197 /*
198  * XIVE Thread Interrupt Management Area (TIMA)
199  */
200 
201 static void xive_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx,
202                                 hwaddr offset, uint64_t value, unsigned size)
203 {
204     xive_tctx_set_cppr(tctx, TM_QW3_HV_PHYS, value & 0xff);
205 }
206 
207 static uint64_t xive_tm_ack_hv_reg(XivePresenter *xptr, XiveTCTX *tctx,
208                                    hwaddr offset, unsigned size)
209 {
210     return xive_tctx_accept(tctx, TM_QW3_HV_PHYS);
211 }
212 
213 static uint64_t xive_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx,
214                                       hwaddr offset, unsigned size)
215 {
216     uint32_t qw2w2_prev = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
217     uint32_t qw2w2;
218 
219     qw2w2 = xive_set_field32(TM_QW2W2_VP, qw2w2_prev, 0);
220     memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4);
221     return qw2w2;
222 }
223 
224 static uint64_t xive_tm_pull_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx,
225                                       hwaddr offset, unsigned size)
226 {
227     uint8_t qw3b8_prev = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2];
228     uint8_t qw3b8;
229 
230     qw3b8 = qw3b8_prev & ~TM_QW3B8_VT;
231     tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = qw3b8;
232     return qw3b8;
233 }
234 
235 static void xive_tm_vt_push(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
236                             uint64_t value, unsigned size)
237 {
238     tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = value & 0xff;
239 }
240 
241 static uint64_t xive_tm_vt_poll(XivePresenter *xptr, XiveTCTX *tctx,
242                                 hwaddr offset, unsigned size)
243 {
244     return tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] & 0xff;
245 }
246 
247 /*
248  * Define an access map for each page of the TIMA that we will use in
249  * the memory region ops to filter values when doing loads and stores
250  * of raw registers values
251  *
252  * Registers accessibility bits :
253  *
254  *    0x0 - no access
255  *    0x1 - write only
256  *    0x2 - read only
257  *    0x3 - read/write
258  */
259 
260 static const uint8_t xive_tm_hw_view[] = {
261     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
262     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
263     0, 0, 3, 3,   0, 3, 3, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
264     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   3, 3, 3, 0, /* QW-3 PHYS */
265 };
266 
267 static const uint8_t xive_tm_hv_view[] = {
268     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
269     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
270     0, 0, 3, 3,   0, 3, 3, 0,   0, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
271     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   0, 0, 0, 0, /* QW-3 PHYS */
272 };
273 
274 static const uint8_t xive_tm_os_view[] = {
275     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
276     2, 3, 2, 2,   2, 2, 0, 2,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
277     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
278     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
279 };
280 
281 static const uint8_t xive_tm_user_view[] = {
282     3, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-0 User */
283     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
284     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
285     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
286 };
287 
288 /*
289  * Overall TIMA access map for the thread interrupt management context
290  * registers
291  */
292 static const uint8_t *xive_tm_views[] = {
293     [XIVE_TM_HW_PAGE]   = xive_tm_hw_view,
294     [XIVE_TM_HV_PAGE]   = xive_tm_hv_view,
295     [XIVE_TM_OS_PAGE]   = xive_tm_os_view,
296     [XIVE_TM_USER_PAGE] = xive_tm_user_view,
297 };
298 
299 /*
300  * Computes a register access mask for a given offset in the TIMA
301  */
302 static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write)
303 {
304     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
305     uint8_t reg_offset = offset & TM_REG_OFFSET;
306     uint8_t reg_mask = write ? 0x1 : 0x2;
307     uint64_t mask = 0x0;
308     int i;
309 
310     for (i = 0; i < size; i++) {
311         if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) {
312             mask |= (uint64_t) 0xff << (8 * (size - i - 1));
313         }
314     }
315 
316     return mask;
317 }
318 
319 static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
320                               unsigned size)
321 {
322     uint8_t ring_offset = offset & TM_RING_OFFSET;
323     uint8_t reg_offset = offset & TM_REG_OFFSET;
324     uint64_t mask = xive_tm_mask(offset, size, true);
325     int i;
326 
327     /*
328      * Only 4 or 8 bytes stores are allowed and the User ring is
329      * excluded
330      */
331     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
332         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
333                       HWADDR_PRIx" size %d\n", offset, size);
334         return;
335     }
336 
337     /*
338      * Use the register offset for the raw values and filter out
339      * reserved values
340      */
341     for (i = 0; i < size; i++) {
342         uint8_t byte_mask = (mask >> (8 * (size - i - 1)));
343         if (byte_mask) {
344             tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) &
345                 byte_mask;
346         }
347     }
348 }
349 
350 static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
351 {
352     uint8_t ring_offset = offset & TM_RING_OFFSET;
353     uint8_t reg_offset = offset & TM_REG_OFFSET;
354     uint64_t mask = xive_tm_mask(offset, size, false);
355     uint64_t ret;
356     int i;
357 
358     /*
359      * Only 4 or 8 bytes loads are allowed and the User ring is
360      * excluded
361      */
362     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
363         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
364                       HWADDR_PRIx" size %d\n", offset, size);
365         return -1;
366     }
367 
368     /* Use the register offset for the raw values */
369     ret = 0;
370     for (i = 0; i < size; i++) {
371         ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1));
372     }
373 
374     /* filter out reserved values */
375     return ret & mask;
376 }
377 
378 /*
379  * The TM context is mapped twice within each page. Stores and loads
380  * to the first mapping below 2K write and read the specified values
381  * without modification. The second mapping above 2K performs specific
382  * state changes (side effects) in addition to setting/returning the
383  * interrupt management area context of the processor thread.
384  */
385 static uint64_t xive_tm_ack_os_reg(XivePresenter *xptr, XiveTCTX *tctx,
386                                    hwaddr offset, unsigned size)
387 {
388     return xive_tctx_accept(tctx, TM_QW1_OS);
389 }
390 
391 static void xive_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
392                                 hwaddr offset, uint64_t value, unsigned size)
393 {
394     xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
395 }
396 
397 static void xive_tctx_set_lgs(XiveTCTX *tctx, uint8_t ring, uint8_t lgs)
398 {
399     uint8_t *regs = &tctx->regs[ring];
400 
401     regs[TM_LGS] = lgs;
402 }
403 
404 static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx,
405                           hwaddr offset, uint64_t value, unsigned size)
406 {
407     xive_tctx_set_lgs(tctx, TM_QW1_OS, value & 0xff);
408 }
409 
410 /*
411  * Adjust the PIPR to allow a CPU to process event queues of other
412  * priorities during one physical interrupt cycle.
413  */
414 static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
415                                    hwaddr offset, uint64_t value, unsigned size)
416 {
417     xive_tctx_pipr_update(tctx, TM_QW1_OS, value & 0xff, 0);
418 }
419 
420 static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk,
421                                uint32_t *nvt_idx, bool *vo)
422 {
423     if (nvt_blk) {
424         *nvt_blk = xive_nvt_blk(cam);
425     }
426     if (nvt_idx) {
427         *nvt_idx = xive_nvt_idx(cam);
428     }
429     if (vo) {
430         *vo = !!(cam & TM_QW1W2_VO);
431     }
432 }
433 
434 static uint32_t xive_tctx_get_os_cam(XiveTCTX *tctx, uint8_t *nvt_blk,
435                                      uint32_t *nvt_idx, bool *vo)
436 {
437     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
438     uint32_t cam = be32_to_cpu(qw1w2);
439 
440     xive_os_cam_decode(cam, nvt_blk, nvt_idx, vo);
441     return qw1w2;
442 }
443 
444 static void xive_tctx_set_os_cam(XiveTCTX *tctx, uint32_t qw1w2)
445 {
446     memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &qw1w2, 4);
447 }
448 
449 static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
450                                     hwaddr offset, unsigned size)
451 {
452     uint32_t qw1w2;
453     uint32_t qw1w2_new;
454     uint8_t nvt_blk;
455     uint32_t nvt_idx;
456     bool vo;
457 
458     qw1w2 = xive_tctx_get_os_cam(tctx, &nvt_blk, &nvt_idx, &vo);
459 
460     if (!vo) {
461         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid NVT %x/%x !?\n",
462                       nvt_blk, nvt_idx);
463     }
464 
465     /* Invalidate CAM line */
466     qw1w2_new = xive_set_field32(TM_QW1W2_VO, qw1w2, 0);
467     xive_tctx_set_os_cam(tctx, qw1w2_new);
468 
469     xive_tctx_reset_signal(tctx, TM_QW1_OS);
470     return qw1w2;
471 }
472 
473 static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx,
474                                   uint8_t nvt_blk, uint32_t nvt_idx)
475 {
476     XiveNVT nvt;
477     uint8_t ipb;
478 
479     /*
480      * Grab the associated NVT to pull the pending bits, and merge
481      * them with the IPB of the thread interrupt context registers
482      */
483     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
484         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVT %x/%x\n",
485                           nvt_blk, nvt_idx);
486         return;
487     }
488 
489     ipb = xive_get_field32(NVT_W4_IPB, nvt.w4);
490 
491     if (ipb) {
492         /* Reset the NVT value */
493         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, 0);
494         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
495 
496         uint8_t *regs = &tctx->regs[TM_QW1_OS];
497         regs[TM_IPB] |= ipb;
498     }
499 
500     /*
501      * Always call xive_tctx_pipr_update(). Even if there were no
502      * escalation triggered, there could be a pending interrupt which
503      * was saved when the context was pulled and that we need to take
504      * into account by recalculating the PIPR (which is not
505      * saved/restored).
506      * It will also raise the External interrupt signal if needed.
507      */
508     xive_tctx_pipr_update(tctx, TM_QW1_OS, 0xFF, 0); /* fxb */
509 }
510 
511 /*
512  * Updating the OS CAM line can trigger a resend of interrupt
513  */
514 static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
515                                 hwaddr offset, uint64_t value, unsigned size)
516 {
517     uint32_t cam = value;
518     uint32_t qw1w2 = cpu_to_be32(cam);
519     uint8_t nvt_blk;
520     uint32_t nvt_idx;
521     bool vo;
522 
523     xive_os_cam_decode(cam, &nvt_blk, &nvt_idx, &vo);
524 
525     /* First update the registers */
526     xive_tctx_set_os_cam(tctx, qw1w2);
527 
528     /* Check the interrupt pending bits */
529     if (vo) {
530         xive_tctx_need_resend(XIVE_ROUTER(xptr), tctx, nvt_blk, nvt_idx);
531     }
532 }
533 
534 static uint32_t xive_presenter_get_config(XivePresenter *xptr)
535 {
536     XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
537 
538     return xpc->get_config(xptr);
539 }
540 
541 /*
542  * Define a mapping of "special" operations depending on the TIMA page
543  * offset and the size of the operation.
544  */
545 typedef struct XiveTmOp {
546     uint8_t  page_offset;
547     uint32_t op_offset;
548     unsigned size;
549     void     (*write_handler)(XivePresenter *xptr, XiveTCTX *tctx,
550                               hwaddr offset,
551                               uint64_t value, unsigned size);
552     uint64_t (*read_handler)(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
553                              unsigned size);
554 } XiveTmOp;
555 
556 static const XiveTmOp xive_tm_operations[] = {
557     /*
558      * MMIOs below 2K : raw values and special operations without side
559      * effects
560      */
561     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive_tm_set_os_cppr,
562                                                      NULL },
563     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      4, xive_tm_push_os_ctx,
564                                                      NULL },
565     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive_tm_set_hv_cppr,
566                                                      NULL },
567     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
568                                                      NULL },
569     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL,
570                                                      xive_tm_vt_poll },
571 
572     /* MMIOs above 2K : special operations with side effects */
573     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,         2, NULL,
574                                                      xive_tm_ack_os_reg },
575     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING,     1, xive_tm_set_os_pending,
576                                                      NULL },
577     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        4, NULL,
578                                                      xive_tm_pull_os_ctx },
579     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        8, NULL,
580                                                      xive_tm_pull_os_ctx },
581     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,         2, NULL,
582                                                      xive_tm_ack_hv_reg },
583     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      4, NULL,
584                                                      xive_tm_pull_pool_ctx },
585     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      8, NULL,
586                                                      xive_tm_pull_pool_ctx },
587     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX,      1, NULL,
588                                                      xive_tm_pull_phys_ctx },
589 };
590 
591 static const XiveTmOp xive2_tm_operations[] = {
592     /*
593      * MMIOs below 2K : raw values and special operations without side
594      * effects
595      */
596     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive2_tm_set_os_cppr,
597                                                      NULL },
598     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      4, xive2_tm_push_os_ctx,
599                                                      NULL },
600     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      8, xive2_tm_push_os_ctx,
601                                                      NULL },
602     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS,        1, xive_tm_set_os_lgs,
603                                                      NULL },
604     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive2_tm_set_hv_cppr,
605                                                      NULL },
606     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
607                                                      NULL },
608     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL,
609                                                      xive_tm_vt_poll },
610     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_T,     1, xive2_tm_set_hv_target,
611                                                      NULL },
612 
613     /* MMIOs above 2K : special operations with side effects */
614     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,         2, NULL,
615                                                      xive_tm_ack_os_reg },
616     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING,     1, xive_tm_set_os_pending,
617                                                      NULL },
618     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_G2,     4, NULL,
619                                                      xive2_tm_pull_os_ctx },
620     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        4, NULL,
621                                                      xive2_tm_pull_os_ctx },
622     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        8, NULL,
623                                                      xive2_tm_pull_os_ctx },
624     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,         2, NULL,
625                                                      xive_tm_ack_hv_reg },
626     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX_G2,   4, NULL,
627                                                      xive_tm_pull_pool_ctx },
628     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      4, NULL,
629                                                      xive_tm_pull_pool_ctx },
630     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      8, NULL,
631                                                      xive_tm_pull_pool_ctx },
632     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_OL,     1, xive2_tm_pull_os_ctx_ol,
633                                                      NULL },
634     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_G2,   4, NULL,
635                                                      xive_tm_pull_phys_ctx },
636     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX,      1, NULL,
637                                                      xive_tm_pull_phys_ctx },
638     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_OL,   1, xive2_tm_pull_phys_ctx_ol,
639                                                      NULL },
640 };
641 
642 static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset,
643                                        unsigned size, bool write)
644 {
645     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
646     uint32_t op_offset = offset & TM_ADDRESS_MASK;
647     const XiveTmOp *tm_ops;
648     int i, tm_ops_count;
649     uint32_t cfg;
650 
651     cfg = xive_presenter_get_config(xptr);
652     if (cfg & XIVE_PRESENTER_GEN1_TIMA_OS) {
653         tm_ops = xive_tm_operations;
654         tm_ops_count = ARRAY_SIZE(xive_tm_operations);
655     } else {
656         tm_ops = xive2_tm_operations;
657         tm_ops_count = ARRAY_SIZE(xive2_tm_operations);
658     }
659 
660     for (i = 0; i < tm_ops_count; i++) {
661         const XiveTmOp *xto = &tm_ops[i];
662 
663         /* Accesses done from a more privileged TIMA page is allowed */
664         if (xto->page_offset >= page_offset &&
665             xto->op_offset == op_offset &&
666             xto->size == size &&
667             ((write && xto->write_handler) || (!write && xto->read_handler))) {
668             return xto;
669         }
670     }
671     return NULL;
672 }
673 
674 /*
675  * TIMA MMIO handlers
676  */
677 void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
678                         uint64_t value, unsigned size)
679 {
680     const XiveTmOp *xto;
681 
682     trace_xive_tctx_tm_write(tctx->cs->cpu_index, offset, size, value);
683 
684     /*
685      * TODO: check V bit in Q[0-3]W2
686      */
687 
688     /*
689      * First, check for special operations in the 2K region
690      */
691     if (offset & TM_SPECIAL_OP) {
692         xto = xive_tm_find_op(tctx->xptr, offset, size, true);
693         if (!xto) {
694             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA "
695                           "@%"HWADDR_PRIx" size %d\n", offset, size);
696         } else {
697             xto->write_handler(xptr, tctx, offset, value, size);
698         }
699         return;
700     }
701 
702     /*
703      * Then, for special operations in the region below 2K.
704      */
705     xto = xive_tm_find_op(tctx->xptr, offset, size, true);
706     if (xto) {
707         xto->write_handler(xptr, tctx, offset, value, size);
708         return;
709     }
710 
711     /*
712      * Finish with raw access to the register values
713      */
714     xive_tm_raw_write(tctx, offset, value, size);
715 }
716 
717 uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
718                            unsigned size)
719 {
720     const XiveTmOp *xto;
721     uint64_t ret;
722 
723     /*
724      * TODO: check V bit in Q[0-3]W2
725      */
726 
727     /*
728      * First, check for special operations in the 2K region
729      */
730     if (offset & TM_SPECIAL_OP) {
731         xto = xive_tm_find_op(tctx->xptr, offset, size, false);
732         if (!xto) {
733             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
734                           "@%"HWADDR_PRIx" size %d\n", offset, size);
735             return -1;
736         }
737         ret = xto->read_handler(xptr, tctx, offset, size);
738         goto out;
739     }
740 
741     /*
742      * Then, for special operations in the region below 2K.
743      */
744     xto = xive_tm_find_op(tctx->xptr, offset, size, false);
745     if (xto) {
746         ret = xto->read_handler(xptr, tctx, offset, size);
747         goto out;
748     }
749 
750     /*
751      * Finish with raw access to the register values
752      */
753     ret = xive_tm_raw_read(tctx, offset, size);
754 out:
755     trace_xive_tctx_tm_read(tctx->cs->cpu_index, offset, size, ret);
756     return ret;
757 }
758 
759 static char *xive_tctx_ring_print(uint8_t *ring)
760 {
761     uint32_t w2 = xive_tctx_word2(ring);
762 
763     return g_strdup_printf("%02x   %02x  %02x    %02x   %02x  "
764                    "%02x  %02x   %02x  %08x",
765                    ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB],
766                    ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR],
767                    be32_to_cpu(w2));
768 }
769 
770 static const char * const xive_tctx_ring_names[] = {
771     "USER", "OS", "POOL", "PHYS",
772 };
773 
774 /*
775  * kvm_irqchip_in_kernel() will cause the compiler to turn this
776  * info a nop if CONFIG_KVM isn't defined.
777  */
778 #define xive_in_kernel(xptr)                                            \
779     (kvm_irqchip_in_kernel() &&                                         \
780      ({                                                                 \
781          XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);      \
782          xpc->in_kernel ? xpc->in_kernel(xptr) : false;                 \
783      }))
784 
785 void xive_tctx_pic_print_info(XiveTCTX *tctx, GString *buf)
786 {
787     int cpu_index;
788     int i;
789 
790     /* Skip partially initialized vCPUs. This can happen on sPAPR when vCPUs
791      * are hot plugged or unplugged.
792      */
793     if (!tctx) {
794         return;
795     }
796 
797     cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
798 
799     if (xive_in_kernel(tctx->xptr)) {
800         Error *local_err = NULL;
801 
802         kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
803         if (local_err) {
804             error_report_err(local_err);
805             return;
806         }
807     }
808 
809     if (xive_presenter_get_config(tctx->xptr) & XIVE_PRESENTER_GEN1_TIMA_OS) {
810         g_string_append_printf(buf, "CPU[%04x]:   "
811                                "QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
812                                "  W2\n", cpu_index);
813     } else {
814         g_string_append_printf(buf, "CPU[%04x]:   "
815                                "QW   NSR CPPR IPB LSMFB   -  LGS  T  PIPR"
816                                "  W2\n", cpu_index);
817     }
818 
819     for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
820         char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);
821         g_string_append_printf(buf, "CPU[%04x]: %4s    %s\n",
822                                cpu_index, xive_tctx_ring_names[i], s);
823         g_free(s);
824     }
825 }
826 
827 void xive_tctx_reset(XiveTCTX *tctx)
828 {
829     memset(tctx->regs, 0, sizeof(tctx->regs));
830 
831     /* Set some defaults */
832     tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
833     tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
834     tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
835     if (!(xive_presenter_get_config(tctx->xptr) &
836           XIVE_PRESENTER_GEN1_TIMA_OS)) {
837         tctx->regs[TM_QW1_OS + TM_OGEN] = 2;
838     }
839 
840     /*
841      * Initialize PIPR to 0xFF to avoid phantom interrupts when the
842      * CPPR is first set.
843      */
844     tctx->regs[TM_QW1_OS + TM_PIPR] =
845         xive_ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
846     tctx->regs[TM_QW3_HV_PHYS + TM_PIPR] =
847         xive_ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]);
848 }
849 
850 static void xive_tctx_realize(DeviceState *dev, Error **errp)
851 {
852     XiveTCTX *tctx = XIVE_TCTX(dev);
853     PowerPCCPU *cpu;
854     CPUPPCState *env;
855 
856     assert(tctx->cs);
857     assert(tctx->xptr);
858 
859     cpu = POWERPC_CPU(tctx->cs);
860     env = &cpu->env;
861     switch (PPC_INPUT(env)) {
862     case PPC_FLAGS_INPUT_POWER9:
863         tctx->hv_output = qdev_get_gpio_in(DEVICE(cpu), POWER9_INPUT_HINT);
864         tctx->os_output = qdev_get_gpio_in(DEVICE(cpu), POWER9_INPUT_INT);
865         break;
866 
867     default:
868         error_setg(errp, "XIVE interrupt controller does not support "
869                    "this CPU bus model");
870         return;
871     }
872 
873     /* Connect the presenter to the VCPU (required for CPU hotplug) */
874     if (xive_in_kernel(tctx->xptr)) {
875         if (kvmppc_xive_cpu_connect(tctx, errp) < 0) {
876             return;
877         }
878     }
879 }
880 
881 static int vmstate_xive_tctx_pre_save(void *opaque)
882 {
883     XiveTCTX *tctx = XIVE_TCTX(opaque);
884     Error *local_err = NULL;
885     int ret;
886 
887     if (xive_in_kernel(tctx->xptr)) {
888         ret = kvmppc_xive_cpu_get_state(tctx, &local_err);
889         if (ret < 0) {
890             error_report_err(local_err);
891             return ret;
892         }
893     }
894 
895     return 0;
896 }
897 
898 static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
899 {
900     XiveTCTX *tctx = XIVE_TCTX(opaque);
901     Error *local_err = NULL;
902     int ret;
903 
904     if (xive_in_kernel(tctx->xptr)) {
905         /*
906          * Required for hotplugged CPU, for which the state comes
907          * after all states of the machine.
908          */
909         ret = kvmppc_xive_cpu_set_state(tctx, &local_err);
910         if (ret < 0) {
911             error_report_err(local_err);
912             return ret;
913         }
914     }
915 
916     return 0;
917 }
918 
919 static const VMStateDescription vmstate_xive_tctx = {
920     .name = TYPE_XIVE_TCTX,
921     .version_id = 1,
922     .minimum_version_id = 1,
923     .pre_save = vmstate_xive_tctx_pre_save,
924     .post_load = vmstate_xive_tctx_post_load,
925     .fields = (const VMStateField[]) {
926         VMSTATE_BUFFER(regs, XiveTCTX),
927         VMSTATE_END_OF_LIST()
928     },
929 };
930 
931 static const Property xive_tctx_properties[] = {
932     DEFINE_PROP_LINK("cpu", XiveTCTX, cs, TYPE_CPU, CPUState *),
933     DEFINE_PROP_LINK("presenter", XiveTCTX, xptr, TYPE_XIVE_PRESENTER,
934                      XivePresenter *),
935 };
936 
937 static void xive_tctx_class_init(ObjectClass *klass, const void *data)
938 {
939     DeviceClass *dc = DEVICE_CLASS(klass);
940 
941     dc->desc = "XIVE Interrupt Thread Context";
942     dc->realize = xive_tctx_realize;
943     dc->vmsd = &vmstate_xive_tctx;
944     device_class_set_props(dc, xive_tctx_properties);
945     /*
946      * Reason: part of XIVE interrupt controller, needs to be wired up
947      * by xive_tctx_create().
948      */
949     dc->user_creatable = false;
950 }
951 
952 static const TypeInfo xive_tctx_info = {
953     .name          = TYPE_XIVE_TCTX,
954     .parent        = TYPE_DEVICE,
955     .instance_size = sizeof(XiveTCTX),
956     .class_init    = xive_tctx_class_init,
957 };
958 
959 Object *xive_tctx_create(Object *cpu, XivePresenter *xptr, Error **errp)
960 {
961     Object *obj;
962 
963     obj = object_new(TYPE_XIVE_TCTX);
964     object_property_add_child(cpu, TYPE_XIVE_TCTX, obj);
965     object_unref(obj);
966     object_property_set_link(obj, "cpu", cpu, &error_abort);
967     object_property_set_link(obj, "presenter", OBJECT(xptr), &error_abort);
968     if (!qdev_realize(DEVICE(obj), NULL, errp)) {
969         object_unparent(obj);
970         return NULL;
971     }
972     return obj;
973 }
974 
975 void xive_tctx_destroy(XiveTCTX *tctx)
976 {
977     Object *obj = OBJECT(tctx);
978 
979     object_unparent(obj);
980 }
981 
982 /*
983  * XIVE ESB helpers
984  */
985 
986 uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
987 {
988     uint8_t old_pq = *pq & 0x3;
989 
990     *pq &= ~0x3;
991     *pq |= value & 0x3;
992 
993     return old_pq;
994 }
995 
996 bool xive_esb_trigger(uint8_t *pq)
997 {
998     uint8_t old_pq = *pq & 0x3;
999 
1000     switch (old_pq) {
1001     case XIVE_ESB_RESET:
1002         xive_esb_set(pq, XIVE_ESB_PENDING);
1003         return true;
1004     case XIVE_ESB_PENDING:
1005     case XIVE_ESB_QUEUED:
1006         xive_esb_set(pq, XIVE_ESB_QUEUED);
1007         return false;
1008     case XIVE_ESB_OFF:
1009         xive_esb_set(pq, XIVE_ESB_OFF);
1010         return false;
1011     default:
1012          g_assert_not_reached();
1013     }
1014 }
1015 
1016 bool xive_esb_eoi(uint8_t *pq)
1017 {
1018     uint8_t old_pq = *pq & 0x3;
1019 
1020     switch (old_pq) {
1021     case XIVE_ESB_RESET:
1022     case XIVE_ESB_PENDING:
1023         xive_esb_set(pq, XIVE_ESB_RESET);
1024         return false;
1025     case XIVE_ESB_QUEUED:
1026         xive_esb_set(pq, XIVE_ESB_PENDING);
1027         return true;
1028     case XIVE_ESB_OFF:
1029         xive_esb_set(pq, XIVE_ESB_OFF);
1030         return false;
1031     default:
1032          g_assert_not_reached();
1033     }
1034 }
1035 
1036 /*
1037  * XIVE Interrupt Source (or IVSE)
1038  */
1039 
1040 uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
1041 {
1042     assert(srcno < xsrc->nr_irqs);
1043 
1044     return xsrc->status[srcno] & 0x3;
1045 }
1046 
1047 uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
1048 {
1049     assert(srcno < xsrc->nr_irqs);
1050 
1051     return xive_esb_set(&xsrc->status[srcno], pq);
1052 }
1053 
1054 /*
1055  * Returns whether the event notification should be forwarded.
1056  */
1057 static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno)
1058 {
1059     uint8_t old_pq = xive_source_esb_get(xsrc, srcno);
1060 
1061     xive_source_set_asserted(xsrc, srcno, true);
1062 
1063     switch (old_pq) {
1064     case XIVE_ESB_RESET:
1065         xive_source_esb_set(xsrc, srcno, XIVE_ESB_PENDING);
1066         return true;
1067     default:
1068         return false;
1069     }
1070 }
1071 
1072 /*
1073  * Sources can be configured with PQ offloading in which case the check
1074  * on the PQ state bits of MSIs is disabled
1075  */
1076 static bool xive_source_esb_disabled(XiveSource *xsrc, uint32_t srcno)
1077 {
1078     return (xsrc->esb_flags & XIVE_SRC_PQ_DISABLE) &&
1079         !xive_source_irq_is_lsi(xsrc, srcno);
1080 }
1081 
1082 /*
1083  * Returns whether the event notification should be forwarded.
1084  */
1085 static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
1086 {
1087     bool ret;
1088 
1089     assert(srcno < xsrc->nr_irqs);
1090 
1091     if (xive_source_esb_disabled(xsrc, srcno)) {
1092         return true;
1093     }
1094 
1095     ret = xive_esb_trigger(&xsrc->status[srcno]);
1096 
1097     if (xive_source_irq_is_lsi(xsrc, srcno) &&
1098         xive_source_esb_get(xsrc, srcno) == XIVE_ESB_QUEUED) {
1099         qemu_log_mask(LOG_GUEST_ERROR,
1100                       "XIVE: queued an event on LSI IRQ %d\n", srcno);
1101     }
1102 
1103     return ret;
1104 }
1105 
1106 /*
1107  * Returns whether the event notification should be forwarded.
1108  */
1109 static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
1110 {
1111     bool ret;
1112 
1113     assert(srcno < xsrc->nr_irqs);
1114 
1115     if (xive_source_esb_disabled(xsrc, srcno)) {
1116         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid EOI for IRQ %d\n", srcno);
1117         return false;
1118     }
1119 
1120     ret = xive_esb_eoi(&xsrc->status[srcno]);
1121 
1122     /*
1123      * LSI sources do not set the Q bit but they can still be
1124      * asserted, in which case we should forward a new event
1125      * notification
1126      */
1127     if (xive_source_irq_is_lsi(xsrc, srcno) &&
1128         xive_source_is_asserted(xsrc, srcno)) {
1129         ret = xive_source_lsi_trigger(xsrc, srcno);
1130     }
1131 
1132     return ret;
1133 }
1134 
1135 /*
1136  * Forward the source event notification to the Router
1137  */
1138 static void xive_source_notify(XiveSource *xsrc, int srcno)
1139 {
1140     XiveNotifierClass *xnc = XIVE_NOTIFIER_GET_CLASS(xsrc->xive);
1141     bool pq_checked = !xive_source_esb_disabled(xsrc, srcno);
1142 
1143     if (xnc->notify) {
1144         xnc->notify(xsrc->xive, srcno, pq_checked);
1145     }
1146 }
1147 
1148 /*
1149  * In a two pages ESB MMIO setting, even page is the trigger page, odd
1150  * page is for management
1151  */
1152 static inline bool addr_is_even(hwaddr addr, uint32_t shift)
1153 {
1154     return !((addr >> shift) & 1);
1155 }
1156 
1157 static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
1158 {
1159     return xive_source_esb_has_2page(xsrc) &&
1160         addr_is_even(addr, xsrc->esb_shift - 1);
1161 }
1162 
1163 /*
1164  * ESB MMIO loads
1165  *                      Trigger page    Management/EOI page
1166  *
1167  * ESB MMIO setting     2 pages         1 or 2 pages
1168  *
1169  * 0x000 .. 0x3FF       -1              EOI and return 0|1
1170  * 0x400 .. 0x7FF       -1              EOI and return 0|1
1171  * 0x800 .. 0xBFF       -1              return PQ
1172  * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=00
1173  * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=01
1174  * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=10
1175  * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=11
1176  */
1177 static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
1178 {
1179     XiveSource *xsrc = XIVE_SOURCE(opaque);
1180     uint32_t offset = addr & 0xFFF;
1181     uint32_t srcno = addr >> xsrc->esb_shift;
1182     uint64_t ret = -1;
1183 
1184     /* In a two pages ESB MMIO setting, trigger page should not be read */
1185     if (xive_source_is_trigger_page(xsrc, addr)) {
1186         qemu_log_mask(LOG_GUEST_ERROR,
1187                       "XIVE: invalid load on IRQ %d trigger page at "
1188                       "0x%"HWADDR_PRIx"\n", srcno, addr);
1189         return -1;
1190     }
1191 
1192     switch (offset) {
1193     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
1194         ret = xive_source_esb_eoi(xsrc, srcno);
1195 
1196         /* Forward the source event notification for routing */
1197         if (ret) {
1198             xive_source_notify(xsrc, srcno);
1199         }
1200         break;
1201 
1202     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
1203         ret = xive_source_esb_get(xsrc, srcno);
1204         break;
1205 
1206     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1207     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1208     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1209     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1210         ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
1211         break;
1212     default:
1213         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
1214                       offset);
1215     }
1216 
1217     trace_xive_source_esb_read(addr, srcno, ret);
1218 
1219     return ret;
1220 }
1221 
1222 /*
1223  * ESB MMIO stores
1224  *                      Trigger page    Management/EOI page
1225  *
1226  * ESB MMIO setting     2 pages         1 or 2 pages
1227  *
1228  * 0x000 .. 0x3FF       Trigger         Trigger
1229  * 0x400 .. 0x7FF       Trigger         EOI
1230  * 0x800 .. 0xBFF       Trigger         undefined
1231  * 0xC00 .. 0xCFF       Trigger         PQ=00
1232  * 0xD00 .. 0xDFF       Trigger         PQ=01
1233  * 0xE00 .. 0xDFF       Trigger         PQ=10
1234  * 0xF00 .. 0xDFF       Trigger         PQ=11
1235  */
1236 static void xive_source_esb_write(void *opaque, hwaddr addr,
1237                                   uint64_t value, unsigned size)
1238 {
1239     XiveSource *xsrc = XIVE_SOURCE(opaque);
1240     uint32_t offset = addr & 0xFFF;
1241     uint32_t srcno = addr >> xsrc->esb_shift;
1242     bool notify = false;
1243 
1244     trace_xive_source_esb_write(addr, srcno, value);
1245 
1246     /* In a two pages ESB MMIO setting, trigger page only triggers */
1247     if (xive_source_is_trigger_page(xsrc, addr)) {
1248         notify = xive_source_esb_trigger(xsrc, srcno);
1249         goto out;
1250     }
1251 
1252     switch (offset) {
1253     case 0 ... 0x3FF:
1254         notify = xive_source_esb_trigger(xsrc, srcno);
1255         break;
1256 
1257     case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
1258         if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
1259             qemu_log_mask(LOG_GUEST_ERROR,
1260                           "XIVE: invalid Store EOI for IRQ %d\n", srcno);
1261             return;
1262         }
1263 
1264         notify = xive_source_esb_eoi(xsrc, srcno);
1265         break;
1266 
1267     /*
1268      * This is an internal offset used to inject triggers when the PQ
1269      * state bits are not controlled locally. Such as for LSIs when
1270      * under ABT mode.
1271      */
1272     case XIVE_ESB_INJECT ... XIVE_ESB_INJECT + 0x3FF:
1273         notify = true;
1274         break;
1275 
1276     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1277     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1278     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1279     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1280         xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
1281         break;
1282 
1283     default:
1284         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
1285                       offset);
1286         return;
1287     }
1288 
1289 out:
1290     /* Forward the source event notification for routing */
1291     if (notify) {
1292         xive_source_notify(xsrc, srcno);
1293     }
1294 }
1295 
1296 static const MemoryRegionOps xive_source_esb_ops = {
1297     .read = xive_source_esb_read,
1298     .write = xive_source_esb_write,
1299     .endianness = DEVICE_BIG_ENDIAN,
1300     .valid = {
1301         .min_access_size = 1,
1302         .max_access_size = 8,
1303     },
1304     .impl = {
1305         .min_access_size = 1,
1306         .max_access_size = 8,
1307     },
1308 };
1309 
1310 void xive_source_set_irq(void *opaque, int srcno, int val)
1311 {
1312     XiveSource *xsrc = XIVE_SOURCE(opaque);
1313     bool notify = false;
1314 
1315     if (xive_source_irq_is_lsi(xsrc, srcno)) {
1316         if (val) {
1317             notify = xive_source_lsi_trigger(xsrc, srcno);
1318         } else {
1319             xive_source_set_asserted(xsrc, srcno, false);
1320         }
1321     } else {
1322         if (val) {
1323             notify = xive_source_esb_trigger(xsrc, srcno);
1324         }
1325     }
1326 
1327     /* Forward the source event notification for routing */
1328     if (notify) {
1329         xive_source_notify(xsrc, srcno);
1330     }
1331 }
1332 
1333 void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, GString *buf)
1334 {
1335     for (unsigned i = 0; i < xsrc->nr_irqs; i++) {
1336         uint8_t pq = xive_source_esb_get(xsrc, i);
1337 
1338         if (pq == XIVE_ESB_OFF) {
1339             continue;
1340         }
1341 
1342         g_string_append_printf(buf, "  %08x %s %c%c%c\n", i + offset,
1343                                xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
1344                                pq & XIVE_ESB_VAL_P ? 'P' : '-',
1345                                pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1346                                xive_source_is_asserted(xsrc, i) ? 'A' : ' ');
1347     }
1348 }
1349 
1350 static void xive_source_reset(void *dev)
1351 {
1352     XiveSource *xsrc = XIVE_SOURCE(dev);
1353 
1354     /* Do not clear the LSI bitmap */
1355 
1356     memset(xsrc->status, xsrc->reset_pq, xsrc->nr_irqs);
1357 }
1358 
1359 static void xive_source_realize(DeviceState *dev, Error **errp)
1360 {
1361     XiveSource *xsrc = XIVE_SOURCE(dev);
1362     uint64_t esb_len = xive_source_esb_len(xsrc);
1363 
1364     assert(xsrc->xive);
1365 
1366     if (!xsrc->nr_irqs) {
1367         error_setg(errp, "Number of interrupt needs to be greater than 0");
1368         return;
1369     }
1370 
1371     if (xsrc->esb_shift != XIVE_ESB_4K &&
1372         xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
1373         xsrc->esb_shift != XIVE_ESB_64K &&
1374         xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
1375         error_setg(errp, "Invalid ESB shift setting");
1376         return;
1377     }
1378 
1379     xsrc->status = g_malloc0(xsrc->nr_irqs);
1380     xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
1381 
1382     memory_region_init(&xsrc->esb_mmio, OBJECT(xsrc), "xive.esb", esb_len);
1383     memory_region_init_io(&xsrc->esb_mmio_emulated, OBJECT(xsrc),
1384                           &xive_source_esb_ops, xsrc, "xive.esb-emulated",
1385                           esb_len);
1386     memory_region_add_subregion(&xsrc->esb_mmio, 0, &xsrc->esb_mmio_emulated);
1387 
1388     qemu_register_reset(xive_source_reset, dev);
1389 }
1390 
1391 static const VMStateDescription vmstate_xive_source = {
1392     .name = TYPE_XIVE_SOURCE,
1393     .version_id = 1,
1394     .minimum_version_id = 1,
1395     .fields = (const VMStateField[]) {
1396         VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
1397         VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
1398         VMSTATE_END_OF_LIST()
1399     },
1400 };
1401 
1402 /*
1403  * The default XIVE interrupt source setting for the ESB MMIOs is two
1404  * 64k pages without Store EOI, to be in sync with KVM.
1405  */
1406 static const Property xive_source_properties[] = {
1407     DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
1408     DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
1409     DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
1410     /*
1411      * By default, PQs are initialized to 0b01 (Q=1) which corresponds
1412      * to "ints off"
1413      */
1414     DEFINE_PROP_UINT8("reset-pq", XiveSource, reset_pq, XIVE_ESB_OFF),
1415     DEFINE_PROP_LINK("xive", XiveSource, xive, TYPE_XIVE_NOTIFIER,
1416                      XiveNotifier *),
1417 };
1418 
1419 static void xive_source_class_init(ObjectClass *klass, const void *data)
1420 {
1421     DeviceClass *dc = DEVICE_CLASS(klass);
1422 
1423     dc->desc    = "XIVE Interrupt Source";
1424     device_class_set_props(dc, xive_source_properties);
1425     dc->realize = xive_source_realize;
1426     dc->vmsd    = &vmstate_xive_source;
1427     /*
1428      * Reason: part of XIVE interrupt controller, needs to be wired up,
1429      * e.g. by spapr_xive_instance_init().
1430      */
1431     dc->user_creatable = false;
1432 }
1433 
1434 static const TypeInfo xive_source_info = {
1435     .name          = TYPE_XIVE_SOURCE,
1436     .parent        = TYPE_DEVICE,
1437     .instance_size = sizeof(XiveSource),
1438     .class_init    = xive_source_class_init,
1439 };
1440 
1441 /*
1442  * XiveEND helpers
1443  */
1444 
1445 void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, GString *buf)
1446 {
1447     uint64_t qaddr_base = xive_end_qaddr(end);
1448     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1449     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1450     uint32_t qentries = 1 << (qsize + 10);
1451     int i;
1452 
1453     /*
1454      * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
1455      */
1456     g_string_append_printf(buf, " [ ");
1457     qindex = (qindex - (width - 1)) & (qentries - 1);
1458     for (i = 0; i < width; i++) {
1459         uint64_t qaddr = qaddr_base + (qindex << 2);
1460         uint32_t qdata = -1;
1461 
1462         if (dma_memory_read(&address_space_memory, qaddr,
1463                             &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) {
1464             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
1465                           HWADDR_PRIx "\n", qaddr);
1466             return;
1467         }
1468         g_string_append_printf(buf, "%s%08x ", i == width - 1 ? "^" : "",
1469                                be32_to_cpu(qdata));
1470         qindex = (qindex + 1) & (qentries - 1);
1471     }
1472     g_string_append_c(buf, ']');
1473 }
1474 
1475 void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, GString *buf)
1476 {
1477     uint64_t qaddr_base = xive_end_qaddr(end);
1478     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1479     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1480     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1481     uint32_t qentries = 1 << (qsize + 10);
1482 
1483     uint32_t nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
1484     uint32_t nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end->w6);
1485     uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
1486     uint8_t pq;
1487 
1488     if (!xive_end_is_valid(end)) {
1489         return;
1490     }
1491 
1492     pq = xive_get_field32(END_W1_ESn, end->w1);
1493 
1494     g_string_append_printf(buf,
1495                            "  %08x %c%c %c%c%c%c%c%c%c%c prio:%d nvt:%02x/%04x",
1496                            end_idx,
1497                            pq & XIVE_ESB_VAL_P ? 'P' : '-',
1498                            pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1499                            xive_end_is_valid(end)    ? 'v' : '-',
1500                            xive_end_is_enqueue(end)  ? 'q' : '-',
1501                            xive_end_is_notify(end)   ? 'n' : '-',
1502                            xive_end_is_backlog(end)  ? 'b' : '-',
1503                            xive_end_is_escalate(end) ? 'e' : '-',
1504                            xive_end_is_uncond_escalation(end)   ? 'u' : '-',
1505                            xive_end_is_silent_escalation(end)   ? 's' : '-',
1506                            xive_end_is_firmware(end)   ? 'f' : '-',
1507                            priority, nvt_blk, nvt_idx);
1508 
1509     if (qaddr_base) {
1510         g_string_append_printf(buf, " eq:@%08"PRIx64"% 6d/%5d ^%d",
1511                                qaddr_base, qindex, qentries, qgen);
1512         xive_end_queue_pic_print_info(end, 6, buf);
1513     }
1514     g_string_append_c(buf, '\n');
1515 }
1516 
1517 static void xive_end_enqueue(XiveEND *end, uint32_t data)
1518 {
1519     uint64_t qaddr_base = xive_end_qaddr(end);
1520     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1521     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1522     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1523 
1524     uint64_t qaddr = qaddr_base + (qindex << 2);
1525     uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
1526     uint32_t qentries = 1 << (qsize + 10);
1527 
1528     if (dma_memory_write(&address_space_memory, qaddr,
1529                          &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) {
1530         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
1531                       HWADDR_PRIx "\n", qaddr);
1532         return;
1533     }
1534 
1535     qindex = (qindex + 1) & (qentries - 1);
1536     if (qindex == 0) {
1537         qgen ^= 1;
1538         end->w1 = xive_set_field32(END_W1_GENERATION, end->w1, qgen);
1539     }
1540     end->w1 = xive_set_field32(END_W1_PAGE_OFF, end->w1, qindex);
1541 }
1542 
1543 void xive_end_eas_pic_print_info(XiveEND *end, uint32_t end_idx, GString *buf)
1544 {
1545     XiveEAS *eas = (XiveEAS *) &end->w4;
1546     uint8_t pq;
1547 
1548     if (!xive_end_is_escalate(end)) {
1549         return;
1550     }
1551 
1552     pq = xive_get_field32(END_W1_ESe, end->w1);
1553 
1554     g_string_append_printf(buf, "  %08x %c%c %c%c end:%02x/%04x data:%08x\n",
1555                            end_idx,
1556                            pq & XIVE_ESB_VAL_P ? 'P' : '-',
1557                            pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1558                            xive_eas_is_valid(eas) ? 'V' : ' ',
1559                            xive_eas_is_masked(eas) ? 'M' : ' ',
1560                            (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
1561                            (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
1562                            (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
1563 }
1564 
1565 /*
1566  * XIVE Router (aka. Virtualization Controller or IVRE)
1567  */
1568 
1569 int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1570                         XiveEAS *eas)
1571 {
1572     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1573 
1574     return xrc->get_eas(xrtr, eas_blk, eas_idx, eas);
1575 }
1576 
1577 static
1578 int xive_router_get_pq(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1579                        uint8_t *pq)
1580 {
1581     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1582 
1583     return xrc->get_pq(xrtr, eas_blk, eas_idx, pq);
1584 }
1585 
1586 static
1587 int xive_router_set_pq(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1588                        uint8_t *pq)
1589 {
1590     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1591 
1592     return xrc->set_pq(xrtr, eas_blk, eas_idx, pq);
1593 }
1594 
1595 int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1596                         XiveEND *end)
1597 {
1598    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1599 
1600    return xrc->get_end(xrtr, end_blk, end_idx, end);
1601 }
1602 
1603 int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1604                           XiveEND *end, uint8_t word_number)
1605 {
1606    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1607 
1608    return xrc->write_end(xrtr, end_blk, end_idx, end, word_number);
1609 }
1610 
1611 int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1612                         XiveNVT *nvt)
1613 {
1614    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1615 
1616    return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt);
1617 }
1618 
1619 int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1620                         XiveNVT *nvt, uint8_t word_number)
1621 {
1622    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1623 
1624    return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number);
1625 }
1626 
1627 static int xive_router_get_block_id(XiveRouter *xrtr)
1628 {
1629    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1630 
1631    return xrc->get_block_id(xrtr);
1632 }
1633 
1634 static void xive_router_realize(DeviceState *dev, Error **errp)
1635 {
1636     XiveRouter *xrtr = XIVE_ROUTER(dev);
1637 
1638     assert(xrtr->xfb);
1639 }
1640 
1641 static void xive_router_end_notify_handler(XiveRouter *xrtr, XiveEAS *eas)
1642 {
1643     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1644 
1645     return xrc->end_notify(xrtr, eas);
1646 }
1647 
1648 /*
1649  * Encode the HW CAM line in the block group mode format :
1650  *
1651  *   chip << 19 | 0000000 0 0001 thread (7Bit)
1652  */
1653 static uint32_t xive_tctx_hw_cam_line(XivePresenter *xptr, XiveTCTX *tctx)
1654 {
1655     CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env;
1656     uint32_t pir = env->spr_cb[SPR_PIR].default_value;
1657     uint8_t blk = xive_router_get_block_id(XIVE_ROUTER(xptr));
1658 
1659     return xive_nvt_cam_line(blk, 1 << 7 | (pir & 0x7f));
1660 }
1661 
1662 uint32_t xive_get_vpgroup_size(uint32_t nvp_index)
1663 {
1664     /*
1665      * Group size is a power of 2. The position of the first 0
1666      * (starting with the least significant bits) in the NVP index
1667      * gives the size of the group.
1668      */
1669     int first_zero = cto32(nvp_index);
1670     if (first_zero >= 31) {
1671         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid group index 0x%08x",
1672                                        nvp_index);
1673         return 0;
1674     }
1675 
1676     return 1U << (first_zero + 1);
1677 }
1678 
1679 static uint8_t xive_get_group_level(bool crowd, bool ignore,
1680                                     uint32_t nvp_blk, uint32_t nvp_index)
1681 {
1682     int first_zero;
1683     uint8_t level;
1684 
1685     if (!ignore) {
1686         g_assert(!crowd);
1687         return 0;
1688     }
1689 
1690     first_zero = cto32(nvp_index);
1691     if (first_zero >= 31) {
1692         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid group index 0x%08x",
1693                                        nvp_index);
1694         return 0;
1695     }
1696 
1697     level = (first_zero + 1) & 0b1111;
1698     if (crowd) {
1699         uint32_t blk;
1700 
1701         /* crowd level is bit position of first 0 from the right in nvp_blk */
1702         first_zero = cto32(nvp_blk);
1703         if (first_zero >= 31) {
1704             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid crowd block 0x%08x",
1705                                            nvp_blk);
1706             return 0;
1707         }
1708         blk = first_zero + 1;
1709 
1710         /*
1711          * Supported crowd sizes are 2^1, 2^2, and 2^4. 2^3 is not supported.
1712          * HW will encode level 4 as the value 3.  See xive2_pgofnext().
1713          */
1714         switch (blk) {
1715         case 1:
1716         case 2:
1717             break;
1718         case 4:
1719             blk = 3;
1720             break;
1721         default:
1722             g_assert_not_reached();
1723         }
1724 
1725         /* Crowd level bits reside in upper 2 bits of the 6 bit group level */
1726         level |= blk << 4;
1727     }
1728     return level;
1729 }
1730 
1731 /*
1732  * The thread context register words are in big-endian format.
1733  */
1734 int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
1735                               uint8_t format,
1736                               uint8_t nvt_blk, uint32_t nvt_idx,
1737                               bool cam_ignore, uint32_t logic_serv)
1738 {
1739     uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx);
1740     uint32_t qw3w2 = xive_tctx_word2(&tctx->regs[TM_QW3_HV_PHYS]);
1741     uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
1742     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
1743     uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]);
1744 
1745     /*
1746      * TODO (PowerNV): ignore mode. The low order bits of the NVT
1747      * identifier are ignored in the "CAM" match.
1748      */
1749 
1750     if (format == 0) {
1751         if (cam_ignore == true) {
1752             /*
1753              * F=0 & i=1: Logical server notification (bits ignored at
1754              * the end of the NVT identifier)
1755              */
1756             qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
1757                           nvt_blk, nvt_idx);
1758              return -1;
1759         }
1760 
1761         /* F=0 & i=0: Specific NVT notification */
1762 
1763         /* PHYS ring */
1764         if ((be32_to_cpu(qw3w2) & TM_QW3W2_VT) &&
1765             cam == xive_tctx_hw_cam_line(xptr, tctx)) {
1766             return TM_QW3_HV_PHYS;
1767         }
1768 
1769         /* HV POOL ring */
1770         if ((be32_to_cpu(qw2w2) & TM_QW2W2_VP) &&
1771             cam == xive_get_field32(TM_QW2W2_POOL_CAM, qw2w2)) {
1772             return TM_QW2_HV_POOL;
1773         }
1774 
1775         /* OS ring */
1776         if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1777             cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) {
1778             return TM_QW1_OS;
1779         }
1780     } else {
1781         /* F=1 : User level Event-Based Branch (EBB) notification */
1782 
1783         /* USER ring */
1784         if  ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1785              (cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) &&
1786              (be32_to_cpu(qw0w2) & TM_QW0W2_VU) &&
1787              (logic_serv == xive_get_field32(TM_QW0W2_LOGIC_SERV, qw0w2))) {
1788             return TM_QW0_USER;
1789         }
1790     }
1791     return -1;
1792 }
1793 
1794 /*
1795  * This is our simple Xive Presenter Engine model. It is merged in the
1796  * Router as it does not require an extra object.
1797  */
1798 bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
1799                            uint8_t nvt_blk, uint32_t nvt_idx,
1800                            bool crowd, bool cam_ignore, uint8_t priority,
1801                            uint32_t logic_serv, bool *precluded)
1802 {
1803     XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb);
1804     XiveTCTXMatch match = { .tctx = NULL, .ring = 0, .precluded = false };
1805     uint8_t group_level;
1806     int count;
1807 
1808     /*
1809      * Ask the machine to scan the interrupt controllers for a match.
1810      *
1811      * For VP-specific notification, we expect at most one match and
1812      * one call to the presenters is all we need (abbreviated notify
1813      * sequence documented by the architecture).
1814      *
1815      * For VP-group notification, match_nvt() is the equivalent of the
1816      * "histogram" and "poll" commands sent to the power bus to the
1817      * presenters. 'count' could be more than one, but we always
1818      * select the first match for now. 'precluded' tells if (at least)
1819      * one thread matches but can't take the interrupt now because
1820      * it's running at a more favored priority. We return the
1821      * information to the router so that it can take appropriate
1822      * actions (backlog, escalation, broadcast, etc...)
1823      *
1824      * If we were to implement a better way of dispatching the
1825      * interrupt in case of multiple matches (instead of the first
1826      * match), we would need a heuristic to elect a thread (for
1827      * example, the hardware keeps track of an 'age' in the TIMA) and
1828      * a new command to the presenters (the equivalent of the "assign"
1829      * power bus command in the documented full notify sequence.
1830      */
1831     count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, crowd, cam_ignore,
1832                            priority, logic_serv, &match);
1833     if (count < 0) {
1834         return false;
1835     }
1836 
1837     /* handle CPU exception delivery */
1838     if (count) {
1839         group_level = xive_get_group_level(crowd, cam_ignore, nvt_blk, nvt_idx);
1840         trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring, group_level);
1841         xive_tctx_pipr_update(match.tctx, match.ring, priority, group_level);
1842     } else {
1843         *precluded = match.precluded;
1844     }
1845 
1846     return !!count;
1847 }
1848 
1849 /*
1850  * Notification using the END ESe/ESn bit (Event State Buffer for
1851  * escalation and notification). Provide further coalescing in the
1852  * Router.
1853  */
1854 static bool xive_router_end_es_notify(XiveRouter *xrtr, uint8_t end_blk,
1855                                       uint32_t end_idx, XiveEND *end,
1856                                       uint32_t end_esmask)
1857 {
1858     uint8_t pq = xive_get_field32(end_esmask, end->w1);
1859     bool notify = xive_esb_trigger(&pq);
1860 
1861     if (pq != xive_get_field32(end_esmask, end->w1)) {
1862         end->w1 = xive_set_field32(end_esmask, end->w1, pq);
1863         xive_router_write_end(xrtr, end_blk, end_idx, end, 1);
1864     }
1865 
1866     /* ESe/n[Q]=1 : end of notification */
1867     return notify;
1868 }
1869 
1870 /*
1871  * An END trigger can come from an event trigger (IPI or HW) or from
1872  * another chip. We don't model the PowerBus but the END trigger
1873  * message has the same parameters than in the function below.
1874  */
1875 void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas)
1876 {
1877     XiveEND end;
1878     uint8_t priority;
1879     uint8_t format;
1880     uint8_t nvt_blk;
1881     uint32_t nvt_idx;
1882     XiveNVT nvt;
1883     bool found, precluded;
1884 
1885     uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
1886     uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
1887     uint32_t end_data = xive_get_field64(EAS_END_DATA,  eas->w);
1888 
1889     /* END cache lookup */
1890     if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
1891         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
1892                       end_idx);
1893         return;
1894     }
1895 
1896     if (!xive_end_is_valid(&end)) {
1897         trace_xive_router_end_notify(end_blk, end_idx, end_data);
1898         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
1899                       end_blk, end_idx);
1900         return;
1901     }
1902 
1903     if (xive_end_is_enqueue(&end)) {
1904         xive_end_enqueue(&end, end_data);
1905         /* Enqueuing event data modifies the EQ toggle and index */
1906         xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
1907     }
1908 
1909     /*
1910      * When the END is silent, we skip the notification part.
1911      */
1912     if (xive_end_is_silent_escalation(&end)) {
1913         goto do_escalation;
1914     }
1915 
1916     /*
1917      * The W7 format depends on the F bit in W6. It defines the type
1918      * of the notification :
1919      *
1920      *   F=0 : single or multiple NVT notification
1921      *   F=1 : User level Event-Based Branch (EBB) notification, no
1922      *         priority
1923      */
1924     format = xive_get_field32(END_W6_FORMAT_BIT, end.w6);
1925     priority = xive_get_field32(END_W7_F0_PRIORITY, end.w7);
1926 
1927     /* The END is masked */
1928     if (format == 0 && priority == 0xff) {
1929         return;
1930     }
1931 
1932     /*
1933      * Check the END ESn (Event State Buffer for notification) for
1934      * even further coalescing in the Router
1935      */
1936     if (!xive_end_is_notify(&end)) {
1937         /* ESn[Q]=1 : end of notification */
1938         if (!xive_router_end_es_notify(xrtr, end_blk, end_idx,
1939                                        &end, END_W1_ESn)) {
1940             return;
1941         }
1942     }
1943 
1944     /*
1945      * Follows IVPE notification
1946      */
1947     nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end.w6);
1948     nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end.w6);
1949 
1950     /* NVT cache lookup */
1951     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
1952         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n",
1953                       nvt_blk, nvt_idx);
1954         return;
1955     }
1956 
1957     if (!xive_nvt_is_valid(&nvt)) {
1958         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n",
1959                       nvt_blk, nvt_idx);
1960         return;
1961     }
1962 
1963     found = xive_presenter_notify(xrtr->xfb, format, nvt_blk, nvt_idx,
1964                           false /* crowd */,
1965                           xive_get_field32(END_W7_F0_IGNORE, end.w7),
1966                           priority,
1967                           xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7),
1968                           &precluded);
1969     /* we don't support VP-group notification on P9, so precluded is not used */
1970     /* TODO: Auto EOI. */
1971 
1972     if (found) {
1973         return;
1974     }
1975 
1976     /*
1977      * If no matching NVT is dispatched on a HW thread :
1978      * - specific VP: update the NVT structure if backlog is activated
1979      * - logical server : forward request to IVPE (not supported)
1980      */
1981     if (xive_end_is_backlog(&end)) {
1982         uint8_t ipb;
1983 
1984         if (format == 1) {
1985             qemu_log_mask(LOG_GUEST_ERROR,
1986                           "XIVE: END %x/%x invalid config: F1 & backlog\n",
1987                           end_blk, end_idx);
1988             return;
1989         }
1990         /*
1991          * Record the IPB in the associated NVT structure for later
1992          * use. The presenter will resend the interrupt when the vCPU
1993          * is dispatched again on a HW thread.
1994          */
1995         ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) |
1996             xive_priority_to_ipb(priority);
1997         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, ipb);
1998         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
1999 
2000         /*
2001          * On HW, follows a "Broadcast Backlog" to IVPEs
2002          */
2003     }
2004 
2005 do_escalation:
2006     /*
2007      * If activated, escalate notification using the ESe PQ bits and
2008      * the EAS in w4-5
2009      */
2010     if (!xive_end_is_escalate(&end)) {
2011         return;
2012     }
2013 
2014     /*
2015      * Check the END ESe (Event State Buffer for escalation) for even
2016      * further coalescing in the Router
2017      */
2018     if (!xive_end_is_uncond_escalation(&end)) {
2019         /* ESe[Q]=1 : end of notification */
2020         if (!xive_router_end_es_notify(xrtr, end_blk, end_idx,
2021                                        &end, END_W1_ESe)) {
2022             return;
2023         }
2024     }
2025 
2026     trace_xive_router_end_escalate(end_blk, end_idx,
2027            (uint8_t) xive_get_field32(END_W4_ESC_END_BLOCK, end.w4),
2028            (uint32_t) xive_get_field32(END_W4_ESC_END_INDEX, end.w4),
2029            (uint32_t) xive_get_field32(END_W5_ESC_END_DATA,  end.w5));
2030     /*
2031      * The END trigger becomes an Escalation trigger
2032      */
2033     xive_router_end_notify_handler(xrtr, (XiveEAS *) &end.w4);
2034 }
2035 
2036 void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked)
2037 {
2038     XiveRouter *xrtr = XIVE_ROUTER(xn);
2039     uint8_t eas_blk = XIVE_EAS_BLOCK(lisn);
2040     uint32_t eas_idx = XIVE_EAS_INDEX(lisn);
2041     XiveEAS eas;
2042 
2043     /* EAS cache lookup */
2044     if (xive_router_get_eas(xrtr, eas_blk, eas_idx, &eas)) {
2045         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
2046         return;
2047     }
2048 
2049     if (!pq_checked) {
2050         bool notify;
2051         uint8_t pq;
2052 
2053         /* PQ cache lookup */
2054         if (xive_router_get_pq(xrtr, eas_blk, eas_idx, &pq)) {
2055             /* Set FIR */
2056             g_assert_not_reached();
2057         }
2058 
2059         notify = xive_esb_trigger(&pq);
2060 
2061         if (xive_router_set_pq(xrtr, eas_blk, eas_idx, &pq)) {
2062             /* Set FIR */
2063             g_assert_not_reached();
2064         }
2065 
2066         if (!notify) {
2067             return;
2068         }
2069     }
2070 
2071     if (!xive_eas_is_valid(&eas)) {
2072         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
2073         return;
2074     }
2075 
2076     if (xive_eas_is_masked(&eas)) {
2077         /* Notification completed */
2078         return;
2079     }
2080 
2081     /*
2082      * The event trigger becomes an END trigger
2083      */
2084     xive_router_end_notify_handler(xrtr, &eas);
2085 }
2086 
2087 static const Property xive_router_properties[] = {
2088     DEFINE_PROP_LINK("xive-fabric", XiveRouter, xfb,
2089                      TYPE_XIVE_FABRIC, XiveFabric *),
2090 };
2091 
2092 static void xive_router_class_init(ObjectClass *klass, const void *data)
2093 {
2094     DeviceClass *dc = DEVICE_CLASS(klass);
2095     XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
2096     XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
2097 
2098     dc->desc    = "XIVE Router Engine";
2099     device_class_set_props(dc, xive_router_properties);
2100     /* Parent is SysBusDeviceClass. No need to call its realize hook */
2101     dc->realize = xive_router_realize;
2102     xnc->notify = xive_router_notify;
2103 
2104     /* By default, the router handles END triggers locally */
2105     xrc->end_notify = xive_router_end_notify;
2106 }
2107 
2108 static const TypeInfo xive_router_info = {
2109     .name          = TYPE_XIVE_ROUTER,
2110     .parent        = TYPE_SYS_BUS_DEVICE,
2111     .abstract      = true,
2112     .instance_size = sizeof(XiveRouter),
2113     .class_size    = sizeof(XiveRouterClass),
2114     .class_init    = xive_router_class_init,
2115     .interfaces    = (const InterfaceInfo[]) {
2116         { TYPE_XIVE_NOTIFIER },
2117         { TYPE_XIVE_PRESENTER },
2118         { }
2119     }
2120 };
2121 
2122 void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, GString *buf)
2123 {
2124     if (!xive_eas_is_valid(eas)) {
2125         return;
2126     }
2127 
2128     g_string_append_printf(buf, "  %08x %s end:%02x/%04x data:%08x\n",
2129                            lisn, xive_eas_is_masked(eas) ? "M" : " ",
2130                            (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
2131                            (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
2132                            (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
2133 }
2134 
2135 /*
2136  * END ESB MMIO loads
2137  */
2138 static uint64_t xive_end_source_read(void *opaque, hwaddr addr, unsigned size)
2139 {
2140     XiveENDSource *xsrc = XIVE_END_SOURCE(opaque);
2141     uint32_t offset = addr & 0xFFF;
2142     uint8_t end_blk;
2143     uint32_t end_idx;
2144     XiveEND end;
2145     uint32_t end_esmask;
2146     uint8_t pq;
2147     uint64_t ret = -1;
2148 
2149     /*
2150      * The block id should be deduced from the load address on the END
2151      * ESB MMIO but our model only supports a single block per XIVE chip.
2152      */
2153     end_blk = xive_router_get_block_id(xsrc->xrtr);
2154     end_idx = addr >> (xsrc->esb_shift + 1);
2155 
2156     trace_xive_end_source_read(end_blk, end_idx, addr);
2157 
2158     if (xive_router_get_end(xsrc->xrtr, end_blk, end_idx, &end)) {
2159         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
2160                       end_idx);
2161         return -1;
2162     }
2163 
2164     if (!xive_end_is_valid(&end)) {
2165         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
2166                       end_blk, end_idx);
2167         return -1;
2168     }
2169 
2170     end_esmask = addr_is_even(addr, xsrc->esb_shift) ? END_W1_ESn : END_W1_ESe;
2171     pq = xive_get_field32(end_esmask, end.w1);
2172 
2173     switch (offset) {
2174     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
2175         ret = xive_esb_eoi(&pq);
2176 
2177         /* Forward the source event notification for routing ?? */
2178         break;
2179 
2180     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
2181         ret = pq;
2182         break;
2183 
2184     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
2185     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
2186     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
2187     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
2188         ret = xive_esb_set(&pq, (offset >> 8) & 0x3);
2189         break;
2190     default:
2191         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END ESB load addr %d\n",
2192                       offset);
2193         return -1;
2194     }
2195 
2196     if (pq != xive_get_field32(end_esmask, end.w1)) {
2197         end.w1 = xive_set_field32(end_esmask, end.w1, pq);
2198         xive_router_write_end(xsrc->xrtr, end_blk, end_idx, &end, 1);
2199     }
2200 
2201     return ret;
2202 }
2203 
2204 /*
2205  * END ESB MMIO stores are invalid
2206  */
2207 static void xive_end_source_write(void *opaque, hwaddr addr,
2208                                   uint64_t value, unsigned size)
2209 {
2210     qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr 0x%"
2211                   HWADDR_PRIx"\n", addr);
2212 }
2213 
2214 static const MemoryRegionOps xive_end_source_ops = {
2215     .read = xive_end_source_read,
2216     .write = xive_end_source_write,
2217     .endianness = DEVICE_BIG_ENDIAN,
2218     .valid = {
2219         .min_access_size = 1,
2220         .max_access_size = 8,
2221     },
2222     .impl = {
2223         .min_access_size = 1,
2224         .max_access_size = 8,
2225     },
2226 };
2227 
2228 static void xive_end_source_realize(DeviceState *dev, Error **errp)
2229 {
2230     XiveENDSource *xsrc = XIVE_END_SOURCE(dev);
2231 
2232     assert(xsrc->xrtr);
2233 
2234     if (!xsrc->nr_ends) {
2235         error_setg(errp, "Number of interrupt needs to be greater than 0");
2236         return;
2237     }
2238 
2239     if (xsrc->esb_shift != XIVE_ESB_4K &&
2240         xsrc->esb_shift != XIVE_ESB_64K) {
2241         error_setg(errp, "Invalid ESB shift setting");
2242         return;
2243     }
2244 
2245     /*
2246      * Each END is assigned an even/odd pair of MMIO pages, the even page
2247      * manages the ESn field while the odd page manages the ESe field.
2248      */
2249     memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
2250                           &xive_end_source_ops, xsrc, "xive.end",
2251                           (1ull << (xsrc->esb_shift + 1)) * xsrc->nr_ends);
2252 }
2253 
2254 static const Property xive_end_source_properties[] = {
2255     DEFINE_PROP_UINT32("nr-ends", XiveENDSource, nr_ends, 0),
2256     DEFINE_PROP_UINT32("shift", XiveENDSource, esb_shift, XIVE_ESB_64K),
2257     DEFINE_PROP_LINK("xive", XiveENDSource, xrtr, TYPE_XIVE_ROUTER,
2258                      XiveRouter *),
2259 };
2260 
2261 static void xive_end_source_class_init(ObjectClass *klass, const void *data)
2262 {
2263     DeviceClass *dc = DEVICE_CLASS(klass);
2264 
2265     dc->desc    = "XIVE END Source";
2266     device_class_set_props(dc, xive_end_source_properties);
2267     dc->realize = xive_end_source_realize;
2268     /*
2269      * Reason: part of XIVE interrupt controller, needs to be wired up,
2270      * e.g. by spapr_xive_instance_init().
2271      */
2272     dc->user_creatable = false;
2273 }
2274 
2275 static const TypeInfo xive_end_source_info = {
2276     .name          = TYPE_XIVE_END_SOURCE,
2277     .parent        = TYPE_DEVICE,
2278     .instance_size = sizeof(XiveENDSource),
2279     .class_init    = xive_end_source_class_init,
2280 };
2281 
2282 /*
2283  * XIVE Notifier
2284  */
2285 static const TypeInfo xive_notifier_info = {
2286     .name = TYPE_XIVE_NOTIFIER,
2287     .parent = TYPE_INTERFACE,
2288     .class_size = sizeof(XiveNotifierClass),
2289 };
2290 
2291 /*
2292  * XIVE Presenter
2293  */
2294 static const TypeInfo xive_presenter_info = {
2295     .name = TYPE_XIVE_PRESENTER,
2296     .parent = TYPE_INTERFACE,
2297     .class_size = sizeof(XivePresenterClass),
2298 };
2299 
2300 /*
2301  * XIVE Fabric
2302  */
2303 static const TypeInfo xive_fabric_info = {
2304     .name = TYPE_XIVE_FABRIC,
2305     .parent = TYPE_INTERFACE,
2306     .class_size = sizeof(XiveFabricClass),
2307 };
2308 
2309 static void xive_register_types(void)
2310 {
2311     type_register_static(&xive_fabric_info);
2312     type_register_static(&xive_source_info);
2313     type_register_static(&xive_notifier_info);
2314     type_register_static(&xive_presenter_info);
2315     type_register_static(&xive_router_info);
2316     type_register_static(&xive_end_source_info);
2317     type_register_static(&xive_tctx_info);
2318 }
2319 
2320 type_init(xive_register_types)
2321