xref: /openbmc/qemu/hw/intc/xive.c (revision ed75658a)
1 /*
2  * QEMU PowerPC XIVE interrupt controller model
3  *
4  * Copyright (c) 2017-2018, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/log.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "target/ppc/cpu.h"
15 #include "sysemu/cpus.h"
16 #include "sysemu/dma.h"
17 #include "sysemu/reset.h"
18 #include "hw/qdev-properties.h"
19 #include "migration/vmstate.h"
20 #include "monitor/monitor.h"
21 #include "hw/irq.h"
22 #include "hw/ppc/xive.h"
23 #include "hw/ppc/xive2.h"
24 #include "hw/ppc/xive_regs.h"
25 #include "trace.h"
26 
27 /*
28  * XIVE Thread Interrupt Management context
29  */
30 
31 /*
32  * Convert an Interrupt Pending Buffer (IPB) register to a Pending
33  * Interrupt Priority Register (PIPR), which contains the priority of
34  * the most favored pending notification.
35  */
36 static uint8_t ipb_to_pipr(uint8_t ibp)
37 {
38     return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
39 }
40 
41 static uint8_t exception_mask(uint8_t ring)
42 {
43     switch (ring) {
44     case TM_QW1_OS:
45         return TM_QW1_NSR_EO;
46     case TM_QW3_HV_PHYS:
47         return TM_QW3_NSR_HE;
48     default:
49         g_assert_not_reached();
50     }
51 }
52 
53 static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring)
54 {
55         switch (ring) {
56         case TM_QW0_USER:
57                 return 0; /* Not supported */
58         case TM_QW1_OS:
59                 return tctx->os_output;
60         case TM_QW2_HV_POOL:
61         case TM_QW3_HV_PHYS:
62                 return tctx->hv_output;
63         default:
64                 return 0;
65         }
66 }
67 
68 static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
69 {
70     uint8_t *regs = &tctx->regs[ring];
71     uint8_t nsr = regs[TM_NSR];
72     uint8_t mask = exception_mask(ring);
73 
74     qemu_irq_lower(xive_tctx_output(tctx, ring));
75 
76     if (regs[TM_NSR] & mask) {
77         uint8_t cppr = regs[TM_PIPR];
78 
79         regs[TM_CPPR] = cppr;
80 
81         /* Reset the pending buffer bit */
82         regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
83         regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
84 
85         /* Drop Exception bit */
86         regs[TM_NSR] &= ~mask;
87 
88         trace_xive_tctx_accept(tctx->cs->cpu_index, ring,
89                                regs[TM_IPB], regs[TM_PIPR],
90                                regs[TM_CPPR], regs[TM_NSR]);
91     }
92 
93     return (nsr << 8) | regs[TM_CPPR];
94 }
95 
96 static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
97 {
98     uint8_t *regs = &tctx->regs[ring];
99 
100     if (regs[TM_PIPR] < regs[TM_CPPR]) {
101         switch (ring) {
102         case TM_QW1_OS:
103             regs[TM_NSR] |= TM_QW1_NSR_EO;
104             break;
105         case TM_QW3_HV_PHYS:
106             regs[TM_NSR] |= (TM_QW3_NSR_HE_PHYS << 6);
107             break;
108         default:
109             g_assert_not_reached();
110         }
111         trace_xive_tctx_notify(tctx->cs->cpu_index, ring,
112                                regs[TM_IPB], regs[TM_PIPR],
113                                regs[TM_CPPR], regs[TM_NSR]);
114         qemu_irq_raise(xive_tctx_output(tctx, ring));
115     }
116 }
117 
118 void xive_tctx_reset_os_signal(XiveTCTX *tctx)
119 {
120     /*
121      * Lower the External interrupt. Used when pulling an OS
122      * context. It is necessary to avoid catching it in the hypervisor
123      * context. It should be raised again when re-pushing the OS
124      * context.
125      */
126     qemu_irq_lower(xive_tctx_output(tctx, TM_QW1_OS));
127 }
128 
129 static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
130 {
131     uint8_t *regs = &tctx->regs[ring];
132 
133     trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring,
134                              regs[TM_IPB], regs[TM_PIPR],
135                              cppr, regs[TM_NSR]);
136 
137     if (cppr > XIVE_PRIORITY_MAX) {
138         cppr = 0xff;
139     }
140 
141     tctx->regs[ring + TM_CPPR] = cppr;
142 
143     /* CPPR has changed, check if we need to raise a pending exception */
144     xive_tctx_notify(tctx, ring);
145 }
146 
147 void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb)
148 {
149     uint8_t *regs = &tctx->regs[ring];
150 
151     regs[TM_IPB] |= ipb;
152     regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
153     xive_tctx_notify(tctx, ring);
154 }
155 
156 /*
157  * XIVE Thread Interrupt Management Area (TIMA)
158  */
159 
160 static void xive_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx,
161                                 hwaddr offset, uint64_t value, unsigned size)
162 {
163     xive_tctx_set_cppr(tctx, TM_QW3_HV_PHYS, value & 0xff);
164 }
165 
166 static uint64_t xive_tm_ack_hv_reg(XivePresenter *xptr, XiveTCTX *tctx,
167                                    hwaddr offset, unsigned size)
168 {
169     return xive_tctx_accept(tctx, TM_QW3_HV_PHYS);
170 }
171 
172 static uint64_t xive_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx,
173                                       hwaddr offset, unsigned size)
174 {
175     uint32_t qw2w2_prev = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
176     uint32_t qw2w2;
177 
178     qw2w2 = xive_set_field32(TM_QW2W2_VP, qw2w2_prev, 0);
179     memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4);
180     return qw2w2;
181 }
182 
183 static void xive_tm_vt_push(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
184                             uint64_t value, unsigned size)
185 {
186     tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = value & 0xff;
187 }
188 
189 static uint64_t xive_tm_vt_poll(XivePresenter *xptr, XiveTCTX *tctx,
190                                 hwaddr offset, unsigned size)
191 {
192     return tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] & 0xff;
193 }
194 
195 /*
196  * Define an access map for each page of the TIMA that we will use in
197  * the memory region ops to filter values when doing loads and stores
198  * of raw registers values
199  *
200  * Registers accessibility bits :
201  *
202  *    0x0 - no access
203  *    0x1 - write only
204  *    0x2 - read only
205  *    0x3 - read/write
206  */
207 
208 static const uint8_t xive_tm_hw_view[] = {
209     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
210     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
211     0, 0, 3, 3,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
212     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   3, 3, 3, 0, /* QW-3 PHYS */
213 };
214 
215 static const uint8_t xive_tm_hv_view[] = {
216     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
217     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
218     0, 0, 3, 3,   0, 0, 0, 0,   0, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
219     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   0, 0, 0, 0, /* QW-3 PHYS */
220 };
221 
222 static const uint8_t xive_tm_os_view[] = {
223     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
224     2, 3, 2, 2,   2, 2, 0, 2,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
225     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
226     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
227 };
228 
229 static const uint8_t xive_tm_user_view[] = {
230     3, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-0 User */
231     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
232     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
233     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
234 };
235 
236 /*
237  * Overall TIMA access map for the thread interrupt management context
238  * registers
239  */
240 static const uint8_t *xive_tm_views[] = {
241     [XIVE_TM_HW_PAGE]   = xive_tm_hw_view,
242     [XIVE_TM_HV_PAGE]   = xive_tm_hv_view,
243     [XIVE_TM_OS_PAGE]   = xive_tm_os_view,
244     [XIVE_TM_USER_PAGE] = xive_tm_user_view,
245 };
246 
247 /*
248  * Computes a register access mask for a given offset in the TIMA
249  */
250 static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write)
251 {
252     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
253     uint8_t reg_offset = offset & TM_REG_OFFSET;
254     uint8_t reg_mask = write ? 0x1 : 0x2;
255     uint64_t mask = 0x0;
256     int i;
257 
258     for (i = 0; i < size; i++) {
259         if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) {
260             mask |= (uint64_t) 0xff << (8 * (size - i - 1));
261         }
262     }
263 
264     return mask;
265 }
266 
267 static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
268                               unsigned size)
269 {
270     uint8_t ring_offset = offset & TM_RING_OFFSET;
271     uint8_t reg_offset = offset & TM_REG_OFFSET;
272     uint64_t mask = xive_tm_mask(offset, size, true);
273     int i;
274 
275     /*
276      * Only 4 or 8 bytes stores are allowed and the User ring is
277      * excluded
278      */
279     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
280         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
281                       HWADDR_PRIx"\n", offset);
282         return;
283     }
284 
285     /*
286      * Use the register offset for the raw values and filter out
287      * reserved values
288      */
289     for (i = 0; i < size; i++) {
290         uint8_t byte_mask = (mask >> (8 * (size - i - 1)));
291         if (byte_mask) {
292             tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) &
293                 byte_mask;
294         }
295     }
296 }
297 
298 static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
299 {
300     uint8_t ring_offset = offset & TM_RING_OFFSET;
301     uint8_t reg_offset = offset & TM_REG_OFFSET;
302     uint64_t mask = xive_tm_mask(offset, size, false);
303     uint64_t ret;
304     int i;
305 
306     /*
307      * Only 4 or 8 bytes loads are allowed and the User ring is
308      * excluded
309      */
310     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
311         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
312                       HWADDR_PRIx"\n", offset);
313         return -1;
314     }
315 
316     /* Use the register offset for the raw values */
317     ret = 0;
318     for (i = 0; i < size; i++) {
319         ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1));
320     }
321 
322     /* filter out reserved values */
323     return ret & mask;
324 }
325 
326 /*
327  * The TM context is mapped twice within each page. Stores and loads
328  * to the first mapping below 2K write and read the specified values
329  * without modification. The second mapping above 2K performs specific
330  * state changes (side effects) in addition to setting/returning the
331  * interrupt management area context of the processor thread.
332  */
333 static uint64_t xive_tm_ack_os_reg(XivePresenter *xptr, XiveTCTX *tctx,
334                                    hwaddr offset, unsigned size)
335 {
336     return xive_tctx_accept(tctx, TM_QW1_OS);
337 }
338 
339 static void xive_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
340                                 hwaddr offset, uint64_t value, unsigned size)
341 {
342     xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
343 }
344 
345 /*
346  * Adjust the IPB to allow a CPU to process event queues of other
347  * priorities during one physical interrupt cycle.
348  */
349 static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
350                                    hwaddr offset, uint64_t value, unsigned size)
351 {
352     xive_tctx_ipb_update(tctx, TM_QW1_OS, xive_priority_to_ipb(value & 0xff));
353 }
354 
355 static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk,
356                                uint32_t *nvt_idx, bool *vo)
357 {
358     if (nvt_blk) {
359         *nvt_blk = xive_nvt_blk(cam);
360     }
361     if (nvt_idx) {
362         *nvt_idx = xive_nvt_idx(cam);
363     }
364     if (vo) {
365         *vo = !!(cam & TM_QW1W2_VO);
366     }
367 }
368 
369 static uint32_t xive_tctx_get_os_cam(XiveTCTX *tctx, uint8_t *nvt_blk,
370                                      uint32_t *nvt_idx, bool *vo)
371 {
372     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
373     uint32_t cam = be32_to_cpu(qw1w2);
374 
375     xive_os_cam_decode(cam, nvt_blk, nvt_idx, vo);
376     return qw1w2;
377 }
378 
379 static void xive_tctx_set_os_cam(XiveTCTX *tctx, uint32_t qw1w2)
380 {
381     memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &qw1w2, 4);
382 }
383 
384 static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
385                                     hwaddr offset, unsigned size)
386 {
387     uint32_t qw1w2;
388     uint32_t qw1w2_new;
389     uint8_t nvt_blk;
390     uint32_t nvt_idx;
391     bool vo;
392 
393     qw1w2 = xive_tctx_get_os_cam(tctx, &nvt_blk, &nvt_idx, &vo);
394 
395     if (!vo) {
396         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid NVT %x/%x !?\n",
397                       nvt_blk, nvt_idx);
398     }
399 
400     /* Invalidate CAM line */
401     qw1w2_new = xive_set_field32(TM_QW1W2_VO, qw1w2, 0);
402     xive_tctx_set_os_cam(tctx, qw1w2_new);
403 
404     xive_tctx_reset_os_signal(tctx);
405     return qw1w2;
406 }
407 
408 static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx,
409                                   uint8_t nvt_blk, uint32_t nvt_idx)
410 {
411     XiveNVT nvt;
412     uint8_t ipb;
413 
414     /*
415      * Grab the associated NVT to pull the pending bits, and merge
416      * them with the IPB of the thread interrupt context registers
417      */
418     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
419         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVT %x/%x\n",
420                           nvt_blk, nvt_idx);
421         return;
422     }
423 
424     ipb = xive_get_field32(NVT_W4_IPB, nvt.w4);
425 
426     if (ipb) {
427         /* Reset the NVT value */
428         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, 0);
429         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
430     }
431     /*
432      * Always call xive_tctx_ipb_update(). Even if there were no
433      * escalation triggered, there could be a pending interrupt which
434      * was saved when the context was pulled and that we need to take
435      * into account by recalculating the PIPR (which is not
436      * saved/restored).
437      * It will also raise the External interrupt signal if needed.
438      */
439     xive_tctx_ipb_update(tctx, TM_QW1_OS, ipb);
440 }
441 
442 /*
443  * Updating the OS CAM line can trigger a resend of interrupt
444  */
445 static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
446                                 hwaddr offset, uint64_t value, unsigned size)
447 {
448     uint32_t cam = value;
449     uint32_t qw1w2 = cpu_to_be32(cam);
450     uint8_t nvt_blk;
451     uint32_t nvt_idx;
452     bool vo;
453 
454     xive_os_cam_decode(cam, &nvt_blk, &nvt_idx, &vo);
455 
456     /* First update the registers */
457     xive_tctx_set_os_cam(tctx, qw1w2);
458 
459     /* Check the interrupt pending bits */
460     if (vo) {
461         xive_tctx_need_resend(XIVE_ROUTER(xptr), tctx, nvt_blk, nvt_idx);
462     }
463 }
464 
465 static uint32_t xive_presenter_get_config(XivePresenter *xptr)
466 {
467     XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
468 
469     return xpc->get_config(xptr);
470 }
471 
472 /*
473  * Define a mapping of "special" operations depending on the TIMA page
474  * offset and the size of the operation.
475  */
476 typedef struct XiveTmOp {
477     uint8_t  page_offset;
478     uint32_t op_offset;
479     unsigned size;
480     void     (*write_handler)(XivePresenter *xptr, XiveTCTX *tctx,
481                               hwaddr offset,
482                               uint64_t value, unsigned size);
483     uint64_t (*read_handler)(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
484                              unsigned size);
485 } XiveTmOp;
486 
487 static const XiveTmOp xive_tm_operations[] = {
488     /*
489      * MMIOs below 2K : raw values and special operations without side
490      * effects
491      */
492     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,   1, xive_tm_set_os_cppr, NULL },
493     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,     4, xive_tm_push_os_ctx, NULL },
494     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr, NULL },
495     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, NULL },
496     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, xive_tm_vt_poll },
497 
498     /* MMIOs above 2K : special operations with side effects */
499     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,     2, NULL, xive_tm_ack_os_reg },
500     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
501     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,    4, NULL, xive_tm_pull_os_ctx },
502     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,    8, NULL, xive_tm_pull_os_ctx },
503     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,     2, NULL, xive_tm_ack_hv_reg },
504     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,  4, NULL, xive_tm_pull_pool_ctx },
505     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,  8, NULL, xive_tm_pull_pool_ctx },
506 };
507 
508 static const XiveTmOp xive2_tm_operations[] = {
509     /*
510      * MMIOs below 2K : raw values and special operations without side
511      * effects
512      */
513     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,   1, xive_tm_set_os_cppr, NULL },
514     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,  4, xive2_tm_push_os_ctx, NULL },
515     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr, NULL },
516     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, NULL },
517     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, xive_tm_vt_poll },
518 
519     /* MMIOs above 2K : special operations with side effects */
520     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,     2, NULL, xive_tm_ack_os_reg },
521     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
522     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,    4, NULL, xive2_tm_pull_os_ctx },
523     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,    8, NULL, xive2_tm_pull_os_ctx },
524     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,     2, NULL, xive_tm_ack_hv_reg },
525     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,  4, NULL, xive_tm_pull_pool_ctx },
526     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,  8, NULL, xive_tm_pull_pool_ctx },
527 };
528 
529 static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset,
530                                        unsigned size, bool write)
531 {
532     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
533     uint32_t op_offset = offset & TM_ADDRESS_MASK;
534     const XiveTmOp *tm_ops;
535     int i, tm_ops_count;
536     uint32_t cfg;
537 
538     cfg = xive_presenter_get_config(xptr);
539     if (cfg & XIVE_PRESENTER_GEN1_TIMA_OS) {
540         tm_ops = xive_tm_operations;
541         tm_ops_count = ARRAY_SIZE(xive_tm_operations);
542     } else {
543         tm_ops = xive2_tm_operations;
544         tm_ops_count = ARRAY_SIZE(xive2_tm_operations);
545     }
546 
547     for (i = 0; i < tm_ops_count; i++) {
548         const XiveTmOp *xto = &tm_ops[i];
549 
550         /* Accesses done from a more privileged TIMA page is allowed */
551         if (xto->page_offset >= page_offset &&
552             xto->op_offset == op_offset &&
553             xto->size == size &&
554             ((write && xto->write_handler) || (!write && xto->read_handler))) {
555             return xto;
556         }
557     }
558     return NULL;
559 }
560 
561 /*
562  * TIMA MMIO handlers
563  */
564 void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
565                         uint64_t value, unsigned size)
566 {
567     const XiveTmOp *xto;
568 
569     trace_xive_tctx_tm_write(tctx->cs->cpu_index, offset, size, value);
570 
571     /*
572      * TODO: check V bit in Q[0-3]W2
573      */
574 
575     /*
576      * First, check for special operations in the 2K region
577      */
578     if (offset & TM_SPECIAL_OP) {
579         xto = xive_tm_find_op(tctx->xptr, offset, size, true);
580         if (!xto) {
581             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA "
582                           "@%"HWADDR_PRIx"\n", offset);
583         } else {
584             xto->write_handler(xptr, tctx, offset, value, size);
585         }
586         return;
587     }
588 
589     /*
590      * Then, for special operations in the region below 2K.
591      */
592     xto = xive_tm_find_op(tctx->xptr, offset, size, true);
593     if (xto) {
594         xto->write_handler(xptr, tctx, offset, value, size);
595         return;
596     }
597 
598     /*
599      * Finish with raw access to the register values
600      */
601     xive_tm_raw_write(tctx, offset, value, size);
602 }
603 
604 uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
605                            unsigned size)
606 {
607     const XiveTmOp *xto;
608     uint64_t ret;
609 
610     /*
611      * TODO: check V bit in Q[0-3]W2
612      */
613 
614     /*
615      * First, check for special operations in the 2K region
616      */
617     if (offset & TM_SPECIAL_OP) {
618         xto = xive_tm_find_op(tctx->xptr, offset, size, false);
619         if (!xto) {
620             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
621                           "@%"HWADDR_PRIx"\n", offset);
622             return -1;
623         }
624         ret = xto->read_handler(xptr, tctx, offset, size);
625         goto out;
626     }
627 
628     /*
629      * Then, for special operations in the region below 2K.
630      */
631     xto = xive_tm_find_op(tctx->xptr, offset, size, false);
632     if (xto) {
633         ret = xto->read_handler(xptr, tctx, offset, size);
634         goto out;
635     }
636 
637     /*
638      * Finish with raw access to the register values
639      */
640     ret = xive_tm_raw_read(tctx, offset, size);
641 out:
642     trace_xive_tctx_tm_read(tctx->cs->cpu_index, offset, size, ret);
643     return ret;
644 }
645 
646 static char *xive_tctx_ring_print(uint8_t *ring)
647 {
648     uint32_t w2 = xive_tctx_word2(ring);
649 
650     return g_strdup_printf("%02x   %02x  %02x    %02x   %02x  "
651                    "%02x  %02x   %02x  %08x",
652                    ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB],
653                    ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR],
654                    be32_to_cpu(w2));
655 }
656 
657 static const char * const xive_tctx_ring_names[] = {
658     "USER", "OS", "POOL", "PHYS",
659 };
660 
661 /*
662  * kvm_irqchip_in_kernel() will cause the compiler to turn this
663  * info a nop if CONFIG_KVM isn't defined.
664  */
665 #define xive_in_kernel(xptr)                                            \
666     (kvm_irqchip_in_kernel() &&                                         \
667      ({                                                                 \
668          XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);      \
669          xpc->in_kernel ? xpc->in_kernel(xptr) : false;                 \
670      }))
671 
672 void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
673 {
674     int cpu_index;
675     int i;
676 
677     /* Skip partially initialized vCPUs. This can happen on sPAPR when vCPUs
678      * are hot plugged or unplugged.
679      */
680     if (!tctx) {
681         return;
682     }
683 
684     cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
685 
686     if (xive_in_kernel(tctx->xptr)) {
687         Error *local_err = NULL;
688 
689         kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
690         if (local_err) {
691             error_report_err(local_err);
692             return;
693         }
694     }
695 
696     monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
697                    "  W2\n", cpu_index);
698 
699     for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
700         char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);
701         monitor_printf(mon, "CPU[%04x]: %4s    %s\n", cpu_index,
702                        xive_tctx_ring_names[i], s);
703         g_free(s);
704     }
705 }
706 
707 void xive_tctx_reset(XiveTCTX *tctx)
708 {
709     memset(tctx->regs, 0, sizeof(tctx->regs));
710 
711     /* Set some defaults */
712     tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
713     tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
714     tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
715 
716     /*
717      * Initialize PIPR to 0xFF to avoid phantom interrupts when the
718      * CPPR is first set.
719      */
720     tctx->regs[TM_QW1_OS + TM_PIPR] =
721         ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
722     tctx->regs[TM_QW3_HV_PHYS + TM_PIPR] =
723         ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]);
724 }
725 
726 static void xive_tctx_realize(DeviceState *dev, Error **errp)
727 {
728     XiveTCTX *tctx = XIVE_TCTX(dev);
729     PowerPCCPU *cpu;
730     CPUPPCState *env;
731 
732     assert(tctx->cs);
733     assert(tctx->xptr);
734 
735     cpu = POWERPC_CPU(tctx->cs);
736     env = &cpu->env;
737     switch (PPC_INPUT(env)) {
738     case PPC_FLAGS_INPUT_POWER9:
739         tctx->hv_output = qdev_get_gpio_in(DEVICE(cpu), POWER9_INPUT_HINT);
740         tctx->os_output = qdev_get_gpio_in(DEVICE(cpu), POWER9_INPUT_INT);
741         break;
742 
743     default:
744         error_setg(errp, "XIVE interrupt controller does not support "
745                    "this CPU bus model");
746         return;
747     }
748 
749     /* Connect the presenter to the VCPU (required for CPU hotplug) */
750     if (xive_in_kernel(tctx->xptr)) {
751         if (kvmppc_xive_cpu_connect(tctx, errp) < 0) {
752             return;
753         }
754     }
755 }
756 
757 static int vmstate_xive_tctx_pre_save(void *opaque)
758 {
759     XiveTCTX *tctx = XIVE_TCTX(opaque);
760     Error *local_err = NULL;
761     int ret;
762 
763     if (xive_in_kernel(tctx->xptr)) {
764         ret = kvmppc_xive_cpu_get_state(tctx, &local_err);
765         if (ret < 0) {
766             error_report_err(local_err);
767             return ret;
768         }
769     }
770 
771     return 0;
772 }
773 
774 static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
775 {
776     XiveTCTX *tctx = XIVE_TCTX(opaque);
777     Error *local_err = NULL;
778     int ret;
779 
780     if (xive_in_kernel(tctx->xptr)) {
781         /*
782          * Required for hotplugged CPU, for which the state comes
783          * after all states of the machine.
784          */
785         ret = kvmppc_xive_cpu_set_state(tctx, &local_err);
786         if (ret < 0) {
787             error_report_err(local_err);
788             return ret;
789         }
790     }
791 
792     return 0;
793 }
794 
795 static const VMStateDescription vmstate_xive_tctx = {
796     .name = TYPE_XIVE_TCTX,
797     .version_id = 1,
798     .minimum_version_id = 1,
799     .pre_save = vmstate_xive_tctx_pre_save,
800     .post_load = vmstate_xive_tctx_post_load,
801     .fields = (const VMStateField[]) {
802         VMSTATE_BUFFER(regs, XiveTCTX),
803         VMSTATE_END_OF_LIST()
804     },
805 };
806 
807 static Property xive_tctx_properties[] = {
808     DEFINE_PROP_LINK("cpu", XiveTCTX, cs, TYPE_CPU, CPUState *),
809     DEFINE_PROP_LINK("presenter", XiveTCTX, xptr, TYPE_XIVE_PRESENTER,
810                      XivePresenter *),
811     DEFINE_PROP_END_OF_LIST(),
812 };
813 
814 static void xive_tctx_class_init(ObjectClass *klass, void *data)
815 {
816     DeviceClass *dc = DEVICE_CLASS(klass);
817 
818     dc->desc = "XIVE Interrupt Thread Context";
819     dc->realize = xive_tctx_realize;
820     dc->vmsd = &vmstate_xive_tctx;
821     device_class_set_props(dc, xive_tctx_properties);
822     /*
823      * Reason: part of XIVE interrupt controller, needs to be wired up
824      * by xive_tctx_create().
825      */
826     dc->user_creatable = false;
827 }
828 
829 static const TypeInfo xive_tctx_info = {
830     .name          = TYPE_XIVE_TCTX,
831     .parent        = TYPE_DEVICE,
832     .instance_size = sizeof(XiveTCTX),
833     .class_init    = xive_tctx_class_init,
834 };
835 
836 Object *xive_tctx_create(Object *cpu, XivePresenter *xptr, Error **errp)
837 {
838     Object *obj;
839 
840     obj = object_new(TYPE_XIVE_TCTX);
841     object_property_add_child(cpu, TYPE_XIVE_TCTX, obj);
842     object_unref(obj);
843     object_property_set_link(obj, "cpu", cpu, &error_abort);
844     object_property_set_link(obj, "presenter", OBJECT(xptr), &error_abort);
845     if (!qdev_realize(DEVICE(obj), NULL, errp)) {
846         object_unparent(obj);
847         return NULL;
848     }
849     return obj;
850 }
851 
852 void xive_tctx_destroy(XiveTCTX *tctx)
853 {
854     Object *obj = OBJECT(tctx);
855 
856     object_unparent(obj);
857 }
858 
859 /*
860  * XIVE ESB helpers
861  */
862 
863 uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
864 {
865     uint8_t old_pq = *pq & 0x3;
866 
867     *pq &= ~0x3;
868     *pq |= value & 0x3;
869 
870     return old_pq;
871 }
872 
873 bool xive_esb_trigger(uint8_t *pq)
874 {
875     uint8_t old_pq = *pq & 0x3;
876 
877     switch (old_pq) {
878     case XIVE_ESB_RESET:
879         xive_esb_set(pq, XIVE_ESB_PENDING);
880         return true;
881     case XIVE_ESB_PENDING:
882     case XIVE_ESB_QUEUED:
883         xive_esb_set(pq, XIVE_ESB_QUEUED);
884         return false;
885     case XIVE_ESB_OFF:
886         xive_esb_set(pq, XIVE_ESB_OFF);
887         return false;
888     default:
889          g_assert_not_reached();
890     }
891 }
892 
893 bool xive_esb_eoi(uint8_t *pq)
894 {
895     uint8_t old_pq = *pq & 0x3;
896 
897     switch (old_pq) {
898     case XIVE_ESB_RESET:
899     case XIVE_ESB_PENDING:
900         xive_esb_set(pq, XIVE_ESB_RESET);
901         return false;
902     case XIVE_ESB_QUEUED:
903         xive_esb_set(pq, XIVE_ESB_PENDING);
904         return true;
905     case XIVE_ESB_OFF:
906         xive_esb_set(pq, XIVE_ESB_OFF);
907         return false;
908     default:
909          g_assert_not_reached();
910     }
911 }
912 
913 /*
914  * XIVE Interrupt Source (or IVSE)
915  */
916 
917 uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
918 {
919     assert(srcno < xsrc->nr_irqs);
920 
921     return xsrc->status[srcno] & 0x3;
922 }
923 
924 uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
925 {
926     assert(srcno < xsrc->nr_irqs);
927 
928     return xive_esb_set(&xsrc->status[srcno], pq);
929 }
930 
931 /*
932  * Returns whether the event notification should be forwarded.
933  */
934 static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno)
935 {
936     uint8_t old_pq = xive_source_esb_get(xsrc, srcno);
937 
938     xive_source_set_asserted(xsrc, srcno, true);
939 
940     switch (old_pq) {
941     case XIVE_ESB_RESET:
942         xive_source_esb_set(xsrc, srcno, XIVE_ESB_PENDING);
943         return true;
944     default:
945         return false;
946     }
947 }
948 
949 /*
950  * Sources can be configured with PQ offloading in which case the check
951  * on the PQ state bits of MSIs is disabled
952  */
953 static bool xive_source_esb_disabled(XiveSource *xsrc, uint32_t srcno)
954 {
955     return (xsrc->esb_flags & XIVE_SRC_PQ_DISABLE) &&
956         !xive_source_irq_is_lsi(xsrc, srcno);
957 }
958 
959 /*
960  * Returns whether the event notification should be forwarded.
961  */
962 static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
963 {
964     bool ret;
965 
966     assert(srcno < xsrc->nr_irqs);
967 
968     if (xive_source_esb_disabled(xsrc, srcno)) {
969         return true;
970     }
971 
972     ret = xive_esb_trigger(&xsrc->status[srcno]);
973 
974     if (xive_source_irq_is_lsi(xsrc, srcno) &&
975         xive_source_esb_get(xsrc, srcno) == XIVE_ESB_QUEUED) {
976         qemu_log_mask(LOG_GUEST_ERROR,
977                       "XIVE: queued an event on LSI IRQ %d\n", srcno);
978     }
979 
980     return ret;
981 }
982 
983 /*
984  * Returns whether the event notification should be forwarded.
985  */
986 static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
987 {
988     bool ret;
989 
990     assert(srcno < xsrc->nr_irqs);
991 
992     if (xive_source_esb_disabled(xsrc, srcno)) {
993         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid EOI for IRQ %d\n", srcno);
994         return false;
995     }
996 
997     ret = xive_esb_eoi(&xsrc->status[srcno]);
998 
999     /*
1000      * LSI sources do not set the Q bit but they can still be
1001      * asserted, in which case we should forward a new event
1002      * notification
1003      */
1004     if (xive_source_irq_is_lsi(xsrc, srcno) &&
1005         xive_source_is_asserted(xsrc, srcno)) {
1006         ret = xive_source_lsi_trigger(xsrc, srcno);
1007     }
1008 
1009     return ret;
1010 }
1011 
1012 /*
1013  * Forward the source event notification to the Router
1014  */
1015 static void xive_source_notify(XiveSource *xsrc, int srcno)
1016 {
1017     XiveNotifierClass *xnc = XIVE_NOTIFIER_GET_CLASS(xsrc->xive);
1018     bool pq_checked = !xive_source_esb_disabled(xsrc, srcno);
1019 
1020     if (xnc->notify) {
1021         xnc->notify(xsrc->xive, srcno, pq_checked);
1022     }
1023 }
1024 
1025 /*
1026  * In a two pages ESB MMIO setting, even page is the trigger page, odd
1027  * page is for management
1028  */
1029 static inline bool addr_is_even(hwaddr addr, uint32_t shift)
1030 {
1031     return !((addr >> shift) & 1);
1032 }
1033 
1034 static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
1035 {
1036     return xive_source_esb_has_2page(xsrc) &&
1037         addr_is_even(addr, xsrc->esb_shift - 1);
1038 }
1039 
1040 /*
1041  * ESB MMIO loads
1042  *                      Trigger page    Management/EOI page
1043  *
1044  * ESB MMIO setting     2 pages         1 or 2 pages
1045  *
1046  * 0x000 .. 0x3FF       -1              EOI and return 0|1
1047  * 0x400 .. 0x7FF       -1              EOI and return 0|1
1048  * 0x800 .. 0xBFF       -1              return PQ
1049  * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=00
1050  * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=01
1051  * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=10
1052  * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=11
1053  */
1054 static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
1055 {
1056     XiveSource *xsrc = XIVE_SOURCE(opaque);
1057     uint32_t offset = addr & 0xFFF;
1058     uint32_t srcno = addr >> xsrc->esb_shift;
1059     uint64_t ret = -1;
1060 
1061     /* In a two pages ESB MMIO setting, trigger page should not be read */
1062     if (xive_source_is_trigger_page(xsrc, addr)) {
1063         qemu_log_mask(LOG_GUEST_ERROR,
1064                       "XIVE: invalid load on IRQ %d trigger page at "
1065                       "0x%"HWADDR_PRIx"\n", srcno, addr);
1066         return -1;
1067     }
1068 
1069     switch (offset) {
1070     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
1071         ret = xive_source_esb_eoi(xsrc, srcno);
1072 
1073         /* Forward the source event notification for routing */
1074         if (ret) {
1075             xive_source_notify(xsrc, srcno);
1076         }
1077         break;
1078 
1079     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
1080         ret = xive_source_esb_get(xsrc, srcno);
1081         break;
1082 
1083     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1084     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1085     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1086     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1087         ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
1088         break;
1089     default:
1090         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
1091                       offset);
1092     }
1093 
1094     trace_xive_source_esb_read(addr, srcno, ret);
1095 
1096     return ret;
1097 }
1098 
1099 /*
1100  * ESB MMIO stores
1101  *                      Trigger page    Management/EOI page
1102  *
1103  * ESB MMIO setting     2 pages         1 or 2 pages
1104  *
1105  * 0x000 .. 0x3FF       Trigger         Trigger
1106  * 0x400 .. 0x7FF       Trigger         EOI
1107  * 0x800 .. 0xBFF       Trigger         undefined
1108  * 0xC00 .. 0xCFF       Trigger         PQ=00
1109  * 0xD00 .. 0xDFF       Trigger         PQ=01
1110  * 0xE00 .. 0xDFF       Trigger         PQ=10
1111  * 0xF00 .. 0xDFF       Trigger         PQ=11
1112  */
1113 static void xive_source_esb_write(void *opaque, hwaddr addr,
1114                                   uint64_t value, unsigned size)
1115 {
1116     XiveSource *xsrc = XIVE_SOURCE(opaque);
1117     uint32_t offset = addr & 0xFFF;
1118     uint32_t srcno = addr >> xsrc->esb_shift;
1119     bool notify = false;
1120 
1121     trace_xive_source_esb_write(addr, srcno, value);
1122 
1123     /* In a two pages ESB MMIO setting, trigger page only triggers */
1124     if (xive_source_is_trigger_page(xsrc, addr)) {
1125         notify = xive_source_esb_trigger(xsrc, srcno);
1126         goto out;
1127     }
1128 
1129     switch (offset) {
1130     case 0 ... 0x3FF:
1131         notify = xive_source_esb_trigger(xsrc, srcno);
1132         break;
1133 
1134     case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
1135         if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
1136             qemu_log_mask(LOG_GUEST_ERROR,
1137                           "XIVE: invalid Store EOI for IRQ %d\n", srcno);
1138             return;
1139         }
1140 
1141         notify = xive_source_esb_eoi(xsrc, srcno);
1142         break;
1143 
1144     /*
1145      * This is an internal offset used to inject triggers when the PQ
1146      * state bits are not controlled locally. Such as for LSIs when
1147      * under ABT mode.
1148      */
1149     case XIVE_ESB_INJECT ... XIVE_ESB_INJECT + 0x3FF:
1150         notify = true;
1151         break;
1152 
1153     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1154     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1155     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1156     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1157         xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
1158         break;
1159 
1160     default:
1161         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
1162                       offset);
1163         return;
1164     }
1165 
1166 out:
1167     /* Forward the source event notification for routing */
1168     if (notify) {
1169         xive_source_notify(xsrc, srcno);
1170     }
1171 }
1172 
1173 static const MemoryRegionOps xive_source_esb_ops = {
1174     .read = xive_source_esb_read,
1175     .write = xive_source_esb_write,
1176     .endianness = DEVICE_BIG_ENDIAN,
1177     .valid = {
1178         .min_access_size = 1,
1179         .max_access_size = 8,
1180     },
1181     .impl = {
1182         .min_access_size = 1,
1183         .max_access_size = 8,
1184     },
1185 };
1186 
1187 void xive_source_set_irq(void *opaque, int srcno, int val)
1188 {
1189     XiveSource *xsrc = XIVE_SOURCE(opaque);
1190     bool notify = false;
1191 
1192     if (xive_source_irq_is_lsi(xsrc, srcno)) {
1193         if (val) {
1194             notify = xive_source_lsi_trigger(xsrc, srcno);
1195         } else {
1196             xive_source_set_asserted(xsrc, srcno, false);
1197         }
1198     } else {
1199         if (val) {
1200             notify = xive_source_esb_trigger(xsrc, srcno);
1201         }
1202     }
1203 
1204     /* Forward the source event notification for routing */
1205     if (notify) {
1206         xive_source_notify(xsrc, srcno);
1207     }
1208 }
1209 
1210 void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
1211 {
1212     int i;
1213 
1214     for (i = 0; i < xsrc->nr_irqs; i++) {
1215         uint8_t pq = xive_source_esb_get(xsrc, i);
1216 
1217         if (pq == XIVE_ESB_OFF) {
1218             continue;
1219         }
1220 
1221         monitor_printf(mon, "  %08x %s %c%c%c\n", i + offset,
1222                        xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
1223                        pq & XIVE_ESB_VAL_P ? 'P' : '-',
1224                        pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1225                        xive_source_is_asserted(xsrc, i) ? 'A' : ' ');
1226     }
1227 }
1228 
1229 static void xive_source_reset(void *dev)
1230 {
1231     XiveSource *xsrc = XIVE_SOURCE(dev);
1232 
1233     /* Do not clear the LSI bitmap */
1234 
1235     memset(xsrc->status, xsrc->reset_pq, xsrc->nr_irqs);
1236 }
1237 
1238 static void xive_source_realize(DeviceState *dev, Error **errp)
1239 {
1240     XiveSource *xsrc = XIVE_SOURCE(dev);
1241     size_t esb_len = xive_source_esb_len(xsrc);
1242 
1243     assert(xsrc->xive);
1244 
1245     if (!xsrc->nr_irqs) {
1246         error_setg(errp, "Number of interrupt needs to be greater than 0");
1247         return;
1248     }
1249 
1250     if (xsrc->esb_shift != XIVE_ESB_4K &&
1251         xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
1252         xsrc->esb_shift != XIVE_ESB_64K &&
1253         xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
1254         error_setg(errp, "Invalid ESB shift setting");
1255         return;
1256     }
1257 
1258     xsrc->status = g_malloc0(xsrc->nr_irqs);
1259     xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
1260 
1261     memory_region_init(&xsrc->esb_mmio, OBJECT(xsrc), "xive.esb", esb_len);
1262     memory_region_init_io(&xsrc->esb_mmio_emulated, OBJECT(xsrc),
1263                           &xive_source_esb_ops, xsrc, "xive.esb-emulated",
1264                           esb_len);
1265     memory_region_add_subregion(&xsrc->esb_mmio, 0, &xsrc->esb_mmio_emulated);
1266 
1267     qemu_register_reset(xive_source_reset, dev);
1268 }
1269 
1270 static const VMStateDescription vmstate_xive_source = {
1271     .name = TYPE_XIVE_SOURCE,
1272     .version_id = 1,
1273     .minimum_version_id = 1,
1274     .fields = (const VMStateField[]) {
1275         VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
1276         VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
1277         VMSTATE_END_OF_LIST()
1278     },
1279 };
1280 
1281 /*
1282  * The default XIVE interrupt source setting for the ESB MMIOs is two
1283  * 64k pages without Store EOI, to be in sync with KVM.
1284  */
1285 static Property xive_source_properties[] = {
1286     DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
1287     DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
1288     DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
1289     /*
1290      * By default, PQs are initialized to 0b01 (Q=1) which corresponds
1291      * to "ints off"
1292      */
1293     DEFINE_PROP_UINT8("reset-pq", XiveSource, reset_pq, XIVE_ESB_OFF),
1294     DEFINE_PROP_LINK("xive", XiveSource, xive, TYPE_XIVE_NOTIFIER,
1295                      XiveNotifier *),
1296     DEFINE_PROP_END_OF_LIST(),
1297 };
1298 
1299 static void xive_source_class_init(ObjectClass *klass, void *data)
1300 {
1301     DeviceClass *dc = DEVICE_CLASS(klass);
1302 
1303     dc->desc    = "XIVE Interrupt Source";
1304     device_class_set_props(dc, xive_source_properties);
1305     dc->realize = xive_source_realize;
1306     dc->vmsd    = &vmstate_xive_source;
1307     /*
1308      * Reason: part of XIVE interrupt controller, needs to be wired up,
1309      * e.g. by spapr_xive_instance_init().
1310      */
1311     dc->user_creatable = false;
1312 }
1313 
1314 static const TypeInfo xive_source_info = {
1315     .name          = TYPE_XIVE_SOURCE,
1316     .parent        = TYPE_DEVICE,
1317     .instance_size = sizeof(XiveSource),
1318     .class_init    = xive_source_class_init,
1319 };
1320 
1321 /*
1322  * XiveEND helpers
1323  */
1324 
1325 void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
1326 {
1327     uint64_t qaddr_base = xive_end_qaddr(end);
1328     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1329     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1330     uint32_t qentries = 1 << (qsize + 10);
1331     int i;
1332 
1333     /*
1334      * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
1335      */
1336     monitor_printf(mon, " [ ");
1337     qindex = (qindex - (width - 1)) & (qentries - 1);
1338     for (i = 0; i < width; i++) {
1339         uint64_t qaddr = qaddr_base + (qindex << 2);
1340         uint32_t qdata = -1;
1341 
1342         if (dma_memory_read(&address_space_memory, qaddr,
1343                             &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) {
1344             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
1345                           HWADDR_PRIx "\n", qaddr);
1346             return;
1347         }
1348         monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "",
1349                        be32_to_cpu(qdata));
1350         qindex = (qindex + 1) & (qentries - 1);
1351     }
1352     monitor_printf(mon, "]");
1353 }
1354 
1355 void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
1356 {
1357     uint64_t qaddr_base = xive_end_qaddr(end);
1358     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1359     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1360     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1361     uint32_t qentries = 1 << (qsize + 10);
1362 
1363     uint32_t nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
1364     uint32_t nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end->w6);
1365     uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
1366     uint8_t pq;
1367 
1368     if (!xive_end_is_valid(end)) {
1369         return;
1370     }
1371 
1372     pq = xive_get_field32(END_W1_ESn, end->w1);
1373 
1374     monitor_printf(mon, "  %08x %c%c %c%c%c%c%c%c%c%c prio:%d nvt:%02x/%04x",
1375                    end_idx,
1376                    pq & XIVE_ESB_VAL_P ? 'P' : '-',
1377                    pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1378                    xive_end_is_valid(end)    ? 'v' : '-',
1379                    xive_end_is_enqueue(end)  ? 'q' : '-',
1380                    xive_end_is_notify(end)   ? 'n' : '-',
1381                    xive_end_is_backlog(end)  ? 'b' : '-',
1382                    xive_end_is_escalate(end) ? 'e' : '-',
1383                    xive_end_is_uncond_escalation(end)   ? 'u' : '-',
1384                    xive_end_is_silent_escalation(end)   ? 's' : '-',
1385                    xive_end_is_firmware(end)   ? 'f' : '-',
1386                    priority, nvt_blk, nvt_idx);
1387 
1388     if (qaddr_base) {
1389         monitor_printf(mon, " eq:@%08"PRIx64"% 6d/%5d ^%d",
1390                        qaddr_base, qindex, qentries, qgen);
1391         xive_end_queue_pic_print_info(end, 6, mon);
1392     }
1393     monitor_printf(mon, "\n");
1394 }
1395 
1396 static void xive_end_enqueue(XiveEND *end, uint32_t data)
1397 {
1398     uint64_t qaddr_base = xive_end_qaddr(end);
1399     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1400     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1401     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1402 
1403     uint64_t qaddr = qaddr_base + (qindex << 2);
1404     uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
1405     uint32_t qentries = 1 << (qsize + 10);
1406 
1407     if (dma_memory_write(&address_space_memory, qaddr,
1408                          &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) {
1409         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
1410                       HWADDR_PRIx "\n", qaddr);
1411         return;
1412     }
1413 
1414     qindex = (qindex + 1) & (qentries - 1);
1415     if (qindex == 0) {
1416         qgen ^= 1;
1417         end->w1 = xive_set_field32(END_W1_GENERATION, end->w1, qgen);
1418     }
1419     end->w1 = xive_set_field32(END_W1_PAGE_OFF, end->w1, qindex);
1420 }
1421 
1422 void xive_end_eas_pic_print_info(XiveEND *end, uint32_t end_idx,
1423                                    Monitor *mon)
1424 {
1425     XiveEAS *eas = (XiveEAS *) &end->w4;
1426     uint8_t pq;
1427 
1428     if (!xive_end_is_escalate(end)) {
1429         return;
1430     }
1431 
1432     pq = xive_get_field32(END_W1_ESe, end->w1);
1433 
1434     monitor_printf(mon, "  %08x %c%c %c%c end:%02x/%04x data:%08x\n",
1435                    end_idx,
1436                    pq & XIVE_ESB_VAL_P ? 'P' : '-',
1437                    pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1438                    xive_eas_is_valid(eas) ? 'V' : ' ',
1439                    xive_eas_is_masked(eas) ? 'M' : ' ',
1440                    (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
1441                    (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
1442                    (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
1443 }
1444 
1445 /*
1446  * XIVE Router (aka. Virtualization Controller or IVRE)
1447  */
1448 
1449 int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1450                         XiveEAS *eas)
1451 {
1452     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1453 
1454     return xrc->get_eas(xrtr, eas_blk, eas_idx, eas);
1455 }
1456 
1457 static
1458 int xive_router_get_pq(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1459                        uint8_t *pq)
1460 {
1461     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1462 
1463     return xrc->get_pq(xrtr, eas_blk, eas_idx, pq);
1464 }
1465 
1466 static
1467 int xive_router_set_pq(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1468                        uint8_t *pq)
1469 {
1470     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1471 
1472     return xrc->set_pq(xrtr, eas_blk, eas_idx, pq);
1473 }
1474 
1475 int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1476                         XiveEND *end)
1477 {
1478    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1479 
1480    return xrc->get_end(xrtr, end_blk, end_idx, end);
1481 }
1482 
1483 int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1484                           XiveEND *end, uint8_t word_number)
1485 {
1486    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1487 
1488    return xrc->write_end(xrtr, end_blk, end_idx, end, word_number);
1489 }
1490 
1491 int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1492                         XiveNVT *nvt)
1493 {
1494    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1495 
1496    return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt);
1497 }
1498 
1499 int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1500                         XiveNVT *nvt, uint8_t word_number)
1501 {
1502    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1503 
1504    return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number);
1505 }
1506 
1507 static int xive_router_get_block_id(XiveRouter *xrtr)
1508 {
1509    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1510 
1511    return xrc->get_block_id(xrtr);
1512 }
1513 
1514 static void xive_router_realize(DeviceState *dev, Error **errp)
1515 {
1516     XiveRouter *xrtr = XIVE_ROUTER(dev);
1517 
1518     assert(xrtr->xfb);
1519 }
1520 
1521 static void xive_router_end_notify_handler(XiveRouter *xrtr, XiveEAS *eas)
1522 {
1523     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1524 
1525     return xrc->end_notify(xrtr, eas);
1526 }
1527 
1528 /*
1529  * Encode the HW CAM line in the block group mode format :
1530  *
1531  *   chip << 19 | 0000000 0 0001 thread (7Bit)
1532  */
1533 static uint32_t xive_tctx_hw_cam_line(XivePresenter *xptr, XiveTCTX *tctx)
1534 {
1535     CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env;
1536     uint32_t pir = env->spr_cb[SPR_PIR].default_value;
1537     uint8_t blk = xive_router_get_block_id(XIVE_ROUTER(xptr));
1538 
1539     return xive_nvt_cam_line(blk, 1 << 7 | (pir & 0x7f));
1540 }
1541 
1542 /*
1543  * The thread context register words are in big-endian format.
1544  */
1545 int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
1546                               uint8_t format,
1547                               uint8_t nvt_blk, uint32_t nvt_idx,
1548                               bool cam_ignore, uint32_t logic_serv)
1549 {
1550     uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx);
1551     uint32_t qw3w2 = xive_tctx_word2(&tctx->regs[TM_QW3_HV_PHYS]);
1552     uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
1553     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
1554     uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]);
1555 
1556     /*
1557      * TODO (PowerNV): ignore mode. The low order bits of the NVT
1558      * identifier are ignored in the "CAM" match.
1559      */
1560 
1561     if (format == 0) {
1562         if (cam_ignore == true) {
1563             /*
1564              * F=0 & i=1: Logical server notification (bits ignored at
1565              * the end of the NVT identifier)
1566              */
1567             qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
1568                           nvt_blk, nvt_idx);
1569              return -1;
1570         }
1571 
1572         /* F=0 & i=0: Specific NVT notification */
1573 
1574         /* PHYS ring */
1575         if ((be32_to_cpu(qw3w2) & TM_QW3W2_VT) &&
1576             cam == xive_tctx_hw_cam_line(xptr, tctx)) {
1577             return TM_QW3_HV_PHYS;
1578         }
1579 
1580         /* HV POOL ring */
1581         if ((be32_to_cpu(qw2w2) & TM_QW2W2_VP) &&
1582             cam == xive_get_field32(TM_QW2W2_POOL_CAM, qw2w2)) {
1583             return TM_QW2_HV_POOL;
1584         }
1585 
1586         /* OS ring */
1587         if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1588             cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) {
1589             return TM_QW1_OS;
1590         }
1591     } else {
1592         /* F=1 : User level Event-Based Branch (EBB) notification */
1593 
1594         /* USER ring */
1595         if  ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1596              (cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) &&
1597              (be32_to_cpu(qw0w2) & TM_QW0W2_VU) &&
1598              (logic_serv == xive_get_field32(TM_QW0W2_LOGIC_SERV, qw0w2))) {
1599             return TM_QW0_USER;
1600         }
1601     }
1602     return -1;
1603 }
1604 
1605 /*
1606  * This is our simple Xive Presenter Engine model. It is merged in the
1607  * Router as it does not require an extra object.
1608  *
1609  * It receives notification requests sent by the IVRE to find one
1610  * matching NVT (or more) dispatched on the processor threads. In case
1611  * of a single NVT notification, the process is abbreviated and the
1612  * thread is signaled if a match is found. In case of a logical server
1613  * notification (bits ignored at the end of the NVT identifier), the
1614  * IVPE and IVRE select a winning thread using different filters. This
1615  * involves 2 or 3 exchanges on the PowerBus that the model does not
1616  * support.
1617  *
1618  * The parameters represent what is sent on the PowerBus
1619  */
1620 bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
1621                            uint8_t nvt_blk, uint32_t nvt_idx,
1622                            bool cam_ignore, uint8_t priority,
1623                            uint32_t logic_serv)
1624 {
1625     XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb);
1626     XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
1627     int count;
1628 
1629     /*
1630      * Ask the machine to scan the interrupt controllers for a match
1631      */
1632     count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, cam_ignore,
1633                            priority, logic_serv, &match);
1634     if (count < 0) {
1635         return false;
1636     }
1637 
1638     /* handle CPU exception delivery */
1639     if (count) {
1640         trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring);
1641         xive_tctx_ipb_update(match.tctx, match.ring,
1642                              xive_priority_to_ipb(priority));
1643     }
1644 
1645     return !!count;
1646 }
1647 
1648 /*
1649  * Notification using the END ESe/ESn bit (Event State Buffer for
1650  * escalation and notification). Provide further coalescing in the
1651  * Router.
1652  */
1653 static bool xive_router_end_es_notify(XiveRouter *xrtr, uint8_t end_blk,
1654                                       uint32_t end_idx, XiveEND *end,
1655                                       uint32_t end_esmask)
1656 {
1657     uint8_t pq = xive_get_field32(end_esmask, end->w1);
1658     bool notify = xive_esb_trigger(&pq);
1659 
1660     if (pq != xive_get_field32(end_esmask, end->w1)) {
1661         end->w1 = xive_set_field32(end_esmask, end->w1, pq);
1662         xive_router_write_end(xrtr, end_blk, end_idx, end, 1);
1663     }
1664 
1665     /* ESe/n[Q]=1 : end of notification */
1666     return notify;
1667 }
1668 
1669 /*
1670  * An END trigger can come from an event trigger (IPI or HW) or from
1671  * another chip. We don't model the PowerBus but the END trigger
1672  * message has the same parameters than in the function below.
1673  */
1674 void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas)
1675 {
1676     XiveEND end;
1677     uint8_t priority;
1678     uint8_t format;
1679     uint8_t nvt_blk;
1680     uint32_t nvt_idx;
1681     XiveNVT nvt;
1682     bool found;
1683 
1684     uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
1685     uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
1686     uint32_t end_data = xive_get_field64(EAS_END_DATA,  eas->w);
1687 
1688     /* END cache lookup */
1689     if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
1690         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
1691                       end_idx);
1692         return;
1693     }
1694 
1695     if (!xive_end_is_valid(&end)) {
1696         trace_xive_router_end_notify(end_blk, end_idx, end_data);
1697         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
1698                       end_blk, end_idx);
1699         return;
1700     }
1701 
1702     if (xive_end_is_enqueue(&end)) {
1703         xive_end_enqueue(&end, end_data);
1704         /* Enqueuing event data modifies the EQ toggle and index */
1705         xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
1706     }
1707 
1708     /*
1709      * When the END is silent, we skip the notification part.
1710      */
1711     if (xive_end_is_silent_escalation(&end)) {
1712         goto do_escalation;
1713     }
1714 
1715     /*
1716      * The W7 format depends on the F bit in W6. It defines the type
1717      * of the notification :
1718      *
1719      *   F=0 : single or multiple NVT notification
1720      *   F=1 : User level Event-Based Branch (EBB) notification, no
1721      *         priority
1722      */
1723     format = xive_get_field32(END_W6_FORMAT_BIT, end.w6);
1724     priority = xive_get_field32(END_W7_F0_PRIORITY, end.w7);
1725 
1726     /* The END is masked */
1727     if (format == 0 && priority == 0xff) {
1728         return;
1729     }
1730 
1731     /*
1732      * Check the END ESn (Event State Buffer for notification) for
1733      * even further coalescing in the Router
1734      */
1735     if (!xive_end_is_notify(&end)) {
1736         /* ESn[Q]=1 : end of notification */
1737         if (!xive_router_end_es_notify(xrtr, end_blk, end_idx,
1738                                        &end, END_W1_ESn)) {
1739             return;
1740         }
1741     }
1742 
1743     /*
1744      * Follows IVPE notification
1745      */
1746     nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end.w6);
1747     nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end.w6);
1748 
1749     /* NVT cache lookup */
1750     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
1751         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n",
1752                       nvt_blk, nvt_idx);
1753         return;
1754     }
1755 
1756     if (!xive_nvt_is_valid(&nvt)) {
1757         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n",
1758                       nvt_blk, nvt_idx);
1759         return;
1760     }
1761 
1762     found = xive_presenter_notify(xrtr->xfb, format, nvt_blk, nvt_idx,
1763                           xive_get_field32(END_W7_F0_IGNORE, end.w7),
1764                           priority,
1765                           xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7));
1766 
1767     /* TODO: Auto EOI. */
1768 
1769     if (found) {
1770         return;
1771     }
1772 
1773     /*
1774      * If no matching NVT is dispatched on a HW thread :
1775      * - specific VP: update the NVT structure if backlog is activated
1776      * - logical server : forward request to IVPE (not supported)
1777      */
1778     if (xive_end_is_backlog(&end)) {
1779         uint8_t ipb;
1780 
1781         if (format == 1) {
1782             qemu_log_mask(LOG_GUEST_ERROR,
1783                           "XIVE: END %x/%x invalid config: F1 & backlog\n",
1784                           end_blk, end_idx);
1785             return;
1786         }
1787         /*
1788          * Record the IPB in the associated NVT structure for later
1789          * use. The presenter will resend the interrupt when the vCPU
1790          * is dispatched again on a HW thread.
1791          */
1792         ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) |
1793             xive_priority_to_ipb(priority);
1794         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, ipb);
1795         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
1796 
1797         /*
1798          * On HW, follows a "Broadcast Backlog" to IVPEs
1799          */
1800     }
1801 
1802 do_escalation:
1803     /*
1804      * If activated, escalate notification using the ESe PQ bits and
1805      * the EAS in w4-5
1806      */
1807     if (!xive_end_is_escalate(&end)) {
1808         return;
1809     }
1810 
1811     /*
1812      * Check the END ESe (Event State Buffer for escalation) for even
1813      * further coalescing in the Router
1814      */
1815     if (!xive_end_is_uncond_escalation(&end)) {
1816         /* ESe[Q]=1 : end of notification */
1817         if (!xive_router_end_es_notify(xrtr, end_blk, end_idx,
1818                                        &end, END_W1_ESe)) {
1819             return;
1820         }
1821     }
1822 
1823     trace_xive_router_end_escalate(end_blk, end_idx,
1824            (uint8_t) xive_get_field32(END_W4_ESC_END_BLOCK, end.w4),
1825            (uint32_t) xive_get_field32(END_W4_ESC_END_INDEX, end.w4),
1826            (uint32_t) xive_get_field32(END_W5_ESC_END_DATA,  end.w5));
1827     /*
1828      * The END trigger becomes an Escalation trigger
1829      */
1830     xive_router_end_notify_handler(xrtr, (XiveEAS *) &end.w4);
1831 }
1832 
1833 void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked)
1834 {
1835     XiveRouter *xrtr = XIVE_ROUTER(xn);
1836     uint8_t eas_blk = XIVE_EAS_BLOCK(lisn);
1837     uint32_t eas_idx = XIVE_EAS_INDEX(lisn);
1838     XiveEAS eas;
1839 
1840     /* EAS cache lookup */
1841     if (xive_router_get_eas(xrtr, eas_blk, eas_idx, &eas)) {
1842         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
1843         return;
1844     }
1845 
1846     if (!pq_checked) {
1847         bool notify;
1848         uint8_t pq;
1849 
1850         /* PQ cache lookup */
1851         if (xive_router_get_pq(xrtr, eas_blk, eas_idx, &pq)) {
1852             /* Set FIR */
1853             g_assert_not_reached();
1854         }
1855 
1856         notify = xive_esb_trigger(&pq);
1857 
1858         if (xive_router_set_pq(xrtr, eas_blk, eas_idx, &pq)) {
1859             /* Set FIR */
1860             g_assert_not_reached();
1861         }
1862 
1863         if (!notify) {
1864             return;
1865         }
1866     }
1867 
1868     if (!xive_eas_is_valid(&eas)) {
1869         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
1870         return;
1871     }
1872 
1873     if (xive_eas_is_masked(&eas)) {
1874         /* Notification completed */
1875         return;
1876     }
1877 
1878     /*
1879      * The event trigger becomes an END trigger
1880      */
1881     xive_router_end_notify_handler(xrtr, &eas);
1882 }
1883 
1884 static Property xive_router_properties[] = {
1885     DEFINE_PROP_LINK("xive-fabric", XiveRouter, xfb,
1886                      TYPE_XIVE_FABRIC, XiveFabric *),
1887     DEFINE_PROP_END_OF_LIST(),
1888 };
1889 
1890 static void xive_router_class_init(ObjectClass *klass, void *data)
1891 {
1892     DeviceClass *dc = DEVICE_CLASS(klass);
1893     XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
1894     XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
1895 
1896     dc->desc    = "XIVE Router Engine";
1897     device_class_set_props(dc, xive_router_properties);
1898     /* Parent is SysBusDeviceClass. No need to call its realize hook */
1899     dc->realize = xive_router_realize;
1900     xnc->notify = xive_router_notify;
1901 
1902     /* By default, the router handles END triggers locally */
1903     xrc->end_notify = xive_router_end_notify;
1904 }
1905 
1906 static const TypeInfo xive_router_info = {
1907     .name          = TYPE_XIVE_ROUTER,
1908     .parent        = TYPE_SYS_BUS_DEVICE,
1909     .abstract      = true,
1910     .instance_size = sizeof(XiveRouter),
1911     .class_size    = sizeof(XiveRouterClass),
1912     .class_init    = xive_router_class_init,
1913     .interfaces    = (InterfaceInfo[]) {
1914         { TYPE_XIVE_NOTIFIER },
1915         { TYPE_XIVE_PRESENTER },
1916         { }
1917     }
1918 };
1919 
1920 void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon)
1921 {
1922     if (!xive_eas_is_valid(eas)) {
1923         return;
1924     }
1925 
1926     monitor_printf(mon, "  %08x %s end:%02x/%04x data:%08x\n",
1927                    lisn, xive_eas_is_masked(eas) ? "M" : " ",
1928                    (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
1929                    (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
1930                    (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
1931 }
1932 
1933 /*
1934  * END ESB MMIO loads
1935  */
1936 static uint64_t xive_end_source_read(void *opaque, hwaddr addr, unsigned size)
1937 {
1938     XiveENDSource *xsrc = XIVE_END_SOURCE(opaque);
1939     uint32_t offset = addr & 0xFFF;
1940     uint8_t end_blk;
1941     uint32_t end_idx;
1942     XiveEND end;
1943     uint32_t end_esmask;
1944     uint8_t pq;
1945     uint64_t ret = -1;
1946 
1947     /*
1948      * The block id should be deduced from the load address on the END
1949      * ESB MMIO but our model only supports a single block per XIVE chip.
1950      */
1951     end_blk = xive_router_get_block_id(xsrc->xrtr);
1952     end_idx = addr >> (xsrc->esb_shift + 1);
1953 
1954     trace_xive_end_source_read(end_blk, end_idx, addr);
1955 
1956     if (xive_router_get_end(xsrc->xrtr, end_blk, end_idx, &end)) {
1957         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
1958                       end_idx);
1959         return -1;
1960     }
1961 
1962     if (!xive_end_is_valid(&end)) {
1963         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
1964                       end_blk, end_idx);
1965         return -1;
1966     }
1967 
1968     end_esmask = addr_is_even(addr, xsrc->esb_shift) ? END_W1_ESn : END_W1_ESe;
1969     pq = xive_get_field32(end_esmask, end.w1);
1970 
1971     switch (offset) {
1972     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
1973         ret = xive_esb_eoi(&pq);
1974 
1975         /* Forward the source event notification for routing ?? */
1976         break;
1977 
1978     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
1979         ret = pq;
1980         break;
1981 
1982     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1983     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1984     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1985     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1986         ret = xive_esb_set(&pq, (offset >> 8) & 0x3);
1987         break;
1988     default:
1989         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END ESB load addr %d\n",
1990                       offset);
1991         return -1;
1992     }
1993 
1994     if (pq != xive_get_field32(end_esmask, end.w1)) {
1995         end.w1 = xive_set_field32(end_esmask, end.w1, pq);
1996         xive_router_write_end(xsrc->xrtr, end_blk, end_idx, &end, 1);
1997     }
1998 
1999     return ret;
2000 }
2001 
2002 /*
2003  * END ESB MMIO stores are invalid
2004  */
2005 static void xive_end_source_write(void *opaque, hwaddr addr,
2006                                   uint64_t value, unsigned size)
2007 {
2008     qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr 0x%"
2009                   HWADDR_PRIx"\n", addr);
2010 }
2011 
2012 static const MemoryRegionOps xive_end_source_ops = {
2013     .read = xive_end_source_read,
2014     .write = xive_end_source_write,
2015     .endianness = DEVICE_BIG_ENDIAN,
2016     .valid = {
2017         .min_access_size = 1,
2018         .max_access_size = 8,
2019     },
2020     .impl = {
2021         .min_access_size = 1,
2022         .max_access_size = 8,
2023     },
2024 };
2025 
2026 static void xive_end_source_realize(DeviceState *dev, Error **errp)
2027 {
2028     XiveENDSource *xsrc = XIVE_END_SOURCE(dev);
2029 
2030     assert(xsrc->xrtr);
2031 
2032     if (!xsrc->nr_ends) {
2033         error_setg(errp, "Number of interrupt needs to be greater than 0");
2034         return;
2035     }
2036 
2037     if (xsrc->esb_shift != XIVE_ESB_4K &&
2038         xsrc->esb_shift != XIVE_ESB_64K) {
2039         error_setg(errp, "Invalid ESB shift setting");
2040         return;
2041     }
2042 
2043     /*
2044      * Each END is assigned an even/odd pair of MMIO pages, the even page
2045      * manages the ESn field while the odd page manages the ESe field.
2046      */
2047     memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
2048                           &xive_end_source_ops, xsrc, "xive.end",
2049                           (1ull << (xsrc->esb_shift + 1)) * xsrc->nr_ends);
2050 }
2051 
2052 static Property xive_end_source_properties[] = {
2053     DEFINE_PROP_UINT32("nr-ends", XiveENDSource, nr_ends, 0),
2054     DEFINE_PROP_UINT32("shift", XiveENDSource, esb_shift, XIVE_ESB_64K),
2055     DEFINE_PROP_LINK("xive", XiveENDSource, xrtr, TYPE_XIVE_ROUTER,
2056                      XiveRouter *),
2057     DEFINE_PROP_END_OF_LIST(),
2058 };
2059 
2060 static void xive_end_source_class_init(ObjectClass *klass, void *data)
2061 {
2062     DeviceClass *dc = DEVICE_CLASS(klass);
2063 
2064     dc->desc    = "XIVE END Source";
2065     device_class_set_props(dc, xive_end_source_properties);
2066     dc->realize = xive_end_source_realize;
2067     /*
2068      * Reason: part of XIVE interrupt controller, needs to be wired up,
2069      * e.g. by spapr_xive_instance_init().
2070      */
2071     dc->user_creatable = false;
2072 }
2073 
2074 static const TypeInfo xive_end_source_info = {
2075     .name          = TYPE_XIVE_END_SOURCE,
2076     .parent        = TYPE_DEVICE,
2077     .instance_size = sizeof(XiveENDSource),
2078     .class_init    = xive_end_source_class_init,
2079 };
2080 
2081 /*
2082  * XIVE Notifier
2083  */
2084 static const TypeInfo xive_notifier_info = {
2085     .name = TYPE_XIVE_NOTIFIER,
2086     .parent = TYPE_INTERFACE,
2087     .class_size = sizeof(XiveNotifierClass),
2088 };
2089 
2090 /*
2091  * XIVE Presenter
2092  */
2093 static const TypeInfo xive_presenter_info = {
2094     .name = TYPE_XIVE_PRESENTER,
2095     .parent = TYPE_INTERFACE,
2096     .class_size = sizeof(XivePresenterClass),
2097 };
2098 
2099 /*
2100  * XIVE Fabric
2101  */
2102 static const TypeInfo xive_fabric_info = {
2103     .name = TYPE_XIVE_FABRIC,
2104     .parent = TYPE_INTERFACE,
2105     .class_size = sizeof(XiveFabricClass),
2106 };
2107 
2108 static void xive_register_types(void)
2109 {
2110     type_register_static(&xive_fabric_info);
2111     type_register_static(&xive_source_info);
2112     type_register_static(&xive_notifier_info);
2113     type_register_static(&xive_presenter_info);
2114     type_register_static(&xive_router_info);
2115     type_register_static(&xive_end_source_info);
2116     type_register_static(&xive_tctx_info);
2117 }
2118 
2119 type_init(xive_register_types)
2120