xref: /openbmc/qemu/hw/intc/xive.c (revision 9d466ab9b6f27a5d5b7a0ec5a7ad6f60e82fafda)
1 /*
2  * QEMU PowerPC XIVE interrupt controller model
3  *
4  * Copyright (c) 2017-2018, IBM Corporation.
5  *
6  * SPDX-License-Identifier: GPL-2.0-or-later
7  */
8 
9 #include "qemu/osdep.h"
10 #include "qemu/log.h"
11 #include "qemu/module.h"
12 #include "qapi/error.h"
13 #include "target/ppc/cpu.h"
14 #include "system/cpus.h"
15 #include "system/dma.h"
16 #include "system/reset.h"
17 #include "hw/qdev-properties.h"
18 #include "migration/vmstate.h"
19 #include "hw/irq.h"
20 #include "hw/ppc/xive.h"
21 #include "hw/ppc/xive2.h"
22 #include "hw/ppc/xive_regs.h"
23 #include "trace.h"
24 
25 /*
26  * XIVE Thread Interrupt Management context
27  */
28 
29 static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring)
30 {
31         switch (ring) {
32         case TM_QW0_USER:
33                 return 0; /* Not supported */
34         case TM_QW1_OS:
35                 return tctx->os_output;
36         case TM_QW2_HV_POOL:
37         case TM_QW3_HV_PHYS:
38                 return tctx->hv_output;
39         default:
40                 return 0;
41         }
42 }
43 
44 static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
45 {
46     uint8_t *regs = &tctx->regs[ring];
47     uint8_t nsr = regs[TM_NSR];
48 
49     qemu_irq_lower(xive_tctx_output(tctx, ring));
50 
51     if (regs[TM_NSR] != 0) {
52         uint8_t cppr = regs[TM_PIPR];
53         uint8_t alt_ring;
54         uint8_t *alt_regs;
55 
56         /* POOL interrupt uses IPB in QW2, POOL ring */
57         if ((ring == TM_QW3_HV_PHYS) &&
58             ((nsr & TM_QW3_NSR_HE) == (TM_QW3_NSR_HE_POOL << 6))) {
59             alt_ring = TM_QW2_HV_POOL;
60         } else {
61             alt_ring = ring;
62         }
63         alt_regs = &tctx->regs[alt_ring];
64 
65         regs[TM_CPPR] = cppr;
66 
67         /*
68          * If the interrupt was for a specific VP, reset the pending
69          * buffer bit, otherwise clear the logical server indicator
70          */
71         if (!(regs[TM_NSR] & TM_NSR_GRP_LVL)) {
72             alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
73         }
74 
75         /* Clear the exception from NSR */
76         regs[TM_NSR] = 0;
77 
78         trace_xive_tctx_accept(tctx->cs->cpu_index, alt_ring,
79                                alt_regs[TM_IPB], regs[TM_PIPR],
80                                regs[TM_CPPR], regs[TM_NSR]);
81     }
82 
83     return ((uint64_t)nsr << 8) | regs[TM_CPPR];
84 }
85 
86 void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring, uint8_t group_level)
87 {
88     /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
89     uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
90     uint8_t *alt_regs = &tctx->regs[alt_ring];
91     uint8_t *regs = &tctx->regs[ring];
92 
93     if (alt_regs[TM_PIPR] < alt_regs[TM_CPPR]) {
94         switch (ring) {
95         case TM_QW1_OS:
96             regs[TM_NSR] = TM_QW1_NSR_EO | (group_level & 0x3F);
97             break;
98         case TM_QW2_HV_POOL:
99             alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6) | (group_level & 0x3F);
100             break;
101         case TM_QW3_HV_PHYS:
102             regs[TM_NSR] = (TM_QW3_NSR_HE_PHYS << 6) | (group_level & 0x3F);
103             break;
104         default:
105             g_assert_not_reached();
106         }
107         trace_xive_tctx_notify(tctx->cs->cpu_index, ring,
108                                regs[TM_IPB], alt_regs[TM_PIPR],
109                                alt_regs[TM_CPPR], alt_regs[TM_NSR]);
110         qemu_irq_raise(xive_tctx_output(tctx, ring));
111     } else {
112         alt_regs[TM_NSR] = 0;
113         qemu_irq_lower(xive_tctx_output(tctx, ring));
114     }
115 }
116 
117 void xive_tctx_reset_signal(XiveTCTX *tctx, uint8_t ring)
118 {
119     /*
120      * Lower the External interrupt. Used when pulling a context. It is
121      * necessary to avoid catching it in the higher privilege context. It
122      * should be raised again when re-pushing the lower privilege context.
123      */
124     qemu_irq_lower(xive_tctx_output(tctx, ring));
125 }
126 
127 static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
128 {
129     uint8_t *regs = &tctx->regs[ring];
130     uint8_t pipr_min;
131     uint8_t ring_min;
132 
133     trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring,
134                              regs[TM_IPB], regs[TM_PIPR],
135                              cppr, regs[TM_NSR]);
136 
137     if (cppr > XIVE_PRIORITY_MAX) {
138         cppr = 0xff;
139     }
140 
141     tctx->regs[ring + TM_CPPR] = cppr;
142 
143     /*
144      * Recompute the PIPR based on local pending interrupts.  The PHYS
145      * ring must take the minimum of both the PHYS and POOL PIPR values.
146      */
147     pipr_min = xive_ipb_to_pipr(regs[TM_IPB]);
148     ring_min = ring;
149 
150     /* PHYS updates also depend on POOL values */
151     if (ring == TM_QW3_HV_PHYS) {
152         uint8_t *pool_regs = &tctx->regs[TM_QW2_HV_POOL];
153 
154         /* POOL values only matter if POOL ctx is valid */
155         if (pool_regs[TM_WORD2] & 0x80) {
156 
157             uint8_t pool_pipr = xive_ipb_to_pipr(pool_regs[TM_IPB]);
158 
159             /*
160              * Determine highest priority interrupt and
161              * remember which ring has it.
162              */
163             if (pool_pipr < pipr_min) {
164                 pipr_min = pool_pipr;
165                 ring_min = TM_QW2_HV_POOL;
166             }
167         }
168     }
169 
170     regs[TM_PIPR] = pipr_min;
171 
172     /* CPPR has changed, check if we need to raise a pending exception */
173     xive_tctx_notify(tctx, ring_min, 0);
174 }
175 
176 void xive_tctx_pipr_update(XiveTCTX *tctx, uint8_t ring, uint8_t priority,
177                            uint8_t group_level)
178  {
179     /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
180     uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
181     uint8_t *alt_regs = &tctx->regs[alt_ring];
182     uint8_t *regs = &tctx->regs[ring];
183 
184     if (group_level == 0) {
185         /* VP-specific */
186         regs[TM_IPB] |= xive_priority_to_ipb(priority);
187         alt_regs[TM_PIPR] = xive_ipb_to_pipr(regs[TM_IPB]);
188     } else {
189         /* VP-group */
190         alt_regs[TM_PIPR] = xive_priority_to_pipr(priority);
191     }
192     xive_tctx_notify(tctx, ring, group_level);
193  }
194 
195 /*
196  * XIVE Thread Interrupt Management Area (TIMA)
197  */
198 
199 static void xive_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx,
200                                 hwaddr offset, uint64_t value, unsigned size)
201 {
202     xive_tctx_set_cppr(tctx, TM_QW3_HV_PHYS, value & 0xff);
203 }
204 
205 static uint64_t xive_tm_ack_hv_reg(XivePresenter *xptr, XiveTCTX *tctx,
206                                    hwaddr offset, unsigned size)
207 {
208     return xive_tctx_accept(tctx, TM_QW3_HV_PHYS);
209 }
210 
211 static uint64_t xive_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx,
212                                       hwaddr offset, unsigned size)
213 {
214     uint32_t qw2w2_prev = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
215     uint32_t qw2w2;
216 
217     qw2w2 = xive_set_field32(TM_QW2W2_VP, qw2w2_prev, 0);
218     memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4);
219     return qw2w2;
220 }
221 
222 static uint64_t xive_tm_pull_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx,
223                                       hwaddr offset, unsigned size)
224 {
225     uint8_t qw3b8_prev = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2];
226     uint8_t qw3b8;
227 
228     qw3b8 = qw3b8_prev & ~TM_QW3B8_VT;
229     tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = qw3b8;
230     return qw3b8;
231 }
232 
233 static void xive_tm_vt_push(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
234                             uint64_t value, unsigned size)
235 {
236     tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = value & 0xff;
237 }
238 
239 static uint64_t xive_tm_vt_poll(XivePresenter *xptr, XiveTCTX *tctx,
240                                 hwaddr offset, unsigned size)
241 {
242     return tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] & 0xff;
243 }
244 
245 /*
246  * Define an access map for each page of the TIMA that we will use in
247  * the memory region ops to filter values when doing loads and stores
248  * of raw registers values
249  *
250  * Registers accessibility bits :
251  *
252  *    0x0 - no access
253  *    0x1 - write only
254  *    0x2 - read only
255  *    0x3 - read/write
256  */
257 
258 static const uint8_t xive_tm_hw_view[] = {
259     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
260     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
261     0, 0, 3, 3,   0, 3, 3, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
262     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   3, 3, 3, 0, /* QW-3 PHYS */
263 };
264 
265 static const uint8_t xive_tm_hv_view[] = {
266     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
267     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
268     0, 0, 3, 3,   0, 3, 3, 0,   0, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
269     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   0, 0, 0, 0, /* QW-3 PHYS */
270 };
271 
272 static const uint8_t xive_tm_os_view[] = {
273     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
274     2, 3, 2, 2,   2, 2, 0, 2,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
275     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
276     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
277 };
278 
279 static const uint8_t xive_tm_user_view[] = {
280     3, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-0 User */
281     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
282     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
283     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
284 };
285 
286 /*
287  * Overall TIMA access map for the thread interrupt management context
288  * registers
289  */
290 static const uint8_t *xive_tm_views[] = {
291     [XIVE_TM_HW_PAGE]   = xive_tm_hw_view,
292     [XIVE_TM_HV_PAGE]   = xive_tm_hv_view,
293     [XIVE_TM_OS_PAGE]   = xive_tm_os_view,
294     [XIVE_TM_USER_PAGE] = xive_tm_user_view,
295 };
296 
297 /*
298  * Computes a register access mask for a given offset in the TIMA
299  */
300 static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write)
301 {
302     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
303     uint8_t reg_offset = offset & TM_REG_OFFSET;
304     uint8_t reg_mask = write ? 0x1 : 0x2;
305     uint64_t mask = 0x0;
306     int i;
307 
308     for (i = 0; i < size; i++) {
309         if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) {
310             mask |= (uint64_t) 0xff << (8 * (size - i - 1));
311         }
312     }
313 
314     return mask;
315 }
316 
317 static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
318                               unsigned size)
319 {
320     uint8_t ring_offset = offset & TM_RING_OFFSET;
321     uint8_t reg_offset = offset & TM_REG_OFFSET;
322     uint64_t mask = xive_tm_mask(offset, size, true);
323     int i;
324 
325     /*
326      * Only 4 or 8 bytes stores are allowed and the User ring is
327      * excluded
328      */
329     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
330         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
331                       HWADDR_PRIx" size %d\n", offset, size);
332         return;
333     }
334 
335     /*
336      * Use the register offset for the raw values and filter out
337      * reserved values
338      */
339     for (i = 0; i < size; i++) {
340         uint8_t byte_mask = (mask >> (8 * (size - i - 1)));
341         if (byte_mask) {
342             tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) &
343                 byte_mask;
344         }
345     }
346 }
347 
348 static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
349 {
350     uint8_t ring_offset = offset & TM_RING_OFFSET;
351     uint8_t reg_offset = offset & TM_REG_OFFSET;
352     uint64_t mask = xive_tm_mask(offset, size, false);
353     uint64_t ret;
354     int i;
355 
356     /*
357      * Only 4 or 8 bytes loads are allowed and the User ring is
358      * excluded
359      */
360     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
361         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
362                       HWADDR_PRIx" size %d\n", offset, size);
363         return -1;
364     }
365 
366     /* Use the register offset for the raw values */
367     ret = 0;
368     for (i = 0; i < size; i++) {
369         ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1));
370     }
371 
372     /* filter out reserved values */
373     return ret & mask;
374 }
375 
376 /*
377  * The TM context is mapped twice within each page. Stores and loads
378  * to the first mapping below 2K write and read the specified values
379  * without modification. The second mapping above 2K performs specific
380  * state changes (side effects) in addition to setting/returning the
381  * interrupt management area context of the processor thread.
382  */
383 static uint64_t xive_tm_ack_os_reg(XivePresenter *xptr, XiveTCTX *tctx,
384                                    hwaddr offset, unsigned size)
385 {
386     return xive_tctx_accept(tctx, TM_QW1_OS);
387 }
388 
389 static void xive_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
390                                 hwaddr offset, uint64_t value, unsigned size)
391 {
392     xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
393 }
394 
395 static void xive_tctx_set_lgs(XiveTCTX *tctx, uint8_t ring, uint8_t lgs)
396 {
397     uint8_t *regs = &tctx->regs[ring];
398 
399     regs[TM_LGS] = lgs;
400 }
401 
402 static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx,
403                           hwaddr offset, uint64_t value, unsigned size)
404 {
405     xive_tctx_set_lgs(tctx, TM_QW1_OS, value & 0xff);
406 }
407 
408 /*
409  * Adjust the PIPR to allow a CPU to process event queues of other
410  * priorities during one physical interrupt cycle.
411  */
412 static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
413                                    hwaddr offset, uint64_t value, unsigned size)
414 {
415     xive_tctx_pipr_update(tctx, TM_QW1_OS, value & 0xff, 0);
416 }
417 
418 static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk,
419                                uint32_t *nvt_idx, bool *vo)
420 {
421     if (nvt_blk) {
422         *nvt_blk = xive_nvt_blk(cam);
423     }
424     if (nvt_idx) {
425         *nvt_idx = xive_nvt_idx(cam);
426     }
427     if (vo) {
428         *vo = !!(cam & TM_QW1W2_VO);
429     }
430 }
431 
432 static uint32_t xive_tctx_get_os_cam(XiveTCTX *tctx, uint8_t *nvt_blk,
433                                      uint32_t *nvt_idx, bool *vo)
434 {
435     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
436     uint32_t cam = be32_to_cpu(qw1w2);
437 
438     xive_os_cam_decode(cam, nvt_blk, nvt_idx, vo);
439     return qw1w2;
440 }
441 
442 static void xive_tctx_set_os_cam(XiveTCTX *tctx, uint32_t qw1w2)
443 {
444     memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &qw1w2, 4);
445 }
446 
447 static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
448                                     hwaddr offset, unsigned size)
449 {
450     uint32_t qw1w2;
451     uint32_t qw1w2_new;
452     uint8_t nvt_blk;
453     uint32_t nvt_idx;
454     bool vo;
455 
456     qw1w2 = xive_tctx_get_os_cam(tctx, &nvt_blk, &nvt_idx, &vo);
457 
458     if (!vo) {
459         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid NVT %x/%x !?\n",
460                       nvt_blk, nvt_idx);
461     }
462 
463     /* Invalidate CAM line */
464     qw1w2_new = xive_set_field32(TM_QW1W2_VO, qw1w2, 0);
465     xive_tctx_set_os_cam(tctx, qw1w2_new);
466 
467     xive_tctx_reset_signal(tctx, TM_QW1_OS);
468     return qw1w2;
469 }
470 
471 static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx,
472                                   uint8_t nvt_blk, uint32_t nvt_idx)
473 {
474     XiveNVT nvt;
475     uint8_t ipb;
476 
477     /*
478      * Grab the associated NVT to pull the pending bits, and merge
479      * them with the IPB of the thread interrupt context registers
480      */
481     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
482         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVT %x/%x\n",
483                           nvt_blk, nvt_idx);
484         return;
485     }
486 
487     ipb = xive_get_field32(NVT_W4_IPB, nvt.w4);
488 
489     if (ipb) {
490         /* Reset the NVT value */
491         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, 0);
492         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
493 
494         uint8_t *regs = &tctx->regs[TM_QW1_OS];
495         regs[TM_IPB] |= ipb;
496     }
497 
498     /*
499      * Always call xive_tctx_pipr_update(). Even if there were no
500      * escalation triggered, there could be a pending interrupt which
501      * was saved when the context was pulled and that we need to take
502      * into account by recalculating the PIPR (which is not
503      * saved/restored).
504      * It will also raise the External interrupt signal if needed.
505      */
506     xive_tctx_pipr_update(tctx, TM_QW1_OS, 0xFF, 0); /* fxb */
507 }
508 
509 /*
510  * Updating the OS CAM line can trigger a resend of interrupt
511  */
512 static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
513                                 hwaddr offset, uint64_t value, unsigned size)
514 {
515     uint32_t cam = value;
516     uint32_t qw1w2 = cpu_to_be32(cam);
517     uint8_t nvt_blk;
518     uint32_t nvt_idx;
519     bool vo;
520 
521     xive_os_cam_decode(cam, &nvt_blk, &nvt_idx, &vo);
522 
523     /* First update the registers */
524     xive_tctx_set_os_cam(tctx, qw1w2);
525 
526     /* Check the interrupt pending bits */
527     if (vo) {
528         xive_tctx_need_resend(XIVE_ROUTER(xptr), tctx, nvt_blk, nvt_idx);
529     }
530 }
531 
532 static uint32_t xive_presenter_get_config(XivePresenter *xptr)
533 {
534     XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
535 
536     return xpc->get_config(xptr);
537 }
538 
539 /*
540  * Define a mapping of "special" operations depending on the TIMA page
541  * offset and the size of the operation.
542  */
543 typedef struct XiveTmOp {
544     uint8_t  page_offset;
545     uint32_t op_offset;
546     unsigned size;
547     void     (*write_handler)(XivePresenter *xptr, XiveTCTX *tctx,
548                               hwaddr offset,
549                               uint64_t value, unsigned size);
550     uint64_t (*read_handler)(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
551                              unsigned size);
552 } XiveTmOp;
553 
554 static const XiveTmOp xive_tm_operations[] = {
555     /*
556      * MMIOs below 2K : raw values and special operations without side
557      * effects
558      */
559     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive_tm_set_os_cppr,
560                                                      NULL },
561     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      4, xive_tm_push_os_ctx,
562                                                      NULL },
563     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive_tm_set_hv_cppr,
564                                                      NULL },
565     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
566                                                      NULL },
567     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL,
568                                                      xive_tm_vt_poll },
569 
570     /* MMIOs above 2K : special operations with side effects */
571     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,         2, NULL,
572                                                      xive_tm_ack_os_reg },
573     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING,     1, xive_tm_set_os_pending,
574                                                      NULL },
575     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        4, NULL,
576                                                      xive_tm_pull_os_ctx },
577     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        8, NULL,
578                                                      xive_tm_pull_os_ctx },
579     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,         2, NULL,
580                                                      xive_tm_ack_hv_reg },
581     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      4, NULL,
582                                                      xive_tm_pull_pool_ctx },
583     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      8, NULL,
584                                                      xive_tm_pull_pool_ctx },
585     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX,      1, NULL,
586                                                      xive_tm_pull_phys_ctx },
587 };
588 
589 static const XiveTmOp xive2_tm_operations[] = {
590     /*
591      * MMIOs below 2K : raw values and special operations without side
592      * effects
593      */
594     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive2_tm_set_os_cppr,
595                                                      NULL },
596     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      4, xive2_tm_push_os_ctx,
597                                                      NULL },
598     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      8, xive2_tm_push_os_ctx,
599                                                      NULL },
600     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS,        1, xive_tm_set_os_lgs,
601                                                      NULL },
602     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive2_tm_set_hv_cppr,
603                                                      NULL },
604     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
605                                                      NULL },
606     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL,
607                                                      xive_tm_vt_poll },
608     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_T,     1, xive2_tm_set_hv_target,
609                                                      NULL },
610 
611     /* MMIOs above 2K : special operations with side effects */
612     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,         2, NULL,
613                                                      xive_tm_ack_os_reg },
614     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING,     1, xive_tm_set_os_pending,
615                                                      NULL },
616     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_G2,     4, NULL,
617                                                      xive2_tm_pull_os_ctx },
618     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        4, NULL,
619                                                      xive2_tm_pull_os_ctx },
620     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        8, NULL,
621                                                      xive2_tm_pull_os_ctx },
622     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,         2, NULL,
623                                                      xive_tm_ack_hv_reg },
624     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX_G2,   4, NULL,
625                                                      xive_tm_pull_pool_ctx },
626     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      4, NULL,
627                                                      xive_tm_pull_pool_ctx },
628     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      8, NULL,
629                                                      xive_tm_pull_pool_ctx },
630     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_OL,     1, xive2_tm_pull_os_ctx_ol,
631                                                      NULL },
632     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_G2,   4, NULL,
633                                                      xive_tm_pull_phys_ctx },
634     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX,      1, NULL,
635                                                      xive_tm_pull_phys_ctx },
636     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_OL,   1, xive2_tm_pull_phys_ctx_ol,
637                                                      NULL },
638 };
639 
640 static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset,
641                                        unsigned size, bool write)
642 {
643     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
644     uint32_t op_offset = offset & TM_ADDRESS_MASK;
645     const XiveTmOp *tm_ops;
646     int i, tm_ops_count;
647     uint32_t cfg;
648 
649     cfg = xive_presenter_get_config(xptr);
650     if (cfg & XIVE_PRESENTER_GEN1_TIMA_OS) {
651         tm_ops = xive_tm_operations;
652         tm_ops_count = ARRAY_SIZE(xive_tm_operations);
653     } else {
654         tm_ops = xive2_tm_operations;
655         tm_ops_count = ARRAY_SIZE(xive2_tm_operations);
656     }
657 
658     for (i = 0; i < tm_ops_count; i++) {
659         const XiveTmOp *xto = &tm_ops[i];
660 
661         /* Accesses done from a more privileged TIMA page is allowed */
662         if (xto->page_offset >= page_offset &&
663             xto->op_offset == op_offset &&
664             xto->size == size &&
665             ((write && xto->write_handler) || (!write && xto->read_handler))) {
666             return xto;
667         }
668     }
669     return NULL;
670 }
671 
672 /*
673  * TIMA MMIO handlers
674  */
675 void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
676                         uint64_t value, unsigned size)
677 {
678     const XiveTmOp *xto;
679 
680     trace_xive_tctx_tm_write(tctx->cs->cpu_index, offset, size, value);
681 
682     /*
683      * TODO: check V bit in Q[0-3]W2
684      */
685 
686     /*
687      * First, check for special operations in the 2K region
688      */
689     if (offset & TM_SPECIAL_OP) {
690         xto = xive_tm_find_op(tctx->xptr, offset, size, true);
691         if (!xto) {
692             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA "
693                           "@%"HWADDR_PRIx" size %d\n", offset, size);
694         } else {
695             xto->write_handler(xptr, tctx, offset, value, size);
696         }
697         return;
698     }
699 
700     /*
701      * Then, for special operations in the region below 2K.
702      */
703     xto = xive_tm_find_op(tctx->xptr, offset, size, true);
704     if (xto) {
705         xto->write_handler(xptr, tctx, offset, value, size);
706         return;
707     }
708 
709     /*
710      * Finish with raw access to the register values
711      */
712     xive_tm_raw_write(tctx, offset, value, size);
713 }
714 
715 uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
716                            unsigned size)
717 {
718     const XiveTmOp *xto;
719     uint64_t ret;
720 
721     /*
722      * TODO: check V bit in Q[0-3]W2
723      */
724 
725     /*
726      * First, check for special operations in the 2K region
727      */
728     if (offset & TM_SPECIAL_OP) {
729         xto = xive_tm_find_op(tctx->xptr, offset, size, false);
730         if (!xto) {
731             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
732                           "@%"HWADDR_PRIx" size %d\n", offset, size);
733             return -1;
734         }
735         ret = xto->read_handler(xptr, tctx, offset, size);
736         goto out;
737     }
738 
739     /*
740      * Then, for special operations in the region below 2K.
741      */
742     xto = xive_tm_find_op(tctx->xptr, offset, size, false);
743     if (xto) {
744         ret = xto->read_handler(xptr, tctx, offset, size);
745         goto out;
746     }
747 
748     /*
749      * Finish with raw access to the register values
750      */
751     ret = xive_tm_raw_read(tctx, offset, size);
752 out:
753     trace_xive_tctx_tm_read(tctx->cs->cpu_index, offset, size, ret);
754     return ret;
755 }
756 
757 static char *xive_tctx_ring_print(uint8_t *ring)
758 {
759     uint32_t w2 = xive_tctx_word2(ring);
760 
761     return g_strdup_printf("%02x   %02x  %02x    %02x   %02x  "
762                    "%02x  %02x   %02x  %08x",
763                    ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB],
764                    ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR],
765                    be32_to_cpu(w2));
766 }
767 
768 static const char * const xive_tctx_ring_names[] = {
769     "USER", "OS", "POOL", "PHYS",
770 };
771 
772 /*
773  * kvm_irqchip_in_kernel() will cause the compiler to turn this
774  * info a nop if CONFIG_KVM isn't defined.
775  */
776 #define xive_in_kernel(xptr)                                            \
777     (kvm_irqchip_in_kernel() &&                                         \
778      ({                                                                 \
779          XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);      \
780          xpc->in_kernel ? xpc->in_kernel(xptr) : false;                 \
781      }))
782 
783 void xive_tctx_pic_print_info(XiveTCTX *tctx, GString *buf)
784 {
785     int cpu_index;
786     int i;
787 
788     /* Skip partially initialized vCPUs. This can happen on sPAPR when vCPUs
789      * are hot plugged or unplugged.
790      */
791     if (!tctx) {
792         return;
793     }
794 
795     cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
796 
797     if (xive_in_kernel(tctx->xptr)) {
798         Error *local_err = NULL;
799 
800         kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
801         if (local_err) {
802             error_report_err(local_err);
803             return;
804         }
805     }
806 
807     if (xive_presenter_get_config(tctx->xptr) & XIVE_PRESENTER_GEN1_TIMA_OS) {
808         g_string_append_printf(buf, "CPU[%04x]:   "
809                                "QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
810                                "  W2\n", cpu_index);
811     } else {
812         g_string_append_printf(buf, "CPU[%04x]:   "
813                                "QW   NSR CPPR IPB LSMFB   -  LGS  T  PIPR"
814                                "  W2\n", cpu_index);
815     }
816 
817     for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
818         char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);
819         g_string_append_printf(buf, "CPU[%04x]: %4s    %s\n",
820                                cpu_index, xive_tctx_ring_names[i], s);
821         g_free(s);
822     }
823 }
824 
825 void xive_tctx_reset(XiveTCTX *tctx)
826 {
827     memset(tctx->regs, 0, sizeof(tctx->regs));
828 
829     /* Set some defaults */
830     tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
831     tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
832     tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
833     if (!(xive_presenter_get_config(tctx->xptr) &
834           XIVE_PRESENTER_GEN1_TIMA_OS)) {
835         tctx->regs[TM_QW1_OS + TM_OGEN] = 2;
836     }
837 
838     /*
839      * Initialize PIPR to 0xFF to avoid phantom interrupts when the
840      * CPPR is first set.
841      */
842     tctx->regs[TM_QW1_OS + TM_PIPR] =
843         xive_ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
844     tctx->regs[TM_QW3_HV_PHYS + TM_PIPR] =
845         xive_ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]);
846 }
847 
848 static void xive_tctx_realize(DeviceState *dev, Error **errp)
849 {
850     XiveTCTX *tctx = XIVE_TCTX(dev);
851     PowerPCCPU *cpu;
852     CPUPPCState *env;
853 
854     assert(tctx->cs);
855     assert(tctx->xptr);
856 
857     cpu = POWERPC_CPU(tctx->cs);
858     env = &cpu->env;
859     switch (PPC_INPUT(env)) {
860     case PPC_FLAGS_INPUT_POWER9:
861         tctx->hv_output = qdev_get_gpio_in(DEVICE(cpu), POWER9_INPUT_HINT);
862         tctx->os_output = qdev_get_gpio_in(DEVICE(cpu), POWER9_INPUT_INT);
863         break;
864 
865     default:
866         error_setg(errp, "XIVE interrupt controller does not support "
867                    "this CPU bus model");
868         return;
869     }
870 
871     /* Connect the presenter to the VCPU (required for CPU hotplug) */
872     if (xive_in_kernel(tctx->xptr)) {
873         if (kvmppc_xive_cpu_connect(tctx, errp) < 0) {
874             return;
875         }
876     }
877 }
878 
879 static int vmstate_xive_tctx_pre_save(void *opaque)
880 {
881     XiveTCTX *tctx = XIVE_TCTX(opaque);
882     Error *local_err = NULL;
883     int ret;
884 
885     if (xive_in_kernel(tctx->xptr)) {
886         ret = kvmppc_xive_cpu_get_state(tctx, &local_err);
887         if (ret < 0) {
888             error_report_err(local_err);
889             return ret;
890         }
891     }
892 
893     return 0;
894 }
895 
896 static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
897 {
898     XiveTCTX *tctx = XIVE_TCTX(opaque);
899     Error *local_err = NULL;
900     int ret;
901 
902     if (xive_in_kernel(tctx->xptr)) {
903         /*
904          * Required for hotplugged CPU, for which the state comes
905          * after all states of the machine.
906          */
907         ret = kvmppc_xive_cpu_set_state(tctx, &local_err);
908         if (ret < 0) {
909             error_report_err(local_err);
910             return ret;
911         }
912     }
913 
914     return 0;
915 }
916 
917 static const VMStateDescription vmstate_xive_tctx = {
918     .name = TYPE_XIVE_TCTX,
919     .version_id = 1,
920     .minimum_version_id = 1,
921     .pre_save = vmstate_xive_tctx_pre_save,
922     .post_load = vmstate_xive_tctx_post_load,
923     .fields = (const VMStateField[]) {
924         VMSTATE_BUFFER(regs, XiveTCTX),
925         VMSTATE_END_OF_LIST()
926     },
927 };
928 
929 static const Property xive_tctx_properties[] = {
930     DEFINE_PROP_LINK("cpu", XiveTCTX, cs, TYPE_CPU, CPUState *),
931     DEFINE_PROP_LINK("presenter", XiveTCTX, xptr, TYPE_XIVE_PRESENTER,
932                      XivePresenter *),
933 };
934 
935 static void xive_tctx_class_init(ObjectClass *klass, const void *data)
936 {
937     DeviceClass *dc = DEVICE_CLASS(klass);
938 
939     dc->desc = "XIVE Interrupt Thread Context";
940     dc->realize = xive_tctx_realize;
941     dc->vmsd = &vmstate_xive_tctx;
942     device_class_set_props(dc, xive_tctx_properties);
943     /*
944      * Reason: part of XIVE interrupt controller, needs to be wired up
945      * by xive_tctx_create().
946      */
947     dc->user_creatable = false;
948 }
949 
950 static const TypeInfo xive_tctx_info = {
951     .name          = TYPE_XIVE_TCTX,
952     .parent        = TYPE_DEVICE,
953     .instance_size = sizeof(XiveTCTX),
954     .class_init    = xive_tctx_class_init,
955 };
956 
957 Object *xive_tctx_create(Object *cpu, XivePresenter *xptr, Error **errp)
958 {
959     Object *obj;
960 
961     obj = object_new(TYPE_XIVE_TCTX);
962     object_property_add_child(cpu, TYPE_XIVE_TCTX, obj);
963     object_unref(obj);
964     object_property_set_link(obj, "cpu", cpu, &error_abort);
965     object_property_set_link(obj, "presenter", OBJECT(xptr), &error_abort);
966     if (!qdev_realize(DEVICE(obj), NULL, errp)) {
967         object_unparent(obj);
968         return NULL;
969     }
970     return obj;
971 }
972 
973 void xive_tctx_destroy(XiveTCTX *tctx)
974 {
975     Object *obj = OBJECT(tctx);
976 
977     object_unparent(obj);
978 }
979 
980 /*
981  * XIVE ESB helpers
982  */
983 
984 uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
985 {
986     uint8_t old_pq = *pq & 0x3;
987 
988     *pq &= ~0x3;
989     *pq |= value & 0x3;
990 
991     return old_pq;
992 }
993 
994 bool xive_esb_trigger(uint8_t *pq)
995 {
996     uint8_t old_pq = *pq & 0x3;
997 
998     switch (old_pq) {
999     case XIVE_ESB_RESET:
1000         xive_esb_set(pq, XIVE_ESB_PENDING);
1001         return true;
1002     case XIVE_ESB_PENDING:
1003     case XIVE_ESB_QUEUED:
1004         xive_esb_set(pq, XIVE_ESB_QUEUED);
1005         return false;
1006     case XIVE_ESB_OFF:
1007         xive_esb_set(pq, XIVE_ESB_OFF);
1008         return false;
1009     default:
1010          g_assert_not_reached();
1011     }
1012 }
1013 
1014 bool xive_esb_eoi(uint8_t *pq)
1015 {
1016     uint8_t old_pq = *pq & 0x3;
1017 
1018     switch (old_pq) {
1019     case XIVE_ESB_RESET:
1020     case XIVE_ESB_PENDING:
1021         xive_esb_set(pq, XIVE_ESB_RESET);
1022         return false;
1023     case XIVE_ESB_QUEUED:
1024         xive_esb_set(pq, XIVE_ESB_PENDING);
1025         return true;
1026     case XIVE_ESB_OFF:
1027         xive_esb_set(pq, XIVE_ESB_OFF);
1028         return false;
1029     default:
1030          g_assert_not_reached();
1031     }
1032 }
1033 
1034 /*
1035  * XIVE Interrupt Source (or IVSE)
1036  */
1037 
1038 uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
1039 {
1040     assert(srcno < xsrc->nr_irqs);
1041 
1042     return xsrc->status[srcno] & 0x3;
1043 }
1044 
1045 uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
1046 {
1047     assert(srcno < xsrc->nr_irqs);
1048 
1049     return xive_esb_set(&xsrc->status[srcno], pq);
1050 }
1051 
1052 /*
1053  * Returns whether the event notification should be forwarded.
1054  */
1055 static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno)
1056 {
1057     uint8_t old_pq = xive_source_esb_get(xsrc, srcno);
1058 
1059     xive_source_set_asserted(xsrc, srcno, true);
1060 
1061     switch (old_pq) {
1062     case XIVE_ESB_RESET:
1063         xive_source_esb_set(xsrc, srcno, XIVE_ESB_PENDING);
1064         return true;
1065     default:
1066         return false;
1067     }
1068 }
1069 
1070 /*
1071  * Sources can be configured with PQ offloading in which case the check
1072  * on the PQ state bits of MSIs is disabled
1073  */
1074 static bool xive_source_esb_disabled(XiveSource *xsrc, uint32_t srcno)
1075 {
1076     return (xsrc->esb_flags & XIVE_SRC_PQ_DISABLE) &&
1077         !xive_source_irq_is_lsi(xsrc, srcno);
1078 }
1079 
1080 /*
1081  * Returns whether the event notification should be forwarded.
1082  */
1083 static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
1084 {
1085     bool ret;
1086 
1087     assert(srcno < xsrc->nr_irqs);
1088 
1089     if (xive_source_esb_disabled(xsrc, srcno)) {
1090         return true;
1091     }
1092 
1093     ret = xive_esb_trigger(&xsrc->status[srcno]);
1094 
1095     if (xive_source_irq_is_lsi(xsrc, srcno) &&
1096         xive_source_esb_get(xsrc, srcno) == XIVE_ESB_QUEUED) {
1097         qemu_log_mask(LOG_GUEST_ERROR,
1098                       "XIVE: queued an event on LSI IRQ %d\n", srcno);
1099     }
1100 
1101     return ret;
1102 }
1103 
1104 /*
1105  * Returns whether the event notification should be forwarded.
1106  */
1107 static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
1108 {
1109     bool ret;
1110 
1111     assert(srcno < xsrc->nr_irqs);
1112 
1113     if (xive_source_esb_disabled(xsrc, srcno)) {
1114         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid EOI for IRQ %d\n", srcno);
1115         return false;
1116     }
1117 
1118     ret = xive_esb_eoi(&xsrc->status[srcno]);
1119 
1120     /*
1121      * LSI sources do not set the Q bit but they can still be
1122      * asserted, in which case we should forward a new event
1123      * notification
1124      */
1125     if (xive_source_irq_is_lsi(xsrc, srcno) &&
1126         xive_source_is_asserted(xsrc, srcno)) {
1127         ret = xive_source_lsi_trigger(xsrc, srcno);
1128     }
1129 
1130     return ret;
1131 }
1132 
1133 /*
1134  * Forward the source event notification to the Router
1135  */
1136 static void xive_source_notify(XiveSource *xsrc, int srcno)
1137 {
1138     XiveNotifierClass *xnc = XIVE_NOTIFIER_GET_CLASS(xsrc->xive);
1139     bool pq_checked = !xive_source_esb_disabled(xsrc, srcno);
1140 
1141     if (xnc->notify) {
1142         xnc->notify(xsrc->xive, srcno, pq_checked);
1143     }
1144 }
1145 
1146 /*
1147  * In a two pages ESB MMIO setting, even page is the trigger page, odd
1148  * page is for management
1149  */
1150 static inline bool addr_is_even(hwaddr addr, uint32_t shift)
1151 {
1152     return !((addr >> shift) & 1);
1153 }
1154 
1155 static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
1156 {
1157     return xive_source_esb_has_2page(xsrc) &&
1158         addr_is_even(addr, xsrc->esb_shift - 1);
1159 }
1160 
1161 /*
1162  * ESB MMIO loads
1163  *                      Trigger page    Management/EOI page
1164  *
1165  * ESB MMIO setting     2 pages         1 or 2 pages
1166  *
1167  * 0x000 .. 0x3FF       -1              EOI and return 0|1
1168  * 0x400 .. 0x7FF       -1              EOI and return 0|1
1169  * 0x800 .. 0xBFF       -1              return PQ
1170  * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=00
1171  * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=01
1172  * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=10
1173  * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=11
1174  */
1175 static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
1176 {
1177     XiveSource *xsrc = XIVE_SOURCE(opaque);
1178     uint32_t offset = addr & 0xFFF;
1179     uint32_t srcno = addr >> xsrc->esb_shift;
1180     uint64_t ret = -1;
1181 
1182     /* In a two pages ESB MMIO setting, trigger page should not be read */
1183     if (xive_source_is_trigger_page(xsrc, addr)) {
1184         qemu_log_mask(LOG_GUEST_ERROR,
1185                       "XIVE: invalid load on IRQ %d trigger page at "
1186                       "0x%"HWADDR_PRIx"\n", srcno, addr);
1187         return -1;
1188     }
1189 
1190     switch (offset) {
1191     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
1192         ret = xive_source_esb_eoi(xsrc, srcno);
1193 
1194         /* Forward the source event notification for routing */
1195         if (ret) {
1196             xive_source_notify(xsrc, srcno);
1197         }
1198         break;
1199 
1200     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
1201         ret = xive_source_esb_get(xsrc, srcno);
1202         break;
1203 
1204     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1205     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1206     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1207     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1208         ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
1209         break;
1210     default:
1211         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
1212                       offset);
1213     }
1214 
1215     trace_xive_source_esb_read(addr, srcno, ret);
1216 
1217     return ret;
1218 }
1219 
1220 /*
1221  * ESB MMIO stores
1222  *                      Trigger page    Management/EOI page
1223  *
1224  * ESB MMIO setting     2 pages         1 or 2 pages
1225  *
1226  * 0x000 .. 0x3FF       Trigger         Trigger
1227  * 0x400 .. 0x7FF       Trigger         EOI
1228  * 0x800 .. 0xBFF       Trigger         undefined
1229  * 0xC00 .. 0xCFF       Trigger         PQ=00
1230  * 0xD00 .. 0xDFF       Trigger         PQ=01
1231  * 0xE00 .. 0xDFF       Trigger         PQ=10
1232  * 0xF00 .. 0xDFF       Trigger         PQ=11
1233  */
1234 static void xive_source_esb_write(void *opaque, hwaddr addr,
1235                                   uint64_t value, unsigned size)
1236 {
1237     XiveSource *xsrc = XIVE_SOURCE(opaque);
1238     uint32_t offset = addr & 0xFFF;
1239     uint32_t srcno = addr >> xsrc->esb_shift;
1240     bool notify = false;
1241 
1242     trace_xive_source_esb_write(addr, srcno, value);
1243 
1244     /* In a two pages ESB MMIO setting, trigger page only triggers */
1245     if (xive_source_is_trigger_page(xsrc, addr)) {
1246         notify = xive_source_esb_trigger(xsrc, srcno);
1247         goto out;
1248     }
1249 
1250     switch (offset) {
1251     case 0 ... 0x3FF:
1252         notify = xive_source_esb_trigger(xsrc, srcno);
1253         break;
1254 
1255     case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
1256         if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
1257             qemu_log_mask(LOG_GUEST_ERROR,
1258                           "XIVE: invalid Store EOI for IRQ %d\n", srcno);
1259             return;
1260         }
1261 
1262         notify = xive_source_esb_eoi(xsrc, srcno);
1263         break;
1264 
1265     /*
1266      * This is an internal offset used to inject triggers when the PQ
1267      * state bits are not controlled locally. Such as for LSIs when
1268      * under ABT mode.
1269      */
1270     case XIVE_ESB_INJECT ... XIVE_ESB_INJECT + 0x3FF:
1271         notify = true;
1272         break;
1273 
1274     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1275     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1276     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1277     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1278         xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
1279         break;
1280 
1281     default:
1282         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
1283                       offset);
1284         return;
1285     }
1286 
1287 out:
1288     /* Forward the source event notification for routing */
1289     if (notify) {
1290         xive_source_notify(xsrc, srcno);
1291     }
1292 }
1293 
1294 static const MemoryRegionOps xive_source_esb_ops = {
1295     .read = xive_source_esb_read,
1296     .write = xive_source_esb_write,
1297     .endianness = DEVICE_BIG_ENDIAN,
1298     .valid = {
1299         .min_access_size = 1,
1300         .max_access_size = 8,
1301     },
1302     .impl = {
1303         .min_access_size = 1,
1304         .max_access_size = 8,
1305     },
1306 };
1307 
1308 void xive_source_set_irq(void *opaque, int srcno, int val)
1309 {
1310     XiveSource *xsrc = XIVE_SOURCE(opaque);
1311     bool notify = false;
1312 
1313     if (xive_source_irq_is_lsi(xsrc, srcno)) {
1314         if (val) {
1315             notify = xive_source_lsi_trigger(xsrc, srcno);
1316         } else {
1317             xive_source_set_asserted(xsrc, srcno, false);
1318         }
1319     } else {
1320         if (val) {
1321             notify = xive_source_esb_trigger(xsrc, srcno);
1322         }
1323     }
1324 
1325     /* Forward the source event notification for routing */
1326     if (notify) {
1327         xive_source_notify(xsrc, srcno);
1328     }
1329 }
1330 
1331 void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, GString *buf)
1332 {
1333     for (unsigned i = 0; i < xsrc->nr_irqs; i++) {
1334         uint8_t pq = xive_source_esb_get(xsrc, i);
1335 
1336         if (pq == XIVE_ESB_OFF) {
1337             continue;
1338         }
1339 
1340         g_string_append_printf(buf, "  %08x %s %c%c%c\n", i + offset,
1341                                xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
1342                                pq & XIVE_ESB_VAL_P ? 'P' : '-',
1343                                pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1344                                xive_source_is_asserted(xsrc, i) ? 'A' : ' ');
1345     }
1346 }
1347 
1348 static void xive_source_reset(void *dev)
1349 {
1350     XiveSource *xsrc = XIVE_SOURCE(dev);
1351 
1352     /* Do not clear the LSI bitmap */
1353 
1354     memset(xsrc->status, xsrc->reset_pq, xsrc->nr_irqs);
1355 }
1356 
1357 static void xive_source_realize(DeviceState *dev, Error **errp)
1358 {
1359     XiveSource *xsrc = XIVE_SOURCE(dev);
1360     uint64_t esb_len = xive_source_esb_len(xsrc);
1361 
1362     assert(xsrc->xive);
1363 
1364     if (!xsrc->nr_irqs) {
1365         error_setg(errp, "Number of interrupt needs to be greater than 0");
1366         return;
1367     }
1368 
1369     if (xsrc->esb_shift != XIVE_ESB_4K &&
1370         xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
1371         xsrc->esb_shift != XIVE_ESB_64K &&
1372         xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
1373         error_setg(errp, "Invalid ESB shift setting");
1374         return;
1375     }
1376 
1377     xsrc->status = g_malloc0(xsrc->nr_irqs);
1378     xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
1379 
1380     memory_region_init(&xsrc->esb_mmio, OBJECT(xsrc), "xive.esb", esb_len);
1381     memory_region_init_io(&xsrc->esb_mmio_emulated, OBJECT(xsrc),
1382                           &xive_source_esb_ops, xsrc, "xive.esb-emulated",
1383                           esb_len);
1384     memory_region_add_subregion(&xsrc->esb_mmio, 0, &xsrc->esb_mmio_emulated);
1385 
1386     qemu_register_reset(xive_source_reset, dev);
1387 }
1388 
1389 static const VMStateDescription vmstate_xive_source = {
1390     .name = TYPE_XIVE_SOURCE,
1391     .version_id = 1,
1392     .minimum_version_id = 1,
1393     .fields = (const VMStateField[]) {
1394         VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
1395         VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
1396         VMSTATE_END_OF_LIST()
1397     },
1398 };
1399 
1400 /*
1401  * The default XIVE interrupt source setting for the ESB MMIOs is two
1402  * 64k pages without Store EOI, to be in sync with KVM.
1403  */
1404 static const Property xive_source_properties[] = {
1405     DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
1406     DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
1407     DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
1408     /*
1409      * By default, PQs are initialized to 0b01 (Q=1) which corresponds
1410      * to "ints off"
1411      */
1412     DEFINE_PROP_UINT8("reset-pq", XiveSource, reset_pq, XIVE_ESB_OFF),
1413     DEFINE_PROP_LINK("xive", XiveSource, xive, TYPE_XIVE_NOTIFIER,
1414                      XiveNotifier *),
1415 };
1416 
1417 static void xive_source_class_init(ObjectClass *klass, const void *data)
1418 {
1419     DeviceClass *dc = DEVICE_CLASS(klass);
1420 
1421     dc->desc    = "XIVE Interrupt Source";
1422     device_class_set_props(dc, xive_source_properties);
1423     dc->realize = xive_source_realize;
1424     dc->vmsd    = &vmstate_xive_source;
1425     /*
1426      * Reason: part of XIVE interrupt controller, needs to be wired up,
1427      * e.g. by spapr_xive_instance_init().
1428      */
1429     dc->user_creatable = false;
1430 }
1431 
1432 static const TypeInfo xive_source_info = {
1433     .name          = TYPE_XIVE_SOURCE,
1434     .parent        = TYPE_DEVICE,
1435     .instance_size = sizeof(XiveSource),
1436     .class_init    = xive_source_class_init,
1437 };
1438 
1439 /*
1440  * XiveEND helpers
1441  */
1442 
1443 void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, GString *buf)
1444 {
1445     uint64_t qaddr_base = xive_end_qaddr(end);
1446     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1447     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1448     uint32_t qentries = 1 << (qsize + 10);
1449     int i;
1450 
1451     /*
1452      * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
1453      */
1454     g_string_append_printf(buf, " [ ");
1455     qindex = (qindex - (width - 1)) & (qentries - 1);
1456     for (i = 0; i < width; i++) {
1457         uint64_t qaddr = qaddr_base + (qindex << 2);
1458         uint32_t qdata = -1;
1459 
1460         if (dma_memory_read(&address_space_memory, qaddr,
1461                             &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) {
1462             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
1463                           HWADDR_PRIx "\n", qaddr);
1464             return;
1465         }
1466         g_string_append_printf(buf, "%s%08x ", i == width - 1 ? "^" : "",
1467                                be32_to_cpu(qdata));
1468         qindex = (qindex + 1) & (qentries - 1);
1469     }
1470     g_string_append_c(buf, ']');
1471 }
1472 
1473 void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, GString *buf)
1474 {
1475     uint64_t qaddr_base = xive_end_qaddr(end);
1476     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1477     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1478     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1479     uint32_t qentries = 1 << (qsize + 10);
1480 
1481     uint32_t nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
1482     uint32_t nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end->w6);
1483     uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
1484     uint8_t pq;
1485 
1486     if (!xive_end_is_valid(end)) {
1487         return;
1488     }
1489 
1490     pq = xive_get_field32(END_W1_ESn, end->w1);
1491 
1492     g_string_append_printf(buf,
1493                            "  %08x %c%c %c%c%c%c%c%c%c%c prio:%d nvt:%02x/%04x",
1494                            end_idx,
1495                            pq & XIVE_ESB_VAL_P ? 'P' : '-',
1496                            pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1497                            xive_end_is_valid(end)    ? 'v' : '-',
1498                            xive_end_is_enqueue(end)  ? 'q' : '-',
1499                            xive_end_is_notify(end)   ? 'n' : '-',
1500                            xive_end_is_backlog(end)  ? 'b' : '-',
1501                            xive_end_is_escalate(end) ? 'e' : '-',
1502                            xive_end_is_uncond_escalation(end)   ? 'u' : '-',
1503                            xive_end_is_silent_escalation(end)   ? 's' : '-',
1504                            xive_end_is_firmware(end)   ? 'f' : '-',
1505                            priority, nvt_blk, nvt_idx);
1506 
1507     if (qaddr_base) {
1508         g_string_append_printf(buf, " eq:@%08"PRIx64"% 6d/%5d ^%d",
1509                                qaddr_base, qindex, qentries, qgen);
1510         xive_end_queue_pic_print_info(end, 6, buf);
1511     }
1512     g_string_append_c(buf, '\n');
1513 }
1514 
1515 static void xive_end_enqueue(XiveEND *end, uint32_t data)
1516 {
1517     uint64_t qaddr_base = xive_end_qaddr(end);
1518     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1519     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1520     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1521 
1522     uint64_t qaddr = qaddr_base + (qindex << 2);
1523     uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
1524     uint32_t qentries = 1 << (qsize + 10);
1525 
1526     if (dma_memory_write(&address_space_memory, qaddr,
1527                          &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) {
1528         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
1529                       HWADDR_PRIx "\n", qaddr);
1530         return;
1531     }
1532 
1533     qindex = (qindex + 1) & (qentries - 1);
1534     if (qindex == 0) {
1535         qgen ^= 1;
1536         end->w1 = xive_set_field32(END_W1_GENERATION, end->w1, qgen);
1537     }
1538     end->w1 = xive_set_field32(END_W1_PAGE_OFF, end->w1, qindex);
1539 }
1540 
1541 void xive_end_eas_pic_print_info(XiveEND *end, uint32_t end_idx, GString *buf)
1542 {
1543     XiveEAS *eas = (XiveEAS *) &end->w4;
1544     uint8_t pq;
1545 
1546     if (!xive_end_is_escalate(end)) {
1547         return;
1548     }
1549 
1550     pq = xive_get_field32(END_W1_ESe, end->w1);
1551 
1552     g_string_append_printf(buf, "  %08x %c%c %c%c end:%02x/%04x data:%08x\n",
1553                            end_idx,
1554                            pq & XIVE_ESB_VAL_P ? 'P' : '-',
1555                            pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1556                            xive_eas_is_valid(eas) ? 'V' : ' ',
1557                            xive_eas_is_masked(eas) ? 'M' : ' ',
1558                            (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
1559                            (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
1560                            (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
1561 }
1562 
1563 /*
1564  * XIVE Router (aka. Virtualization Controller or IVRE)
1565  */
1566 
1567 int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1568                         XiveEAS *eas)
1569 {
1570     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1571 
1572     return xrc->get_eas(xrtr, eas_blk, eas_idx, eas);
1573 }
1574 
1575 static
1576 int xive_router_get_pq(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1577                        uint8_t *pq)
1578 {
1579     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1580 
1581     return xrc->get_pq(xrtr, eas_blk, eas_idx, pq);
1582 }
1583 
1584 static
1585 int xive_router_set_pq(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1586                        uint8_t *pq)
1587 {
1588     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1589 
1590     return xrc->set_pq(xrtr, eas_blk, eas_idx, pq);
1591 }
1592 
1593 int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1594                         XiveEND *end)
1595 {
1596    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1597 
1598    return xrc->get_end(xrtr, end_blk, end_idx, end);
1599 }
1600 
1601 int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1602                           XiveEND *end, uint8_t word_number)
1603 {
1604    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1605 
1606    return xrc->write_end(xrtr, end_blk, end_idx, end, word_number);
1607 }
1608 
1609 int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1610                         XiveNVT *nvt)
1611 {
1612    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1613 
1614    return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt);
1615 }
1616 
1617 int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1618                         XiveNVT *nvt, uint8_t word_number)
1619 {
1620    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1621 
1622    return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number);
1623 }
1624 
1625 static int xive_router_get_block_id(XiveRouter *xrtr)
1626 {
1627    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1628 
1629    return xrc->get_block_id(xrtr);
1630 }
1631 
1632 static void xive_router_realize(DeviceState *dev, Error **errp)
1633 {
1634     XiveRouter *xrtr = XIVE_ROUTER(dev);
1635 
1636     assert(xrtr->xfb);
1637 }
1638 
1639 static void xive_router_end_notify_handler(XiveRouter *xrtr, XiveEAS *eas)
1640 {
1641     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1642 
1643     return xrc->end_notify(xrtr, eas);
1644 }
1645 
1646 /*
1647  * Encode the HW CAM line in the block group mode format :
1648  *
1649  *   chip << 19 | 0000000 0 0001 thread (7Bit)
1650  */
1651 static uint32_t xive_tctx_hw_cam_line(XivePresenter *xptr, XiveTCTX *tctx)
1652 {
1653     CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env;
1654     uint32_t pir = env->spr_cb[SPR_PIR].default_value;
1655     uint8_t blk = xive_router_get_block_id(XIVE_ROUTER(xptr));
1656 
1657     return xive_nvt_cam_line(blk, 1 << 7 | (pir & 0x7f));
1658 }
1659 
1660 uint32_t xive_get_vpgroup_size(uint32_t nvp_index)
1661 {
1662     /*
1663      * Group size is a power of 2. The position of the first 0
1664      * (starting with the least significant bits) in the NVP index
1665      * gives the size of the group.
1666      */
1667     int first_zero = cto32(nvp_index);
1668     if (first_zero >= 31) {
1669         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid group index 0x%08x",
1670                                        nvp_index);
1671         return 0;
1672     }
1673 
1674     return 1U << (first_zero + 1);
1675 }
1676 
1677 static uint8_t xive_get_group_level(bool crowd, bool ignore,
1678                                     uint32_t nvp_blk, uint32_t nvp_index)
1679 {
1680     int first_zero;
1681     uint8_t level;
1682 
1683     if (!ignore) {
1684         g_assert(!crowd);
1685         return 0;
1686     }
1687 
1688     first_zero = cto32(nvp_index);
1689     if (first_zero >= 31) {
1690         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid group index 0x%08x",
1691                                        nvp_index);
1692         return 0;
1693     }
1694 
1695     level = (first_zero + 1) & 0b1111;
1696     if (crowd) {
1697         uint32_t blk;
1698 
1699         /* crowd level is bit position of first 0 from the right in nvp_blk */
1700         first_zero = cto32(nvp_blk);
1701         if (first_zero >= 31) {
1702             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Invalid crowd block 0x%08x",
1703                                            nvp_blk);
1704             return 0;
1705         }
1706         blk = first_zero + 1;
1707 
1708         /*
1709          * Supported crowd sizes are 2^1, 2^2, and 2^4. 2^3 is not supported.
1710          * HW will encode level 4 as the value 3.  See xive2_pgofnext().
1711          */
1712         switch (blk) {
1713         case 1:
1714         case 2:
1715             break;
1716         case 4:
1717             blk = 3;
1718             break;
1719         default:
1720             g_assert_not_reached();
1721         }
1722 
1723         /* Crowd level bits reside in upper 2 bits of the 6 bit group level */
1724         level |= blk << 4;
1725     }
1726     return level;
1727 }
1728 
1729 /*
1730  * The thread context register words are in big-endian format.
1731  */
1732 int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
1733                               uint8_t format,
1734                               uint8_t nvt_blk, uint32_t nvt_idx,
1735                               bool cam_ignore, uint32_t logic_serv)
1736 {
1737     uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx);
1738     uint32_t qw3w2 = xive_tctx_word2(&tctx->regs[TM_QW3_HV_PHYS]);
1739     uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
1740     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
1741     uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]);
1742 
1743     /*
1744      * TODO (PowerNV): ignore mode. The low order bits of the NVT
1745      * identifier are ignored in the "CAM" match.
1746      */
1747 
1748     if (format == 0) {
1749         if (cam_ignore == true) {
1750             /*
1751              * F=0 & i=1: Logical server notification (bits ignored at
1752              * the end of the NVT identifier)
1753              */
1754             qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
1755                           nvt_blk, nvt_idx);
1756              return -1;
1757         }
1758 
1759         /* F=0 & i=0: Specific NVT notification */
1760 
1761         /* PHYS ring */
1762         if ((be32_to_cpu(qw3w2) & TM_QW3W2_VT) &&
1763             cam == xive_tctx_hw_cam_line(xptr, tctx)) {
1764             return TM_QW3_HV_PHYS;
1765         }
1766 
1767         /* HV POOL ring */
1768         if ((be32_to_cpu(qw2w2) & TM_QW2W2_VP) &&
1769             cam == xive_get_field32(TM_QW2W2_POOL_CAM, qw2w2)) {
1770             return TM_QW2_HV_POOL;
1771         }
1772 
1773         /* OS ring */
1774         if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1775             cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) {
1776             return TM_QW1_OS;
1777         }
1778     } else {
1779         /* F=1 : User level Event-Based Branch (EBB) notification */
1780 
1781         /* USER ring */
1782         if  ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1783              (cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) &&
1784              (be32_to_cpu(qw0w2) & TM_QW0W2_VU) &&
1785              (logic_serv == xive_get_field32(TM_QW0W2_LOGIC_SERV, qw0w2))) {
1786             return TM_QW0_USER;
1787         }
1788     }
1789     return -1;
1790 }
1791 
1792 /*
1793  * This is our simple Xive Presenter Engine model. It is merged in the
1794  * Router as it does not require an extra object.
1795  */
1796 bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
1797                            uint8_t nvt_blk, uint32_t nvt_idx,
1798                            bool crowd, bool cam_ignore, uint8_t priority,
1799                            uint32_t logic_serv, bool *precluded)
1800 {
1801     XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb);
1802     XiveTCTXMatch match = { .tctx = NULL, .ring = 0, .precluded = false };
1803     uint8_t group_level;
1804     int count;
1805 
1806     /*
1807      * Ask the machine to scan the interrupt controllers for a match.
1808      *
1809      * For VP-specific notification, we expect at most one match and
1810      * one call to the presenters is all we need (abbreviated notify
1811      * sequence documented by the architecture).
1812      *
1813      * For VP-group notification, match_nvt() is the equivalent of the
1814      * "histogram" and "poll" commands sent to the power bus to the
1815      * presenters. 'count' could be more than one, but we always
1816      * select the first match for now. 'precluded' tells if (at least)
1817      * one thread matches but can't take the interrupt now because
1818      * it's running at a more favored priority. We return the
1819      * information to the router so that it can take appropriate
1820      * actions (backlog, escalation, broadcast, etc...)
1821      *
1822      * If we were to implement a better way of dispatching the
1823      * interrupt in case of multiple matches (instead of the first
1824      * match), we would need a heuristic to elect a thread (for
1825      * example, the hardware keeps track of an 'age' in the TIMA) and
1826      * a new command to the presenters (the equivalent of the "assign"
1827      * power bus command in the documented full notify sequence.
1828      */
1829     count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, crowd, cam_ignore,
1830                            priority, logic_serv, &match);
1831     if (count < 0) {
1832         return false;
1833     }
1834 
1835     /* handle CPU exception delivery */
1836     if (count) {
1837         group_level = xive_get_group_level(crowd, cam_ignore, nvt_blk, nvt_idx);
1838         trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring, group_level);
1839         xive_tctx_pipr_update(match.tctx, match.ring, priority, group_level);
1840     } else {
1841         *precluded = match.precluded;
1842     }
1843 
1844     return !!count;
1845 }
1846 
1847 /*
1848  * Notification using the END ESe/ESn bit (Event State Buffer for
1849  * escalation and notification). Provide further coalescing in the
1850  * Router.
1851  */
1852 static bool xive_router_end_es_notify(XiveRouter *xrtr, uint8_t end_blk,
1853                                       uint32_t end_idx, XiveEND *end,
1854                                       uint32_t end_esmask)
1855 {
1856     uint8_t pq = xive_get_field32(end_esmask, end->w1);
1857     bool notify = xive_esb_trigger(&pq);
1858 
1859     if (pq != xive_get_field32(end_esmask, end->w1)) {
1860         end->w1 = xive_set_field32(end_esmask, end->w1, pq);
1861         xive_router_write_end(xrtr, end_blk, end_idx, end, 1);
1862     }
1863 
1864     /* ESe/n[Q]=1 : end of notification */
1865     return notify;
1866 }
1867 
1868 /*
1869  * An END trigger can come from an event trigger (IPI or HW) or from
1870  * another chip. We don't model the PowerBus but the END trigger
1871  * message has the same parameters than in the function below.
1872  */
1873 void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas)
1874 {
1875     XiveEND end;
1876     uint8_t priority;
1877     uint8_t format;
1878     uint8_t nvt_blk;
1879     uint32_t nvt_idx;
1880     XiveNVT nvt;
1881     bool found, precluded;
1882 
1883     uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
1884     uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
1885     uint32_t end_data = xive_get_field64(EAS_END_DATA,  eas->w);
1886 
1887     /* END cache lookup */
1888     if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
1889         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
1890                       end_idx);
1891         return;
1892     }
1893 
1894     if (!xive_end_is_valid(&end)) {
1895         trace_xive_router_end_notify(end_blk, end_idx, end_data);
1896         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
1897                       end_blk, end_idx);
1898         return;
1899     }
1900 
1901     if (xive_end_is_enqueue(&end)) {
1902         xive_end_enqueue(&end, end_data);
1903         /* Enqueuing event data modifies the EQ toggle and index */
1904         xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
1905     }
1906 
1907     /*
1908      * When the END is silent, we skip the notification part.
1909      */
1910     if (xive_end_is_silent_escalation(&end)) {
1911         goto do_escalation;
1912     }
1913 
1914     /*
1915      * The W7 format depends on the F bit in W6. It defines the type
1916      * of the notification :
1917      *
1918      *   F=0 : single or multiple NVT notification
1919      *   F=1 : User level Event-Based Branch (EBB) notification, no
1920      *         priority
1921      */
1922     format = xive_get_field32(END_W6_FORMAT_BIT, end.w6);
1923     priority = xive_get_field32(END_W7_F0_PRIORITY, end.w7);
1924 
1925     /* The END is masked */
1926     if (format == 0 && priority == 0xff) {
1927         return;
1928     }
1929 
1930     /*
1931      * Check the END ESn (Event State Buffer for notification) for
1932      * even further coalescing in the Router
1933      */
1934     if (!xive_end_is_notify(&end)) {
1935         /* ESn[Q]=1 : end of notification */
1936         if (!xive_router_end_es_notify(xrtr, end_blk, end_idx,
1937                                        &end, END_W1_ESn)) {
1938             return;
1939         }
1940     }
1941 
1942     /*
1943      * Follows IVPE notification
1944      */
1945     nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end.w6);
1946     nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end.w6);
1947 
1948     /* NVT cache lookup */
1949     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
1950         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n",
1951                       nvt_blk, nvt_idx);
1952         return;
1953     }
1954 
1955     if (!xive_nvt_is_valid(&nvt)) {
1956         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n",
1957                       nvt_blk, nvt_idx);
1958         return;
1959     }
1960 
1961     found = xive_presenter_notify(xrtr->xfb, format, nvt_blk, nvt_idx,
1962                           false /* crowd */,
1963                           xive_get_field32(END_W7_F0_IGNORE, end.w7),
1964                           priority,
1965                           xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7),
1966                           &precluded);
1967     /* we don't support VP-group notification on P9, so precluded is not used */
1968     /* TODO: Auto EOI. */
1969 
1970     if (found) {
1971         return;
1972     }
1973 
1974     /*
1975      * If no matching NVT is dispatched on a HW thread :
1976      * - specific VP: update the NVT structure if backlog is activated
1977      * - logical server : forward request to IVPE (not supported)
1978      */
1979     if (xive_end_is_backlog(&end)) {
1980         uint8_t ipb;
1981 
1982         if (format == 1) {
1983             qemu_log_mask(LOG_GUEST_ERROR,
1984                           "XIVE: END %x/%x invalid config: F1 & backlog\n",
1985                           end_blk, end_idx);
1986             return;
1987         }
1988         /*
1989          * Record the IPB in the associated NVT structure for later
1990          * use. The presenter will resend the interrupt when the vCPU
1991          * is dispatched again on a HW thread.
1992          */
1993         ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) |
1994             xive_priority_to_ipb(priority);
1995         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, ipb);
1996         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
1997 
1998         /*
1999          * On HW, follows a "Broadcast Backlog" to IVPEs
2000          */
2001     }
2002 
2003 do_escalation:
2004     /*
2005      * If activated, escalate notification using the ESe PQ bits and
2006      * the EAS in w4-5
2007      */
2008     if (!xive_end_is_escalate(&end)) {
2009         return;
2010     }
2011 
2012     /*
2013      * Check the END ESe (Event State Buffer for escalation) for even
2014      * further coalescing in the Router
2015      */
2016     if (!xive_end_is_uncond_escalation(&end)) {
2017         /* ESe[Q]=1 : end of notification */
2018         if (!xive_router_end_es_notify(xrtr, end_blk, end_idx,
2019                                        &end, END_W1_ESe)) {
2020             return;
2021         }
2022     }
2023 
2024     trace_xive_router_end_escalate(end_blk, end_idx,
2025            (uint8_t) xive_get_field32(END_W4_ESC_END_BLOCK, end.w4),
2026            (uint32_t) xive_get_field32(END_W4_ESC_END_INDEX, end.w4),
2027            (uint32_t) xive_get_field32(END_W5_ESC_END_DATA,  end.w5));
2028     /*
2029      * The END trigger becomes an Escalation trigger
2030      */
2031     xive_router_end_notify_handler(xrtr, (XiveEAS *) &end.w4);
2032 }
2033 
2034 void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked)
2035 {
2036     XiveRouter *xrtr = XIVE_ROUTER(xn);
2037     uint8_t eas_blk = XIVE_EAS_BLOCK(lisn);
2038     uint32_t eas_idx = XIVE_EAS_INDEX(lisn);
2039     XiveEAS eas;
2040 
2041     /* EAS cache lookup */
2042     if (xive_router_get_eas(xrtr, eas_blk, eas_idx, &eas)) {
2043         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
2044         return;
2045     }
2046 
2047     if (!pq_checked) {
2048         bool notify;
2049         uint8_t pq;
2050 
2051         /* PQ cache lookup */
2052         if (xive_router_get_pq(xrtr, eas_blk, eas_idx, &pq)) {
2053             /* Set FIR */
2054             g_assert_not_reached();
2055         }
2056 
2057         notify = xive_esb_trigger(&pq);
2058 
2059         if (xive_router_set_pq(xrtr, eas_blk, eas_idx, &pq)) {
2060             /* Set FIR */
2061             g_assert_not_reached();
2062         }
2063 
2064         if (!notify) {
2065             return;
2066         }
2067     }
2068 
2069     if (!xive_eas_is_valid(&eas)) {
2070         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
2071         return;
2072     }
2073 
2074     if (xive_eas_is_masked(&eas)) {
2075         /* Notification completed */
2076         return;
2077     }
2078 
2079     /*
2080      * The event trigger becomes an END trigger
2081      */
2082     xive_router_end_notify_handler(xrtr, &eas);
2083 }
2084 
2085 static const Property xive_router_properties[] = {
2086     DEFINE_PROP_LINK("xive-fabric", XiveRouter, xfb,
2087                      TYPE_XIVE_FABRIC, XiveFabric *),
2088 };
2089 
2090 static void xive_router_class_init(ObjectClass *klass, const void *data)
2091 {
2092     DeviceClass *dc = DEVICE_CLASS(klass);
2093     XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
2094     XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
2095 
2096     dc->desc    = "XIVE Router Engine";
2097     device_class_set_props(dc, xive_router_properties);
2098     /* Parent is SysBusDeviceClass. No need to call its realize hook */
2099     dc->realize = xive_router_realize;
2100     xnc->notify = xive_router_notify;
2101 
2102     /* By default, the router handles END triggers locally */
2103     xrc->end_notify = xive_router_end_notify;
2104 }
2105 
2106 static const TypeInfo xive_router_info = {
2107     .name          = TYPE_XIVE_ROUTER,
2108     .parent        = TYPE_SYS_BUS_DEVICE,
2109     .abstract      = true,
2110     .instance_size = sizeof(XiveRouter),
2111     .class_size    = sizeof(XiveRouterClass),
2112     .class_init    = xive_router_class_init,
2113     .interfaces    = (const InterfaceInfo[]) {
2114         { TYPE_XIVE_NOTIFIER },
2115         { TYPE_XIVE_PRESENTER },
2116         { }
2117     }
2118 };
2119 
2120 void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, GString *buf)
2121 {
2122     if (!xive_eas_is_valid(eas)) {
2123         return;
2124     }
2125 
2126     g_string_append_printf(buf, "  %08x %s end:%02x/%04x data:%08x\n",
2127                            lisn, xive_eas_is_masked(eas) ? "M" : " ",
2128                            (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
2129                            (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
2130                            (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
2131 }
2132 
2133 /*
2134  * END ESB MMIO loads
2135  */
2136 static uint64_t xive_end_source_read(void *opaque, hwaddr addr, unsigned size)
2137 {
2138     XiveENDSource *xsrc = XIVE_END_SOURCE(opaque);
2139     uint32_t offset = addr & 0xFFF;
2140     uint8_t end_blk;
2141     uint32_t end_idx;
2142     XiveEND end;
2143     uint32_t end_esmask;
2144     uint8_t pq;
2145     uint64_t ret = -1;
2146 
2147     /*
2148      * The block id should be deduced from the load address on the END
2149      * ESB MMIO but our model only supports a single block per XIVE chip.
2150      */
2151     end_blk = xive_router_get_block_id(xsrc->xrtr);
2152     end_idx = addr >> (xsrc->esb_shift + 1);
2153 
2154     trace_xive_end_source_read(end_blk, end_idx, addr);
2155 
2156     if (xive_router_get_end(xsrc->xrtr, end_blk, end_idx, &end)) {
2157         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
2158                       end_idx);
2159         return -1;
2160     }
2161 
2162     if (!xive_end_is_valid(&end)) {
2163         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
2164                       end_blk, end_idx);
2165         return -1;
2166     }
2167 
2168     end_esmask = addr_is_even(addr, xsrc->esb_shift) ? END_W1_ESn : END_W1_ESe;
2169     pq = xive_get_field32(end_esmask, end.w1);
2170 
2171     switch (offset) {
2172     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
2173         ret = xive_esb_eoi(&pq);
2174 
2175         /* Forward the source event notification for routing ?? */
2176         break;
2177 
2178     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
2179         ret = pq;
2180         break;
2181 
2182     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
2183     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
2184     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
2185     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
2186         ret = xive_esb_set(&pq, (offset >> 8) & 0x3);
2187         break;
2188     default:
2189         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END ESB load addr %d\n",
2190                       offset);
2191         return -1;
2192     }
2193 
2194     if (pq != xive_get_field32(end_esmask, end.w1)) {
2195         end.w1 = xive_set_field32(end_esmask, end.w1, pq);
2196         xive_router_write_end(xsrc->xrtr, end_blk, end_idx, &end, 1);
2197     }
2198 
2199     return ret;
2200 }
2201 
2202 /*
2203  * END ESB MMIO stores are invalid
2204  */
2205 static void xive_end_source_write(void *opaque, hwaddr addr,
2206                                   uint64_t value, unsigned size)
2207 {
2208     qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr 0x%"
2209                   HWADDR_PRIx"\n", addr);
2210 }
2211 
2212 static const MemoryRegionOps xive_end_source_ops = {
2213     .read = xive_end_source_read,
2214     .write = xive_end_source_write,
2215     .endianness = DEVICE_BIG_ENDIAN,
2216     .valid = {
2217         .min_access_size = 1,
2218         .max_access_size = 8,
2219     },
2220     .impl = {
2221         .min_access_size = 1,
2222         .max_access_size = 8,
2223     },
2224 };
2225 
2226 static void xive_end_source_realize(DeviceState *dev, Error **errp)
2227 {
2228     XiveENDSource *xsrc = XIVE_END_SOURCE(dev);
2229 
2230     assert(xsrc->xrtr);
2231 
2232     if (!xsrc->nr_ends) {
2233         error_setg(errp, "Number of interrupt needs to be greater than 0");
2234         return;
2235     }
2236 
2237     if (xsrc->esb_shift != XIVE_ESB_4K &&
2238         xsrc->esb_shift != XIVE_ESB_64K) {
2239         error_setg(errp, "Invalid ESB shift setting");
2240         return;
2241     }
2242 
2243     /*
2244      * Each END is assigned an even/odd pair of MMIO pages, the even page
2245      * manages the ESn field while the odd page manages the ESe field.
2246      */
2247     memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
2248                           &xive_end_source_ops, xsrc, "xive.end",
2249                           (1ull << (xsrc->esb_shift + 1)) * xsrc->nr_ends);
2250 }
2251 
2252 static const Property xive_end_source_properties[] = {
2253     DEFINE_PROP_UINT32("nr-ends", XiveENDSource, nr_ends, 0),
2254     DEFINE_PROP_UINT32("shift", XiveENDSource, esb_shift, XIVE_ESB_64K),
2255     DEFINE_PROP_LINK("xive", XiveENDSource, xrtr, TYPE_XIVE_ROUTER,
2256                      XiveRouter *),
2257 };
2258 
2259 static void xive_end_source_class_init(ObjectClass *klass, const void *data)
2260 {
2261     DeviceClass *dc = DEVICE_CLASS(klass);
2262 
2263     dc->desc    = "XIVE END Source";
2264     device_class_set_props(dc, xive_end_source_properties);
2265     dc->realize = xive_end_source_realize;
2266     /*
2267      * Reason: part of XIVE interrupt controller, needs to be wired up,
2268      * e.g. by spapr_xive_instance_init().
2269      */
2270     dc->user_creatable = false;
2271 }
2272 
2273 static const TypeInfo xive_end_source_info = {
2274     .name          = TYPE_XIVE_END_SOURCE,
2275     .parent        = TYPE_DEVICE,
2276     .instance_size = sizeof(XiveENDSource),
2277     .class_init    = xive_end_source_class_init,
2278 };
2279 
2280 /*
2281  * XIVE Notifier
2282  */
2283 static const TypeInfo xive_notifier_info = {
2284     .name = TYPE_XIVE_NOTIFIER,
2285     .parent = TYPE_INTERFACE,
2286     .class_size = sizeof(XiveNotifierClass),
2287 };
2288 
2289 /*
2290  * XIVE Presenter
2291  */
2292 static const TypeInfo xive_presenter_info = {
2293     .name = TYPE_XIVE_PRESENTER,
2294     .parent = TYPE_INTERFACE,
2295     .class_size = sizeof(XivePresenterClass),
2296 };
2297 
2298 /*
2299  * XIVE Fabric
2300  */
2301 static const TypeInfo xive_fabric_info = {
2302     .name = TYPE_XIVE_FABRIC,
2303     .parent = TYPE_INTERFACE,
2304     .class_size = sizeof(XiveFabricClass),
2305 };
2306 
2307 static void xive_register_types(void)
2308 {
2309     type_register_static(&xive_fabric_info);
2310     type_register_static(&xive_source_info);
2311     type_register_static(&xive_notifier_info);
2312     type_register_static(&xive_presenter_info);
2313     type_register_static(&xive_router_info);
2314     type_register_static(&xive_end_source_info);
2315     type_register_static(&xive_tctx_info);
2316 }
2317 
2318 type_init(xive_register_types)
2319