xref: /openbmc/qemu/hw/intc/xive.c (revision 7d87775f)
1 /*
2  * QEMU PowerPC XIVE interrupt controller model
3  *
4  * Copyright (c) 2017-2018, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/log.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "target/ppc/cpu.h"
15 #include "sysemu/cpus.h"
16 #include "sysemu/dma.h"
17 #include "sysemu/reset.h"
18 #include "hw/qdev-properties.h"
19 #include "migration/vmstate.h"
20 #include "hw/irq.h"
21 #include "hw/ppc/xive.h"
22 #include "hw/ppc/xive2.h"
23 #include "hw/ppc/xive_regs.h"
24 #include "trace.h"
25 
26 /*
27  * XIVE Thread Interrupt Management context
28  */
29 
30 /*
31  * Convert an Interrupt Pending Buffer (IPB) register to a Pending
32  * Interrupt Priority Register (PIPR), which contains the priority of
33  * the most favored pending notification.
34  */
35 static uint8_t ipb_to_pipr(uint8_t ibp)
36 {
37     return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
38 }
39 
40 static uint8_t exception_mask(uint8_t ring)
41 {
42     switch (ring) {
43     case TM_QW1_OS:
44         return TM_QW1_NSR_EO;
45     case TM_QW3_HV_PHYS:
46         return TM_QW3_NSR_HE;
47     default:
48         g_assert_not_reached();
49     }
50 }
51 
52 static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring)
53 {
54         switch (ring) {
55         case TM_QW0_USER:
56                 return 0; /* Not supported */
57         case TM_QW1_OS:
58                 return tctx->os_output;
59         case TM_QW2_HV_POOL:
60         case TM_QW3_HV_PHYS:
61                 return tctx->hv_output;
62         default:
63                 return 0;
64         }
65 }
66 
67 static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
68 {
69     uint8_t *regs = &tctx->regs[ring];
70     uint8_t nsr = regs[TM_NSR];
71     uint8_t mask = exception_mask(ring);
72 
73     qemu_irq_lower(xive_tctx_output(tctx, ring));
74 
75     if (regs[TM_NSR] & mask) {
76         uint8_t cppr = regs[TM_PIPR];
77         uint8_t alt_ring;
78         uint8_t *alt_regs;
79 
80         /* POOL interrupt uses IPB in QW2, POOL ring */
81         if ((ring == TM_QW3_HV_PHYS) && (nsr & (TM_QW3_NSR_HE_POOL << 6))) {
82             alt_ring = TM_QW2_HV_POOL;
83         } else {
84             alt_ring = ring;
85         }
86         alt_regs = &tctx->regs[alt_ring];
87 
88         regs[TM_CPPR] = cppr;
89 
90         /* Reset the pending buffer bit */
91         alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
92 
93         /* Drop Exception bit */
94         regs[TM_NSR] &= ~mask;
95 
96         trace_xive_tctx_accept(tctx->cs->cpu_index, alt_ring,
97                                alt_regs[TM_IPB], regs[TM_PIPR],
98                                regs[TM_CPPR], regs[TM_NSR]);
99     }
100 
101     return ((uint64_t)nsr << 8) | regs[TM_CPPR];
102 }
103 
104 static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
105 {
106     /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
107     uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
108     uint8_t *alt_regs = &tctx->regs[alt_ring];
109     uint8_t *regs = &tctx->regs[ring];
110 
111     if (alt_regs[TM_PIPR] < alt_regs[TM_CPPR]) {
112         switch (ring) {
113         case TM_QW1_OS:
114             regs[TM_NSR] |= TM_QW1_NSR_EO;
115             break;
116         case TM_QW2_HV_POOL:
117             alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6);
118             break;
119         case TM_QW3_HV_PHYS:
120             regs[TM_NSR] |= (TM_QW3_NSR_HE_PHYS << 6);
121             break;
122         default:
123             g_assert_not_reached();
124         }
125         trace_xive_tctx_notify(tctx->cs->cpu_index, ring,
126                                regs[TM_IPB], alt_regs[TM_PIPR],
127                                alt_regs[TM_CPPR], alt_regs[TM_NSR]);
128         qemu_irq_raise(xive_tctx_output(tctx, ring));
129     }
130 }
131 
132 void xive_tctx_reset_signal(XiveTCTX *tctx, uint8_t ring)
133 {
134     /*
135      * Lower the External interrupt. Used when pulling a context. It is
136      * necessary to avoid catching it in the higher privilege context. It
137      * should be raised again when re-pushing the lower privilege context.
138      */
139     qemu_irq_lower(xive_tctx_output(tctx, ring));
140 }
141 
142 static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
143 {
144     uint8_t *regs = &tctx->regs[ring];
145     uint8_t pipr_min;
146     uint8_t ring_min;
147 
148     trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring,
149                              regs[TM_IPB], regs[TM_PIPR],
150                              cppr, regs[TM_NSR]);
151 
152     if (cppr > XIVE_PRIORITY_MAX) {
153         cppr = 0xff;
154     }
155 
156     tctx->regs[ring + TM_CPPR] = cppr;
157 
158     /*
159      * Recompute the PIPR based on local pending interrupts.  The PHYS
160      * ring must take the minimum of both the PHYS and POOL PIPR values.
161      */
162     pipr_min = ipb_to_pipr(regs[TM_IPB]);
163     ring_min = ring;
164 
165     /* PHYS updates also depend on POOL values */
166     if (ring == TM_QW3_HV_PHYS) {
167         uint8_t *pool_regs = &tctx->regs[TM_QW2_HV_POOL];
168 
169         /* POOL values only matter if POOL ctx is valid */
170         if (pool_regs[TM_WORD2] & 0x80) {
171 
172             uint8_t pool_pipr = ipb_to_pipr(pool_regs[TM_IPB]);
173 
174             /*
175              * Determine highest priority interrupt and
176              * remember which ring has it.
177              */
178             if (pool_pipr < pipr_min) {
179                 pipr_min = pool_pipr;
180                 ring_min = TM_QW2_HV_POOL;
181             }
182         }
183     }
184 
185     regs[TM_PIPR] = pipr_min;
186 
187     /* CPPR has changed, check if we need to raise a pending exception */
188     xive_tctx_notify(tctx, ring_min);
189 }
190 
191 void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb)
192 {
193     uint8_t *regs = &tctx->regs[ring];
194 
195     regs[TM_IPB] |= ipb;
196     regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
197     xive_tctx_notify(tctx, ring);
198 }
199 
200 /*
201  * XIVE Thread Interrupt Management Area (TIMA)
202  */
203 
204 static void xive_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx,
205                                 hwaddr offset, uint64_t value, unsigned size)
206 {
207     xive_tctx_set_cppr(tctx, TM_QW3_HV_PHYS, value & 0xff);
208 }
209 
210 static uint64_t xive_tm_ack_hv_reg(XivePresenter *xptr, XiveTCTX *tctx,
211                                    hwaddr offset, unsigned size)
212 {
213     return xive_tctx_accept(tctx, TM_QW3_HV_PHYS);
214 }
215 
216 static uint64_t xive_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx,
217                                       hwaddr offset, unsigned size)
218 {
219     uint32_t qw2w2_prev = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
220     uint32_t qw2w2;
221 
222     qw2w2 = xive_set_field32(TM_QW2W2_VP, qw2w2_prev, 0);
223     memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4);
224     return qw2w2;
225 }
226 
227 static uint64_t xive_tm_pull_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx,
228                                       hwaddr offset, unsigned size)
229 {
230     uint8_t qw3b8_prev = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2];
231     uint8_t qw3b8;
232 
233     qw3b8 = qw3b8_prev & ~TM_QW3B8_VT;
234     tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = qw3b8;
235     return qw3b8;
236 }
237 
238 static void xive_tm_vt_push(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
239                             uint64_t value, unsigned size)
240 {
241     tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = value & 0xff;
242 }
243 
244 static uint64_t xive_tm_vt_poll(XivePresenter *xptr, XiveTCTX *tctx,
245                                 hwaddr offset, unsigned size)
246 {
247     return tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] & 0xff;
248 }
249 
250 /*
251  * Define an access map for each page of the TIMA that we will use in
252  * the memory region ops to filter values when doing loads and stores
253  * of raw registers values
254  *
255  * Registers accessibility bits :
256  *
257  *    0x0 - no access
258  *    0x1 - write only
259  *    0x2 - read only
260  *    0x3 - read/write
261  */
262 
263 static const uint8_t xive_tm_hw_view[] = {
264     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
265     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
266     0, 0, 3, 3,   0, 3, 3, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
267     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   3, 3, 3, 0, /* QW-3 PHYS */
268 };
269 
270 static const uint8_t xive_tm_hv_view[] = {
271     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
272     3, 3, 3, 3,   3, 3, 0, 2,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-1 OS   */
273     0, 0, 3, 3,   0, 3, 3, 0,   0, 3, 3, 3,   0, 0, 0, 0, /* QW-2 POOL */
274     3, 3, 3, 3,   0, 3, 0, 2,   3, 0, 0, 3,   0, 0, 0, 0, /* QW-3 PHYS */
275 };
276 
277 static const uint8_t xive_tm_os_view[] = {
278     3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0, /* QW-0 User */
279     2, 3, 2, 2,   2, 2, 0, 2,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
280     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
281     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
282 };
283 
284 static const uint8_t xive_tm_user_view[] = {
285     3, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-0 User */
286     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-1 OS   */
287     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-2 POOL */
288     0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0, /* QW-3 PHYS */
289 };
290 
291 /*
292  * Overall TIMA access map for the thread interrupt management context
293  * registers
294  */
295 static const uint8_t *xive_tm_views[] = {
296     [XIVE_TM_HW_PAGE]   = xive_tm_hw_view,
297     [XIVE_TM_HV_PAGE]   = xive_tm_hv_view,
298     [XIVE_TM_OS_PAGE]   = xive_tm_os_view,
299     [XIVE_TM_USER_PAGE] = xive_tm_user_view,
300 };
301 
302 /*
303  * Computes a register access mask for a given offset in the TIMA
304  */
305 static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write)
306 {
307     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
308     uint8_t reg_offset = offset & TM_REG_OFFSET;
309     uint8_t reg_mask = write ? 0x1 : 0x2;
310     uint64_t mask = 0x0;
311     int i;
312 
313     for (i = 0; i < size; i++) {
314         if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) {
315             mask |= (uint64_t) 0xff << (8 * (size - i - 1));
316         }
317     }
318 
319     return mask;
320 }
321 
322 static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
323                               unsigned size)
324 {
325     uint8_t ring_offset = offset & TM_RING_OFFSET;
326     uint8_t reg_offset = offset & TM_REG_OFFSET;
327     uint64_t mask = xive_tm_mask(offset, size, true);
328     int i;
329 
330     /*
331      * Only 4 or 8 bytes stores are allowed and the User ring is
332      * excluded
333      */
334     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
335         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
336                       HWADDR_PRIx"\n", offset);
337         return;
338     }
339 
340     /*
341      * Use the register offset for the raw values and filter out
342      * reserved values
343      */
344     for (i = 0; i < size; i++) {
345         uint8_t byte_mask = (mask >> (8 * (size - i - 1)));
346         if (byte_mask) {
347             tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) &
348                 byte_mask;
349         }
350     }
351 }
352 
353 static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
354 {
355     uint8_t ring_offset = offset & TM_RING_OFFSET;
356     uint8_t reg_offset = offset & TM_REG_OFFSET;
357     uint64_t mask = xive_tm_mask(offset, size, false);
358     uint64_t ret;
359     int i;
360 
361     /*
362      * Only 4 or 8 bytes loads are allowed and the User ring is
363      * excluded
364      */
365     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
366         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
367                       HWADDR_PRIx"\n", offset);
368         return -1;
369     }
370 
371     /* Use the register offset for the raw values */
372     ret = 0;
373     for (i = 0; i < size; i++) {
374         ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1));
375     }
376 
377     /* filter out reserved values */
378     return ret & mask;
379 }
380 
381 /*
382  * The TM context is mapped twice within each page. Stores and loads
383  * to the first mapping below 2K write and read the specified values
384  * without modification. The second mapping above 2K performs specific
385  * state changes (side effects) in addition to setting/returning the
386  * interrupt management area context of the processor thread.
387  */
388 static uint64_t xive_tm_ack_os_reg(XivePresenter *xptr, XiveTCTX *tctx,
389                                    hwaddr offset, unsigned size)
390 {
391     return xive_tctx_accept(tctx, TM_QW1_OS);
392 }
393 
394 static void xive_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
395                                 hwaddr offset, uint64_t value, unsigned size)
396 {
397     xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
398 }
399 
400 static void xive_tctx_set_lgs(XiveTCTX *tctx, uint8_t ring, uint8_t lgs)
401 {
402     uint8_t *regs = &tctx->regs[ring];
403 
404     regs[TM_LGS] = lgs;
405 }
406 
407 static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx,
408                           hwaddr offset, uint64_t value, unsigned size)
409 {
410     xive_tctx_set_lgs(tctx, TM_QW1_OS, value & 0xff);
411 }
412 
413 /*
414  * Adjust the IPB to allow a CPU to process event queues of other
415  * priorities during one physical interrupt cycle.
416  */
417 static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
418                                    hwaddr offset, uint64_t value, unsigned size)
419 {
420     xive_tctx_ipb_update(tctx, TM_QW1_OS, xive_priority_to_ipb(value & 0xff));
421 }
422 
423 static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk,
424                                uint32_t *nvt_idx, bool *vo)
425 {
426     if (nvt_blk) {
427         *nvt_blk = xive_nvt_blk(cam);
428     }
429     if (nvt_idx) {
430         *nvt_idx = xive_nvt_idx(cam);
431     }
432     if (vo) {
433         *vo = !!(cam & TM_QW1W2_VO);
434     }
435 }
436 
437 static uint32_t xive_tctx_get_os_cam(XiveTCTX *tctx, uint8_t *nvt_blk,
438                                      uint32_t *nvt_idx, bool *vo)
439 {
440     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
441     uint32_t cam = be32_to_cpu(qw1w2);
442 
443     xive_os_cam_decode(cam, nvt_blk, nvt_idx, vo);
444     return qw1w2;
445 }
446 
447 static void xive_tctx_set_os_cam(XiveTCTX *tctx, uint32_t qw1w2)
448 {
449     memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &qw1w2, 4);
450 }
451 
452 static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
453                                     hwaddr offset, unsigned size)
454 {
455     uint32_t qw1w2;
456     uint32_t qw1w2_new;
457     uint8_t nvt_blk;
458     uint32_t nvt_idx;
459     bool vo;
460 
461     qw1w2 = xive_tctx_get_os_cam(tctx, &nvt_blk, &nvt_idx, &vo);
462 
463     if (!vo) {
464         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid NVT %x/%x !?\n",
465                       nvt_blk, nvt_idx);
466     }
467 
468     /* Invalidate CAM line */
469     qw1w2_new = xive_set_field32(TM_QW1W2_VO, qw1w2, 0);
470     xive_tctx_set_os_cam(tctx, qw1w2_new);
471 
472     xive_tctx_reset_signal(tctx, TM_QW1_OS);
473     return qw1w2;
474 }
475 
476 static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx,
477                                   uint8_t nvt_blk, uint32_t nvt_idx)
478 {
479     XiveNVT nvt;
480     uint8_t ipb;
481 
482     /*
483      * Grab the associated NVT to pull the pending bits, and merge
484      * them with the IPB of the thread interrupt context registers
485      */
486     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
487         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVT %x/%x\n",
488                           nvt_blk, nvt_idx);
489         return;
490     }
491 
492     ipb = xive_get_field32(NVT_W4_IPB, nvt.w4);
493 
494     if (ipb) {
495         /* Reset the NVT value */
496         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, 0);
497         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
498     }
499     /*
500      * Always call xive_tctx_ipb_update(). Even if there were no
501      * escalation triggered, there could be a pending interrupt which
502      * was saved when the context was pulled and that we need to take
503      * into account by recalculating the PIPR (which is not
504      * saved/restored).
505      * It will also raise the External interrupt signal if needed.
506      */
507     xive_tctx_ipb_update(tctx, TM_QW1_OS, ipb);
508 }
509 
510 /*
511  * Updating the OS CAM line can trigger a resend of interrupt
512  */
513 static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
514                                 hwaddr offset, uint64_t value, unsigned size)
515 {
516     uint32_t cam = value;
517     uint32_t qw1w2 = cpu_to_be32(cam);
518     uint8_t nvt_blk;
519     uint32_t nvt_idx;
520     bool vo;
521 
522     xive_os_cam_decode(cam, &nvt_blk, &nvt_idx, &vo);
523 
524     /* First update the registers */
525     xive_tctx_set_os_cam(tctx, qw1w2);
526 
527     /* Check the interrupt pending bits */
528     if (vo) {
529         xive_tctx_need_resend(XIVE_ROUTER(xptr), tctx, nvt_blk, nvt_idx);
530     }
531 }
532 
533 static uint32_t xive_presenter_get_config(XivePresenter *xptr)
534 {
535     XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
536 
537     return xpc->get_config(xptr);
538 }
539 
540 /*
541  * Define a mapping of "special" operations depending on the TIMA page
542  * offset and the size of the operation.
543  */
544 typedef struct XiveTmOp {
545     uint8_t  page_offset;
546     uint32_t op_offset;
547     unsigned size;
548     void     (*write_handler)(XivePresenter *xptr, XiveTCTX *tctx,
549                               hwaddr offset,
550                               uint64_t value, unsigned size);
551     uint64_t (*read_handler)(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
552                              unsigned size);
553 } XiveTmOp;
554 
555 static const XiveTmOp xive_tm_operations[] = {
556     /*
557      * MMIOs below 2K : raw values and special operations without side
558      * effects
559      */
560     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive_tm_set_os_cppr,
561                                                      NULL },
562     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      4, xive_tm_push_os_ctx,
563                                                      NULL },
564     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive_tm_set_hv_cppr,
565                                                      NULL },
566     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
567                                                      NULL },
568     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL,
569                                                      xive_tm_vt_poll },
570 
571     /* MMIOs above 2K : special operations with side effects */
572     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,         2, NULL,
573                                                      xive_tm_ack_os_reg },
574     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING,     1, xive_tm_set_os_pending,
575                                                      NULL },
576     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        4, NULL,
577                                                      xive_tm_pull_os_ctx },
578     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        8, NULL,
579                                                      xive_tm_pull_os_ctx },
580     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,         2, NULL,
581                                                      xive_tm_ack_hv_reg },
582     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      4, NULL,
583                                                      xive_tm_pull_pool_ctx },
584     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      8, NULL,
585                                                      xive_tm_pull_pool_ctx },
586     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX,      1, NULL,
587                                                      xive_tm_pull_phys_ctx },
588 };
589 
590 static const XiveTmOp xive2_tm_operations[] = {
591     /*
592      * MMIOs below 2K : raw values and special operations without side
593      * effects
594      */
595     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,       1, xive_tm_set_os_cppr,
596                                                      NULL },
597     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      4, xive2_tm_push_os_ctx,
598                                                      NULL },
599     { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2,      8, xive2_tm_push_os_ctx,
600                                                      NULL },
601     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS,        1, xive_tm_set_os_lgs,
602                                                      NULL },
603     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR,  1, xive_tm_set_hv_cppr,
604                                                      NULL },
605     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
606                                                      NULL },
607     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL,
608                                                      xive_tm_vt_poll },
609     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_T,     1, xive2_tm_set_hv_target,
610                                                      NULL },
611 
612     /* MMIOs above 2K : special operations with side effects */
613     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,         2, NULL,
614                                                      xive_tm_ack_os_reg },
615     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING,     1, xive_tm_set_os_pending,
616                                                      NULL },
617     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_G2,     4, NULL,
618                                                      xive2_tm_pull_os_ctx },
619     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        4, NULL,
620                                                      xive2_tm_pull_os_ctx },
621     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX,        8, NULL,
622                                                      xive2_tm_pull_os_ctx },
623     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,         2, NULL,
624                                                      xive_tm_ack_hv_reg },
625     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX_G2,   4, NULL,
626                                                      xive_tm_pull_pool_ctx },
627     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      4, NULL,
628                                                      xive_tm_pull_pool_ctx },
629     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,      8, NULL,
630                                                      xive_tm_pull_pool_ctx },
631     { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_OL,     1, xive2_tm_pull_os_ctx_ol,
632                                                      NULL },
633     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_G2,   4, NULL,
634                                                      xive_tm_pull_phys_ctx },
635     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX,      1, NULL,
636                                                      xive_tm_pull_phys_ctx },
637     { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_OL,   1, xive2_tm_pull_phys_ctx_ol,
638                                                      NULL },
639 };
640 
641 static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset,
642                                        unsigned size, bool write)
643 {
644     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
645     uint32_t op_offset = offset & TM_ADDRESS_MASK;
646     const XiveTmOp *tm_ops;
647     int i, tm_ops_count;
648     uint32_t cfg;
649 
650     cfg = xive_presenter_get_config(xptr);
651     if (cfg & XIVE_PRESENTER_GEN1_TIMA_OS) {
652         tm_ops = xive_tm_operations;
653         tm_ops_count = ARRAY_SIZE(xive_tm_operations);
654     } else {
655         tm_ops = xive2_tm_operations;
656         tm_ops_count = ARRAY_SIZE(xive2_tm_operations);
657     }
658 
659     for (i = 0; i < tm_ops_count; i++) {
660         const XiveTmOp *xto = &tm_ops[i];
661 
662         /* Accesses done from a more privileged TIMA page is allowed */
663         if (xto->page_offset >= page_offset &&
664             xto->op_offset == op_offset &&
665             xto->size == size &&
666             ((write && xto->write_handler) || (!write && xto->read_handler))) {
667             return xto;
668         }
669     }
670     return NULL;
671 }
672 
673 /*
674  * TIMA MMIO handlers
675  */
676 void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
677                         uint64_t value, unsigned size)
678 {
679     const XiveTmOp *xto;
680 
681     trace_xive_tctx_tm_write(tctx->cs->cpu_index, offset, size, value);
682 
683     /*
684      * TODO: check V bit in Q[0-3]W2
685      */
686 
687     /*
688      * First, check for special operations in the 2K region
689      */
690     if (offset & TM_SPECIAL_OP) {
691         xto = xive_tm_find_op(tctx->xptr, offset, size, true);
692         if (!xto) {
693             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA "
694                           "@%"HWADDR_PRIx"\n", offset);
695         } else {
696             xto->write_handler(xptr, tctx, offset, value, size);
697         }
698         return;
699     }
700 
701     /*
702      * Then, for special operations in the region below 2K.
703      */
704     xto = xive_tm_find_op(tctx->xptr, offset, size, true);
705     if (xto) {
706         xto->write_handler(xptr, tctx, offset, value, size);
707         return;
708     }
709 
710     /*
711      * Finish with raw access to the register values
712      */
713     xive_tm_raw_write(tctx, offset, value, size);
714 }
715 
716 uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
717                            unsigned size)
718 {
719     const XiveTmOp *xto;
720     uint64_t ret;
721 
722     /*
723      * TODO: check V bit in Q[0-3]W2
724      */
725 
726     /*
727      * First, check for special operations in the 2K region
728      */
729     if (offset & TM_SPECIAL_OP) {
730         xto = xive_tm_find_op(tctx->xptr, offset, size, false);
731         if (!xto) {
732             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
733                           "@%"HWADDR_PRIx"\n", offset);
734             return -1;
735         }
736         ret = xto->read_handler(xptr, tctx, offset, size);
737         goto out;
738     }
739 
740     /*
741      * Then, for special operations in the region below 2K.
742      */
743     xto = xive_tm_find_op(tctx->xptr, offset, size, false);
744     if (xto) {
745         ret = xto->read_handler(xptr, tctx, offset, size);
746         goto out;
747     }
748 
749     /*
750      * Finish with raw access to the register values
751      */
752     ret = xive_tm_raw_read(tctx, offset, size);
753 out:
754     trace_xive_tctx_tm_read(tctx->cs->cpu_index, offset, size, ret);
755     return ret;
756 }
757 
758 static char *xive_tctx_ring_print(uint8_t *ring)
759 {
760     uint32_t w2 = xive_tctx_word2(ring);
761 
762     return g_strdup_printf("%02x   %02x  %02x    %02x   %02x  "
763                    "%02x  %02x   %02x  %08x",
764                    ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB],
765                    ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR],
766                    be32_to_cpu(w2));
767 }
768 
769 static const char * const xive_tctx_ring_names[] = {
770     "USER", "OS", "POOL", "PHYS",
771 };
772 
773 /*
774  * kvm_irqchip_in_kernel() will cause the compiler to turn this
775  * info a nop if CONFIG_KVM isn't defined.
776  */
777 #define xive_in_kernel(xptr)                                            \
778     (kvm_irqchip_in_kernel() &&                                         \
779      ({                                                                 \
780          XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);      \
781          xpc->in_kernel ? xpc->in_kernel(xptr) : false;                 \
782      }))
783 
784 void xive_tctx_pic_print_info(XiveTCTX *tctx, GString *buf)
785 {
786     int cpu_index;
787     int i;
788 
789     /* Skip partially initialized vCPUs. This can happen on sPAPR when vCPUs
790      * are hot plugged or unplugged.
791      */
792     if (!tctx) {
793         return;
794     }
795 
796     cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
797 
798     if (xive_in_kernel(tctx->xptr)) {
799         Error *local_err = NULL;
800 
801         kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
802         if (local_err) {
803             error_report_err(local_err);
804             return;
805         }
806     }
807 
808     if (xive_presenter_get_config(tctx->xptr) & XIVE_PRESENTER_GEN1_TIMA_OS) {
809         g_string_append_printf(buf, "CPU[%04x]:   "
810                                "QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
811                                "  W2\n", cpu_index);
812     } else {
813         g_string_append_printf(buf, "CPU[%04x]:   "
814                                "QW   NSR CPPR IPB LSMFB   -  LGS  T  PIPR"
815                                "  W2\n", cpu_index);
816     }
817 
818     for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
819         char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);
820         g_string_append_printf(buf, "CPU[%04x]: %4s    %s\n",
821                                cpu_index, xive_tctx_ring_names[i], s);
822         g_free(s);
823     }
824 }
825 
826 void xive_tctx_reset(XiveTCTX *tctx)
827 {
828     memset(tctx->regs, 0, sizeof(tctx->regs));
829 
830     /* Set some defaults */
831     tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
832     tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
833     tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
834     if (!(xive_presenter_get_config(tctx->xptr) &
835           XIVE_PRESENTER_GEN1_TIMA_OS)) {
836         tctx->regs[TM_QW1_OS + TM_OGEN] = 2;
837     }
838 
839     /*
840      * Initialize PIPR to 0xFF to avoid phantom interrupts when the
841      * CPPR is first set.
842      */
843     tctx->regs[TM_QW1_OS + TM_PIPR] =
844         ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
845     tctx->regs[TM_QW3_HV_PHYS + TM_PIPR] =
846         ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]);
847 }
848 
849 static void xive_tctx_realize(DeviceState *dev, Error **errp)
850 {
851     XiveTCTX *tctx = XIVE_TCTX(dev);
852     PowerPCCPU *cpu;
853     CPUPPCState *env;
854 
855     assert(tctx->cs);
856     assert(tctx->xptr);
857 
858     cpu = POWERPC_CPU(tctx->cs);
859     env = &cpu->env;
860     switch (PPC_INPUT(env)) {
861     case PPC_FLAGS_INPUT_POWER9:
862         tctx->hv_output = qdev_get_gpio_in(DEVICE(cpu), POWER9_INPUT_HINT);
863         tctx->os_output = qdev_get_gpio_in(DEVICE(cpu), POWER9_INPUT_INT);
864         break;
865 
866     default:
867         error_setg(errp, "XIVE interrupt controller does not support "
868                    "this CPU bus model");
869         return;
870     }
871 
872     /* Connect the presenter to the VCPU (required for CPU hotplug) */
873     if (xive_in_kernel(tctx->xptr)) {
874         if (kvmppc_xive_cpu_connect(tctx, errp) < 0) {
875             return;
876         }
877     }
878 }
879 
880 static int vmstate_xive_tctx_pre_save(void *opaque)
881 {
882     XiveTCTX *tctx = XIVE_TCTX(opaque);
883     Error *local_err = NULL;
884     int ret;
885 
886     if (xive_in_kernel(tctx->xptr)) {
887         ret = kvmppc_xive_cpu_get_state(tctx, &local_err);
888         if (ret < 0) {
889             error_report_err(local_err);
890             return ret;
891         }
892     }
893 
894     return 0;
895 }
896 
897 static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
898 {
899     XiveTCTX *tctx = XIVE_TCTX(opaque);
900     Error *local_err = NULL;
901     int ret;
902 
903     if (xive_in_kernel(tctx->xptr)) {
904         /*
905          * Required for hotplugged CPU, for which the state comes
906          * after all states of the machine.
907          */
908         ret = kvmppc_xive_cpu_set_state(tctx, &local_err);
909         if (ret < 0) {
910             error_report_err(local_err);
911             return ret;
912         }
913     }
914 
915     return 0;
916 }
917 
918 static const VMStateDescription vmstate_xive_tctx = {
919     .name = TYPE_XIVE_TCTX,
920     .version_id = 1,
921     .minimum_version_id = 1,
922     .pre_save = vmstate_xive_tctx_pre_save,
923     .post_load = vmstate_xive_tctx_post_load,
924     .fields = (const VMStateField[]) {
925         VMSTATE_BUFFER(regs, XiveTCTX),
926         VMSTATE_END_OF_LIST()
927     },
928 };
929 
930 static Property xive_tctx_properties[] = {
931     DEFINE_PROP_LINK("cpu", XiveTCTX, cs, TYPE_CPU, CPUState *),
932     DEFINE_PROP_LINK("presenter", XiveTCTX, xptr, TYPE_XIVE_PRESENTER,
933                      XivePresenter *),
934     DEFINE_PROP_END_OF_LIST(),
935 };
936 
937 static void xive_tctx_class_init(ObjectClass *klass, void *data)
938 {
939     DeviceClass *dc = DEVICE_CLASS(klass);
940 
941     dc->desc = "XIVE Interrupt Thread Context";
942     dc->realize = xive_tctx_realize;
943     dc->vmsd = &vmstate_xive_tctx;
944     device_class_set_props(dc, xive_tctx_properties);
945     /*
946      * Reason: part of XIVE interrupt controller, needs to be wired up
947      * by xive_tctx_create().
948      */
949     dc->user_creatable = false;
950 }
951 
952 static const TypeInfo xive_tctx_info = {
953     .name          = TYPE_XIVE_TCTX,
954     .parent        = TYPE_DEVICE,
955     .instance_size = sizeof(XiveTCTX),
956     .class_init    = xive_tctx_class_init,
957 };
958 
959 Object *xive_tctx_create(Object *cpu, XivePresenter *xptr, Error **errp)
960 {
961     Object *obj;
962 
963     obj = object_new(TYPE_XIVE_TCTX);
964     object_property_add_child(cpu, TYPE_XIVE_TCTX, obj);
965     object_unref(obj);
966     object_property_set_link(obj, "cpu", cpu, &error_abort);
967     object_property_set_link(obj, "presenter", OBJECT(xptr), &error_abort);
968     if (!qdev_realize(DEVICE(obj), NULL, errp)) {
969         object_unparent(obj);
970         return NULL;
971     }
972     return obj;
973 }
974 
975 void xive_tctx_destroy(XiveTCTX *tctx)
976 {
977     Object *obj = OBJECT(tctx);
978 
979     object_unparent(obj);
980 }
981 
982 /*
983  * XIVE ESB helpers
984  */
985 
986 uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
987 {
988     uint8_t old_pq = *pq & 0x3;
989 
990     *pq &= ~0x3;
991     *pq |= value & 0x3;
992 
993     return old_pq;
994 }
995 
996 bool xive_esb_trigger(uint8_t *pq)
997 {
998     uint8_t old_pq = *pq & 0x3;
999 
1000     switch (old_pq) {
1001     case XIVE_ESB_RESET:
1002         xive_esb_set(pq, XIVE_ESB_PENDING);
1003         return true;
1004     case XIVE_ESB_PENDING:
1005     case XIVE_ESB_QUEUED:
1006         xive_esb_set(pq, XIVE_ESB_QUEUED);
1007         return false;
1008     case XIVE_ESB_OFF:
1009         xive_esb_set(pq, XIVE_ESB_OFF);
1010         return false;
1011     default:
1012          g_assert_not_reached();
1013     }
1014 }
1015 
1016 bool xive_esb_eoi(uint8_t *pq)
1017 {
1018     uint8_t old_pq = *pq & 0x3;
1019 
1020     switch (old_pq) {
1021     case XIVE_ESB_RESET:
1022     case XIVE_ESB_PENDING:
1023         xive_esb_set(pq, XIVE_ESB_RESET);
1024         return false;
1025     case XIVE_ESB_QUEUED:
1026         xive_esb_set(pq, XIVE_ESB_PENDING);
1027         return true;
1028     case XIVE_ESB_OFF:
1029         xive_esb_set(pq, XIVE_ESB_OFF);
1030         return false;
1031     default:
1032          g_assert_not_reached();
1033     }
1034 }
1035 
1036 /*
1037  * XIVE Interrupt Source (or IVSE)
1038  */
1039 
1040 uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
1041 {
1042     assert(srcno < xsrc->nr_irqs);
1043 
1044     return xsrc->status[srcno] & 0x3;
1045 }
1046 
1047 uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
1048 {
1049     assert(srcno < xsrc->nr_irqs);
1050 
1051     return xive_esb_set(&xsrc->status[srcno], pq);
1052 }
1053 
1054 /*
1055  * Returns whether the event notification should be forwarded.
1056  */
1057 static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno)
1058 {
1059     uint8_t old_pq = xive_source_esb_get(xsrc, srcno);
1060 
1061     xive_source_set_asserted(xsrc, srcno, true);
1062 
1063     switch (old_pq) {
1064     case XIVE_ESB_RESET:
1065         xive_source_esb_set(xsrc, srcno, XIVE_ESB_PENDING);
1066         return true;
1067     default:
1068         return false;
1069     }
1070 }
1071 
1072 /*
1073  * Sources can be configured with PQ offloading in which case the check
1074  * on the PQ state bits of MSIs is disabled
1075  */
1076 static bool xive_source_esb_disabled(XiveSource *xsrc, uint32_t srcno)
1077 {
1078     return (xsrc->esb_flags & XIVE_SRC_PQ_DISABLE) &&
1079         !xive_source_irq_is_lsi(xsrc, srcno);
1080 }
1081 
1082 /*
1083  * Returns whether the event notification should be forwarded.
1084  */
1085 static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
1086 {
1087     bool ret;
1088 
1089     assert(srcno < xsrc->nr_irqs);
1090 
1091     if (xive_source_esb_disabled(xsrc, srcno)) {
1092         return true;
1093     }
1094 
1095     ret = xive_esb_trigger(&xsrc->status[srcno]);
1096 
1097     if (xive_source_irq_is_lsi(xsrc, srcno) &&
1098         xive_source_esb_get(xsrc, srcno) == XIVE_ESB_QUEUED) {
1099         qemu_log_mask(LOG_GUEST_ERROR,
1100                       "XIVE: queued an event on LSI IRQ %d\n", srcno);
1101     }
1102 
1103     return ret;
1104 }
1105 
1106 /*
1107  * Returns whether the event notification should be forwarded.
1108  */
1109 static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
1110 {
1111     bool ret;
1112 
1113     assert(srcno < xsrc->nr_irqs);
1114 
1115     if (xive_source_esb_disabled(xsrc, srcno)) {
1116         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid EOI for IRQ %d\n", srcno);
1117         return false;
1118     }
1119 
1120     ret = xive_esb_eoi(&xsrc->status[srcno]);
1121 
1122     /*
1123      * LSI sources do not set the Q bit but they can still be
1124      * asserted, in which case we should forward a new event
1125      * notification
1126      */
1127     if (xive_source_irq_is_lsi(xsrc, srcno) &&
1128         xive_source_is_asserted(xsrc, srcno)) {
1129         ret = xive_source_lsi_trigger(xsrc, srcno);
1130     }
1131 
1132     return ret;
1133 }
1134 
1135 /*
1136  * Forward the source event notification to the Router
1137  */
1138 static void xive_source_notify(XiveSource *xsrc, int srcno)
1139 {
1140     XiveNotifierClass *xnc = XIVE_NOTIFIER_GET_CLASS(xsrc->xive);
1141     bool pq_checked = !xive_source_esb_disabled(xsrc, srcno);
1142 
1143     if (xnc->notify) {
1144         xnc->notify(xsrc->xive, srcno, pq_checked);
1145     }
1146 }
1147 
1148 /*
1149  * In a two pages ESB MMIO setting, even page is the trigger page, odd
1150  * page is for management
1151  */
1152 static inline bool addr_is_even(hwaddr addr, uint32_t shift)
1153 {
1154     return !((addr >> shift) & 1);
1155 }
1156 
1157 static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
1158 {
1159     return xive_source_esb_has_2page(xsrc) &&
1160         addr_is_even(addr, xsrc->esb_shift - 1);
1161 }
1162 
1163 /*
1164  * ESB MMIO loads
1165  *                      Trigger page    Management/EOI page
1166  *
1167  * ESB MMIO setting     2 pages         1 or 2 pages
1168  *
1169  * 0x000 .. 0x3FF       -1              EOI and return 0|1
1170  * 0x400 .. 0x7FF       -1              EOI and return 0|1
1171  * 0x800 .. 0xBFF       -1              return PQ
1172  * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=00
1173  * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=01
1174  * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=10
1175  * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=11
1176  */
1177 static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
1178 {
1179     XiveSource *xsrc = XIVE_SOURCE(opaque);
1180     uint32_t offset = addr & 0xFFF;
1181     uint32_t srcno = addr >> xsrc->esb_shift;
1182     uint64_t ret = -1;
1183 
1184     /* In a two pages ESB MMIO setting, trigger page should not be read */
1185     if (xive_source_is_trigger_page(xsrc, addr)) {
1186         qemu_log_mask(LOG_GUEST_ERROR,
1187                       "XIVE: invalid load on IRQ %d trigger page at "
1188                       "0x%"HWADDR_PRIx"\n", srcno, addr);
1189         return -1;
1190     }
1191 
1192     switch (offset) {
1193     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
1194         ret = xive_source_esb_eoi(xsrc, srcno);
1195 
1196         /* Forward the source event notification for routing */
1197         if (ret) {
1198             xive_source_notify(xsrc, srcno);
1199         }
1200         break;
1201 
1202     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
1203         ret = xive_source_esb_get(xsrc, srcno);
1204         break;
1205 
1206     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1207     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1208     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1209     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1210         ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
1211         break;
1212     default:
1213         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
1214                       offset);
1215     }
1216 
1217     trace_xive_source_esb_read(addr, srcno, ret);
1218 
1219     return ret;
1220 }
1221 
1222 /*
1223  * ESB MMIO stores
1224  *                      Trigger page    Management/EOI page
1225  *
1226  * ESB MMIO setting     2 pages         1 or 2 pages
1227  *
1228  * 0x000 .. 0x3FF       Trigger         Trigger
1229  * 0x400 .. 0x7FF       Trigger         EOI
1230  * 0x800 .. 0xBFF       Trigger         undefined
1231  * 0xC00 .. 0xCFF       Trigger         PQ=00
1232  * 0xD00 .. 0xDFF       Trigger         PQ=01
1233  * 0xE00 .. 0xDFF       Trigger         PQ=10
1234  * 0xF00 .. 0xDFF       Trigger         PQ=11
1235  */
1236 static void xive_source_esb_write(void *opaque, hwaddr addr,
1237                                   uint64_t value, unsigned size)
1238 {
1239     XiveSource *xsrc = XIVE_SOURCE(opaque);
1240     uint32_t offset = addr & 0xFFF;
1241     uint32_t srcno = addr >> xsrc->esb_shift;
1242     bool notify = false;
1243 
1244     trace_xive_source_esb_write(addr, srcno, value);
1245 
1246     /* In a two pages ESB MMIO setting, trigger page only triggers */
1247     if (xive_source_is_trigger_page(xsrc, addr)) {
1248         notify = xive_source_esb_trigger(xsrc, srcno);
1249         goto out;
1250     }
1251 
1252     switch (offset) {
1253     case 0 ... 0x3FF:
1254         notify = xive_source_esb_trigger(xsrc, srcno);
1255         break;
1256 
1257     case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
1258         if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
1259             qemu_log_mask(LOG_GUEST_ERROR,
1260                           "XIVE: invalid Store EOI for IRQ %d\n", srcno);
1261             return;
1262         }
1263 
1264         notify = xive_source_esb_eoi(xsrc, srcno);
1265         break;
1266 
1267     /*
1268      * This is an internal offset used to inject triggers when the PQ
1269      * state bits are not controlled locally. Such as for LSIs when
1270      * under ABT mode.
1271      */
1272     case XIVE_ESB_INJECT ... XIVE_ESB_INJECT + 0x3FF:
1273         notify = true;
1274         break;
1275 
1276     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1277     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1278     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1279     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1280         xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
1281         break;
1282 
1283     default:
1284         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
1285                       offset);
1286         return;
1287     }
1288 
1289 out:
1290     /* Forward the source event notification for routing */
1291     if (notify) {
1292         xive_source_notify(xsrc, srcno);
1293     }
1294 }
1295 
1296 static const MemoryRegionOps xive_source_esb_ops = {
1297     .read = xive_source_esb_read,
1298     .write = xive_source_esb_write,
1299     .endianness = DEVICE_BIG_ENDIAN,
1300     .valid = {
1301         .min_access_size = 1,
1302         .max_access_size = 8,
1303     },
1304     .impl = {
1305         .min_access_size = 1,
1306         .max_access_size = 8,
1307     },
1308 };
1309 
1310 void xive_source_set_irq(void *opaque, int srcno, int val)
1311 {
1312     XiveSource *xsrc = XIVE_SOURCE(opaque);
1313     bool notify = false;
1314 
1315     if (xive_source_irq_is_lsi(xsrc, srcno)) {
1316         if (val) {
1317             notify = xive_source_lsi_trigger(xsrc, srcno);
1318         } else {
1319             xive_source_set_asserted(xsrc, srcno, false);
1320         }
1321     } else {
1322         if (val) {
1323             notify = xive_source_esb_trigger(xsrc, srcno);
1324         }
1325     }
1326 
1327     /* Forward the source event notification for routing */
1328     if (notify) {
1329         xive_source_notify(xsrc, srcno);
1330     }
1331 }
1332 
1333 void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, GString *buf)
1334 {
1335     for (unsigned i = 0; i < xsrc->nr_irqs; i++) {
1336         uint8_t pq = xive_source_esb_get(xsrc, i);
1337 
1338         if (pq == XIVE_ESB_OFF) {
1339             continue;
1340         }
1341 
1342         g_string_append_printf(buf, "  %08x %s %c%c%c\n", i + offset,
1343                                xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
1344                                pq & XIVE_ESB_VAL_P ? 'P' : '-',
1345                                pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1346                                xive_source_is_asserted(xsrc, i) ? 'A' : ' ');
1347     }
1348 }
1349 
1350 static void xive_source_reset(void *dev)
1351 {
1352     XiveSource *xsrc = XIVE_SOURCE(dev);
1353 
1354     /* Do not clear the LSI bitmap */
1355 
1356     memset(xsrc->status, xsrc->reset_pq, xsrc->nr_irqs);
1357 }
1358 
1359 static void xive_source_realize(DeviceState *dev, Error **errp)
1360 {
1361     XiveSource *xsrc = XIVE_SOURCE(dev);
1362     uint64_t esb_len = xive_source_esb_len(xsrc);
1363 
1364     assert(xsrc->xive);
1365 
1366     if (!xsrc->nr_irqs) {
1367         error_setg(errp, "Number of interrupt needs to be greater than 0");
1368         return;
1369     }
1370 
1371     if (xsrc->esb_shift != XIVE_ESB_4K &&
1372         xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
1373         xsrc->esb_shift != XIVE_ESB_64K &&
1374         xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
1375         error_setg(errp, "Invalid ESB shift setting");
1376         return;
1377     }
1378 
1379     xsrc->status = g_malloc0(xsrc->nr_irqs);
1380     xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
1381 
1382     memory_region_init(&xsrc->esb_mmio, OBJECT(xsrc), "xive.esb", esb_len);
1383     memory_region_init_io(&xsrc->esb_mmio_emulated, OBJECT(xsrc),
1384                           &xive_source_esb_ops, xsrc, "xive.esb-emulated",
1385                           esb_len);
1386     memory_region_add_subregion(&xsrc->esb_mmio, 0, &xsrc->esb_mmio_emulated);
1387 
1388     qemu_register_reset(xive_source_reset, dev);
1389 }
1390 
1391 static const VMStateDescription vmstate_xive_source = {
1392     .name = TYPE_XIVE_SOURCE,
1393     .version_id = 1,
1394     .minimum_version_id = 1,
1395     .fields = (const VMStateField[]) {
1396         VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
1397         VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
1398         VMSTATE_END_OF_LIST()
1399     },
1400 };
1401 
1402 /*
1403  * The default XIVE interrupt source setting for the ESB MMIOs is two
1404  * 64k pages without Store EOI, to be in sync with KVM.
1405  */
1406 static Property xive_source_properties[] = {
1407     DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
1408     DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
1409     DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
1410     /*
1411      * By default, PQs are initialized to 0b01 (Q=1) which corresponds
1412      * to "ints off"
1413      */
1414     DEFINE_PROP_UINT8("reset-pq", XiveSource, reset_pq, XIVE_ESB_OFF),
1415     DEFINE_PROP_LINK("xive", XiveSource, xive, TYPE_XIVE_NOTIFIER,
1416                      XiveNotifier *),
1417     DEFINE_PROP_END_OF_LIST(),
1418 };
1419 
1420 static void xive_source_class_init(ObjectClass *klass, void *data)
1421 {
1422     DeviceClass *dc = DEVICE_CLASS(klass);
1423 
1424     dc->desc    = "XIVE Interrupt Source";
1425     device_class_set_props(dc, xive_source_properties);
1426     dc->realize = xive_source_realize;
1427     dc->vmsd    = &vmstate_xive_source;
1428     /*
1429      * Reason: part of XIVE interrupt controller, needs to be wired up,
1430      * e.g. by spapr_xive_instance_init().
1431      */
1432     dc->user_creatable = false;
1433 }
1434 
1435 static const TypeInfo xive_source_info = {
1436     .name          = TYPE_XIVE_SOURCE,
1437     .parent        = TYPE_DEVICE,
1438     .instance_size = sizeof(XiveSource),
1439     .class_init    = xive_source_class_init,
1440 };
1441 
1442 /*
1443  * XiveEND helpers
1444  */
1445 
1446 void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, GString *buf)
1447 {
1448     uint64_t qaddr_base = xive_end_qaddr(end);
1449     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1450     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1451     uint32_t qentries = 1 << (qsize + 10);
1452     int i;
1453 
1454     /*
1455      * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
1456      */
1457     g_string_append_printf(buf, " [ ");
1458     qindex = (qindex - (width - 1)) & (qentries - 1);
1459     for (i = 0; i < width; i++) {
1460         uint64_t qaddr = qaddr_base + (qindex << 2);
1461         uint32_t qdata = -1;
1462 
1463         if (dma_memory_read(&address_space_memory, qaddr,
1464                             &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) {
1465             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
1466                           HWADDR_PRIx "\n", qaddr);
1467             return;
1468         }
1469         g_string_append_printf(buf, "%s%08x ", i == width - 1 ? "^" : "",
1470                                be32_to_cpu(qdata));
1471         qindex = (qindex + 1) & (qentries - 1);
1472     }
1473     g_string_append_c(buf, ']');
1474 }
1475 
1476 void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, GString *buf)
1477 {
1478     uint64_t qaddr_base = xive_end_qaddr(end);
1479     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1480     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1481     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1482     uint32_t qentries = 1 << (qsize + 10);
1483 
1484     uint32_t nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6);
1485     uint32_t nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end->w6);
1486     uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
1487     uint8_t pq;
1488 
1489     if (!xive_end_is_valid(end)) {
1490         return;
1491     }
1492 
1493     pq = xive_get_field32(END_W1_ESn, end->w1);
1494 
1495     g_string_append_printf(buf,
1496                            "  %08x %c%c %c%c%c%c%c%c%c%c prio:%d nvt:%02x/%04x",
1497                            end_idx,
1498                            pq & XIVE_ESB_VAL_P ? 'P' : '-',
1499                            pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1500                            xive_end_is_valid(end)    ? 'v' : '-',
1501                            xive_end_is_enqueue(end)  ? 'q' : '-',
1502                            xive_end_is_notify(end)   ? 'n' : '-',
1503                            xive_end_is_backlog(end)  ? 'b' : '-',
1504                            xive_end_is_escalate(end) ? 'e' : '-',
1505                            xive_end_is_uncond_escalation(end)   ? 'u' : '-',
1506                            xive_end_is_silent_escalation(end)   ? 's' : '-',
1507                            xive_end_is_firmware(end)   ? 'f' : '-',
1508                            priority, nvt_blk, nvt_idx);
1509 
1510     if (qaddr_base) {
1511         g_string_append_printf(buf, " eq:@%08"PRIx64"% 6d/%5d ^%d",
1512                                qaddr_base, qindex, qentries, qgen);
1513         xive_end_queue_pic_print_info(end, 6, buf);
1514     }
1515     g_string_append_c(buf, '\n');
1516 }
1517 
1518 static void xive_end_enqueue(XiveEND *end, uint32_t data)
1519 {
1520     uint64_t qaddr_base = xive_end_qaddr(end);
1521     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1522     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1523     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1524 
1525     uint64_t qaddr = qaddr_base + (qindex << 2);
1526     uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
1527     uint32_t qentries = 1 << (qsize + 10);
1528 
1529     if (dma_memory_write(&address_space_memory, qaddr,
1530                          &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) {
1531         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
1532                       HWADDR_PRIx "\n", qaddr);
1533         return;
1534     }
1535 
1536     qindex = (qindex + 1) & (qentries - 1);
1537     if (qindex == 0) {
1538         qgen ^= 1;
1539         end->w1 = xive_set_field32(END_W1_GENERATION, end->w1, qgen);
1540     }
1541     end->w1 = xive_set_field32(END_W1_PAGE_OFF, end->w1, qindex);
1542 }
1543 
1544 void xive_end_eas_pic_print_info(XiveEND *end, uint32_t end_idx, GString *buf)
1545 {
1546     XiveEAS *eas = (XiveEAS *) &end->w4;
1547     uint8_t pq;
1548 
1549     if (!xive_end_is_escalate(end)) {
1550         return;
1551     }
1552 
1553     pq = xive_get_field32(END_W1_ESe, end->w1);
1554 
1555     g_string_append_printf(buf, "  %08x %c%c %c%c end:%02x/%04x data:%08x\n",
1556                            end_idx,
1557                            pq & XIVE_ESB_VAL_P ? 'P' : '-',
1558                            pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1559                            xive_eas_is_valid(eas) ? 'V' : ' ',
1560                            xive_eas_is_masked(eas) ? 'M' : ' ',
1561                            (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
1562                            (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
1563                            (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
1564 }
1565 
1566 /*
1567  * XIVE Router (aka. Virtualization Controller or IVRE)
1568  */
1569 
1570 int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1571                         XiveEAS *eas)
1572 {
1573     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1574 
1575     return xrc->get_eas(xrtr, eas_blk, eas_idx, eas);
1576 }
1577 
1578 static
1579 int xive_router_get_pq(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1580                        uint8_t *pq)
1581 {
1582     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1583 
1584     return xrc->get_pq(xrtr, eas_blk, eas_idx, pq);
1585 }
1586 
1587 static
1588 int xive_router_set_pq(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1589                        uint8_t *pq)
1590 {
1591     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1592 
1593     return xrc->set_pq(xrtr, eas_blk, eas_idx, pq);
1594 }
1595 
1596 int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1597                         XiveEND *end)
1598 {
1599    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1600 
1601    return xrc->get_end(xrtr, end_blk, end_idx, end);
1602 }
1603 
1604 int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1605                           XiveEND *end, uint8_t word_number)
1606 {
1607    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1608 
1609    return xrc->write_end(xrtr, end_blk, end_idx, end, word_number);
1610 }
1611 
1612 int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1613                         XiveNVT *nvt)
1614 {
1615    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1616 
1617    return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt);
1618 }
1619 
1620 int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1621                         XiveNVT *nvt, uint8_t word_number)
1622 {
1623    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1624 
1625    return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number);
1626 }
1627 
1628 static int xive_router_get_block_id(XiveRouter *xrtr)
1629 {
1630    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1631 
1632    return xrc->get_block_id(xrtr);
1633 }
1634 
1635 static void xive_router_realize(DeviceState *dev, Error **errp)
1636 {
1637     XiveRouter *xrtr = XIVE_ROUTER(dev);
1638 
1639     assert(xrtr->xfb);
1640 }
1641 
1642 static void xive_router_end_notify_handler(XiveRouter *xrtr, XiveEAS *eas)
1643 {
1644     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1645 
1646     return xrc->end_notify(xrtr, eas);
1647 }
1648 
1649 /*
1650  * Encode the HW CAM line in the block group mode format :
1651  *
1652  *   chip << 19 | 0000000 0 0001 thread (7Bit)
1653  */
1654 static uint32_t xive_tctx_hw_cam_line(XivePresenter *xptr, XiveTCTX *tctx)
1655 {
1656     CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env;
1657     uint32_t pir = env->spr_cb[SPR_PIR].default_value;
1658     uint8_t blk = xive_router_get_block_id(XIVE_ROUTER(xptr));
1659 
1660     return xive_nvt_cam_line(blk, 1 << 7 | (pir & 0x7f));
1661 }
1662 
1663 /*
1664  * The thread context register words are in big-endian format.
1665  */
1666 int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
1667                               uint8_t format,
1668                               uint8_t nvt_blk, uint32_t nvt_idx,
1669                               bool cam_ignore, uint32_t logic_serv)
1670 {
1671     uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx);
1672     uint32_t qw3w2 = xive_tctx_word2(&tctx->regs[TM_QW3_HV_PHYS]);
1673     uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
1674     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
1675     uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]);
1676 
1677     /*
1678      * TODO (PowerNV): ignore mode. The low order bits of the NVT
1679      * identifier are ignored in the "CAM" match.
1680      */
1681 
1682     if (format == 0) {
1683         if (cam_ignore == true) {
1684             /*
1685              * F=0 & i=1: Logical server notification (bits ignored at
1686              * the end of the NVT identifier)
1687              */
1688             qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
1689                           nvt_blk, nvt_idx);
1690              return -1;
1691         }
1692 
1693         /* F=0 & i=0: Specific NVT notification */
1694 
1695         /* PHYS ring */
1696         if ((be32_to_cpu(qw3w2) & TM_QW3W2_VT) &&
1697             cam == xive_tctx_hw_cam_line(xptr, tctx)) {
1698             return TM_QW3_HV_PHYS;
1699         }
1700 
1701         /* HV POOL ring */
1702         if ((be32_to_cpu(qw2w2) & TM_QW2W2_VP) &&
1703             cam == xive_get_field32(TM_QW2W2_POOL_CAM, qw2w2)) {
1704             return TM_QW2_HV_POOL;
1705         }
1706 
1707         /* OS ring */
1708         if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1709             cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) {
1710             return TM_QW1_OS;
1711         }
1712     } else {
1713         /* F=1 : User level Event-Based Branch (EBB) notification */
1714 
1715         /* USER ring */
1716         if  ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1717              (cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) &&
1718              (be32_to_cpu(qw0w2) & TM_QW0W2_VU) &&
1719              (logic_serv == xive_get_field32(TM_QW0W2_LOGIC_SERV, qw0w2))) {
1720             return TM_QW0_USER;
1721         }
1722     }
1723     return -1;
1724 }
1725 
1726 /*
1727  * This is our simple Xive Presenter Engine model. It is merged in the
1728  * Router as it does not require an extra object.
1729  *
1730  * It receives notification requests sent by the IVRE to find one
1731  * matching NVT (or more) dispatched on the processor threads. In case
1732  * of a single NVT notification, the process is abbreviated and the
1733  * thread is signaled if a match is found. In case of a logical server
1734  * notification (bits ignored at the end of the NVT identifier), the
1735  * IVPE and IVRE select a winning thread using different filters. This
1736  * involves 2 or 3 exchanges on the PowerBus that the model does not
1737  * support.
1738  *
1739  * The parameters represent what is sent on the PowerBus
1740  */
1741 bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
1742                            uint8_t nvt_blk, uint32_t nvt_idx,
1743                            bool cam_ignore, uint8_t priority,
1744                            uint32_t logic_serv)
1745 {
1746     XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb);
1747     XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
1748     int count;
1749 
1750     /*
1751      * Ask the machine to scan the interrupt controllers for a match
1752      */
1753     count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, cam_ignore,
1754                            priority, logic_serv, &match);
1755     if (count < 0) {
1756         return false;
1757     }
1758 
1759     /* handle CPU exception delivery */
1760     if (count) {
1761         trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring);
1762         xive_tctx_ipb_update(match.tctx, match.ring,
1763                              xive_priority_to_ipb(priority));
1764     }
1765 
1766     return !!count;
1767 }
1768 
1769 /*
1770  * Notification using the END ESe/ESn bit (Event State Buffer for
1771  * escalation and notification). Provide further coalescing in the
1772  * Router.
1773  */
1774 static bool xive_router_end_es_notify(XiveRouter *xrtr, uint8_t end_blk,
1775                                       uint32_t end_idx, XiveEND *end,
1776                                       uint32_t end_esmask)
1777 {
1778     uint8_t pq = xive_get_field32(end_esmask, end->w1);
1779     bool notify = xive_esb_trigger(&pq);
1780 
1781     if (pq != xive_get_field32(end_esmask, end->w1)) {
1782         end->w1 = xive_set_field32(end_esmask, end->w1, pq);
1783         xive_router_write_end(xrtr, end_blk, end_idx, end, 1);
1784     }
1785 
1786     /* ESe/n[Q]=1 : end of notification */
1787     return notify;
1788 }
1789 
1790 /*
1791  * An END trigger can come from an event trigger (IPI or HW) or from
1792  * another chip. We don't model the PowerBus but the END trigger
1793  * message has the same parameters than in the function below.
1794  */
1795 void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas)
1796 {
1797     XiveEND end;
1798     uint8_t priority;
1799     uint8_t format;
1800     uint8_t nvt_blk;
1801     uint32_t nvt_idx;
1802     XiveNVT nvt;
1803     bool found;
1804 
1805     uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
1806     uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
1807     uint32_t end_data = xive_get_field64(EAS_END_DATA,  eas->w);
1808 
1809     /* END cache lookup */
1810     if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
1811         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
1812                       end_idx);
1813         return;
1814     }
1815 
1816     if (!xive_end_is_valid(&end)) {
1817         trace_xive_router_end_notify(end_blk, end_idx, end_data);
1818         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
1819                       end_blk, end_idx);
1820         return;
1821     }
1822 
1823     if (xive_end_is_enqueue(&end)) {
1824         xive_end_enqueue(&end, end_data);
1825         /* Enqueuing event data modifies the EQ toggle and index */
1826         xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
1827     }
1828 
1829     /*
1830      * When the END is silent, we skip the notification part.
1831      */
1832     if (xive_end_is_silent_escalation(&end)) {
1833         goto do_escalation;
1834     }
1835 
1836     /*
1837      * The W7 format depends on the F bit in W6. It defines the type
1838      * of the notification :
1839      *
1840      *   F=0 : single or multiple NVT notification
1841      *   F=1 : User level Event-Based Branch (EBB) notification, no
1842      *         priority
1843      */
1844     format = xive_get_field32(END_W6_FORMAT_BIT, end.w6);
1845     priority = xive_get_field32(END_W7_F0_PRIORITY, end.w7);
1846 
1847     /* The END is masked */
1848     if (format == 0 && priority == 0xff) {
1849         return;
1850     }
1851 
1852     /*
1853      * Check the END ESn (Event State Buffer for notification) for
1854      * even further coalescing in the Router
1855      */
1856     if (!xive_end_is_notify(&end)) {
1857         /* ESn[Q]=1 : end of notification */
1858         if (!xive_router_end_es_notify(xrtr, end_blk, end_idx,
1859                                        &end, END_W1_ESn)) {
1860             return;
1861         }
1862     }
1863 
1864     /*
1865      * Follows IVPE notification
1866      */
1867     nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end.w6);
1868     nvt_idx = xive_get_field32(END_W6_NVT_INDEX, end.w6);
1869 
1870     /* NVT cache lookup */
1871     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
1872         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n",
1873                       nvt_blk, nvt_idx);
1874         return;
1875     }
1876 
1877     if (!xive_nvt_is_valid(&nvt)) {
1878         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n",
1879                       nvt_blk, nvt_idx);
1880         return;
1881     }
1882 
1883     found = xive_presenter_notify(xrtr->xfb, format, nvt_blk, nvt_idx,
1884                           xive_get_field32(END_W7_F0_IGNORE, end.w7),
1885                           priority,
1886                           xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7));
1887 
1888     /* TODO: Auto EOI. */
1889 
1890     if (found) {
1891         return;
1892     }
1893 
1894     /*
1895      * If no matching NVT is dispatched on a HW thread :
1896      * - specific VP: update the NVT structure if backlog is activated
1897      * - logical server : forward request to IVPE (not supported)
1898      */
1899     if (xive_end_is_backlog(&end)) {
1900         uint8_t ipb;
1901 
1902         if (format == 1) {
1903             qemu_log_mask(LOG_GUEST_ERROR,
1904                           "XIVE: END %x/%x invalid config: F1 & backlog\n",
1905                           end_blk, end_idx);
1906             return;
1907         }
1908         /*
1909          * Record the IPB in the associated NVT structure for later
1910          * use. The presenter will resend the interrupt when the vCPU
1911          * is dispatched again on a HW thread.
1912          */
1913         ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) |
1914             xive_priority_to_ipb(priority);
1915         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, ipb);
1916         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
1917 
1918         /*
1919          * On HW, follows a "Broadcast Backlog" to IVPEs
1920          */
1921     }
1922 
1923 do_escalation:
1924     /*
1925      * If activated, escalate notification using the ESe PQ bits and
1926      * the EAS in w4-5
1927      */
1928     if (!xive_end_is_escalate(&end)) {
1929         return;
1930     }
1931 
1932     /*
1933      * Check the END ESe (Event State Buffer for escalation) for even
1934      * further coalescing in the Router
1935      */
1936     if (!xive_end_is_uncond_escalation(&end)) {
1937         /* ESe[Q]=1 : end of notification */
1938         if (!xive_router_end_es_notify(xrtr, end_blk, end_idx,
1939                                        &end, END_W1_ESe)) {
1940             return;
1941         }
1942     }
1943 
1944     trace_xive_router_end_escalate(end_blk, end_idx,
1945            (uint8_t) xive_get_field32(END_W4_ESC_END_BLOCK, end.w4),
1946            (uint32_t) xive_get_field32(END_W4_ESC_END_INDEX, end.w4),
1947            (uint32_t) xive_get_field32(END_W5_ESC_END_DATA,  end.w5));
1948     /*
1949      * The END trigger becomes an Escalation trigger
1950      */
1951     xive_router_end_notify_handler(xrtr, (XiveEAS *) &end.w4);
1952 }
1953 
1954 void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked)
1955 {
1956     XiveRouter *xrtr = XIVE_ROUTER(xn);
1957     uint8_t eas_blk = XIVE_EAS_BLOCK(lisn);
1958     uint32_t eas_idx = XIVE_EAS_INDEX(lisn);
1959     XiveEAS eas;
1960 
1961     /* EAS cache lookup */
1962     if (xive_router_get_eas(xrtr, eas_blk, eas_idx, &eas)) {
1963         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
1964         return;
1965     }
1966 
1967     if (!pq_checked) {
1968         bool notify;
1969         uint8_t pq;
1970 
1971         /* PQ cache lookup */
1972         if (xive_router_get_pq(xrtr, eas_blk, eas_idx, &pq)) {
1973             /* Set FIR */
1974             g_assert_not_reached();
1975         }
1976 
1977         notify = xive_esb_trigger(&pq);
1978 
1979         if (xive_router_set_pq(xrtr, eas_blk, eas_idx, &pq)) {
1980             /* Set FIR */
1981             g_assert_not_reached();
1982         }
1983 
1984         if (!notify) {
1985             return;
1986         }
1987     }
1988 
1989     if (!xive_eas_is_valid(&eas)) {
1990         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
1991         return;
1992     }
1993 
1994     if (xive_eas_is_masked(&eas)) {
1995         /* Notification completed */
1996         return;
1997     }
1998 
1999     /*
2000      * The event trigger becomes an END trigger
2001      */
2002     xive_router_end_notify_handler(xrtr, &eas);
2003 }
2004 
2005 static Property xive_router_properties[] = {
2006     DEFINE_PROP_LINK("xive-fabric", XiveRouter, xfb,
2007                      TYPE_XIVE_FABRIC, XiveFabric *),
2008     DEFINE_PROP_END_OF_LIST(),
2009 };
2010 
2011 static void xive_router_class_init(ObjectClass *klass, void *data)
2012 {
2013     DeviceClass *dc = DEVICE_CLASS(klass);
2014     XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
2015     XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
2016 
2017     dc->desc    = "XIVE Router Engine";
2018     device_class_set_props(dc, xive_router_properties);
2019     /* Parent is SysBusDeviceClass. No need to call its realize hook */
2020     dc->realize = xive_router_realize;
2021     xnc->notify = xive_router_notify;
2022 
2023     /* By default, the router handles END triggers locally */
2024     xrc->end_notify = xive_router_end_notify;
2025 }
2026 
2027 static const TypeInfo xive_router_info = {
2028     .name          = TYPE_XIVE_ROUTER,
2029     .parent        = TYPE_SYS_BUS_DEVICE,
2030     .abstract      = true,
2031     .instance_size = sizeof(XiveRouter),
2032     .class_size    = sizeof(XiveRouterClass),
2033     .class_init    = xive_router_class_init,
2034     .interfaces    = (InterfaceInfo[]) {
2035         { TYPE_XIVE_NOTIFIER },
2036         { TYPE_XIVE_PRESENTER },
2037         { }
2038     }
2039 };
2040 
2041 void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, GString *buf)
2042 {
2043     if (!xive_eas_is_valid(eas)) {
2044         return;
2045     }
2046 
2047     g_string_append_printf(buf, "  %08x %s end:%02x/%04x data:%08x\n",
2048                            lisn, xive_eas_is_masked(eas) ? "M" : " ",
2049                            (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
2050                            (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
2051                            (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
2052 }
2053 
2054 /*
2055  * END ESB MMIO loads
2056  */
2057 static uint64_t xive_end_source_read(void *opaque, hwaddr addr, unsigned size)
2058 {
2059     XiveENDSource *xsrc = XIVE_END_SOURCE(opaque);
2060     uint32_t offset = addr & 0xFFF;
2061     uint8_t end_blk;
2062     uint32_t end_idx;
2063     XiveEND end;
2064     uint32_t end_esmask;
2065     uint8_t pq;
2066     uint64_t ret = -1;
2067 
2068     /*
2069      * The block id should be deduced from the load address on the END
2070      * ESB MMIO but our model only supports a single block per XIVE chip.
2071      */
2072     end_blk = xive_router_get_block_id(xsrc->xrtr);
2073     end_idx = addr >> (xsrc->esb_shift + 1);
2074 
2075     trace_xive_end_source_read(end_blk, end_idx, addr);
2076 
2077     if (xive_router_get_end(xsrc->xrtr, end_blk, end_idx, &end)) {
2078         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
2079                       end_idx);
2080         return -1;
2081     }
2082 
2083     if (!xive_end_is_valid(&end)) {
2084         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
2085                       end_blk, end_idx);
2086         return -1;
2087     }
2088 
2089     end_esmask = addr_is_even(addr, xsrc->esb_shift) ? END_W1_ESn : END_W1_ESe;
2090     pq = xive_get_field32(end_esmask, end.w1);
2091 
2092     switch (offset) {
2093     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
2094         ret = xive_esb_eoi(&pq);
2095 
2096         /* Forward the source event notification for routing ?? */
2097         break;
2098 
2099     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
2100         ret = pq;
2101         break;
2102 
2103     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
2104     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
2105     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
2106     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
2107         ret = xive_esb_set(&pq, (offset >> 8) & 0x3);
2108         break;
2109     default:
2110         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END ESB load addr %d\n",
2111                       offset);
2112         return -1;
2113     }
2114 
2115     if (pq != xive_get_field32(end_esmask, end.w1)) {
2116         end.w1 = xive_set_field32(end_esmask, end.w1, pq);
2117         xive_router_write_end(xsrc->xrtr, end_blk, end_idx, &end, 1);
2118     }
2119 
2120     return ret;
2121 }
2122 
2123 /*
2124  * END ESB MMIO stores are invalid
2125  */
2126 static void xive_end_source_write(void *opaque, hwaddr addr,
2127                                   uint64_t value, unsigned size)
2128 {
2129     qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr 0x%"
2130                   HWADDR_PRIx"\n", addr);
2131 }
2132 
2133 static const MemoryRegionOps xive_end_source_ops = {
2134     .read = xive_end_source_read,
2135     .write = xive_end_source_write,
2136     .endianness = DEVICE_BIG_ENDIAN,
2137     .valid = {
2138         .min_access_size = 1,
2139         .max_access_size = 8,
2140     },
2141     .impl = {
2142         .min_access_size = 1,
2143         .max_access_size = 8,
2144     },
2145 };
2146 
2147 static void xive_end_source_realize(DeviceState *dev, Error **errp)
2148 {
2149     XiveENDSource *xsrc = XIVE_END_SOURCE(dev);
2150 
2151     assert(xsrc->xrtr);
2152 
2153     if (!xsrc->nr_ends) {
2154         error_setg(errp, "Number of interrupt needs to be greater than 0");
2155         return;
2156     }
2157 
2158     if (xsrc->esb_shift != XIVE_ESB_4K &&
2159         xsrc->esb_shift != XIVE_ESB_64K) {
2160         error_setg(errp, "Invalid ESB shift setting");
2161         return;
2162     }
2163 
2164     /*
2165      * Each END is assigned an even/odd pair of MMIO pages, the even page
2166      * manages the ESn field while the odd page manages the ESe field.
2167      */
2168     memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
2169                           &xive_end_source_ops, xsrc, "xive.end",
2170                           (1ull << (xsrc->esb_shift + 1)) * xsrc->nr_ends);
2171 }
2172 
2173 static Property xive_end_source_properties[] = {
2174     DEFINE_PROP_UINT32("nr-ends", XiveENDSource, nr_ends, 0),
2175     DEFINE_PROP_UINT32("shift", XiveENDSource, esb_shift, XIVE_ESB_64K),
2176     DEFINE_PROP_LINK("xive", XiveENDSource, xrtr, TYPE_XIVE_ROUTER,
2177                      XiveRouter *),
2178     DEFINE_PROP_END_OF_LIST(),
2179 };
2180 
2181 static void xive_end_source_class_init(ObjectClass *klass, void *data)
2182 {
2183     DeviceClass *dc = DEVICE_CLASS(klass);
2184 
2185     dc->desc    = "XIVE END Source";
2186     device_class_set_props(dc, xive_end_source_properties);
2187     dc->realize = xive_end_source_realize;
2188     /*
2189      * Reason: part of XIVE interrupt controller, needs to be wired up,
2190      * e.g. by spapr_xive_instance_init().
2191      */
2192     dc->user_creatable = false;
2193 }
2194 
2195 static const TypeInfo xive_end_source_info = {
2196     .name          = TYPE_XIVE_END_SOURCE,
2197     .parent        = TYPE_DEVICE,
2198     .instance_size = sizeof(XiveENDSource),
2199     .class_init    = xive_end_source_class_init,
2200 };
2201 
2202 /*
2203  * XIVE Notifier
2204  */
2205 static const TypeInfo xive_notifier_info = {
2206     .name = TYPE_XIVE_NOTIFIER,
2207     .parent = TYPE_INTERFACE,
2208     .class_size = sizeof(XiveNotifierClass),
2209 };
2210 
2211 /*
2212  * XIVE Presenter
2213  */
2214 static const TypeInfo xive_presenter_info = {
2215     .name = TYPE_XIVE_PRESENTER,
2216     .parent = TYPE_INTERFACE,
2217     .class_size = sizeof(XivePresenterClass),
2218 };
2219 
2220 /*
2221  * XIVE Fabric
2222  */
2223 static const TypeInfo xive_fabric_info = {
2224     .name = TYPE_XIVE_FABRIC,
2225     .parent = TYPE_INTERFACE,
2226     .class_size = sizeof(XiveFabricClass),
2227 };
2228 
2229 static void xive_register_types(void)
2230 {
2231     type_register_static(&xive_fabric_info);
2232     type_register_static(&xive_source_info);
2233     type_register_static(&xive_notifier_info);
2234     type_register_static(&xive_presenter_info);
2235     type_register_static(&xive_router_info);
2236     type_register_static(&xive_end_source_info);
2237     type_register_static(&xive_tctx_info);
2238 }
2239 
2240 type_init(xive_register_types)
2241