xref: /openbmc/qemu/hw/intc/xive.c (revision 4aca9786542e427d4337503566efdf09f2cb87cd)
1 /*
2  * QEMU PowerPC XIVE interrupt controller model
3  *
4  * Copyright (c) 2017-2018, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/log.h"
12 #include "qapi/error.h"
13 #include "target/ppc/cpu.h"
14 #include "sysemu/cpus.h"
15 #include "sysemu/dma.h"
16 #include "hw/qdev-properties.h"
17 #include "monitor/monitor.h"
18 #include "hw/ppc/xive.h"
19 #include "hw/ppc/xive_regs.h"
20 
21 /*
22  * XIVE Thread Interrupt Management context
23  */
24 
25 /*
26  * Convert a priority number to an Interrupt Pending Buffer (IPB)
27  * register, which indicates a pending interrupt at the priority
28  * corresponding to the bit number
29  */
30 static uint8_t priority_to_ipb(uint8_t priority)
31 {
32     return priority > XIVE_PRIORITY_MAX ?
33         0 : 1 << (XIVE_PRIORITY_MAX - priority);
34 }
35 
36 /*
37  * Convert an Interrupt Pending Buffer (IPB) register to a Pending
38  * Interrupt Priority Register (PIPR), which contains the priority of
39  * the most favored pending notification.
40  */
41 static uint8_t ipb_to_pipr(uint8_t ibp)
42 {
43     return ibp ? clz32((uint32_t)ibp << 24) : 0xff;
44 }
45 
46 static void ipb_update(uint8_t *regs, uint8_t priority)
47 {
48     regs[TM_IPB] |= priority_to_ipb(priority);
49     regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
50 }
51 
52 static uint8_t exception_mask(uint8_t ring)
53 {
54     switch (ring) {
55     case TM_QW1_OS:
56         return TM_QW1_NSR_EO;
57     case TM_QW3_HV_PHYS:
58         return TM_QW3_NSR_HE;
59     default:
60         g_assert_not_reached();
61     }
62 }
63 
64 static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring)
65 {
66         switch (ring) {
67         case TM_QW0_USER:
68                 return 0; /* Not supported */
69         case TM_QW1_OS:
70                 return tctx->os_output;
71         case TM_QW2_HV_POOL:
72         case TM_QW3_HV_PHYS:
73                 return tctx->hv_output;
74         default:
75                 return 0;
76         }
77 }
78 
79 static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
80 {
81     uint8_t *regs = &tctx->regs[ring];
82     uint8_t nsr = regs[TM_NSR];
83     uint8_t mask = exception_mask(ring);
84 
85     qemu_irq_lower(xive_tctx_output(tctx, ring));
86 
87     if (regs[TM_NSR] & mask) {
88         uint8_t cppr = regs[TM_PIPR];
89 
90         regs[TM_CPPR] = cppr;
91 
92         /* Reset the pending buffer bit */
93         regs[TM_IPB] &= ~priority_to_ipb(cppr);
94         regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
95 
96         /* Drop Exception bit */
97         regs[TM_NSR] &= ~mask;
98     }
99 
100     return (nsr << 8) | regs[TM_CPPR];
101 }
102 
103 static void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring)
104 {
105     uint8_t *regs = &tctx->regs[ring];
106 
107     if (regs[TM_PIPR] < regs[TM_CPPR]) {
108         switch (ring) {
109         case TM_QW1_OS:
110             regs[TM_NSR] |= TM_QW1_NSR_EO;
111             break;
112         case TM_QW3_HV_PHYS:
113             regs[TM_NSR] |= (TM_QW3_NSR_HE_PHYS << 6);
114             break;
115         default:
116             g_assert_not_reached();
117         }
118         qemu_irq_raise(xive_tctx_output(tctx, ring));
119     }
120 }
121 
122 static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
123 {
124     if (cppr > XIVE_PRIORITY_MAX) {
125         cppr = 0xff;
126     }
127 
128     tctx->regs[ring + TM_CPPR] = cppr;
129 
130     /* CPPR has changed, check if we need to raise a pending exception */
131     xive_tctx_notify(tctx, ring);
132 }
133 
134 /*
135  * XIVE Thread Interrupt Management Area (TIMA)
136  */
137 
138 static void xive_tm_set_hv_cppr(XiveTCTX *tctx, hwaddr offset,
139                                 uint64_t value, unsigned size)
140 {
141     xive_tctx_set_cppr(tctx, TM_QW3_HV_PHYS, value & 0xff);
142 }
143 
144 static uint64_t xive_tm_ack_hv_reg(XiveTCTX *tctx, hwaddr offset, unsigned size)
145 {
146     return xive_tctx_accept(tctx, TM_QW3_HV_PHYS);
147 }
148 
149 static uint64_t xive_tm_pull_pool_ctx(XiveTCTX *tctx, hwaddr offset,
150                                       unsigned size)
151 {
152     uint64_t ret;
153 
154     ret = tctx->regs[TM_QW2_HV_POOL + TM_WORD2] & TM_QW2W2_POOL_CAM;
155     tctx->regs[TM_QW2_HV_POOL + TM_WORD2] &= ~TM_QW2W2_POOL_CAM;
156     return ret;
157 }
158 
159 static void xive_tm_vt_push(XiveTCTX *tctx, hwaddr offset,
160                             uint64_t value, unsigned size)
161 {
162     tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = value & 0xff;
163 }
164 
165 static uint64_t xive_tm_vt_poll(XiveTCTX *tctx, hwaddr offset, unsigned size)
166 {
167     return tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] & 0xff;
168 }
169 
170 /*
171  * Define an access map for each page of the TIMA that we will use in
172  * the memory region ops to filter values when doing loads and stores
173  * of raw registers values
174  *
175  * Registers accessibility bits :
176  *
177  *    0x0 - no access
178  *    0x1 - write only
179  *    0x2 - read only
180  *    0x3 - read/write
181  */
182 
183 static const uint8_t xive_tm_hw_view[] = {
184     /* QW-0 User */   3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0,
185     /* QW-1 OS   */   3, 3, 3, 3,   3, 3, 0, 3,   3, 3, 3, 3,   0, 0, 0, 0,
186     /* QW-2 POOL */   0, 0, 3, 3,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0,
187     /* QW-3 PHYS */   3, 3, 3, 3,   0, 3, 0, 3,   3, 0, 0, 3,   3, 3, 3, 0,
188 };
189 
190 static const uint8_t xive_tm_hv_view[] = {
191     /* QW-0 User */   3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0,
192     /* QW-1 OS   */   3, 3, 3, 3,   3, 3, 0, 3,   3, 3, 3, 3,   0, 0, 0, 0,
193     /* QW-2 POOL */   0, 0, 3, 3,   0, 0, 0, 0,   0, 3, 3, 3,   0, 0, 0, 0,
194     /* QW-3 PHYS */   3, 3, 3, 3,   0, 3, 0, 3,   3, 0, 0, 3,   0, 0, 0, 0,
195 };
196 
197 static const uint8_t xive_tm_os_view[] = {
198     /* QW-0 User */   3, 0, 0, 0,   0, 0, 0, 0,   3, 3, 3, 3,   0, 0, 0, 0,
199     /* QW-1 OS   */   2, 3, 2, 2,   2, 2, 0, 2,   0, 0, 0, 0,   0, 0, 0, 0,
200     /* QW-2 POOL */   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
201     /* QW-3 PHYS */   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
202 };
203 
204 static const uint8_t xive_tm_user_view[] = {
205     /* QW-0 User */   3, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
206     /* QW-1 OS   */   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
207     /* QW-2 POOL */   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
208     /* QW-3 PHYS */   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,   0, 0, 0, 0,
209 };
210 
211 /*
212  * Overall TIMA access map for the thread interrupt management context
213  * registers
214  */
215 static const uint8_t *xive_tm_views[] = {
216     [XIVE_TM_HW_PAGE]   = xive_tm_hw_view,
217     [XIVE_TM_HV_PAGE]   = xive_tm_hv_view,
218     [XIVE_TM_OS_PAGE]   = xive_tm_os_view,
219     [XIVE_TM_USER_PAGE] = xive_tm_user_view,
220 };
221 
222 /*
223  * Computes a register access mask for a given offset in the TIMA
224  */
225 static uint64_t xive_tm_mask(hwaddr offset, unsigned size, bool write)
226 {
227     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
228     uint8_t reg_offset = offset & 0x3F;
229     uint8_t reg_mask = write ? 0x1 : 0x2;
230     uint64_t mask = 0x0;
231     int i;
232 
233     for (i = 0; i < size; i++) {
234         if (xive_tm_views[page_offset][reg_offset + i] & reg_mask) {
235             mask |= (uint64_t) 0xff << (8 * (size - i - 1));
236         }
237     }
238 
239     return mask;
240 }
241 
242 static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
243                               unsigned size)
244 {
245     uint8_t ring_offset = offset & 0x30;
246     uint8_t reg_offset = offset & 0x3F;
247     uint64_t mask = xive_tm_mask(offset, size, true);
248     int i;
249 
250     /*
251      * Only 4 or 8 bytes stores are allowed and the User ring is
252      * excluded
253      */
254     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
255         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
256                       HWADDR_PRIx"\n", offset);
257         return;
258     }
259 
260     /*
261      * Use the register offset for the raw values and filter out
262      * reserved values
263      */
264     for (i = 0; i < size; i++) {
265         uint8_t byte_mask = (mask >> (8 * (size - i - 1)));
266         if (byte_mask) {
267             tctx->regs[reg_offset + i] = (value >> (8 * (size - i - 1))) &
268                 byte_mask;
269         }
270     }
271 }
272 
273 static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
274 {
275     uint8_t ring_offset = offset & 0x30;
276     uint8_t reg_offset = offset & 0x3F;
277     uint64_t mask = xive_tm_mask(offset, size, false);
278     uint64_t ret;
279     int i;
280 
281     /*
282      * Only 4 or 8 bytes loads are allowed and the User ring is
283      * excluded
284      */
285     if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
286         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
287                       HWADDR_PRIx"\n", offset);
288         return -1;
289     }
290 
291     /* Use the register offset for the raw values */
292     ret = 0;
293     for (i = 0; i < size; i++) {
294         ret |= (uint64_t) tctx->regs[reg_offset + i] << (8 * (size - i - 1));
295     }
296 
297     /* filter out reserved values */
298     return ret & mask;
299 }
300 
301 /*
302  * The TM context is mapped twice within each page. Stores and loads
303  * to the first mapping below 2K write and read the specified values
304  * without modification. The second mapping above 2K performs specific
305  * state changes (side effects) in addition to setting/returning the
306  * interrupt management area context of the processor thread.
307  */
308 static uint64_t xive_tm_ack_os_reg(XiveTCTX *tctx, hwaddr offset, unsigned size)
309 {
310     return xive_tctx_accept(tctx, TM_QW1_OS);
311 }
312 
313 static void xive_tm_set_os_cppr(XiveTCTX *tctx, hwaddr offset,
314                                 uint64_t value, unsigned size)
315 {
316     xive_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
317 }
318 
319 /*
320  * Adjust the IPB to allow a CPU to process event queues of other
321  * priorities during one physical interrupt cycle.
322  */
323 static void xive_tm_set_os_pending(XiveTCTX *tctx, hwaddr offset,
324                                    uint64_t value, unsigned size)
325 {
326     ipb_update(&tctx->regs[TM_QW1_OS], value & 0xff);
327     xive_tctx_notify(tctx, TM_QW1_OS);
328 }
329 
330 /*
331  * Define a mapping of "special" operations depending on the TIMA page
332  * offset and the size of the operation.
333  */
334 typedef struct XiveTmOp {
335     uint8_t  page_offset;
336     uint32_t op_offset;
337     unsigned size;
338     void     (*write_handler)(XiveTCTX *tctx, hwaddr offset, uint64_t value,
339                               unsigned size);
340     uint64_t (*read_handler)(XiveTCTX *tctx, hwaddr offset, unsigned size);
341 } XiveTmOp;
342 
343 static const XiveTmOp xive_tm_operations[] = {
344     /*
345      * MMIOs below 2K : raw values and special operations without side
346      * effects
347      */
348     { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR,   1, xive_tm_set_os_cppr, NULL },
349     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr, NULL },
350     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, NULL },
351     { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, xive_tm_vt_poll },
352 
353     /* MMIOs above 2K : special operations with side effects */
354     { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG,     2, NULL, xive_tm_ack_os_reg },
355     { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
356     { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG,     2, NULL, xive_tm_ack_hv_reg },
357     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,  4, NULL, xive_tm_pull_pool_ctx },
358     { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX,  8, NULL, xive_tm_pull_pool_ctx },
359 };
360 
361 static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write)
362 {
363     uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
364     uint32_t op_offset = offset & 0xFFF;
365     int i;
366 
367     for (i = 0; i < ARRAY_SIZE(xive_tm_operations); i++) {
368         const XiveTmOp *xto = &xive_tm_operations[i];
369 
370         /* Accesses done from a more privileged TIMA page is allowed */
371         if (xto->page_offset >= page_offset &&
372             xto->op_offset == op_offset &&
373             xto->size == size &&
374             ((write && xto->write_handler) || (!write && xto->read_handler))) {
375             return xto;
376         }
377     }
378     return NULL;
379 }
380 
381 /*
382  * TIMA MMIO handlers
383  */
384 void xive_tctx_tm_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
385                         unsigned size)
386 {
387     const XiveTmOp *xto;
388 
389     /*
390      * TODO: check V bit in Q[0-3]W2
391      */
392 
393     /*
394      * First, check for special operations in the 2K region
395      */
396     if (offset & 0x800) {
397         xto = xive_tm_find_op(offset, size, true);
398         if (!xto) {
399             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA"
400                           "@%"HWADDR_PRIx"\n", offset);
401         } else {
402             xto->write_handler(tctx, offset, value, size);
403         }
404         return;
405     }
406 
407     /*
408      * Then, for special operations in the region below 2K.
409      */
410     xto = xive_tm_find_op(offset, size, true);
411     if (xto) {
412         xto->write_handler(tctx, offset, value, size);
413         return;
414     }
415 
416     /*
417      * Finish with raw access to the register values
418      */
419     xive_tm_raw_write(tctx, offset, value, size);
420 }
421 
422 uint64_t xive_tctx_tm_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
423 {
424     const XiveTmOp *xto;
425 
426     /*
427      * TODO: check V bit in Q[0-3]W2
428      */
429 
430     /*
431      * First, check for special operations in the 2K region
432      */
433     if (offset & 0x800) {
434         xto = xive_tm_find_op(offset, size, false);
435         if (!xto) {
436             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
437                           "@%"HWADDR_PRIx"\n", offset);
438             return -1;
439         }
440         return xto->read_handler(tctx, offset, size);
441     }
442 
443     /*
444      * Then, for special operations in the region below 2K.
445      */
446     xto = xive_tm_find_op(offset, size, false);
447     if (xto) {
448         return xto->read_handler(tctx, offset, size);
449     }
450 
451     /*
452      * Finish with raw access to the register values
453      */
454     return xive_tm_raw_read(tctx, offset, size);
455 }
456 
457 static void xive_tm_write(void *opaque, hwaddr offset,
458                           uint64_t value, unsigned size)
459 {
460     XiveTCTX *tctx = xive_router_get_tctx(XIVE_ROUTER(opaque), current_cpu);
461 
462     xive_tctx_tm_write(tctx, offset, value, size);
463 }
464 
465 static uint64_t xive_tm_read(void *opaque, hwaddr offset, unsigned size)
466 {
467     XiveTCTX *tctx = xive_router_get_tctx(XIVE_ROUTER(opaque), current_cpu);
468 
469     return xive_tctx_tm_read(tctx, offset, size);
470 }
471 
472 const MemoryRegionOps xive_tm_ops = {
473     .read = xive_tm_read,
474     .write = xive_tm_write,
475     .endianness = DEVICE_BIG_ENDIAN,
476     .valid = {
477         .min_access_size = 1,
478         .max_access_size = 8,
479     },
480     .impl = {
481         .min_access_size = 1,
482         .max_access_size = 8,
483     },
484 };
485 
486 static inline uint32_t xive_tctx_word2(uint8_t *ring)
487 {
488     return *((uint32_t *) &ring[TM_WORD2]);
489 }
490 
491 static char *xive_tctx_ring_print(uint8_t *ring)
492 {
493     uint32_t w2 = xive_tctx_word2(ring);
494 
495     return g_strdup_printf("%02x   %02x  %02x    %02x   %02x  "
496                    "%02x  %02x   %02x  %08x",
497                    ring[TM_NSR], ring[TM_CPPR], ring[TM_IPB], ring[TM_LSMFB],
498                    ring[TM_ACK_CNT], ring[TM_INC], ring[TM_AGE], ring[TM_PIPR],
499                    be32_to_cpu(w2));
500 }
501 
502 static const char * const xive_tctx_ring_names[] = {
503     "USER", "OS", "POOL", "PHYS",
504 };
505 
506 void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
507 {
508     int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
509     int i;
510 
511     if (kvm_irqchip_in_kernel()) {
512         Error *local_err = NULL;
513 
514         kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
515         if (local_err) {
516             error_report_err(local_err);
517             return;
518         }
519     }
520 
521     monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
522                    "  W2\n", cpu_index);
523 
524     for (i = 0; i < XIVE_TM_RING_COUNT; i++) {
525         char *s = xive_tctx_ring_print(&tctx->regs[i * XIVE_TM_RING_SIZE]);
526         monitor_printf(mon, "CPU[%04x]: %4s    %s\n", cpu_index,
527                        xive_tctx_ring_names[i], s);
528         g_free(s);
529     }
530 }
531 
532 static void xive_tctx_reset(void *dev)
533 {
534     XiveTCTX *tctx = XIVE_TCTX(dev);
535 
536     memset(tctx->regs, 0, sizeof(tctx->regs));
537 
538     /* Set some defaults */
539     tctx->regs[TM_QW1_OS + TM_LSMFB] = 0xFF;
540     tctx->regs[TM_QW1_OS + TM_ACK_CNT] = 0xFF;
541     tctx->regs[TM_QW1_OS + TM_AGE] = 0xFF;
542 
543     /*
544      * Initialize PIPR to 0xFF to avoid phantom interrupts when the
545      * CPPR is first set.
546      */
547     tctx->regs[TM_QW1_OS + TM_PIPR] =
548         ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
549     tctx->regs[TM_QW3_HV_PHYS + TM_PIPR] =
550         ipb_to_pipr(tctx->regs[TM_QW3_HV_PHYS + TM_IPB]);
551 }
552 
553 static void xive_tctx_realize(DeviceState *dev, Error **errp)
554 {
555     XiveTCTX *tctx = XIVE_TCTX(dev);
556     PowerPCCPU *cpu;
557     CPUPPCState *env;
558     Object *obj;
559     Error *local_err = NULL;
560 
561     obj = object_property_get_link(OBJECT(dev), "cpu", &local_err);
562     if (!obj) {
563         error_propagate(errp, local_err);
564         error_prepend(errp, "required link 'cpu' not found: ");
565         return;
566     }
567 
568     cpu = POWERPC_CPU(obj);
569     tctx->cs = CPU(obj);
570 
571     env = &cpu->env;
572     switch (PPC_INPUT(env)) {
573     case PPC_FLAGS_INPUT_POWER9:
574         tctx->hv_output = env->irq_inputs[POWER9_INPUT_HINT];
575         tctx->os_output = env->irq_inputs[POWER9_INPUT_INT];
576         break;
577 
578     default:
579         error_setg(errp, "XIVE interrupt controller does not support "
580                    "this CPU bus model");
581         return;
582     }
583 
584     /* Connect the presenter to the VCPU (required for CPU hotplug) */
585     if (kvm_irqchip_in_kernel()) {
586         kvmppc_xive_cpu_connect(tctx, &local_err);
587         if (local_err) {
588             error_propagate(errp, local_err);
589             return;
590         }
591     }
592 
593     qemu_register_reset(xive_tctx_reset, dev);
594 }
595 
596 static void xive_tctx_unrealize(DeviceState *dev, Error **errp)
597 {
598     qemu_unregister_reset(xive_tctx_reset, dev);
599 }
600 
601 static int vmstate_xive_tctx_pre_save(void *opaque)
602 {
603     Error *local_err = NULL;
604 
605     if (kvm_irqchip_in_kernel()) {
606         kvmppc_xive_cpu_get_state(XIVE_TCTX(opaque), &local_err);
607         if (local_err) {
608             error_report_err(local_err);
609             return -1;
610         }
611     }
612 
613     return 0;
614 }
615 
616 static const VMStateDescription vmstate_xive_tctx = {
617     .name = TYPE_XIVE_TCTX,
618     .version_id = 1,
619     .minimum_version_id = 1,
620     .pre_save = vmstate_xive_tctx_pre_save,
621     .post_load = NULL, /* handled by the sPAPRxive model */
622     .fields = (VMStateField[]) {
623         VMSTATE_BUFFER(regs, XiveTCTX),
624         VMSTATE_END_OF_LIST()
625     },
626 };
627 
628 static void xive_tctx_class_init(ObjectClass *klass, void *data)
629 {
630     DeviceClass *dc = DEVICE_CLASS(klass);
631 
632     dc->desc = "XIVE Interrupt Thread Context";
633     dc->realize = xive_tctx_realize;
634     dc->unrealize = xive_tctx_unrealize;
635     dc->vmsd = &vmstate_xive_tctx;
636 }
637 
638 static const TypeInfo xive_tctx_info = {
639     .name          = TYPE_XIVE_TCTX,
640     .parent        = TYPE_DEVICE,
641     .instance_size = sizeof(XiveTCTX),
642     .class_init    = xive_tctx_class_init,
643 };
644 
645 Object *xive_tctx_create(Object *cpu, XiveRouter *xrtr, Error **errp)
646 {
647     Error *local_err = NULL;
648     Object *obj;
649 
650     obj = object_new(TYPE_XIVE_TCTX);
651     object_property_add_child(cpu, TYPE_XIVE_TCTX, obj, &error_abort);
652     object_unref(obj);
653     object_property_add_const_link(obj, "cpu", cpu, &error_abort);
654     object_property_set_bool(obj, true, "realized", &local_err);
655     if (local_err) {
656         goto error;
657     }
658 
659     return obj;
660 
661 error:
662     object_unparent(obj);
663     error_propagate(errp, local_err);
664     return NULL;
665 }
666 
667 /*
668  * XIVE ESB helpers
669  */
670 
671 static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
672 {
673     uint8_t old_pq = *pq & 0x3;
674 
675     *pq &= ~0x3;
676     *pq |= value & 0x3;
677 
678     return old_pq;
679 }
680 
681 static bool xive_esb_trigger(uint8_t *pq)
682 {
683     uint8_t old_pq = *pq & 0x3;
684 
685     switch (old_pq) {
686     case XIVE_ESB_RESET:
687         xive_esb_set(pq, XIVE_ESB_PENDING);
688         return true;
689     case XIVE_ESB_PENDING:
690     case XIVE_ESB_QUEUED:
691         xive_esb_set(pq, XIVE_ESB_QUEUED);
692         return false;
693     case XIVE_ESB_OFF:
694         xive_esb_set(pq, XIVE_ESB_OFF);
695         return false;
696     default:
697          g_assert_not_reached();
698     }
699 }
700 
701 static bool xive_esb_eoi(uint8_t *pq)
702 {
703     uint8_t old_pq = *pq & 0x3;
704 
705     switch (old_pq) {
706     case XIVE_ESB_RESET:
707     case XIVE_ESB_PENDING:
708         xive_esb_set(pq, XIVE_ESB_RESET);
709         return false;
710     case XIVE_ESB_QUEUED:
711         xive_esb_set(pq, XIVE_ESB_PENDING);
712         return true;
713     case XIVE_ESB_OFF:
714         xive_esb_set(pq, XIVE_ESB_OFF);
715         return false;
716     default:
717          g_assert_not_reached();
718     }
719 }
720 
721 /*
722  * XIVE Interrupt Source (or IVSE)
723  */
724 
725 uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
726 {
727     assert(srcno < xsrc->nr_irqs);
728 
729     return xsrc->status[srcno] & 0x3;
730 }
731 
732 uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
733 {
734     assert(srcno < xsrc->nr_irqs);
735 
736     return xive_esb_set(&xsrc->status[srcno], pq);
737 }
738 
739 /*
740  * Returns whether the event notification should be forwarded.
741  */
742 static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno)
743 {
744     uint8_t old_pq = xive_source_esb_get(xsrc, srcno);
745 
746     xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
747 
748     switch (old_pq) {
749     case XIVE_ESB_RESET:
750         xive_source_esb_set(xsrc, srcno, XIVE_ESB_PENDING);
751         return true;
752     default:
753         return false;
754     }
755 }
756 
757 /*
758  * Returns whether the event notification should be forwarded.
759  */
760 static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
761 {
762     bool ret;
763 
764     assert(srcno < xsrc->nr_irqs);
765 
766     ret = xive_esb_trigger(&xsrc->status[srcno]);
767 
768     if (xive_source_irq_is_lsi(xsrc, srcno) &&
769         xive_source_esb_get(xsrc, srcno) == XIVE_ESB_QUEUED) {
770         qemu_log_mask(LOG_GUEST_ERROR,
771                       "XIVE: queued an event on LSI IRQ %d\n", srcno);
772     }
773 
774     return ret;
775 }
776 
777 /*
778  * Returns whether the event notification should be forwarded.
779  */
780 static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
781 {
782     bool ret;
783 
784     assert(srcno < xsrc->nr_irqs);
785 
786     ret = xive_esb_eoi(&xsrc->status[srcno]);
787 
788     /*
789      * LSI sources do not set the Q bit but they can still be
790      * asserted, in which case we should forward a new event
791      * notification
792      */
793     if (xive_source_irq_is_lsi(xsrc, srcno) &&
794         xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
795         ret = xive_source_lsi_trigger(xsrc, srcno);
796     }
797 
798     return ret;
799 }
800 
801 /*
802  * Forward the source event notification to the Router
803  */
804 static void xive_source_notify(XiveSource *xsrc, int srcno)
805 {
806     XiveNotifierClass *xnc = XIVE_NOTIFIER_GET_CLASS(xsrc->xive);
807 
808     if (xnc->notify) {
809         xnc->notify(xsrc->xive, srcno);
810     }
811 }
812 
813 /*
814  * In a two pages ESB MMIO setting, even page is the trigger page, odd
815  * page is for management
816  */
817 static inline bool addr_is_even(hwaddr addr, uint32_t shift)
818 {
819     return !((addr >> shift) & 1);
820 }
821 
822 static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
823 {
824     return xive_source_esb_has_2page(xsrc) &&
825         addr_is_even(addr, xsrc->esb_shift - 1);
826 }
827 
828 /*
829  * ESB MMIO loads
830  *                      Trigger page    Management/EOI page
831  *
832  * ESB MMIO setting     2 pages         1 or 2 pages
833  *
834  * 0x000 .. 0x3FF       -1              EOI and return 0|1
835  * 0x400 .. 0x7FF       -1              EOI and return 0|1
836  * 0x800 .. 0xBFF       -1              return PQ
837  * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=00
838  * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=01
839  * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=10
840  * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=11
841  */
842 static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
843 {
844     XiveSource *xsrc = XIVE_SOURCE(opaque);
845     uint32_t offset = addr & 0xFFF;
846     uint32_t srcno = addr >> xsrc->esb_shift;
847     uint64_t ret = -1;
848 
849     /* In a two pages ESB MMIO setting, trigger page should not be read */
850     if (xive_source_is_trigger_page(xsrc, addr)) {
851         qemu_log_mask(LOG_GUEST_ERROR,
852                       "XIVE: invalid load on IRQ %d trigger page at "
853                       "0x%"HWADDR_PRIx"\n", srcno, addr);
854         return -1;
855     }
856 
857     switch (offset) {
858     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
859         ret = xive_source_esb_eoi(xsrc, srcno);
860 
861         /* Forward the source event notification for routing */
862         if (ret) {
863             xive_source_notify(xsrc, srcno);
864         }
865         break;
866 
867     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
868         ret = xive_source_esb_get(xsrc, srcno);
869         break;
870 
871     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
872     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
873     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
874     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
875         ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
876         break;
877     default:
878         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
879                       offset);
880     }
881 
882     return ret;
883 }
884 
885 /*
886  * ESB MMIO stores
887  *                      Trigger page    Management/EOI page
888  *
889  * ESB MMIO setting     2 pages         1 or 2 pages
890  *
891  * 0x000 .. 0x3FF       Trigger         Trigger
892  * 0x400 .. 0x7FF       Trigger         EOI
893  * 0x800 .. 0xBFF       Trigger         undefined
894  * 0xC00 .. 0xCFF       Trigger         PQ=00
895  * 0xD00 .. 0xDFF       Trigger         PQ=01
896  * 0xE00 .. 0xDFF       Trigger         PQ=10
897  * 0xF00 .. 0xDFF       Trigger         PQ=11
898  */
899 static void xive_source_esb_write(void *opaque, hwaddr addr,
900                                   uint64_t value, unsigned size)
901 {
902     XiveSource *xsrc = XIVE_SOURCE(opaque);
903     uint32_t offset = addr & 0xFFF;
904     uint32_t srcno = addr >> xsrc->esb_shift;
905     bool notify = false;
906 
907     /* In a two pages ESB MMIO setting, trigger page only triggers */
908     if (xive_source_is_trigger_page(xsrc, addr)) {
909         notify = xive_source_esb_trigger(xsrc, srcno);
910         goto out;
911     }
912 
913     switch (offset) {
914     case 0 ... 0x3FF:
915         notify = xive_source_esb_trigger(xsrc, srcno);
916         break;
917 
918     case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
919         if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
920             qemu_log_mask(LOG_GUEST_ERROR,
921                           "XIVE: invalid Store EOI for IRQ %d\n", srcno);
922             return;
923         }
924 
925         notify = xive_source_esb_eoi(xsrc, srcno);
926         break;
927 
928     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
929     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
930     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
931     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
932         xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
933         break;
934 
935     default:
936         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
937                       offset);
938         return;
939     }
940 
941 out:
942     /* Forward the source event notification for routing */
943     if (notify) {
944         xive_source_notify(xsrc, srcno);
945     }
946 }
947 
948 static const MemoryRegionOps xive_source_esb_ops = {
949     .read = xive_source_esb_read,
950     .write = xive_source_esb_write,
951     .endianness = DEVICE_BIG_ENDIAN,
952     .valid = {
953         .min_access_size = 8,
954         .max_access_size = 8,
955     },
956     .impl = {
957         .min_access_size = 8,
958         .max_access_size = 8,
959     },
960 };
961 
962 void xive_source_set_irq(void *opaque, int srcno, int val)
963 {
964     XiveSource *xsrc = XIVE_SOURCE(opaque);
965     bool notify = false;
966 
967     if (xive_source_irq_is_lsi(xsrc, srcno)) {
968         if (val) {
969             notify = xive_source_lsi_trigger(xsrc, srcno);
970         } else {
971             xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
972         }
973     } else {
974         if (val) {
975             notify = xive_source_esb_trigger(xsrc, srcno);
976         }
977     }
978 
979     /* Forward the source event notification for routing */
980     if (notify) {
981         xive_source_notify(xsrc, srcno);
982     }
983 }
984 
985 void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
986 {
987     int i;
988 
989     for (i = 0; i < xsrc->nr_irqs; i++) {
990         uint8_t pq = xive_source_esb_get(xsrc, i);
991 
992         if (pq == XIVE_ESB_OFF) {
993             continue;
994         }
995 
996         monitor_printf(mon, "  %08x %s %c%c%c\n", i + offset,
997                        xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
998                        pq & XIVE_ESB_VAL_P ? 'P' : '-',
999                        pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
1000                        xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ');
1001     }
1002 }
1003 
1004 static void xive_source_reset(void *dev)
1005 {
1006     XiveSource *xsrc = XIVE_SOURCE(dev);
1007 
1008     /* Do not clear the LSI bitmap */
1009 
1010     /* PQs are initialized to 0b01 (Q=1) which corresponds to "ints off" */
1011     memset(xsrc->status, XIVE_ESB_OFF, xsrc->nr_irqs);
1012 }
1013 
1014 static void xive_source_realize(DeviceState *dev, Error **errp)
1015 {
1016     XiveSource *xsrc = XIVE_SOURCE(dev);
1017     Object *obj;
1018     Error *local_err = NULL;
1019 
1020     obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
1021     if (!obj) {
1022         error_propagate(errp, local_err);
1023         error_prepend(errp, "required link 'xive' not found: ");
1024         return;
1025     }
1026 
1027     xsrc->xive = XIVE_NOTIFIER(obj);
1028 
1029     if (!xsrc->nr_irqs) {
1030         error_setg(errp, "Number of interrupt needs to be greater than 0");
1031         return;
1032     }
1033 
1034     if (xsrc->esb_shift != XIVE_ESB_4K &&
1035         xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
1036         xsrc->esb_shift != XIVE_ESB_64K &&
1037         xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
1038         error_setg(errp, "Invalid ESB shift setting");
1039         return;
1040     }
1041 
1042     xsrc->status = g_malloc0(xsrc->nr_irqs);
1043     xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
1044 
1045     if (!kvm_irqchip_in_kernel()) {
1046         memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
1047                               &xive_source_esb_ops, xsrc, "xive.esb",
1048                               (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
1049     }
1050 
1051     qemu_register_reset(xive_source_reset, dev);
1052 }
1053 
1054 static const VMStateDescription vmstate_xive_source = {
1055     .name = TYPE_XIVE_SOURCE,
1056     .version_id = 1,
1057     .minimum_version_id = 1,
1058     .fields = (VMStateField[]) {
1059         VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
1060         VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
1061         VMSTATE_END_OF_LIST()
1062     },
1063 };
1064 
1065 /*
1066  * The default XIVE interrupt source setting for the ESB MMIOs is two
1067  * 64k pages without Store EOI, to be in sync with KVM.
1068  */
1069 static Property xive_source_properties[] = {
1070     DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
1071     DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
1072     DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
1073     DEFINE_PROP_END_OF_LIST(),
1074 };
1075 
1076 static void xive_source_class_init(ObjectClass *klass, void *data)
1077 {
1078     DeviceClass *dc = DEVICE_CLASS(klass);
1079 
1080     dc->desc    = "XIVE Interrupt Source";
1081     dc->props   = xive_source_properties;
1082     dc->realize = xive_source_realize;
1083     dc->vmsd    = &vmstate_xive_source;
1084 }
1085 
1086 static const TypeInfo xive_source_info = {
1087     .name          = TYPE_XIVE_SOURCE,
1088     .parent        = TYPE_DEVICE,
1089     .instance_size = sizeof(XiveSource),
1090     .class_init    = xive_source_class_init,
1091 };
1092 
1093 /*
1094  * XiveEND helpers
1095  */
1096 
1097 void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon)
1098 {
1099     uint64_t qaddr_base = xive_end_qaddr(end);
1100     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1101     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1102     uint32_t qentries = 1 << (qsize + 10);
1103     int i;
1104 
1105     /*
1106      * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
1107      */
1108     monitor_printf(mon, " [ ");
1109     qindex = (qindex - (width - 1)) & (qentries - 1);
1110     for (i = 0; i < width; i++) {
1111         uint64_t qaddr = qaddr_base + (qindex << 2);
1112         uint32_t qdata = -1;
1113 
1114         if (dma_memory_read(&address_space_memory, qaddr, &qdata,
1115                             sizeof(qdata))) {
1116             qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
1117                           HWADDR_PRIx "\n", qaddr);
1118             return;
1119         }
1120         monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "",
1121                        be32_to_cpu(qdata));
1122         qindex = (qindex + 1) & (qentries - 1);
1123     }
1124 }
1125 
1126 void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
1127 {
1128     uint64_t qaddr_base = xive_end_qaddr(end);
1129     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1130     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1131     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1132     uint32_t qentries = 1 << (qsize + 10);
1133 
1134     uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6);
1135     uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7);
1136 
1137     if (!xive_end_is_valid(end)) {
1138         return;
1139     }
1140 
1141     monitor_printf(mon, "  %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64
1142                    "% 6d/%5d ^%d", end_idx,
1143                    xive_end_is_valid(end)    ? 'v' : '-',
1144                    xive_end_is_enqueue(end)  ? 'q' : '-',
1145                    xive_end_is_notify(end)   ? 'n' : '-',
1146                    xive_end_is_backlog(end)  ? 'b' : '-',
1147                    xive_end_is_escalate(end) ? 'e' : '-',
1148                    priority, nvt, qaddr_base, qindex, qentries, qgen);
1149 
1150     xive_end_queue_pic_print_info(end, 6, mon);
1151     monitor_printf(mon, "]\n");
1152 }
1153 
1154 static void xive_end_enqueue(XiveEND *end, uint32_t data)
1155 {
1156     uint64_t qaddr_base = xive_end_qaddr(end);
1157     uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0);
1158     uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1);
1159     uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1);
1160 
1161     uint64_t qaddr = qaddr_base + (qindex << 2);
1162     uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
1163     uint32_t qentries = 1 << (qsize + 10);
1164 
1165     if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) {
1166         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
1167                       HWADDR_PRIx "\n", qaddr);
1168         return;
1169     }
1170 
1171     qindex = (qindex + 1) & (qentries - 1);
1172     if (qindex == 0) {
1173         qgen ^= 1;
1174         end->w1 = xive_set_field32(END_W1_GENERATION, end->w1, qgen);
1175     }
1176     end->w1 = xive_set_field32(END_W1_PAGE_OFF, end->w1, qindex);
1177 }
1178 
1179 /*
1180  * XIVE Router (aka. Virtualization Controller or IVRE)
1181  */
1182 
1183 int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
1184                         XiveEAS *eas)
1185 {
1186     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1187 
1188     return xrc->get_eas(xrtr, eas_blk, eas_idx, eas);
1189 }
1190 
1191 int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1192                         XiveEND *end)
1193 {
1194    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1195 
1196    return xrc->get_end(xrtr, end_blk, end_idx, end);
1197 }
1198 
1199 int xive_router_write_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
1200                           XiveEND *end, uint8_t word_number)
1201 {
1202    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1203 
1204    return xrc->write_end(xrtr, end_blk, end_idx, end, word_number);
1205 }
1206 
1207 int xive_router_get_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1208                         XiveNVT *nvt)
1209 {
1210    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1211 
1212    return xrc->get_nvt(xrtr, nvt_blk, nvt_idx, nvt);
1213 }
1214 
1215 int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
1216                         XiveNVT *nvt, uint8_t word_number)
1217 {
1218    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1219 
1220    return xrc->write_nvt(xrtr, nvt_blk, nvt_idx, nvt, word_number);
1221 }
1222 
1223 XiveTCTX *xive_router_get_tctx(XiveRouter *xrtr, CPUState *cs)
1224 {
1225     XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
1226 
1227     return xrc->get_tctx(xrtr, cs);
1228 }
1229 
1230 /*
1231  * By default on P9, the HW CAM line (23bits) is hardwired to :
1232  *
1233  *   0x000||0b1||4Bit chip number||7Bit Thread number.
1234  *
1235  * When the block grouping is enabled, the CAM line is changed to :
1236  *
1237  *   4Bit chip number||0x001||7Bit Thread number.
1238  */
1239 static uint32_t hw_cam_line(uint8_t chip_id, uint8_t tid)
1240 {
1241     return 1 << 11 | (chip_id & 0xf) << 7 | (tid & 0x7f);
1242 }
1243 
1244 static bool xive_presenter_tctx_match_hw(XiveTCTX *tctx,
1245                                          uint8_t nvt_blk, uint32_t nvt_idx)
1246 {
1247     CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env;
1248     uint32_t pir = env->spr_cb[SPR_PIR].default_value;
1249 
1250     return hw_cam_line((pir >> 8) & 0xf, pir & 0x7f) ==
1251         hw_cam_line(nvt_blk, nvt_idx);
1252 }
1253 
1254 /*
1255  * The thread context register words are in big-endian format.
1256  */
1257 static int xive_presenter_tctx_match(XiveTCTX *tctx, uint8_t format,
1258                                      uint8_t nvt_blk, uint32_t nvt_idx,
1259                                      bool cam_ignore, uint32_t logic_serv)
1260 {
1261     uint32_t cam = xive_nvt_cam_line(nvt_blk, nvt_idx);
1262     uint32_t qw3w2 = xive_tctx_word2(&tctx->regs[TM_QW3_HV_PHYS]);
1263     uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
1264     uint32_t qw1w2 = xive_tctx_word2(&tctx->regs[TM_QW1_OS]);
1265     uint32_t qw0w2 = xive_tctx_word2(&tctx->regs[TM_QW0_USER]);
1266 
1267     /*
1268      * TODO (PowerNV): ignore mode. The low order bits of the NVT
1269      * identifier are ignored in the "CAM" match.
1270      */
1271 
1272     if (format == 0) {
1273         if (cam_ignore == true) {
1274             /*
1275              * F=0 & i=1: Logical server notification (bits ignored at
1276              * the end of the NVT identifier)
1277              */
1278             qemu_log_mask(LOG_UNIMP, "XIVE: no support for LS NVT %x/%x\n",
1279                           nvt_blk, nvt_idx);
1280              return -1;
1281         }
1282 
1283         /* F=0 & i=0: Specific NVT notification */
1284 
1285         /* PHYS ring */
1286         if ((be32_to_cpu(qw3w2) & TM_QW3W2_VT) &&
1287             xive_presenter_tctx_match_hw(tctx, nvt_blk, nvt_idx)) {
1288             return TM_QW3_HV_PHYS;
1289         }
1290 
1291         /* HV POOL ring */
1292         if ((be32_to_cpu(qw2w2) & TM_QW2W2_VP) &&
1293             cam == xive_get_field32(TM_QW2W2_POOL_CAM, qw2w2)) {
1294             return TM_QW2_HV_POOL;
1295         }
1296 
1297         /* OS ring */
1298         if ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1299             cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) {
1300             return TM_QW1_OS;
1301         }
1302     } else {
1303         /* F=1 : User level Event-Based Branch (EBB) notification */
1304 
1305         /* USER ring */
1306         if  ((be32_to_cpu(qw1w2) & TM_QW1W2_VO) &&
1307              (cam == xive_get_field32(TM_QW1W2_OS_CAM, qw1w2)) &&
1308              (be32_to_cpu(qw0w2) & TM_QW0W2_VU) &&
1309              (logic_serv == xive_get_field32(TM_QW0W2_LOGIC_SERV, qw0w2))) {
1310             return TM_QW0_USER;
1311         }
1312     }
1313     return -1;
1314 }
1315 
1316 typedef struct XiveTCTXMatch {
1317     XiveTCTX *tctx;
1318     uint8_t ring;
1319 } XiveTCTXMatch;
1320 
1321 static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format,
1322                                  uint8_t nvt_blk, uint32_t nvt_idx,
1323                                  bool cam_ignore, uint8_t priority,
1324                                  uint32_t logic_serv, XiveTCTXMatch *match)
1325 {
1326     CPUState *cs;
1327 
1328     /*
1329      * TODO (PowerNV): handle chip_id overwrite of block field for
1330      * hardwired CAM compares
1331      */
1332 
1333     CPU_FOREACH(cs) {
1334         XiveTCTX *tctx = xive_router_get_tctx(xrtr, cs);
1335         int ring;
1336 
1337         /*
1338          * HW checks that the CPU is enabled in the Physical Thread
1339          * Enable Register (PTER).
1340          */
1341 
1342         /*
1343          * Check the thread context CAM lines and record matches. We
1344          * will handle CPU exception delivery later
1345          */
1346         ring = xive_presenter_tctx_match(tctx, format, nvt_blk, nvt_idx,
1347                                          cam_ignore, logic_serv);
1348         /*
1349          * Save the context and follow on to catch duplicates, that we
1350          * don't support yet.
1351          */
1352         if (ring != -1) {
1353             if (match->tctx) {
1354                 qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
1355                               "context NVT %x/%x\n", nvt_blk, nvt_idx);
1356                 return false;
1357             }
1358 
1359             match->ring = ring;
1360             match->tctx = tctx;
1361         }
1362     }
1363 
1364     if (!match->tctx) {
1365         qemu_log_mask(LOG_UNIMP, "XIVE: NVT %x/%x is not dispatched\n",
1366                       nvt_blk, nvt_idx);
1367         return false;
1368     }
1369 
1370     return true;
1371 }
1372 
1373 /*
1374  * This is our simple Xive Presenter Engine model. It is merged in the
1375  * Router as it does not require an extra object.
1376  *
1377  * It receives notification requests sent by the IVRE to find one
1378  * matching NVT (or more) dispatched on the processor threads. In case
1379  * of a single NVT notification, the process is abreviated and the
1380  * thread is signaled if a match is found. In case of a logical server
1381  * notification (bits ignored at the end of the NVT identifier), the
1382  * IVPE and IVRE select a winning thread using different filters. This
1383  * involves 2 or 3 exchanges on the PowerBus that the model does not
1384  * support.
1385  *
1386  * The parameters represent what is sent on the PowerBus
1387  */
1388 static void xive_presenter_notify(XiveRouter *xrtr, uint8_t format,
1389                                   uint8_t nvt_blk, uint32_t nvt_idx,
1390                                   bool cam_ignore, uint8_t priority,
1391                                   uint32_t logic_serv)
1392 {
1393     XiveNVT nvt;
1394     XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
1395     bool found;
1396 
1397     /* NVT cache lookup */
1398     if (xive_router_get_nvt(xrtr, nvt_blk, nvt_idx, &nvt)) {
1399         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no NVT %x/%x\n",
1400                       nvt_blk, nvt_idx);
1401         return;
1402     }
1403 
1404     if (!xive_nvt_is_valid(&nvt)) {
1405         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is invalid\n",
1406                       nvt_blk, nvt_idx);
1407         return;
1408     }
1409 
1410     found = xive_presenter_match(xrtr, format, nvt_blk, nvt_idx, cam_ignore,
1411                                  priority, logic_serv, &match);
1412     if (found) {
1413         ipb_update(&match.tctx->regs[match.ring], priority);
1414         xive_tctx_notify(match.tctx, match.ring);
1415         return;
1416     }
1417 
1418     /* Record the IPB in the associated NVT structure */
1419     ipb_update((uint8_t *) &nvt.w4, priority);
1420     xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
1421 
1422     /*
1423      * If no matching NVT is dispatched on a HW thread :
1424      * - update the NVT structure if backlog is activated
1425      * - escalate (ESe PQ bits and EAS in w4-5) if escalation is
1426      *   activated
1427      */
1428 }
1429 
1430 /*
1431  * An END trigger can come from an event trigger (IPI or HW) or from
1432  * another chip. We don't model the PowerBus but the END trigger
1433  * message has the same parameters than in the function below.
1434  */
1435 static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
1436                                    uint32_t end_idx, uint32_t end_data)
1437 {
1438     XiveEND end;
1439     uint8_t priority;
1440     uint8_t format;
1441 
1442     /* END cache lookup */
1443     if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
1444         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
1445                       end_idx);
1446         return;
1447     }
1448 
1449     if (!xive_end_is_valid(&end)) {
1450         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
1451                       end_blk, end_idx);
1452         return;
1453     }
1454 
1455     if (xive_end_is_enqueue(&end)) {
1456         xive_end_enqueue(&end, end_data);
1457         /* Enqueuing event data modifies the EQ toggle and index */
1458         xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
1459     }
1460 
1461     /*
1462      * The W7 format depends on the F bit in W6. It defines the type
1463      * of the notification :
1464      *
1465      *   F=0 : single or multiple NVT notification
1466      *   F=1 : User level Event-Based Branch (EBB) notification, no
1467      *         priority
1468      */
1469     format = xive_get_field32(END_W6_FORMAT_BIT, end.w6);
1470     priority = xive_get_field32(END_W7_F0_PRIORITY, end.w7);
1471 
1472     /* The END is masked */
1473     if (format == 0 && priority == 0xff) {
1474         return;
1475     }
1476 
1477     /*
1478      * Check the END ESn (Event State Buffer for notification) for
1479      * even futher coalescing in the Router
1480      */
1481     if (!xive_end_is_notify(&end)) {
1482         uint8_t pq = xive_get_field32(END_W1_ESn, end.w1);
1483         bool notify = xive_esb_trigger(&pq);
1484 
1485         if (pq != xive_get_field32(END_W1_ESn, end.w1)) {
1486             end.w1 = xive_set_field32(END_W1_ESn, end.w1, pq);
1487             xive_router_write_end(xrtr, end_blk, end_idx, &end, 1);
1488         }
1489 
1490         /* ESn[Q]=1 : end of notification */
1491         if (!notify) {
1492             return;
1493         }
1494     }
1495 
1496     /*
1497      * Follows IVPE notification
1498      */
1499     xive_presenter_notify(xrtr, format,
1500                           xive_get_field32(END_W6_NVT_BLOCK, end.w6),
1501                           xive_get_field32(END_W6_NVT_INDEX, end.w6),
1502                           xive_get_field32(END_W7_F0_IGNORE, end.w7),
1503                           priority,
1504                           xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7));
1505 
1506     /* TODO: Auto EOI. */
1507 }
1508 
1509 void xive_router_notify(XiveNotifier *xn, uint32_t lisn)
1510 {
1511     XiveRouter *xrtr = XIVE_ROUTER(xn);
1512     uint8_t eas_blk = XIVE_SRCNO_BLOCK(lisn);
1513     uint32_t eas_idx = XIVE_SRCNO_INDEX(lisn);
1514     XiveEAS eas;
1515 
1516     /* EAS cache lookup */
1517     if (xive_router_get_eas(xrtr, eas_blk, eas_idx, &eas)) {
1518         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Unknown LISN %x\n", lisn);
1519         return;
1520     }
1521 
1522     /*
1523      * The IVRE checks the State Bit Cache at this point. We skip the
1524      * SBC lookup because the state bits of the sources are modeled
1525      * internally in QEMU.
1526      */
1527 
1528     if (!xive_eas_is_valid(&eas)) {
1529         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid LISN %x\n", lisn);
1530         return;
1531     }
1532 
1533     if (xive_eas_is_masked(&eas)) {
1534         /* Notification completed */
1535         return;
1536     }
1537 
1538     /*
1539      * The event trigger becomes an END trigger
1540      */
1541     xive_router_end_notify(xrtr,
1542                            xive_get_field64(EAS_END_BLOCK, eas.w),
1543                            xive_get_field64(EAS_END_INDEX, eas.w),
1544                            xive_get_field64(EAS_END_DATA,  eas.w));
1545 }
1546 
1547 static void xive_router_class_init(ObjectClass *klass, void *data)
1548 {
1549     DeviceClass *dc = DEVICE_CLASS(klass);
1550     XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
1551 
1552     dc->desc    = "XIVE Router Engine";
1553     xnc->notify = xive_router_notify;
1554 }
1555 
1556 static const TypeInfo xive_router_info = {
1557     .name          = TYPE_XIVE_ROUTER,
1558     .parent        = TYPE_SYS_BUS_DEVICE,
1559     .abstract      = true,
1560     .class_size    = sizeof(XiveRouterClass),
1561     .class_init    = xive_router_class_init,
1562     .interfaces    = (InterfaceInfo[]) {
1563         { TYPE_XIVE_NOTIFIER },
1564         { }
1565     }
1566 };
1567 
1568 void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon)
1569 {
1570     if (!xive_eas_is_valid(eas)) {
1571         return;
1572     }
1573 
1574     monitor_printf(mon, "  %08x %s end:%02x/%04x data:%08x\n",
1575                    lisn, xive_eas_is_masked(eas) ? "M" : " ",
1576                    (uint8_t)  xive_get_field64(EAS_END_BLOCK, eas->w),
1577                    (uint32_t) xive_get_field64(EAS_END_INDEX, eas->w),
1578                    (uint32_t) xive_get_field64(EAS_END_DATA, eas->w));
1579 }
1580 
1581 /*
1582  * END ESB MMIO loads
1583  */
1584 static uint64_t xive_end_source_read(void *opaque, hwaddr addr, unsigned size)
1585 {
1586     XiveENDSource *xsrc = XIVE_END_SOURCE(opaque);
1587     uint32_t offset = addr & 0xFFF;
1588     uint8_t end_blk;
1589     uint32_t end_idx;
1590     XiveEND end;
1591     uint32_t end_esmask;
1592     uint8_t pq;
1593     uint64_t ret = -1;
1594 
1595     end_blk = xsrc->block_id;
1596     end_idx = addr >> (xsrc->esb_shift + 1);
1597 
1598     if (xive_router_get_end(xsrc->xrtr, end_blk, end_idx, &end)) {
1599         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
1600                       end_idx);
1601         return -1;
1602     }
1603 
1604     if (!xive_end_is_valid(&end)) {
1605         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
1606                       end_blk, end_idx);
1607         return -1;
1608     }
1609 
1610     end_esmask = addr_is_even(addr, xsrc->esb_shift) ? END_W1_ESn : END_W1_ESe;
1611     pq = xive_get_field32(end_esmask, end.w1);
1612 
1613     switch (offset) {
1614     case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
1615         ret = xive_esb_eoi(&pq);
1616 
1617         /* Forward the source event notification for routing ?? */
1618         break;
1619 
1620     case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
1621         ret = pq;
1622         break;
1623 
1624     case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
1625     case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
1626     case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
1627     case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
1628         ret = xive_esb_set(&pq, (offset >> 8) & 0x3);
1629         break;
1630     default:
1631         qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid END ESB load addr %d\n",
1632                       offset);
1633         return -1;
1634     }
1635 
1636     if (pq != xive_get_field32(end_esmask, end.w1)) {
1637         end.w1 = xive_set_field32(end_esmask, end.w1, pq);
1638         xive_router_write_end(xsrc->xrtr, end_blk, end_idx, &end, 1);
1639     }
1640 
1641     return ret;
1642 }
1643 
1644 /*
1645  * END ESB MMIO stores are invalid
1646  */
1647 static void xive_end_source_write(void *opaque, hwaddr addr,
1648                                   uint64_t value, unsigned size)
1649 {
1650     qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr 0x%"
1651                   HWADDR_PRIx"\n", addr);
1652 }
1653 
1654 static const MemoryRegionOps xive_end_source_ops = {
1655     .read = xive_end_source_read,
1656     .write = xive_end_source_write,
1657     .endianness = DEVICE_BIG_ENDIAN,
1658     .valid = {
1659         .min_access_size = 8,
1660         .max_access_size = 8,
1661     },
1662     .impl = {
1663         .min_access_size = 8,
1664         .max_access_size = 8,
1665     },
1666 };
1667 
1668 static void xive_end_source_realize(DeviceState *dev, Error **errp)
1669 {
1670     XiveENDSource *xsrc = XIVE_END_SOURCE(dev);
1671     Object *obj;
1672     Error *local_err = NULL;
1673 
1674     obj = object_property_get_link(OBJECT(dev), "xive", &local_err);
1675     if (!obj) {
1676         error_propagate(errp, local_err);
1677         error_prepend(errp, "required link 'xive' not found: ");
1678         return;
1679     }
1680 
1681     xsrc->xrtr = XIVE_ROUTER(obj);
1682 
1683     if (!xsrc->nr_ends) {
1684         error_setg(errp, "Number of interrupt needs to be greater than 0");
1685         return;
1686     }
1687 
1688     if (xsrc->esb_shift != XIVE_ESB_4K &&
1689         xsrc->esb_shift != XIVE_ESB_64K) {
1690         error_setg(errp, "Invalid ESB shift setting");
1691         return;
1692     }
1693 
1694     /*
1695      * Each END is assigned an even/odd pair of MMIO pages, the even page
1696      * manages the ESn field while the odd page manages the ESe field.
1697      */
1698     memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
1699                           &xive_end_source_ops, xsrc, "xive.end",
1700                           (1ull << (xsrc->esb_shift + 1)) * xsrc->nr_ends);
1701 }
1702 
1703 static Property xive_end_source_properties[] = {
1704     DEFINE_PROP_UINT8("block-id", XiveENDSource, block_id, 0),
1705     DEFINE_PROP_UINT32("nr-ends", XiveENDSource, nr_ends, 0),
1706     DEFINE_PROP_UINT32("shift", XiveENDSource, esb_shift, XIVE_ESB_64K),
1707     DEFINE_PROP_END_OF_LIST(),
1708 };
1709 
1710 static void xive_end_source_class_init(ObjectClass *klass, void *data)
1711 {
1712     DeviceClass *dc = DEVICE_CLASS(klass);
1713 
1714     dc->desc    = "XIVE END Source";
1715     dc->props   = xive_end_source_properties;
1716     dc->realize = xive_end_source_realize;
1717 }
1718 
1719 static const TypeInfo xive_end_source_info = {
1720     .name          = TYPE_XIVE_END_SOURCE,
1721     .parent        = TYPE_DEVICE,
1722     .instance_size = sizeof(XiveENDSource),
1723     .class_init    = xive_end_source_class_init,
1724 };
1725 
1726 /*
1727  * XIVE Notifier
1728  */
1729 static const TypeInfo xive_notifier_info = {
1730     .name = TYPE_XIVE_NOTIFIER,
1731     .parent = TYPE_INTERFACE,
1732     .class_size = sizeof(XiveNotifierClass),
1733 };
1734 
1735 static void xive_register_types(void)
1736 {
1737     type_register_static(&xive_source_info);
1738     type_register_static(&xive_notifier_info);
1739     type_register_static(&xive_router_info);
1740     type_register_static(&xive_end_source_info);
1741     type_register_static(&xive_tctx_info);
1742 }
1743 
1744 type_init(xive_register_types)
1745