xref: /openbmc/qemu/hw/ppc/spapr_nested.c (revision 806ab537ac4705bcf0c577382f0e3f90c6edcd14)
1 #include "qemu/osdep.h"
2 #include "qemu/cutils.h"
3 #include "exec/exec-all.h"
4 #include "helper_regs.h"
5 #include "hw/ppc/ppc.h"
6 #include "hw/ppc/spapr.h"
7 #include "hw/ppc/spapr_cpu_core.h"
8 #include "hw/ppc/spapr_nested.h"
9 #include "mmu-book3s-v3.h"
10 #include "cpu-models.h"
11 #include "qemu/log.h"
12 
13 void spapr_nested_reset(SpaprMachineState *spapr)
14 {
15     if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
16         spapr_unregister_nested_hv();
17         spapr_register_nested_hv();
18     } else if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_PAPR)) {
19         spapr->nested.capabilities_set = false;
20         spapr_unregister_nested_papr();
21         spapr_register_nested_papr();
22         spapr_nested_gsb_init();
23     } else {
24         spapr->nested.api = 0;
25     }
26 }
27 
28 uint8_t spapr_nested_api(SpaprMachineState *spapr)
29 {
30     return spapr->nested.api;
31 }
32 
33 #ifdef CONFIG_TCG
34 
35 bool spapr_get_pate_nested_hv(SpaprMachineState *spapr, PowerPCCPU *cpu,
36                               target_ulong lpid, ppc_v3_pate_t *entry)
37 {
38     uint64_t patb, pats;
39 
40     assert(lpid != 0);
41 
42     patb = spapr->nested.ptcr & PTCR_PATB;
43     pats = spapr->nested.ptcr & PTCR_PATS;
44 
45     /* Check if partition table is properly aligned */
46     if (patb & MAKE_64BIT_MASK(0, pats + 12)) {
47         return false;
48     }
49 
50     /* Calculate number of entries */
51     pats = 1ull << (pats + 12 - 4);
52     if (pats <= lpid) {
53         return false;
54     }
55 
56     /* Grab entry */
57     patb += 16 * lpid;
58     entry->dw0 = ldq_phys(CPU(cpu)->as, patb);
59     entry->dw1 = ldq_phys(CPU(cpu)->as, patb + 8);
60     return true;
61 }
62 
63 static
64 SpaprMachineStateNestedGuest *spapr_get_nested_guest(SpaprMachineState *spapr,
65                                                      target_ulong guestid)
66 {
67     SpaprMachineStateNestedGuest *guest;
68 
69     guest = g_hash_table_lookup(spapr->nested.guests, GINT_TO_POINTER(guestid));
70     return guest;
71 }
72 
73 bool spapr_get_pate_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu,
74                                 target_ulong lpid, ppc_v3_pate_t *entry)
75 {
76     SpaprMachineStateNestedGuest *guest;
77     assert(lpid != 0);
78     guest = spapr_get_nested_guest(spapr, lpid);
79     if (!guest) {
80         return false;
81     }
82 
83     entry->dw0 = guest->parttbl[0];
84     entry->dw1 = guest->parttbl[1];
85     return true;
86 }
87 
88 #define PRTS_MASK      0x1f
89 
90 static target_ulong h_set_ptbl(PowerPCCPU *cpu,
91                                SpaprMachineState *spapr,
92                                target_ulong opcode,
93                                target_ulong *args)
94 {
95     target_ulong ptcr = args[0];
96 
97     if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
98         return H_FUNCTION;
99     }
100 
101     if ((ptcr & PRTS_MASK) + 12 - 4 > 12) {
102         return H_PARAMETER;
103     }
104 
105     spapr->nested.ptcr = ptcr; /* Save new partition table */
106 
107     return H_SUCCESS;
108 }
109 
110 static target_ulong h_tlb_invalidate(PowerPCCPU *cpu,
111                                      SpaprMachineState *spapr,
112                                      target_ulong opcode,
113                                      target_ulong *args)
114 {
115     /*
116      * The spapr virtual hypervisor nested HV implementation retains no L2
117      * translation state except for TLB. And the TLB is always invalidated
118      * across L1<->L2 transitions, so nothing is required here.
119      */
120 
121     return H_SUCCESS;
122 }
123 
124 static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu,
125                                         SpaprMachineState *spapr,
126                                         target_ulong opcode,
127                                         target_ulong *args)
128 {
129     /*
130      * This HCALL is not required, L1 KVM will take a slow path and walk the
131      * page tables manually to do the data copy.
132      */
133     return H_FUNCTION;
134 }
135 
136 static void nested_save_state(struct nested_ppc_state *save, PowerPCCPU *cpu)
137 {
138     CPUPPCState *env = &cpu->env;
139     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
140 
141     memcpy(save->gpr, env->gpr, sizeof(save->gpr));
142 
143     save->lr = env->lr;
144     save->ctr = env->ctr;
145     save->cfar = env->cfar;
146     save->msr = env->msr;
147     save->nip = env->nip;
148 
149     save->cr = ppc_get_cr(env);
150     save->xer = cpu_read_xer(env);
151 
152     save->lpcr = env->spr[SPR_LPCR];
153     save->lpidr = env->spr[SPR_LPIDR];
154     save->pcr = env->spr[SPR_PCR];
155     save->dpdes = env->spr[SPR_DPDES];
156     save->hfscr = env->spr[SPR_HFSCR];
157     save->srr0 = env->spr[SPR_SRR0];
158     save->srr1 = env->spr[SPR_SRR1];
159     save->sprg0 = env->spr[SPR_SPRG0];
160     save->sprg1 = env->spr[SPR_SPRG1];
161     save->sprg2 = env->spr[SPR_SPRG2];
162     save->sprg3 = env->spr[SPR_SPRG3];
163     save->pidr = env->spr[SPR_BOOKS_PID];
164     save->ppr = env->spr[SPR_PPR];
165 
166     if (spapr_nested_api(spapr) == NESTED_API_PAPR) {
167         save->amor = env->spr[SPR_AMOR];
168         save->dawr0 = env->spr[SPR_DAWR0];
169         save->dawrx0 = env->spr[SPR_DAWRX0];
170         save->ciabr = env->spr[SPR_CIABR];
171         save->purr = env->spr[SPR_PURR];
172         save->spurr = env->spr[SPR_SPURR];
173         save->ic = env->spr[SPR_IC];
174         save->vtb = env->spr[SPR_VTB];
175         save->hdar = env->spr[SPR_HDAR];
176         save->hdsisr = env->spr[SPR_HDSISR];
177         save->heir = env->spr[SPR_HEIR];
178         save->asdr = env->spr[SPR_ASDR];
179         save->dawr1 = env->spr[SPR_DAWR1];
180         save->dawrx1 = env->spr[SPR_DAWRX1];
181         save->dexcr = env->spr[SPR_DEXCR];
182         save->hdexcr = env->spr[SPR_HDEXCR];
183         save->hashkeyr = env->spr[SPR_HASHKEYR];
184         save->hashpkeyr = env->spr[SPR_HASHPKEYR];
185         memcpy(save->vsr, env->vsr, sizeof(save->vsr));
186         save->ebbhr = env->spr[SPR_EBBHR];
187         save->tar = env->spr[SPR_TAR];
188         save->ebbrr = env->spr[SPR_EBBRR];
189         save->bescr = env->spr[SPR_BESCR];
190         save->iamr = env->spr[SPR_IAMR];
191         save->amr = env->spr[SPR_AMR];
192         save->uamor = env->spr[SPR_UAMOR];
193         save->dscr = env->spr[SPR_DSCR];
194         save->fscr = env->spr[SPR_FSCR];
195         save->pspb = env->spr[SPR_PSPB];
196         save->ctrl = env->spr[SPR_CTRL];
197         save->vrsave = env->spr[SPR_VRSAVE];
198         save->dar = env->spr[SPR_DAR];
199         save->dsisr = env->spr[SPR_DSISR];
200         save->pmc1 = env->spr[SPR_POWER_PMC1];
201         save->pmc2 = env->spr[SPR_POWER_PMC2];
202         save->pmc3 = env->spr[SPR_POWER_PMC3];
203         save->pmc4 = env->spr[SPR_POWER_PMC4];
204         save->pmc5 = env->spr[SPR_POWER_PMC5];
205         save->pmc6 = env->spr[SPR_POWER_PMC6];
206         save->mmcr0 = env->spr[SPR_POWER_MMCR0];
207         save->mmcr1 = env->spr[SPR_POWER_MMCR1];
208         save->mmcr2 = env->spr[SPR_POWER_MMCR2];
209         save->mmcra = env->spr[SPR_POWER_MMCRA];
210         save->sdar = env->spr[SPR_POWER_SDAR];
211         save->siar = env->spr[SPR_POWER_SIAR];
212         save->sier = env->spr[SPR_POWER_SIER];
213         save->vscr = ppc_get_vscr(env);
214         save->fpscr = env->fpscr;
215     } else if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) {
216         save->tb_offset = env->tb_env->tb_offset;
217     }
218 }
219 
220 static void nested_post_load_state(CPUPPCState *env, CPUState *cs)
221 {
222     /*
223      * compute hflags and possible interrupts.
224      */
225     hreg_compute_hflags(env);
226     ppc_maybe_interrupt(env);
227     /*
228      * Nested HV does not tag TLB entries between L1 and L2, so must
229      * flush on transition.
230      */
231     tlb_flush(cs);
232     env->reserve_addr = -1; /* Reset the reservation */
233 }
234 
235 static void nested_load_state(PowerPCCPU *cpu, struct nested_ppc_state *load)
236 {
237     CPUPPCState *env = &cpu->env;
238     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
239 
240     memcpy(env->gpr, load->gpr, sizeof(env->gpr));
241 
242     env->lr = load->lr;
243     env->ctr = load->ctr;
244     env->cfar = load->cfar;
245     env->msr = load->msr;
246     env->nip = load->nip;
247 
248     ppc_set_cr(env, load->cr);
249     cpu_write_xer(env, load->xer);
250 
251     env->spr[SPR_LPCR] = load->lpcr;
252     env->spr[SPR_LPIDR] = load->lpidr;
253     env->spr[SPR_PCR] = load->pcr;
254     env->spr[SPR_DPDES] = load->dpdes;
255     env->spr[SPR_HFSCR] = load->hfscr;
256     env->spr[SPR_SRR0] = load->srr0;
257     env->spr[SPR_SRR1] = load->srr1;
258     env->spr[SPR_SPRG0] = load->sprg0;
259     env->spr[SPR_SPRG1] = load->sprg1;
260     env->spr[SPR_SPRG2] = load->sprg2;
261     env->spr[SPR_SPRG3] = load->sprg3;
262     env->spr[SPR_BOOKS_PID] = load->pidr;
263     env->spr[SPR_PPR] = load->ppr;
264 
265     if (spapr_nested_api(spapr) == NESTED_API_PAPR) {
266         env->spr[SPR_AMOR] = load->amor;
267         env->spr[SPR_DAWR0] = load->dawr0;
268         env->spr[SPR_DAWRX0] = load->dawrx0;
269         env->spr[SPR_CIABR] = load->ciabr;
270         env->spr[SPR_PURR] = load->purr;
271         env->spr[SPR_SPURR] = load->purr;
272         env->spr[SPR_IC] = load->ic;
273         env->spr[SPR_VTB] = load->vtb;
274         env->spr[SPR_HDAR] = load->hdar;
275         env->spr[SPR_HDSISR] = load->hdsisr;
276         env->spr[SPR_HEIR] = load->heir;
277         env->spr[SPR_ASDR] = load->asdr;
278         env->spr[SPR_DAWR1] = load->dawr1;
279         env->spr[SPR_DAWRX1] = load->dawrx1;
280         env->spr[SPR_DEXCR] = load->dexcr;
281         env->spr[SPR_HDEXCR] = load->hdexcr;
282         env->spr[SPR_HASHKEYR] = load->hashkeyr;
283         env->spr[SPR_HASHPKEYR] = load->hashpkeyr;
284         memcpy(env->vsr, load->vsr, sizeof(env->vsr));
285         env->spr[SPR_EBBHR] = load->ebbhr;
286         env->spr[SPR_TAR] = load->tar;
287         env->spr[SPR_EBBRR] = load->ebbrr;
288         env->spr[SPR_BESCR] = load->bescr;
289         env->spr[SPR_IAMR] = load->iamr;
290         env->spr[SPR_AMR] = load->amr;
291         env->spr[SPR_UAMOR] = load->uamor;
292         env->spr[SPR_DSCR] = load->dscr;
293         env->spr[SPR_FSCR] = load->fscr;
294         env->spr[SPR_PSPB] = load->pspb;
295         env->spr[SPR_CTRL] = load->ctrl;
296         env->spr[SPR_VRSAVE] = load->vrsave;
297         env->spr[SPR_DAR] = load->dar;
298         env->spr[SPR_DSISR] = load->dsisr;
299         env->spr[SPR_POWER_PMC1] = load->pmc1;
300         env->spr[SPR_POWER_PMC2] = load->pmc2;
301         env->spr[SPR_POWER_PMC3] = load->pmc3;
302         env->spr[SPR_POWER_PMC4] = load->pmc4;
303         env->spr[SPR_POWER_PMC5] = load->pmc5;
304         env->spr[SPR_POWER_PMC6] = load->pmc6;
305         env->spr[SPR_POWER_MMCR0] = load->mmcr0;
306         env->spr[SPR_POWER_MMCR1] = load->mmcr1;
307         env->spr[SPR_POWER_MMCR2] = load->mmcr2;
308         env->spr[SPR_POWER_MMCRA] = load->mmcra;
309         env->spr[SPR_POWER_SDAR] = load->sdar;
310         env->spr[SPR_POWER_SIAR] = load->siar;
311         env->spr[SPR_POWER_SIER] = load->sier;
312         ppc_store_vscr(env, load->vscr);
313         ppc_store_fpscr(env, load->fpscr);
314     } else if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) {
315         env->tb_env->tb_offset = load->tb_offset;
316     }
317 }
318 
319 /*
320  * When this handler returns, the environment is switched to the L2 guest
321  * and TCG begins running that. spapr_exit_nested() performs the switch from
322  * L2 back to L1 and returns from the H_ENTER_NESTED hcall.
323  */
324 static target_ulong h_enter_nested(PowerPCCPU *cpu,
325                                    SpaprMachineState *spapr,
326                                    target_ulong opcode,
327                                    target_ulong *args)
328 {
329     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
330     CPUPPCState *env = &cpu->env;
331     CPUState *cs = CPU(cpu);
332     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
333     struct nested_ppc_state l2_state;
334     target_ulong hv_ptr = args[0];
335     target_ulong regs_ptr = args[1];
336     target_ulong hdec, now = cpu_ppc_load_tbl(env);
337     target_ulong lpcr, lpcr_mask;
338     struct kvmppc_hv_guest_state *hvstate;
339     struct kvmppc_hv_guest_state hv_state;
340     struct kvmppc_pt_regs *regs;
341     hwaddr len;
342 
343     if (spapr->nested.ptcr == 0) {
344         return H_NOT_AVAILABLE;
345     }
346 
347     len = sizeof(*hvstate);
348     hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false,
349                                 MEMTXATTRS_UNSPECIFIED);
350     if (len != sizeof(*hvstate)) {
351         address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false);
352         return H_PARAMETER;
353     }
354 
355     memcpy(&hv_state, hvstate, len);
356 
357     address_space_unmap(CPU(cpu)->as, hvstate, len, len, false);
358 
359     /*
360      * We accept versions 1 and 2. Version 2 fields are unused because TCG
361      * does not implement DAWR*.
362      */
363     if (hv_state.version > HV_GUEST_STATE_VERSION) {
364         return H_PARAMETER;
365     }
366 
367     if (hv_state.lpid == 0) {
368         return H_PARAMETER;
369     }
370 
371     spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1);
372     if (!spapr_cpu->nested_host_state) {
373         return H_NO_MEM;
374     }
375 
376     assert(env->spr[SPR_LPIDR] == 0);
377     assert(env->spr[SPR_DPDES] == 0);
378     nested_save_state(spapr_cpu->nested_host_state, cpu);
379 
380     len = sizeof(*regs);
381     regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false,
382                                 MEMTXATTRS_UNSPECIFIED);
383     if (!regs || len != sizeof(*regs)) {
384         address_space_unmap(CPU(cpu)->as, regs, len, 0, false);
385         g_free(spapr_cpu->nested_host_state);
386         return H_P2;
387     }
388 
389     len = sizeof(l2_state.gpr);
390     assert(len == sizeof(regs->gpr));
391     memcpy(l2_state.gpr, regs->gpr, len);
392 
393     l2_state.lr = regs->link;
394     l2_state.ctr = regs->ctr;
395     l2_state.xer = regs->xer;
396     l2_state.cr = regs->ccr;
397     l2_state.msr = regs->msr;
398     l2_state.nip = regs->nip;
399 
400     address_space_unmap(CPU(cpu)->as, regs, len, len, false);
401 
402     l2_state.cfar = hv_state.cfar;
403     l2_state.lpidr = hv_state.lpid;
404 
405     lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER;
406     lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask);
407     lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE;
408     lpcr &= ~LPCR_LPES0;
409     l2_state.lpcr = lpcr & pcc->lpcr_mask;
410 
411     l2_state.pcr = hv_state.pcr;
412     /* hv_state.amor is not used */
413     l2_state.dpdes = hv_state.dpdes;
414     l2_state.hfscr = hv_state.hfscr;
415     /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/
416     l2_state.srr0 = hv_state.srr0;
417     l2_state.srr1 = hv_state.srr1;
418     l2_state.sprg0 = hv_state.sprg[0];
419     l2_state.sprg1 = hv_state.sprg[1];
420     l2_state.sprg2 = hv_state.sprg[2];
421     l2_state.sprg3 = hv_state.sprg[3];
422     l2_state.pidr = hv_state.pidr;
423     l2_state.ppr = hv_state.ppr;
424     l2_state.tb_offset = env->tb_env->tb_offset + hv_state.tb_offset;
425 
426     /*
427      * Switch to the nested guest environment and start the "hdec" timer.
428      */
429     nested_load_state(cpu, &l2_state);
430     nested_post_load_state(env, cs);
431 
432     hdec = hv_state.hdec_expiry - now;
433     cpu_ppc_hdecr_init(env);
434     cpu_ppc_store_hdecr(env, hdec);
435 
436     /*
437      * The hv_state.vcpu_token is not needed. It is used by the KVM
438      * implementation to remember which L2 vCPU last ran on which physical
439      * CPU so as to invalidate process scope translations if it is moved
440      * between physical CPUs. For now TLBs are always flushed on L1<->L2
441      * transitions so this is not a problem.
442      *
443      * Could validate that the same vcpu_token does not attempt to run on
444      * different L1 vCPUs at the same time, but that would be a L1 KVM bug
445      * and it's not obviously worth a new data structure to do it.
446      */
447 
448     spapr_cpu->in_nested = true;
449 
450     /*
451      * The spapr hcall helper sets env->gpr[3] to the return value, but at
452      * this point the L1 is not returning from the hcall but rather we
453      * start running the L2, so r3 must not be clobbered, so return env->gpr[3]
454      * to leave it unchanged.
455      */
456     return env->gpr[3];
457 }
458 
459 static void spapr_exit_nested_hv(PowerPCCPU *cpu, int excp)
460 {
461     CPUPPCState *env = &cpu->env;
462     CPUState *cs = CPU(cpu);
463     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
464     struct nested_ppc_state l2_state;
465     target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4];
466     target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5];
467     target_ulong hsrr0, hsrr1, hdar, asdr, hdsisr;
468     struct kvmppc_hv_guest_state *hvstate;
469     struct kvmppc_pt_regs *regs;
470     hwaddr len;
471 
472     nested_save_state(&l2_state, cpu);
473     hsrr0 = env->spr[SPR_HSRR0];
474     hsrr1 = env->spr[SPR_HSRR1];
475     hdar = env->spr[SPR_HDAR];
476     hdsisr = env->spr[SPR_HDSISR];
477     asdr = env->spr[SPR_ASDR];
478 
479     /*
480      * Switch back to the host environment (including for any error).
481      */
482     assert(env->spr[SPR_LPIDR] != 0);
483     nested_load_state(cpu, spapr_cpu->nested_host_state);
484     nested_post_load_state(env, cs);
485     env->gpr[3] = env->excp_vectors[excp]; /* hcall return value */
486 
487     cpu_ppc_hdecr_exit(env);
488 
489     spapr_cpu->in_nested = false;
490 
491     g_free(spapr_cpu->nested_host_state);
492     spapr_cpu->nested_host_state = NULL;
493 
494     len = sizeof(*hvstate);
495     hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true,
496                                 MEMTXATTRS_UNSPECIFIED);
497     if (len != sizeof(*hvstate)) {
498         address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true);
499         env->gpr[3] = H_PARAMETER;
500         return;
501     }
502 
503     hvstate->cfar = l2_state.cfar;
504     hvstate->lpcr = l2_state.lpcr;
505     hvstate->pcr = l2_state.pcr;
506     hvstate->dpdes = l2_state.dpdes;
507     hvstate->hfscr = l2_state.hfscr;
508 
509     if (excp == POWERPC_EXCP_HDSI) {
510         hvstate->hdar = hdar;
511         hvstate->hdsisr = hdsisr;
512         hvstate->asdr = asdr;
513     } else if (excp == POWERPC_EXCP_HISI) {
514         hvstate->asdr = asdr;
515     }
516 
517     /* HEIR should be implemented for HV mode and saved here. */
518     hvstate->srr0 = l2_state.srr0;
519     hvstate->srr1 = l2_state.srr1;
520     hvstate->sprg[0] = l2_state.sprg0;
521     hvstate->sprg[1] = l2_state.sprg1;
522     hvstate->sprg[2] = l2_state.sprg2;
523     hvstate->sprg[3] = l2_state.sprg3;
524     hvstate->pidr = l2_state.pidr;
525     hvstate->ppr = l2_state.ppr;
526 
527     /* Is it okay to specify write length larger than actual data written? */
528     address_space_unmap(CPU(cpu)->as, hvstate, len, len, true);
529 
530     len = sizeof(*regs);
531     regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true,
532                                 MEMTXATTRS_UNSPECIFIED);
533     if (!regs || len != sizeof(*regs)) {
534         address_space_unmap(CPU(cpu)->as, regs, len, 0, true);
535         env->gpr[3] = H_P2;
536         return;
537     }
538 
539     len = sizeof(env->gpr);
540     assert(len == sizeof(regs->gpr));
541     memcpy(regs->gpr, l2_state.gpr, len);
542 
543     regs->link = l2_state.lr;
544     regs->ctr = l2_state.ctr;
545     regs->xer = l2_state.xer;
546     regs->ccr = l2_state.cr;
547 
548     if (excp == POWERPC_EXCP_MCHECK ||
549         excp == POWERPC_EXCP_RESET ||
550         excp == POWERPC_EXCP_SYSCALL) {
551         regs->nip = l2_state.srr0;
552         regs->msr = l2_state.srr1 & env->msr_mask;
553     } else {
554         regs->nip = hsrr0;
555         regs->msr = hsrr1 & env->msr_mask;
556     }
557 
558     /* Is it okay to specify write length larger than actual data written? */
559     address_space_unmap(CPU(cpu)->as, regs, len, len, true);
560 }
561 
562 static bool spapr_nested_vcpu_check(SpaprMachineStateNestedGuest *guest,
563                                     target_ulong vcpuid, bool inoutbuf)
564 {
565     struct SpaprMachineStateNestedGuestVcpu *vcpu;
566     /*
567      * Perform sanity checks for the provided vcpuid of a guest.
568      * For now, ensure its valid, allocated and enabled for use.
569      */
570 
571     if (vcpuid >= PAPR_NESTED_GUEST_VCPU_MAX) {
572         return false;
573     }
574 
575     if (!(vcpuid < guest->nr_vcpus)) {
576         return false;
577     }
578 
579     vcpu = &guest->vcpus[vcpuid];
580     if (!vcpu->enabled) {
581         return false;
582     }
583 
584     if (!inoutbuf) {
585         return true;
586     }
587 
588     /* Check to see if the in/out buffers are registered */
589     if (vcpu->runbufin.addr && vcpu->runbufout.addr) {
590         return true;
591     }
592 
593     return false;
594 }
595 
596 static void *get_vcpu_state_ptr(SpaprMachineStateNestedGuest *guest,
597                               target_ulong vcpuid)
598 {
599     assert(spapr_nested_vcpu_check(guest, vcpuid, false));
600     return &guest->vcpus[vcpuid].state;
601 }
602 
603 static void *get_vcpu_ptr(SpaprMachineStateNestedGuest *guest,
604                                    target_ulong vcpuid)
605 {
606     assert(spapr_nested_vcpu_check(guest, vcpuid, false));
607     return &guest->vcpus[vcpuid];
608 }
609 
610 static void *get_guest_ptr(SpaprMachineStateNestedGuest *guest,
611                            target_ulong vcpuid)
612 {
613     return guest; /* for GSBE_NESTED */
614 }
615 
616 /*
617  * set=1 means the L1 is trying to set some state
618  * set=0 means the L1 is trying to get some state
619  */
620 static void copy_state_8to8(void *a, void *b, bool set)
621 {
622     /* set takes from the Big endian element_buf and sets internal buffer */
623 
624     if (set) {
625         *(uint64_t *)a = be64_to_cpu(*(uint64_t *)b);
626     } else {
627         *(uint64_t *)b = cpu_to_be64(*(uint64_t *)a);
628     }
629 }
630 
631 static void copy_state_4to4(void *a, void *b, bool set)
632 {
633     if (set) {
634         *(uint32_t *)a = be32_to_cpu(*(uint32_t *)b);
635     } else {
636         *(uint32_t *)b = cpu_to_be32(*((uint32_t *)a));
637     }
638 }
639 
640 static void copy_state_16to16(void *a, void *b, bool set)
641 {
642     uint64_t *src, *dst;
643 
644     if (set) {
645         src = b;
646         dst = a;
647 
648         dst[1] = be64_to_cpu(src[0]);
649         dst[0] = be64_to_cpu(src[1]);
650     } else {
651         src = a;
652         dst = b;
653 
654         dst[1] = cpu_to_be64(src[0]);
655         dst[0] = cpu_to_be64(src[1]);
656     }
657 }
658 
659 static void copy_state_4to8(void *a, void *b, bool set)
660 {
661     if (set) {
662         *(uint64_t *)a  = (uint64_t) be32_to_cpu(*(uint32_t *)b);
663     } else {
664         *(uint32_t *)b = cpu_to_be32((uint32_t) (*((uint64_t *)a)));
665     }
666 }
667 
668 static void copy_state_pagetbl(void *a, void *b, bool set)
669 {
670     uint64_t *pagetbl;
671     uint64_t *buf; /* 3 double words */
672     uint64_t rts;
673 
674     assert(set);
675 
676     pagetbl = a;
677     buf = b;
678 
679     *pagetbl = be64_to_cpu(buf[0]);
680     /* as per ISA section 6.7.6.1 */
681     *pagetbl |= PATE0_HR; /* Host Radix bit is 1 */
682 
683     /* RTS */
684     rts = be64_to_cpu(buf[1]);
685     assert(rts == 52);
686     rts = rts - 31; /* since radix tree size = 2^(RTS+31) */
687     *pagetbl |=  ((rts & 0x7) << 5); /* RTS2 is bit 56:58 */
688     *pagetbl |=  (((rts >> 3) & 0x3) << 61); /* RTS1 is bit 1:2 */
689 
690     /* RPDS {Size = 2^(RPDS+3) , RPDS >=5} */
691     *pagetbl |= 63 - clz64(be64_to_cpu(buf[2])) - 3;
692 }
693 
694 static void copy_state_proctbl(void *a, void *b, bool set)
695 {
696     uint64_t *proctbl;
697     uint64_t *buf; /* 2 double words */
698 
699     assert(set);
700 
701     proctbl = a;
702     buf = b;
703     /* PRTB: Process Table Base */
704     *proctbl = be64_to_cpu(buf[0]);
705     /* PRTS: Process Table Size = 2^(12+PRTS) */
706     if (be64_to_cpu(buf[1]) == (1ULL << 12)) {
707             *proctbl |= 0;
708     } else if (be64_to_cpu(buf[1]) == (1ULL << 24)) {
709             *proctbl |= 12;
710     } else {
711         g_assert_not_reached();
712     }
713 }
714 
715 static void copy_state_runbuf(void *a, void *b, bool set)
716 {
717     uint64_t *buf; /* 2 double words */
718     struct SpaprMachineStateNestedGuestVcpuRunBuf *runbuf;
719 
720     assert(set);
721 
722     runbuf = a;
723     buf = b;
724 
725     runbuf->addr = be64_to_cpu(buf[0]);
726     assert(runbuf->addr);
727 
728     /* per spec */
729     assert(be64_to_cpu(buf[1]) <= 16384);
730 
731     /*
732      * This will also hit in the input buffer but should be fine for
733      * now. If not we can split this function.
734      */
735     assert(be64_to_cpu(buf[1]) >= VCPU_OUT_BUF_MIN_SZ);
736 
737     runbuf->size = be64_to_cpu(buf[1]);
738 }
739 
740 /* tell the L1 how big we want the output vcpu run buffer */
741 static void out_buf_min_size(void *a, void *b, bool set)
742 {
743     uint64_t *buf; /* 1 double word */
744 
745     assert(!set);
746 
747     buf = b;
748 
749     buf[0] = cpu_to_be64(VCPU_OUT_BUF_MIN_SZ);
750 }
751 
752 static void copy_logical_pvr(void *a, void *b, bool set)
753 {
754     SpaprMachineStateNestedGuest *guest;
755     uint32_t *buf; /* 1 word */
756     uint32_t *pvr_logical_ptr;
757     uint32_t pvr_logical;
758     target_ulong pcr = 0;
759 
760     pvr_logical_ptr = a;
761     buf = b;
762 
763     if (!set) {
764         buf[0] = cpu_to_be32(*pvr_logical_ptr);
765         return;
766     }
767 
768     pvr_logical = be32_to_cpu(buf[0]);
769 
770     *pvr_logical_ptr = pvr_logical;
771 
772     if (*pvr_logical_ptr) {
773         switch (*pvr_logical_ptr) {
774         case CPU_POWERPC_LOGICAL_3_10_P11:
775         case CPU_POWERPC_LOGICAL_3_10:
776             pcr = PCR_COMPAT_3_10 | PCR_COMPAT_3_00;
777             break;
778         case CPU_POWERPC_LOGICAL_3_00:
779             pcr = PCR_COMPAT_3_00;
780             break;
781         default:
782             qemu_log_mask(LOG_GUEST_ERROR,
783                           "Could not set PCR for LPVR=0x%08x\n",
784                           *pvr_logical_ptr);
785             return;
786         }
787     }
788 
789     guest = container_of(pvr_logical_ptr,
790                          struct SpaprMachineStateNestedGuest,
791                          pvr_logical);
792     for (int i = 0; i < guest->nr_vcpus; i++) {
793         guest->vcpus[i].state.pcr = ~pcr | HVMASK_PCR;
794     }
795 }
796 
797 static void copy_tb_offset(void *a, void *b, bool set)
798 {
799     SpaprMachineStateNestedGuest *guest;
800     uint64_t *buf; /* 1 double word */
801     uint64_t *tb_offset_ptr;
802     uint64_t tb_offset;
803 
804     tb_offset_ptr = a;
805     buf = b;
806 
807     if (!set) {
808         buf[0] = cpu_to_be64(*tb_offset_ptr);
809         return;
810     }
811 
812     tb_offset = be64_to_cpu(buf[0]);
813     /* need to copy this to the individual tb_offset for each vcpu */
814     guest = container_of(tb_offset_ptr,
815                          struct SpaprMachineStateNestedGuest,
816                          tb_offset);
817     for (int i = 0; i < guest->nr_vcpus; i++) {
818         guest->vcpus[i].tb_offset = tb_offset;
819     }
820 }
821 
822 static void copy_state_hdecr(void *a, void *b, bool set)
823 {
824     uint64_t *buf; /* 1 double word */
825     uint64_t *hdecr_expiry_tb;
826 
827     hdecr_expiry_tb = a;
828     buf = b;
829 
830     if (!set) {
831         buf[0] = cpu_to_be64(*hdecr_expiry_tb);
832         return;
833     }
834 
835     *hdecr_expiry_tb = be64_to_cpu(buf[0]);
836 }
837 
838 struct guest_state_element_type guest_state_element_types[] = {
839     GUEST_STATE_ELEMENT_NOP(GSB_HV_VCPU_IGNORED_ID, 0),
840     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR0,  gpr[0]),
841     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR1,  gpr[1]),
842     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR2,  gpr[2]),
843     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR3,  gpr[3]),
844     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR4,  gpr[4]),
845     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR5,  gpr[5]),
846     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR6,  gpr[6]),
847     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR7,  gpr[7]),
848     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR8,  gpr[8]),
849     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR9,  gpr[9]),
850     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR10, gpr[10]),
851     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR11, gpr[11]),
852     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR12, gpr[12]),
853     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR13, gpr[13]),
854     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR14, gpr[14]),
855     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR15, gpr[15]),
856     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR16, gpr[16]),
857     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR17, gpr[17]),
858     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR18, gpr[18]),
859     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR19, gpr[19]),
860     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR20, gpr[20]),
861     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR21, gpr[21]),
862     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR22, gpr[22]),
863     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR23, gpr[23]),
864     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR24, gpr[24]),
865     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR25, gpr[25]),
866     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR26, gpr[26]),
867     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR27, gpr[27]),
868     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR28, gpr[28]),
869     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR29, gpr[29]),
870     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR30, gpr[30]),
871     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR31, gpr[31]),
872     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_NIA, nip),
873     GSE_ENV_DWM(GSB_VCPU_SPR_MSR, msr, HVMASK_MSR),
874     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CTR, ctr),
875     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_LR, lr),
876     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_XER, xer),
877     GUEST_STATE_ELEMENT_ENV_WW(GSB_VCPU_SPR_CR, cr),
878     GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_MMCR3),
879     GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_SIER2),
880     GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_SIER3),
881     GUEST_STATE_ELEMENT_NOP_W(GSB_VCPU_SPR_WORT),
882     GSE_ENV_DWM(GSB_VCPU_SPR_LPCR, lpcr, HVMASK_LPCR),
883     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_AMOR, amor),
884     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HFSCR, hfscr),
885     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAWR0, dawr0),
886     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DAWRX0, dawrx0),
887     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CIABR, ciabr),
888     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_PURR,  purr),
889     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPURR, spurr),
890     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_IC,    ic),
891     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_VTB,   vtb),
892     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HDAR,  hdar),
893     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_HDSISR, hdsisr),
894     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_HEIR,   heir),
895     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_ASDR,  asdr),
896     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SRR0,  srr0),
897     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SRR1,  srr1),
898     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG0, sprg0),
899     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG1, sprg1),
900     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG2, sprg2),
901     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG3, sprg3),
902     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PIDR,   pidr),
903     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CFAR,  cfar),
904     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_PPR,   ppr),
905     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAWR1, dawr1),
906     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DAWRX1, dawrx1),
907     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DEXCR, dexcr),
908     GSE_ENV_DWM(GSB_VCPU_SPR_HDEXCR, hdexcr, HVMASK_HDEXCR),
909     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HASHKEYR, hashkeyr),
910     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HASHPKEYR, hashpkeyr),
911     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR0, vsr[0]),
912     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR1, vsr[1]),
913     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR2, vsr[2]),
914     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR3, vsr[3]),
915     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR4, vsr[4]),
916     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR5, vsr[5]),
917     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR6, vsr[6]),
918     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR7, vsr[7]),
919     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR8, vsr[8]),
920     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR9, vsr[9]),
921     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR10, vsr[10]),
922     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR11, vsr[11]),
923     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR12, vsr[12]),
924     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR13, vsr[13]),
925     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR14, vsr[14]),
926     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR15, vsr[15]),
927     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR16, vsr[16]),
928     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR17, vsr[17]),
929     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR18, vsr[18]),
930     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR19, vsr[19]),
931     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR20, vsr[20]),
932     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR21, vsr[21]),
933     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR22, vsr[22]),
934     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR23, vsr[23]),
935     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR24, vsr[24]),
936     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR25, vsr[25]),
937     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR26, vsr[26]),
938     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR27, vsr[27]),
939     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR28, vsr[28]),
940     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR29, vsr[29]),
941     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR30, vsr[30]),
942     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR31, vsr[31]),
943     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR32, vsr[32]),
944     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR33, vsr[33]),
945     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR34, vsr[34]),
946     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR35, vsr[35]),
947     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR36, vsr[36]),
948     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR37, vsr[37]),
949     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR38, vsr[38]),
950     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR39, vsr[39]),
951     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR40, vsr[40]),
952     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR41, vsr[41]),
953     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR42, vsr[42]),
954     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR43, vsr[43]),
955     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR44, vsr[44]),
956     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR45, vsr[45]),
957     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR46, vsr[46]),
958     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR47, vsr[47]),
959     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR48, vsr[48]),
960     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR49, vsr[49]),
961     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR50, vsr[50]),
962     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR51, vsr[51]),
963     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR52, vsr[52]),
964     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR53, vsr[53]),
965     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR54, vsr[54]),
966     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR55, vsr[55]),
967     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR56, vsr[56]),
968     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR57, vsr[57]),
969     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR58, vsr[58]),
970     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR59, vsr[59]),
971     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR60, vsr[60]),
972     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR61, vsr[61]),
973     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR62, vsr[62]),
974     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR63, vsr[63]),
975     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_EBBHR, ebbhr),
976     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_TAR,   tar),
977     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_EBBRR, ebbrr),
978     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_BESCR, bescr),
979     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_IAMR,  iamr),
980     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_AMR,   amr),
981     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_UAMOR, uamor),
982     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DSCR,  dscr),
983     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_FSCR,  fscr),
984     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PSPB,   pspb),
985     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CTRL,  ctrl),
986     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DPDES, dpdes),
987     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_VRSAVE, vrsave),
988     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAR,   dar),
989     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DSISR,  dsisr),
990     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC1,   pmc1),
991     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC2,   pmc2),
992     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC3,   pmc3),
993     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC4,   pmc4),
994     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC5,   pmc5),
995     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC6,   pmc6),
996     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR0, mmcr0),
997     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR1, mmcr1),
998     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR2, mmcr2),
999     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCRA, mmcra),
1000     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SDAR , sdar),
1001     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SIAR , siar),
1002     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SIER , sier),
1003     GUEST_STATE_ELEMENT_ENV_WW(GSB_VCPU_SPR_VSCR,  vscr),
1004     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_FPSCR, fpscr),
1005     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_DEC_EXPIRE_TB, dec_expiry_tb),
1006     GSBE_NESTED(GSB_PART_SCOPED_PAGETBL, 0x18, parttbl[0],  copy_state_pagetbl),
1007     GSBE_NESTED(GSB_PROCESS_TBL,         0x10, parttbl[1],  copy_state_proctbl),
1008     GSBE_NESTED(GSB_VCPU_LPVR,           0x4,  pvr_logical, copy_logical_pvr),
1009     GSBE_NESTED_MSK(GSB_TB_OFFSET, 0x8, tb_offset, copy_tb_offset,
1010                     HVMASK_TB_OFFSET),
1011     GSBE_NESTED_VCPU(GSB_VCPU_IN_BUFFER, 0x10, runbufin,    copy_state_runbuf),
1012     GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUFFER, 0x10, runbufout,   copy_state_runbuf),
1013     GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUF_MIN_SZ, 0x8, runbufout, out_buf_min_size),
1014     GSBE_NESTED_VCPU(GSB_VCPU_HDEC_EXPIRY_TB, 0x8, hdecr_expiry_tb,
1015                      copy_state_hdecr)
1016 };
1017 
1018 void spapr_nested_gsb_init(void)
1019 {
1020     struct guest_state_element_type *type;
1021 
1022     /* Init the guest state elements lookup table, flags for now */
1023     for (int i = 0; i < ARRAY_SIZE(guest_state_element_types); i++) {
1024         type = &guest_state_element_types[i];
1025 
1026         assert(type->id <= GSB_LAST);
1027         if (type->id >= GSB_VCPU_SPR_HDAR)
1028             /* 0xf000 - 0xf005 Thread + RO */
1029             type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY;
1030         else if (type->id >= GSB_VCPU_IN_BUFFER)
1031             /* 0x0c00 - 0xf000 Thread + RW */
1032             type->flags = 0;
1033         else if (type->id >= GSB_VCPU_LPVR)
1034             /* 0x0003 - 0x0bff Guest + RW */
1035             type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE;
1036         else if (type->id >= GSB_HV_VCPU_STATE_SIZE)
1037             /* 0x0001 - 0x0002 Guest + RO */
1038             type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY |
1039                           GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE;
1040     }
1041 }
1042 
1043 static struct guest_state_element *guest_state_element_next(
1044     struct guest_state_element *element,
1045     int64_t *len,
1046     int64_t *num_elements)
1047 {
1048     uint16_t size;
1049 
1050     /* size is of element->value[] only. Not whole guest_state_element */
1051     size = be16_to_cpu(element->size);
1052 
1053     if (len) {
1054         *len -= size + offsetof(struct guest_state_element, value);
1055     }
1056 
1057     if (num_elements) {
1058         *num_elements -= 1;
1059     }
1060 
1061     return (struct guest_state_element *)(element->value + size);
1062 }
1063 
1064 static
1065 struct guest_state_element_type *guest_state_element_type_find(uint16_t id)
1066 {
1067     int i;
1068 
1069     for (i = 0; i < ARRAY_SIZE(guest_state_element_types); i++)
1070         if (id == guest_state_element_types[i].id) {
1071             return &guest_state_element_types[i];
1072         }
1073 
1074     return NULL;
1075 }
1076 
1077 static void log_element(struct guest_state_element *element,
1078                         struct guest_state_request *gsr)
1079 {
1080     qemu_log_mask(LOG_GUEST_ERROR, "h_guest_%s_state id:0x%04x size:0x%04x",
1081                   gsr->flags & GUEST_STATE_REQUEST_SET ? "set" : "get",
1082                   be16_to_cpu(element->id), be16_to_cpu(element->size));
1083     qemu_log_mask(LOG_GUEST_ERROR, "buf:0x%016"PRIx64" ...\n",
1084                   be64_to_cpu(*(uint64_t *)element->value));
1085 }
1086 
1087 static bool guest_state_request_check(struct guest_state_request *gsr)
1088 {
1089     int64_t num_elements, len = gsr->len;
1090     struct guest_state_buffer *gsb = gsr->gsb;
1091     struct guest_state_element *element;
1092     struct guest_state_element_type *type;
1093     uint16_t id, size;
1094 
1095     /* gsb->num_elements = 0 == 32 bits long */
1096     assert(len >= 4);
1097 
1098     num_elements = be32_to_cpu(gsb->num_elements);
1099     element = gsb->elements;
1100     len -= sizeof(gsb->num_elements);
1101 
1102     /* Walk the buffer to validate the length */
1103     while (num_elements) {
1104 
1105         id = be16_to_cpu(element->id);
1106         size = be16_to_cpu(element->size);
1107 
1108         if (false) {
1109             log_element(element, gsr);
1110         }
1111         /* buffer size too small */
1112         if (len < 0) {
1113             return false;
1114         }
1115 
1116         type = guest_state_element_type_find(id);
1117         if (!type) {
1118             qemu_log_mask(LOG_GUEST_ERROR, "Element ID %04x unknown\n", id);
1119             log_element(element, gsr);
1120             return false;
1121         }
1122 
1123         if (id == GSB_HV_VCPU_IGNORED_ID) {
1124             goto next_element;
1125         }
1126 
1127         if (size != type->size) {
1128             qemu_log_mask(LOG_GUEST_ERROR, "Size mismatch. Element ID:%04x."
1129                           "Size Exp:%i Got:%i\n", id, type->size, size);
1130             log_element(element, gsr);
1131             return false;
1132         }
1133 
1134         if ((type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY) &&
1135             (gsr->flags & GUEST_STATE_REQUEST_SET)) {
1136             qemu_log_mask(LOG_GUEST_ERROR, "Trying to set a read-only Element "
1137                           "ID:%04x.\n", id);
1138             return false;
1139         }
1140 
1141         if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE) {
1142             /* guest wide element type */
1143             if (!(gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE)) {
1144                 qemu_log_mask(LOG_GUEST_ERROR, "trying to set a guest wide "
1145                               "Element ID:%04x.\n", id);
1146                 return false;
1147             }
1148         } else {
1149             /* thread wide element type */
1150             if (gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE) {
1151                 qemu_log_mask(LOG_GUEST_ERROR, "trying to set a thread wide "
1152                               "Element ID:%04x.\n", id);
1153                 return false;
1154             }
1155         }
1156 next_element:
1157         element = guest_state_element_next(element, &len, &num_elements);
1158 
1159     }
1160     return true;
1161 }
1162 
1163 static bool is_gsr_invalid(struct guest_state_request *gsr,
1164                                    struct guest_state_element *element,
1165                                    struct guest_state_element_type *type)
1166 {
1167     if ((gsr->flags & GUEST_STATE_REQUEST_SET) &&
1168         (*(uint64_t *)(element->value) & ~(type->mask))) {
1169         log_element(element, gsr);
1170         qemu_log_mask(LOG_GUEST_ERROR, "L1 can't set reserved bits "
1171                       "(allowed mask: 0x%08"PRIx64")\n", type->mask);
1172         return true;
1173     }
1174     return false;
1175 }
1176 
1177 static target_ulong h_guest_get_capabilities(PowerPCCPU *cpu,
1178                                              SpaprMachineState *spapr,
1179                                              target_ulong opcode,
1180                                              target_ulong *args)
1181 {
1182     CPUPPCState *env = &cpu->env;
1183     target_ulong flags = args[0];
1184 
1185     if (flags) { /* don't handle any flags capabilities for now */
1186         return H_PARAMETER;
1187     }
1188 
1189     /* P11 capabilities */
1190     if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10_P11, 0,
1191         spapr->max_compat_pvr)) {
1192         env->gpr[4] |= H_GUEST_CAPABILITIES_P11_MODE;
1193     }
1194 
1195     /* P10 capabilities */
1196     if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10, 0,
1197         spapr->max_compat_pvr)) {
1198         env->gpr[4] |= H_GUEST_CAPABILITIES_P10_MODE;
1199     }
1200 
1201     /* P9 capabilities */
1202     if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
1203         spapr->max_compat_pvr)) {
1204         env->gpr[4] |= H_GUEST_CAPABILITIES_P9_MODE;
1205     }
1206 
1207     return H_SUCCESS;
1208 }
1209 
1210 static target_ulong h_guest_set_capabilities(PowerPCCPU *cpu,
1211                                              SpaprMachineState *spapr,
1212                                              target_ulong opcode,
1213                                               target_ulong *args)
1214 {
1215     CPUPPCState *env = &cpu->env;
1216     target_ulong flags = args[0];
1217     target_ulong capabilities = args[1];
1218     env->gpr[4] = 0;
1219 
1220     if (flags) { /* don't handle any flags capabilities for now */
1221         return H_PARAMETER;
1222     }
1223 
1224     if (capabilities & H_GUEST_CAPABILITIES_COPY_MEM) {
1225         env->gpr[4] = 1;
1226         return H_P2; /* isn't supported */
1227     }
1228 
1229     /*
1230      * If there are no capabilities configured, set the R5 to the index of
1231      * the first supported Power Processor Mode
1232      */
1233     if (!capabilities) {
1234         env->gpr[4] = 1;
1235 
1236         /* set R5 to the first supported Power Processor Mode */
1237         if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10_P11, 0,
1238                              spapr->max_compat_pvr)) {
1239             env->gpr[5] = H_GUEST_CAP_P11_MODE_BMAP;
1240         } else if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10, 0,
1241                              spapr->max_compat_pvr)) {
1242             env->gpr[5] = H_GUEST_CAP_P10_MODE_BMAP;
1243         } else if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
1244                                     spapr->max_compat_pvr)) {
1245             env->gpr[5] = H_GUEST_CAP_P9_MODE_BMAP;
1246         }
1247 
1248         return H_P2;
1249     }
1250 
1251     /*
1252      * If an invalid capability is set, R5 should contain the index of the
1253      * invalid capability bit
1254      */
1255     if (capabilities & ~H_GUEST_CAP_VALID_MASK) {
1256         env->gpr[4] = 1;
1257 
1258         /* Set R5 to the index of the invalid capability */
1259         env->gpr[5] = 63 - ctz64(capabilities);
1260 
1261         return H_P2;
1262     }
1263 
1264     if (!spapr->nested.capabilities_set) {
1265         spapr->nested.capabilities_set = true;
1266         spapr->nested.pvr_base = env->spr[SPR_PVR];
1267         return H_SUCCESS;
1268     } else {
1269         return H_STATE;
1270     }
1271 }
1272 
1273 static void
1274 destroy_guest_helper(gpointer value)
1275 {
1276     struct SpaprMachineStateNestedGuest *guest = value;
1277     g_free(guest->vcpus);
1278     g_free(guest);
1279 }
1280 
1281 static target_ulong h_guest_create(PowerPCCPU *cpu,
1282                                    SpaprMachineState *spapr,
1283                                    target_ulong opcode,
1284                                    target_ulong *args)
1285 {
1286     CPUPPCState *env = &cpu->env;
1287     target_ulong flags = args[0];
1288     target_ulong continue_token = args[1];
1289     uint64_t guestid;
1290     int nguests = 0;
1291     struct SpaprMachineStateNestedGuest *guest;
1292 
1293     if (flags) { /* don't handle any flags for now */
1294         return H_UNSUPPORTED_FLAG;
1295     }
1296 
1297     if (continue_token != -1) {
1298         return H_P2;
1299     }
1300 
1301     if (!spapr->nested.capabilities_set) {
1302         return H_STATE;
1303     }
1304 
1305     if (!spapr->nested.guests) {
1306         spapr->nested.guests = g_hash_table_new_full(NULL,
1307                                                      NULL,
1308                                                      NULL,
1309                                                      destroy_guest_helper);
1310     }
1311 
1312     nguests = g_hash_table_size(spapr->nested.guests);
1313 
1314     if (nguests == PAPR_NESTED_GUEST_MAX) {
1315         return H_NO_MEM;
1316     }
1317 
1318     /* Lookup for available guestid */
1319     for (guestid = 1; guestid < PAPR_NESTED_GUEST_MAX; guestid++) {
1320         if (!(g_hash_table_lookup(spapr->nested.guests,
1321                                   GINT_TO_POINTER(guestid)))) {
1322             break;
1323         }
1324     }
1325 
1326     if (guestid == PAPR_NESTED_GUEST_MAX) {
1327         return H_NO_MEM;
1328     }
1329 
1330     guest = g_try_new0(struct SpaprMachineStateNestedGuest, 1);
1331     if (!guest) {
1332         return H_NO_MEM;
1333     }
1334 
1335     guest->pvr_logical = spapr->nested.pvr_base;
1336     g_hash_table_insert(spapr->nested.guests, GINT_TO_POINTER(guestid), guest);
1337     env->gpr[4] = guestid;
1338 
1339     return H_SUCCESS;
1340 }
1341 
1342 static target_ulong h_guest_delete(PowerPCCPU *cpu,
1343                                    SpaprMachineState *spapr,
1344                                    target_ulong opcode,
1345                                    target_ulong *args)
1346 {
1347     target_ulong flags = args[0];
1348     target_ulong guestid = args[1];
1349     struct SpaprMachineStateNestedGuest *guest;
1350 
1351     /*
1352      * handle flag deleteAllGuests, if set:
1353      * guestid is ignored and all guests are deleted
1354      *
1355      */
1356     if (flags & ~H_GUEST_DELETE_ALL_FLAG) {
1357         return H_UNSUPPORTED_FLAG; /* other flag bits reserved */
1358     } else if (flags & H_GUEST_DELETE_ALL_FLAG) {
1359         g_hash_table_destroy(spapr->nested.guests);
1360         return H_SUCCESS;
1361     }
1362 
1363     guest = g_hash_table_lookup(spapr->nested.guests, GINT_TO_POINTER(guestid));
1364     if (!guest) {
1365         return H_P2;
1366     }
1367 
1368     g_hash_table_remove(spapr->nested.guests, GINT_TO_POINTER(guestid));
1369 
1370     return H_SUCCESS;
1371 }
1372 
1373 static target_ulong h_guest_create_vcpu(PowerPCCPU *cpu,
1374                                         SpaprMachineState *spapr,
1375                                         target_ulong opcode,
1376                                         target_ulong *args)
1377 {
1378     target_ulong flags = args[0];
1379     target_ulong guestid = args[1];
1380     target_ulong vcpuid = args[2];
1381     SpaprMachineStateNestedGuest *guest;
1382 
1383     if (flags) { /* don't handle any flags for now */
1384         return H_UNSUPPORTED_FLAG;
1385     }
1386 
1387     guest = spapr_get_nested_guest(spapr, guestid);
1388     if (!guest) {
1389         return H_P2;
1390     }
1391 
1392     if (vcpuid < guest->nr_vcpus) {
1393         qemu_log_mask(LOG_UNIMP, "vcpuid " TARGET_FMT_ld " already in use.",
1394                       vcpuid);
1395         return H_IN_USE;
1396     }
1397     /* linear vcpuid allocation only */
1398     assert(vcpuid == guest->nr_vcpus);
1399 
1400     if (guest->nr_vcpus >= PAPR_NESTED_GUEST_VCPU_MAX) {
1401         return H_P3;
1402     }
1403 
1404     SpaprMachineStateNestedGuestVcpu *vcpus, *curr_vcpu;
1405     vcpus = g_try_renew(struct SpaprMachineStateNestedGuestVcpu,
1406                         guest->vcpus,
1407                         guest->nr_vcpus + 1);
1408     if (!vcpus) {
1409         return H_NO_MEM;
1410     }
1411     guest->vcpus = vcpus;
1412     curr_vcpu = &vcpus[guest->nr_vcpus];
1413     memset(curr_vcpu, 0, sizeof(SpaprMachineStateNestedGuestVcpu));
1414 
1415     curr_vcpu->enabled = true;
1416     guest->nr_vcpus++;
1417 
1418     return H_SUCCESS;
1419 }
1420 
1421 static target_ulong getset_state(SpaprMachineStateNestedGuest *guest,
1422                                  uint64_t vcpuid,
1423                                  struct guest_state_request *gsr)
1424 {
1425     void *ptr;
1426     uint16_t id;
1427     struct guest_state_element *element;
1428     struct guest_state_element_type *type;
1429     int64_t lenleft, num_elements;
1430 
1431     lenleft = gsr->len;
1432 
1433     if (!guest_state_request_check(gsr)) {
1434         return H_P3;
1435     }
1436 
1437     num_elements = be32_to_cpu(gsr->gsb->num_elements);
1438     element = gsr->gsb->elements;
1439     /* Process the elements */
1440     while (num_elements) {
1441         type = NULL;
1442         /* log_element(element, gsr); */
1443 
1444         id = be16_to_cpu(element->id);
1445         if (id == GSB_HV_VCPU_IGNORED_ID) {
1446             goto next_element;
1447         }
1448 
1449         type = guest_state_element_type_find(id);
1450         assert(type);
1451 
1452         /* Get pointer to guest data to get/set */
1453         if (type->location && type->copy) {
1454             ptr = type->location(guest, vcpuid);
1455             assert(ptr);
1456             if (!~(type->mask) && is_gsr_invalid(gsr, element, type)) {
1457                 return H_INVALID_ELEMENT_VALUE;
1458             }
1459             type->copy(ptr + type->offset, element->value,
1460                        gsr->flags & GUEST_STATE_REQUEST_SET ? true : false);
1461         }
1462 
1463 next_element:
1464         element = guest_state_element_next(element, &lenleft, &num_elements);
1465     }
1466 
1467     return H_SUCCESS;
1468 }
1469 
1470 static target_ulong map_and_getset_state(PowerPCCPU *cpu,
1471                                          SpaprMachineStateNestedGuest *guest,
1472                                          uint64_t vcpuid,
1473                                          struct guest_state_request *gsr)
1474 {
1475     target_ulong rc;
1476     int64_t len;
1477     bool is_write;
1478 
1479     len = gsr->len;
1480     /* only get_state would require write access to the provided buffer */
1481     is_write = (gsr->flags & GUEST_STATE_REQUEST_SET) ? false : true;
1482     gsr->gsb = address_space_map(CPU(cpu)->as, gsr->buf, (uint64_t *)&len,
1483                                  is_write, MEMTXATTRS_UNSPECIFIED);
1484     if (!gsr->gsb) {
1485         rc = H_P3;
1486         goto out1;
1487     }
1488 
1489     if (len != gsr->len) {
1490         rc = H_P3;
1491         goto out1;
1492     }
1493 
1494     rc = getset_state(guest, vcpuid, gsr);
1495 
1496 out1:
1497     address_space_unmap(CPU(cpu)->as, gsr->gsb, len, is_write, len);
1498     return rc;
1499 }
1500 
1501 static target_ulong h_guest_getset_state(PowerPCCPU *cpu,
1502                                          SpaprMachineState *spapr,
1503                                          target_ulong *args,
1504                                          bool set)
1505 {
1506     target_ulong flags = args[0];
1507     target_ulong lpid = args[1];
1508     target_ulong vcpuid = args[2];
1509     target_ulong buf = args[3];
1510     target_ulong buflen = args[4];
1511     struct guest_state_request gsr;
1512     SpaprMachineStateNestedGuest *guest;
1513 
1514     guest = spapr_get_nested_guest(spapr, lpid);
1515     if (!guest) {
1516         return H_P2;
1517     }
1518     gsr.buf = buf;
1519     assert(buflen <= GSB_MAX_BUF_SIZE);
1520     gsr.len = buflen;
1521     gsr.flags = 0;
1522     if (flags & H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) {
1523         gsr.flags |= GUEST_STATE_REQUEST_GUEST_WIDE;
1524     }
1525     if (flags & ~H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) {
1526         return H_PARAMETER; /* flag not supported yet */
1527     }
1528 
1529     if (set) {
1530         gsr.flags |= GUEST_STATE_REQUEST_SET;
1531     }
1532     return map_and_getset_state(cpu, guest, vcpuid, &gsr);
1533 }
1534 
1535 static target_ulong h_guest_set_state(PowerPCCPU *cpu,
1536                                       SpaprMachineState *spapr,
1537                                       target_ulong opcode,
1538                                       target_ulong *args)
1539 {
1540     return h_guest_getset_state(cpu, spapr, args, true);
1541 }
1542 
1543 static target_ulong h_guest_get_state(PowerPCCPU *cpu,
1544                                       SpaprMachineState *spapr,
1545                                       target_ulong opcode,
1546                                       target_ulong *args)
1547 {
1548     return h_guest_getset_state(cpu, spapr, args, false);
1549 }
1550 
1551 static void exit_nested_store_l2(PowerPCCPU *cpu, int excp,
1552                                  SpaprMachineStateNestedGuestVcpu *vcpu)
1553 {
1554     CPUPPCState *env = &cpu->env;
1555     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1556     target_ulong now, hdar, hdsisr, asdr;
1557 
1558     assert(sizeof(env->gpr) == sizeof(vcpu->state.gpr)); /* sanity check */
1559 
1560     now = cpu_ppc_load_tbl(env); /* L2 timebase */
1561     now -= vcpu->tb_offset; /* L1 timebase */
1562     vcpu->state.dec_expiry_tb = now - cpu_ppc_load_decr(env);
1563     cpu_ppc_store_decr(env, spapr_cpu->nested_host_state->dec_expiry_tb - now);
1564     /* backup hdar, hdsisr, asdr if reqd later below */
1565     hdar   = vcpu->state.hdar;
1566     hdsisr = vcpu->state.hdsisr;
1567     asdr   = vcpu->state.asdr;
1568 
1569     nested_save_state(&vcpu->state, cpu);
1570 
1571     if (excp == POWERPC_EXCP_MCHECK ||
1572         excp == POWERPC_EXCP_RESET ||
1573         excp == POWERPC_EXCP_SYSCALL) {
1574         vcpu->state.nip = env->spr[SPR_SRR0];
1575         vcpu->state.msr = env->spr[SPR_SRR1] & env->msr_mask;
1576     } else {
1577         vcpu->state.nip = env->spr[SPR_HSRR0];
1578         vcpu->state.msr = env->spr[SPR_HSRR1] & env->msr_mask;
1579     }
1580 
1581     /* hdar, hdsisr, asdr should be retained unless certain exceptions */
1582     if ((excp != POWERPC_EXCP_HDSI) && (excp != POWERPC_EXCP_HISI)) {
1583         vcpu->state.asdr = asdr;
1584     } else if (excp != POWERPC_EXCP_HDSI) {
1585         vcpu->state.hdar   = hdar;
1586         vcpu->state.hdsisr = hdsisr;
1587     }
1588 }
1589 
1590 static int get_exit_ids(uint64_t srr0, uint16_t ids[16])
1591 {
1592     int nr;
1593 
1594     switch (srr0) {
1595     case 0xc00:
1596         nr = 10;
1597         ids[0] = GSB_VCPU_GPR3;
1598         ids[1] = GSB_VCPU_GPR4;
1599         ids[2] = GSB_VCPU_GPR5;
1600         ids[3] = GSB_VCPU_GPR6;
1601         ids[4] = GSB_VCPU_GPR7;
1602         ids[5] = GSB_VCPU_GPR8;
1603         ids[6] = GSB_VCPU_GPR9;
1604         ids[7] = GSB_VCPU_GPR10;
1605         ids[8] = GSB_VCPU_GPR11;
1606         ids[9] = GSB_VCPU_GPR12;
1607         break;
1608     case 0xe00:
1609         nr = 5;
1610         ids[0] = GSB_VCPU_SPR_HDAR;
1611         ids[1] = GSB_VCPU_SPR_HDSISR;
1612         ids[2] = GSB_VCPU_SPR_ASDR;
1613         ids[3] = GSB_VCPU_SPR_NIA;
1614         ids[4] = GSB_VCPU_SPR_MSR;
1615         break;
1616     case 0xe20:
1617         nr = 4;
1618         ids[0] = GSB_VCPU_SPR_HDAR;
1619         ids[1] = GSB_VCPU_SPR_ASDR;
1620         ids[2] = GSB_VCPU_SPR_NIA;
1621         ids[3] = GSB_VCPU_SPR_MSR;
1622         break;
1623     case 0xe40:
1624         nr = 3;
1625         ids[0] = GSB_VCPU_SPR_HEIR;
1626         ids[1] = GSB_VCPU_SPR_NIA;
1627         ids[2] = GSB_VCPU_SPR_MSR;
1628         break;
1629     case 0xf80:
1630         nr = 3;
1631         ids[0] = GSB_VCPU_SPR_HFSCR;
1632         ids[1] = GSB_VCPU_SPR_NIA;
1633         ids[2] = GSB_VCPU_SPR_MSR;
1634         break;
1635     default:
1636         nr = 0;
1637         break;
1638     }
1639 
1640     return nr;
1641 }
1642 
1643 static void exit_process_output_buffer(PowerPCCPU *cpu,
1644                                        SpaprMachineStateNestedGuest *guest,
1645                                        target_ulong vcpuid,
1646                                        target_ulong *r3)
1647 {
1648     SpaprMachineStateNestedGuestVcpu *vcpu = &guest->vcpus[vcpuid];
1649     struct guest_state_request gsr;
1650     struct guest_state_buffer *gsb;
1651     struct guest_state_element *element;
1652     struct guest_state_element_type *type;
1653     int exit_id_count = 0;
1654     uint16_t exit_cause_ids[16];
1655     hwaddr len;
1656 
1657     len = vcpu->runbufout.size;
1658     gsb = address_space_map(CPU(cpu)->as, vcpu->runbufout.addr, &len, true,
1659                             MEMTXATTRS_UNSPECIFIED);
1660     if (!gsb || len != vcpu->runbufout.size) {
1661         address_space_unmap(CPU(cpu)->as, gsb, len, true, len);
1662         *r3 = H_P2;
1663         return;
1664     }
1665 
1666     exit_id_count = get_exit_ids(*r3, exit_cause_ids);
1667 
1668     /* Create a buffer of elements to send back */
1669     gsb->num_elements = cpu_to_be32(exit_id_count);
1670     element = gsb->elements;
1671     for (int i = 0; i < exit_id_count; i++) {
1672         type = guest_state_element_type_find(exit_cause_ids[i]);
1673         assert(type);
1674         element->id = cpu_to_be16(exit_cause_ids[i]);
1675         element->size = cpu_to_be16(type->size);
1676         element = guest_state_element_next(element, NULL, NULL);
1677     }
1678     gsr.gsb = gsb;
1679     gsr.len = VCPU_OUT_BUF_MIN_SZ;
1680     gsr.flags = 0; /* get + never guest wide */
1681     getset_state(guest, vcpuid, &gsr);
1682 
1683     address_space_unmap(CPU(cpu)->as, gsb, len, true, len);
1684     return;
1685 }
1686 
1687 static
1688 void spapr_exit_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu, int excp)
1689 {
1690     CPUPPCState *env = &cpu->env;
1691     CPUState *cs = CPU(cpu);
1692     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1693     target_ulong r3_return = env->excp_vectors[excp]; /* hcall return value */
1694     target_ulong lpid = 0, vcpuid = 0;
1695     struct SpaprMachineStateNestedGuestVcpu *vcpu = NULL;
1696     struct SpaprMachineStateNestedGuest *guest = NULL;
1697 
1698     lpid = spapr_cpu->nested_host_state->gpr[5];
1699     vcpuid = spapr_cpu->nested_host_state->gpr[6];
1700     guest = spapr_get_nested_guest(spapr, lpid);
1701     assert(guest);
1702     spapr_nested_vcpu_check(guest, vcpuid, false);
1703     vcpu = &guest->vcpus[vcpuid];
1704 
1705     exit_nested_store_l2(cpu, excp, vcpu);
1706     /* do the output buffer for run_vcpu*/
1707     exit_process_output_buffer(cpu, guest, vcpuid, &r3_return);
1708 
1709     assert(env->spr[SPR_LPIDR] != 0);
1710     nested_load_state(cpu, spapr_cpu->nested_host_state);
1711     cpu_ppc_decrease_tb_by_offset(env, vcpu->tb_offset);
1712     env->gpr[3] = H_SUCCESS;
1713     env->gpr[4] = r3_return;
1714     nested_post_load_state(env, cs);
1715     cpu_ppc_hdecr_exit(env);
1716 
1717     spapr_cpu->in_nested = false;
1718     g_free(spapr_cpu->nested_host_state);
1719     spapr_cpu->nested_host_state = NULL;
1720 }
1721 
1722 void spapr_exit_nested(PowerPCCPU *cpu, int excp)
1723 {
1724     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1725     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1726 
1727     assert(spapr_cpu->in_nested);
1728     if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) {
1729         spapr_exit_nested_hv(cpu, excp);
1730     } else if (spapr_nested_api(spapr) == NESTED_API_PAPR) {
1731         spapr_exit_nested_papr(spapr, cpu, excp);
1732     } else {
1733         g_assert_not_reached();
1734     }
1735 }
1736 
1737 static void nested_papr_load_l2(PowerPCCPU *cpu,
1738                                 CPUPPCState *env,
1739                                 SpaprMachineStateNestedGuestVcpu *vcpu,
1740                                 target_ulong now)
1741 {
1742     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
1743     target_ulong lpcr, lpcr_mask, hdec;
1744     lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER;
1745 
1746     assert(vcpu);
1747     assert(sizeof(env->gpr) == sizeof(vcpu->state.gpr));
1748     nested_load_state(cpu, &vcpu->state);
1749     lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) |
1750            (vcpu->state.lpcr & lpcr_mask);
1751     lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE;
1752     lpcr &= ~LPCR_LPES0;
1753     env->spr[SPR_LPCR] = lpcr & pcc->lpcr_mask;
1754 
1755     hdec = vcpu->hdecr_expiry_tb - now;
1756     cpu_ppc_store_decr(env, vcpu->state.dec_expiry_tb - now);
1757     cpu_ppc_hdecr_init(env);
1758     cpu_ppc_store_hdecr(env, hdec);
1759 
1760     cpu_ppc_increase_tb_by_offset(env, vcpu->tb_offset);
1761 }
1762 
1763 static void nested_papr_run_vcpu(PowerPCCPU *cpu,
1764                                  uint64_t lpid,
1765                                  SpaprMachineStateNestedGuestVcpu *vcpu)
1766 {
1767     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1768     CPUPPCState *env = &cpu->env;
1769     CPUState *cs = CPU(cpu);
1770     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1771     target_ulong now = cpu_ppc_load_tbl(env);
1772 
1773     assert(env->spr[SPR_LPIDR] == 0);
1774     assert(spapr->nested.api); /* ensure API version is initialized */
1775     spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1);
1776     assert(spapr_cpu->nested_host_state);
1777     nested_save_state(spapr_cpu->nested_host_state, cpu);
1778     spapr_cpu->nested_host_state->dec_expiry_tb = now - cpu_ppc_load_decr(env);
1779     nested_papr_load_l2(cpu, env, vcpu, now);
1780     env->spr[SPR_LPIDR] = lpid; /* post load l2 */
1781 
1782     spapr_cpu->in_nested = true;
1783     nested_post_load_state(env, cs);
1784 }
1785 
1786 static target_ulong h_guest_run_vcpu(PowerPCCPU *cpu,
1787                                      SpaprMachineState *spapr,
1788                                      target_ulong opcode,
1789                                      target_ulong *args)
1790 {
1791     CPUPPCState *env = &cpu->env;
1792     target_ulong flags = args[0];
1793     target_ulong lpid = args[1];
1794     target_ulong vcpuid = args[2];
1795     struct SpaprMachineStateNestedGuestVcpu *vcpu;
1796     struct guest_state_request gsr;
1797     SpaprMachineStateNestedGuest *guest;
1798     target_ulong rc;
1799 
1800     if (flags) /* don't handle any flags for now */
1801         return H_PARAMETER;
1802 
1803     guest = spapr_get_nested_guest(spapr, lpid);
1804     if (!guest) {
1805         return H_P2;
1806     }
1807     if (!spapr_nested_vcpu_check(guest, vcpuid, true)) {
1808         return H_P3;
1809     }
1810 
1811     if (guest->parttbl[0] == 0) {
1812         /* At least need a partition scoped radix tree */
1813         return H_NOT_AVAILABLE;
1814     }
1815 
1816     vcpu = &guest->vcpus[vcpuid];
1817 
1818     /* Read run_vcpu input buffer to update state */
1819     gsr.buf = vcpu->runbufin.addr;
1820     gsr.len = vcpu->runbufin.size;
1821     gsr.flags = GUEST_STATE_REQUEST_SET; /* Thread wide + writing */
1822     rc = map_and_getset_state(cpu, guest, vcpuid, &gsr);
1823     if (rc == H_SUCCESS) {
1824         nested_papr_run_vcpu(cpu, lpid, vcpu);
1825     } else {
1826         env->gpr[3] = rc;
1827     }
1828     return env->gpr[3];
1829 }
1830 
1831 void spapr_register_nested_hv(void)
1832 {
1833     spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl);
1834     spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested);
1835     spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate);
1836     spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest);
1837 }
1838 
1839 void spapr_unregister_nested_hv(void)
1840 {
1841     spapr_unregister_hypercall(KVMPPC_H_SET_PARTITION_TABLE);
1842     spapr_unregister_hypercall(KVMPPC_H_ENTER_NESTED);
1843     spapr_unregister_hypercall(KVMPPC_H_TLB_INVALIDATE);
1844     spapr_unregister_hypercall(KVMPPC_H_COPY_TOFROM_GUEST);
1845 }
1846 
1847 void spapr_register_nested_papr(void)
1848 {
1849     spapr_register_hypercall(H_GUEST_GET_CAPABILITIES,
1850                              h_guest_get_capabilities);
1851     spapr_register_hypercall(H_GUEST_SET_CAPABILITIES,
1852                              h_guest_set_capabilities);
1853     spapr_register_hypercall(H_GUEST_CREATE, h_guest_create);
1854     spapr_register_hypercall(H_GUEST_DELETE, h_guest_delete);
1855     spapr_register_hypercall(H_GUEST_CREATE_VCPU, h_guest_create_vcpu);
1856     spapr_register_hypercall(H_GUEST_SET_STATE, h_guest_set_state);
1857     spapr_register_hypercall(H_GUEST_GET_STATE, h_guest_get_state);
1858     spapr_register_hypercall(H_GUEST_RUN_VCPU, h_guest_run_vcpu);
1859 }
1860 
1861 void spapr_unregister_nested_papr(void)
1862 {
1863     spapr_unregister_hypercall(H_GUEST_GET_CAPABILITIES);
1864     spapr_unregister_hypercall(H_GUEST_SET_CAPABILITIES);
1865     spapr_unregister_hypercall(H_GUEST_CREATE);
1866     spapr_unregister_hypercall(H_GUEST_DELETE);
1867     spapr_unregister_hypercall(H_GUEST_CREATE_VCPU);
1868     spapr_unregister_hypercall(H_GUEST_SET_STATE);
1869     spapr_unregister_hypercall(H_GUEST_GET_STATE);
1870     spapr_unregister_hypercall(H_GUEST_RUN_VCPU);
1871 }
1872 
1873 #else
1874 void spapr_exit_nested(PowerPCCPU *cpu, int excp)
1875 {
1876     g_assert_not_reached();
1877 }
1878 
1879 void spapr_register_nested_hv(void)
1880 {
1881     /* DO NOTHING */
1882 }
1883 
1884 void spapr_unregister_nested_hv(void)
1885 {
1886     /* DO NOTHING */
1887 }
1888 
1889 bool spapr_get_pate_nested_hv(SpaprMachineState *spapr, PowerPCCPU *cpu,
1890                               target_ulong lpid, ppc_v3_pate_t *entry)
1891 {
1892     return false;
1893 }
1894 
1895 bool spapr_get_pate_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu,
1896                                 target_ulong lpid, ppc_v3_pate_t *entry)
1897 {
1898     return false;
1899 }
1900 
1901 void spapr_register_nested_papr(void)
1902 {
1903     /* DO NOTHING */
1904 }
1905 
1906 void spapr_unregister_nested_papr(void)
1907 {
1908     /* DO NOTHING */
1909 }
1910 
1911 void spapr_nested_gsb_init(void)
1912 {
1913     /* DO NOTHING */
1914 }
1915 
1916 #endif
1917