xref: /openbmc/qemu/hw/ppc/spapr_nested.c (revision 497711070905393a4614e87c3fe4a4445c369bce)
1 #include "qemu/osdep.h"
2 #include "qemu/cutils.h"
3 #include "exec/exec-all.h"
4 #include "helper_regs.h"
5 #include "hw/ppc/ppc.h"
6 #include "hw/ppc/spapr.h"
7 #include "hw/ppc/spapr_cpu_core.h"
8 #include "hw/ppc/spapr_nested.h"
9 #include "mmu-book3s-v3.h"
10 #include "cpu-models.h"
11 #include "qemu/log.h"
12 
13 void spapr_nested_reset(SpaprMachineState *spapr)
14 {
15     if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
16         spapr->nested.api = NESTED_API_KVM_HV;
17         spapr_unregister_nested_hv();
18         spapr_register_nested_hv();
19     } else {
20         spapr->nested.api = 0;
21         spapr->nested.capabilities_set = false;
22         spapr_nested_gsb_init();
23     }
24 }
25 
26 uint8_t spapr_nested_api(SpaprMachineState *spapr)
27 {
28     return spapr->nested.api;
29 }
30 
31 #ifdef CONFIG_TCG
32 
33 bool spapr_get_pate_nested_hv(SpaprMachineState *spapr, PowerPCCPU *cpu,
34                               target_ulong lpid, ppc_v3_pate_t *entry)
35 {
36     uint64_t patb, pats;
37 
38     assert(lpid != 0);
39 
40     patb = spapr->nested.ptcr & PTCR_PATB;
41     pats = spapr->nested.ptcr & PTCR_PATS;
42 
43     /* Check if partition table is properly aligned */
44     if (patb & MAKE_64BIT_MASK(0, pats + 12)) {
45         return false;
46     }
47 
48     /* Calculate number of entries */
49     pats = 1ull << (pats + 12 - 4);
50     if (pats <= lpid) {
51         return false;
52     }
53 
54     /* Grab entry */
55     patb += 16 * lpid;
56     entry->dw0 = ldq_phys(CPU(cpu)->as, patb);
57     entry->dw1 = ldq_phys(CPU(cpu)->as, patb + 8);
58     return true;
59 }
60 
61 static
62 SpaprMachineStateNestedGuest *spapr_get_nested_guest(SpaprMachineState *spapr,
63                                                      target_ulong guestid)
64 {
65     SpaprMachineStateNestedGuest *guest;
66 
67     guest = g_hash_table_lookup(spapr->nested.guests, GINT_TO_POINTER(guestid));
68     return guest;
69 }
70 
71 bool spapr_get_pate_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu,
72                                 target_ulong lpid, ppc_v3_pate_t *entry)
73 {
74     SpaprMachineStateNestedGuest *guest;
75     assert(lpid != 0);
76     guest = spapr_get_nested_guest(spapr, lpid);
77     if (!guest) {
78         return false;
79     }
80 
81     entry->dw0 = guest->parttbl[0];
82     entry->dw1 = guest->parttbl[1];
83     return true;
84 }
85 
86 #define PRTS_MASK      0x1f
87 
88 static target_ulong h_set_ptbl(PowerPCCPU *cpu,
89                                SpaprMachineState *spapr,
90                                target_ulong opcode,
91                                target_ulong *args)
92 {
93     target_ulong ptcr = args[0];
94 
95     if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
96         return H_FUNCTION;
97     }
98 
99     if ((ptcr & PRTS_MASK) + 12 - 4 > 12) {
100         return H_PARAMETER;
101     }
102 
103     spapr->nested.ptcr = ptcr; /* Save new partition table */
104 
105     return H_SUCCESS;
106 }
107 
108 static target_ulong h_tlb_invalidate(PowerPCCPU *cpu,
109                                      SpaprMachineState *spapr,
110                                      target_ulong opcode,
111                                      target_ulong *args)
112 {
113     /*
114      * The spapr virtual hypervisor nested HV implementation retains no L2
115      * translation state except for TLB. And the TLB is always invalidated
116      * across L1<->L2 transitions, so nothing is required here.
117      */
118 
119     return H_SUCCESS;
120 }
121 
122 static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu,
123                                         SpaprMachineState *spapr,
124                                         target_ulong opcode,
125                                         target_ulong *args)
126 {
127     /*
128      * This HCALL is not required, L1 KVM will take a slow path and walk the
129      * page tables manually to do the data copy.
130      */
131     return H_FUNCTION;
132 }
133 
134 static void nested_save_state(struct nested_ppc_state *save, PowerPCCPU *cpu)
135 {
136     CPUPPCState *env = &cpu->env;
137     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
138 
139     memcpy(save->gpr, env->gpr, sizeof(save->gpr));
140 
141     save->lr = env->lr;
142     save->ctr = env->ctr;
143     save->cfar = env->cfar;
144     save->msr = env->msr;
145     save->nip = env->nip;
146 
147     save->cr = ppc_get_cr(env);
148     save->xer = cpu_read_xer(env);
149 
150     save->lpcr = env->spr[SPR_LPCR];
151     save->lpidr = env->spr[SPR_LPIDR];
152     save->pcr = env->spr[SPR_PCR];
153     save->dpdes = env->spr[SPR_DPDES];
154     save->hfscr = env->spr[SPR_HFSCR];
155     save->srr0 = env->spr[SPR_SRR0];
156     save->srr1 = env->spr[SPR_SRR1];
157     save->sprg0 = env->spr[SPR_SPRG0];
158     save->sprg1 = env->spr[SPR_SPRG1];
159     save->sprg2 = env->spr[SPR_SPRG2];
160     save->sprg3 = env->spr[SPR_SPRG3];
161     save->pidr = env->spr[SPR_BOOKS_PID];
162     save->ppr = env->spr[SPR_PPR];
163 
164     if (spapr_nested_api(spapr) == NESTED_API_PAPR) {
165         save->amor = env->spr[SPR_AMOR];
166         save->dawr0 = env->spr[SPR_DAWR0];
167         save->dawrx0 = env->spr[SPR_DAWRX0];
168         save->ciabr = env->spr[SPR_CIABR];
169         save->purr = env->spr[SPR_PURR];
170         save->spurr = env->spr[SPR_SPURR];
171         save->ic = env->spr[SPR_IC];
172         save->vtb = env->spr[SPR_VTB];
173         save->hdar = env->spr[SPR_HDAR];
174         save->hdsisr = env->spr[SPR_HDSISR];
175         save->heir = env->spr[SPR_HEIR];
176         save->asdr = env->spr[SPR_ASDR];
177         save->dawr1 = env->spr[SPR_DAWR1];
178         save->dawrx1 = env->spr[SPR_DAWRX1];
179         save->dexcr = env->spr[SPR_DEXCR];
180         save->hdexcr = env->spr[SPR_HDEXCR];
181         save->hashkeyr = env->spr[SPR_HASHKEYR];
182         save->hashpkeyr = env->spr[SPR_HASHPKEYR];
183         memcpy(save->vsr, env->vsr, sizeof(save->vsr));
184         save->ebbhr = env->spr[SPR_EBBHR];
185         save->tar = env->spr[SPR_TAR];
186         save->ebbrr = env->spr[SPR_EBBRR];
187         save->bescr = env->spr[SPR_BESCR];
188         save->iamr = env->spr[SPR_IAMR];
189         save->amr = env->spr[SPR_AMR];
190         save->uamor = env->spr[SPR_UAMOR];
191         save->dscr = env->spr[SPR_DSCR];
192         save->fscr = env->spr[SPR_FSCR];
193         save->pspb = env->spr[SPR_PSPB];
194         save->ctrl = env->spr[SPR_CTRL];
195         save->vrsave = env->spr[SPR_VRSAVE];
196         save->dar = env->spr[SPR_DAR];
197         save->dsisr = env->spr[SPR_DSISR];
198         save->pmc1 = env->spr[SPR_POWER_PMC1];
199         save->pmc2 = env->spr[SPR_POWER_PMC2];
200         save->pmc3 = env->spr[SPR_POWER_PMC3];
201         save->pmc4 = env->spr[SPR_POWER_PMC4];
202         save->pmc5 = env->spr[SPR_POWER_PMC5];
203         save->pmc6 = env->spr[SPR_POWER_PMC6];
204         save->mmcr0 = env->spr[SPR_POWER_MMCR0];
205         save->mmcr1 = env->spr[SPR_POWER_MMCR1];
206         save->mmcr2 = env->spr[SPR_POWER_MMCR2];
207         save->mmcra = env->spr[SPR_POWER_MMCRA];
208         save->sdar = env->spr[SPR_POWER_SDAR];
209         save->siar = env->spr[SPR_POWER_SIAR];
210         save->sier = env->spr[SPR_POWER_SIER];
211         save->vscr = ppc_get_vscr(env);
212         save->fpscr = env->fpscr;
213     } else if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) {
214         save->tb_offset = env->tb_env->tb_offset;
215     }
216 }
217 
218 static void nested_post_load_state(CPUPPCState *env, CPUState *cs)
219 {
220     /*
221      * compute hflags and possible interrupts.
222      */
223     hreg_compute_hflags(env);
224     ppc_maybe_interrupt(env);
225     /*
226      * Nested HV does not tag TLB entries between L1 and L2, so must
227      * flush on transition.
228      */
229     tlb_flush(cs);
230     env->reserve_addr = -1; /* Reset the reservation */
231 }
232 
233 static void nested_load_state(PowerPCCPU *cpu, struct nested_ppc_state *load)
234 {
235     CPUPPCState *env = &cpu->env;
236     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
237 
238     memcpy(env->gpr, load->gpr, sizeof(env->gpr));
239 
240     env->lr = load->lr;
241     env->ctr = load->ctr;
242     env->cfar = load->cfar;
243     env->msr = load->msr;
244     env->nip = load->nip;
245 
246     ppc_set_cr(env, load->cr);
247     cpu_write_xer(env, load->xer);
248 
249     env->spr[SPR_LPCR] = load->lpcr;
250     env->spr[SPR_LPIDR] = load->lpidr;
251     env->spr[SPR_PCR] = load->pcr;
252     env->spr[SPR_DPDES] = load->dpdes;
253     env->spr[SPR_HFSCR] = load->hfscr;
254     env->spr[SPR_SRR0] = load->srr0;
255     env->spr[SPR_SRR1] = load->srr1;
256     env->spr[SPR_SPRG0] = load->sprg0;
257     env->spr[SPR_SPRG1] = load->sprg1;
258     env->spr[SPR_SPRG2] = load->sprg2;
259     env->spr[SPR_SPRG3] = load->sprg3;
260     env->spr[SPR_BOOKS_PID] = load->pidr;
261     env->spr[SPR_PPR] = load->ppr;
262 
263     if (spapr_nested_api(spapr) == NESTED_API_PAPR) {
264         env->spr[SPR_AMOR] = load->amor;
265         env->spr[SPR_DAWR0] = load->dawr0;
266         env->spr[SPR_DAWRX0] = load->dawrx0;
267         env->spr[SPR_CIABR] = load->ciabr;
268         env->spr[SPR_PURR] = load->purr;
269         env->spr[SPR_SPURR] = load->purr;
270         env->spr[SPR_IC] = load->ic;
271         env->spr[SPR_VTB] = load->vtb;
272         env->spr[SPR_HDAR] = load->hdar;
273         env->spr[SPR_HDSISR] = load->hdsisr;
274         env->spr[SPR_HEIR] = load->heir;
275         env->spr[SPR_ASDR] = load->asdr;
276         env->spr[SPR_DAWR1] = load->dawr1;
277         env->spr[SPR_DAWRX1] = load->dawrx1;
278         env->spr[SPR_DEXCR] = load->dexcr;
279         env->spr[SPR_HDEXCR] = load->hdexcr;
280         env->spr[SPR_HASHKEYR] = load->hashkeyr;
281         env->spr[SPR_HASHPKEYR] = load->hashpkeyr;
282         memcpy(env->vsr, load->vsr, sizeof(env->vsr));
283         env->spr[SPR_EBBHR] = load->ebbhr;
284         env->spr[SPR_TAR] = load->tar;
285         env->spr[SPR_EBBRR] = load->ebbrr;
286         env->spr[SPR_BESCR] = load->bescr;
287         env->spr[SPR_IAMR] = load->iamr;
288         env->spr[SPR_AMR] = load->amr;
289         env->spr[SPR_UAMOR] = load->uamor;
290         env->spr[SPR_DSCR] = load->dscr;
291         env->spr[SPR_FSCR] = load->fscr;
292         env->spr[SPR_PSPB] = load->pspb;
293         env->spr[SPR_CTRL] = load->ctrl;
294         env->spr[SPR_VRSAVE] = load->vrsave;
295         env->spr[SPR_DAR] = load->dar;
296         env->spr[SPR_DSISR] = load->dsisr;
297         env->spr[SPR_POWER_PMC1] = load->pmc1;
298         env->spr[SPR_POWER_PMC2] = load->pmc2;
299         env->spr[SPR_POWER_PMC3] = load->pmc3;
300         env->spr[SPR_POWER_PMC4] = load->pmc4;
301         env->spr[SPR_POWER_PMC5] = load->pmc5;
302         env->spr[SPR_POWER_PMC6] = load->pmc6;
303         env->spr[SPR_POWER_MMCR0] = load->mmcr0;
304         env->spr[SPR_POWER_MMCR1] = load->mmcr1;
305         env->spr[SPR_POWER_MMCR2] = load->mmcr2;
306         env->spr[SPR_POWER_MMCRA] = load->mmcra;
307         env->spr[SPR_POWER_SDAR] = load->sdar;
308         env->spr[SPR_POWER_SIAR] = load->siar;
309         env->spr[SPR_POWER_SIER] = load->sier;
310         ppc_store_vscr(env, load->vscr);
311         ppc_store_fpscr(env, load->fpscr);
312     } else if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) {
313         env->tb_env->tb_offset = load->tb_offset;
314     }
315 }
316 
317 /*
318  * When this handler returns, the environment is switched to the L2 guest
319  * and TCG begins running that. spapr_exit_nested() performs the switch from
320  * L2 back to L1 and returns from the H_ENTER_NESTED hcall.
321  */
322 static target_ulong h_enter_nested(PowerPCCPU *cpu,
323                                    SpaprMachineState *spapr,
324                                    target_ulong opcode,
325                                    target_ulong *args)
326 {
327     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
328     CPUPPCState *env = &cpu->env;
329     CPUState *cs = CPU(cpu);
330     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
331     struct nested_ppc_state l2_state;
332     target_ulong hv_ptr = args[0];
333     target_ulong regs_ptr = args[1];
334     target_ulong hdec, now = cpu_ppc_load_tbl(env);
335     target_ulong lpcr, lpcr_mask;
336     struct kvmppc_hv_guest_state *hvstate;
337     struct kvmppc_hv_guest_state hv_state;
338     struct kvmppc_pt_regs *regs;
339     hwaddr len;
340 
341     if (spapr->nested.ptcr == 0) {
342         return H_NOT_AVAILABLE;
343     }
344 
345     len = sizeof(*hvstate);
346     hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false,
347                                 MEMTXATTRS_UNSPECIFIED);
348     if (len != sizeof(*hvstate)) {
349         address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false);
350         return H_PARAMETER;
351     }
352 
353     memcpy(&hv_state, hvstate, len);
354 
355     address_space_unmap(CPU(cpu)->as, hvstate, len, len, false);
356 
357     /*
358      * We accept versions 1 and 2. Version 2 fields are unused because TCG
359      * does not implement DAWR*.
360      */
361     if (hv_state.version > HV_GUEST_STATE_VERSION) {
362         return H_PARAMETER;
363     }
364 
365     if (hv_state.lpid == 0) {
366         return H_PARAMETER;
367     }
368 
369     spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1);
370     if (!spapr_cpu->nested_host_state) {
371         return H_NO_MEM;
372     }
373 
374     assert(env->spr[SPR_LPIDR] == 0);
375     assert(env->spr[SPR_DPDES] == 0);
376     nested_save_state(spapr_cpu->nested_host_state, cpu);
377 
378     len = sizeof(*regs);
379     regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false,
380                                 MEMTXATTRS_UNSPECIFIED);
381     if (!regs || len != sizeof(*regs)) {
382         address_space_unmap(CPU(cpu)->as, regs, len, 0, false);
383         g_free(spapr_cpu->nested_host_state);
384         return H_P2;
385     }
386 
387     len = sizeof(l2_state.gpr);
388     assert(len == sizeof(regs->gpr));
389     memcpy(l2_state.gpr, regs->gpr, len);
390 
391     l2_state.lr = regs->link;
392     l2_state.ctr = regs->ctr;
393     l2_state.xer = regs->xer;
394     l2_state.cr = regs->ccr;
395     l2_state.msr = regs->msr;
396     l2_state.nip = regs->nip;
397 
398     address_space_unmap(CPU(cpu)->as, regs, len, len, false);
399 
400     l2_state.cfar = hv_state.cfar;
401     l2_state.lpidr = hv_state.lpid;
402 
403     lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER;
404     lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask);
405     lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE;
406     lpcr &= ~LPCR_LPES0;
407     l2_state.lpcr = lpcr & pcc->lpcr_mask;
408 
409     l2_state.pcr = hv_state.pcr;
410     /* hv_state.amor is not used */
411     l2_state.dpdes = hv_state.dpdes;
412     l2_state.hfscr = hv_state.hfscr;
413     /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/
414     l2_state.srr0 = hv_state.srr0;
415     l2_state.srr1 = hv_state.srr1;
416     l2_state.sprg0 = hv_state.sprg[0];
417     l2_state.sprg1 = hv_state.sprg[1];
418     l2_state.sprg2 = hv_state.sprg[2];
419     l2_state.sprg3 = hv_state.sprg[3];
420     l2_state.pidr = hv_state.pidr;
421     l2_state.ppr = hv_state.ppr;
422     l2_state.tb_offset = env->tb_env->tb_offset + hv_state.tb_offset;
423 
424     /*
425      * Switch to the nested guest environment and start the "hdec" timer.
426      */
427     nested_load_state(cpu, &l2_state);
428     nested_post_load_state(env, cs);
429 
430     hdec = hv_state.hdec_expiry - now;
431     cpu_ppc_hdecr_init(env);
432     cpu_ppc_store_hdecr(env, hdec);
433 
434     /*
435      * The hv_state.vcpu_token is not needed. It is used by the KVM
436      * implementation to remember which L2 vCPU last ran on which physical
437      * CPU so as to invalidate process scope translations if it is moved
438      * between physical CPUs. For now TLBs are always flushed on L1<->L2
439      * transitions so this is not a problem.
440      *
441      * Could validate that the same vcpu_token does not attempt to run on
442      * different L1 vCPUs at the same time, but that would be a L1 KVM bug
443      * and it's not obviously worth a new data structure to do it.
444      */
445 
446     spapr_cpu->in_nested = true;
447 
448     /*
449      * The spapr hcall helper sets env->gpr[3] to the return value, but at
450      * this point the L1 is not returning from the hcall but rather we
451      * start running the L2, so r3 must not be clobbered, so return env->gpr[3]
452      * to leave it unchanged.
453      */
454     return env->gpr[3];
455 }
456 
457 static void spapr_exit_nested_hv(PowerPCCPU *cpu, int excp)
458 {
459     CPUPPCState *env = &cpu->env;
460     CPUState *cs = CPU(cpu);
461     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
462     struct nested_ppc_state l2_state;
463     target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4];
464     target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5];
465     target_ulong hsrr0, hsrr1, hdar, asdr, hdsisr;
466     struct kvmppc_hv_guest_state *hvstate;
467     struct kvmppc_pt_regs *regs;
468     hwaddr len;
469 
470     nested_save_state(&l2_state, cpu);
471     hsrr0 = env->spr[SPR_HSRR0];
472     hsrr1 = env->spr[SPR_HSRR1];
473     hdar = env->spr[SPR_HDAR];
474     hdsisr = env->spr[SPR_HDSISR];
475     asdr = env->spr[SPR_ASDR];
476 
477     /*
478      * Switch back to the host environment (including for any error).
479      */
480     assert(env->spr[SPR_LPIDR] != 0);
481     nested_load_state(cpu, spapr_cpu->nested_host_state);
482     nested_post_load_state(env, cs);
483     env->gpr[3] = env->excp_vectors[excp]; /* hcall return value */
484 
485     cpu_ppc_hdecr_exit(env);
486 
487     spapr_cpu->in_nested = false;
488 
489     g_free(spapr_cpu->nested_host_state);
490     spapr_cpu->nested_host_state = NULL;
491 
492     len = sizeof(*hvstate);
493     hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true,
494                                 MEMTXATTRS_UNSPECIFIED);
495     if (len != sizeof(*hvstate)) {
496         address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true);
497         env->gpr[3] = H_PARAMETER;
498         return;
499     }
500 
501     hvstate->cfar = l2_state.cfar;
502     hvstate->lpcr = l2_state.lpcr;
503     hvstate->pcr = l2_state.pcr;
504     hvstate->dpdes = l2_state.dpdes;
505     hvstate->hfscr = l2_state.hfscr;
506 
507     if (excp == POWERPC_EXCP_HDSI) {
508         hvstate->hdar = hdar;
509         hvstate->hdsisr = hdsisr;
510         hvstate->asdr = asdr;
511     } else if (excp == POWERPC_EXCP_HISI) {
512         hvstate->asdr = asdr;
513     }
514 
515     /* HEIR should be implemented for HV mode and saved here. */
516     hvstate->srr0 = l2_state.srr0;
517     hvstate->srr1 = l2_state.srr1;
518     hvstate->sprg[0] = l2_state.sprg0;
519     hvstate->sprg[1] = l2_state.sprg1;
520     hvstate->sprg[2] = l2_state.sprg2;
521     hvstate->sprg[3] = l2_state.sprg3;
522     hvstate->pidr = l2_state.pidr;
523     hvstate->ppr = l2_state.ppr;
524 
525     /* Is it okay to specify write length larger than actual data written? */
526     address_space_unmap(CPU(cpu)->as, hvstate, len, len, true);
527 
528     len = sizeof(*regs);
529     regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true,
530                                 MEMTXATTRS_UNSPECIFIED);
531     if (!regs || len != sizeof(*regs)) {
532         address_space_unmap(CPU(cpu)->as, regs, len, 0, true);
533         env->gpr[3] = H_P2;
534         return;
535     }
536 
537     len = sizeof(env->gpr);
538     assert(len == sizeof(regs->gpr));
539     memcpy(regs->gpr, l2_state.gpr, len);
540 
541     regs->link = l2_state.lr;
542     regs->ctr = l2_state.ctr;
543     regs->xer = l2_state.xer;
544     regs->ccr = l2_state.cr;
545 
546     if (excp == POWERPC_EXCP_MCHECK ||
547         excp == POWERPC_EXCP_RESET ||
548         excp == POWERPC_EXCP_SYSCALL) {
549         regs->nip = l2_state.srr0;
550         regs->msr = l2_state.srr1 & env->msr_mask;
551     } else {
552         regs->nip = hsrr0;
553         regs->msr = hsrr1 & env->msr_mask;
554     }
555 
556     /* Is it okay to specify write length larger than actual data written? */
557     address_space_unmap(CPU(cpu)->as, regs, len, len, true);
558 }
559 
560 static bool spapr_nested_vcpu_check(SpaprMachineStateNestedGuest *guest,
561                                     target_ulong vcpuid, bool inoutbuf)
562 {
563     struct SpaprMachineStateNestedGuestVcpu *vcpu;
564     /*
565      * Perform sanity checks for the provided vcpuid of a guest.
566      * For now, ensure its valid, allocated and enabled for use.
567      */
568 
569     if (vcpuid >= PAPR_NESTED_GUEST_VCPU_MAX) {
570         return false;
571     }
572 
573     if (!(vcpuid < guest->nr_vcpus)) {
574         return false;
575     }
576 
577     vcpu = &guest->vcpus[vcpuid];
578     if (!vcpu->enabled) {
579         return false;
580     }
581 
582     if (!inoutbuf) {
583         return true;
584     }
585 
586     /* Check to see if the in/out buffers are registered */
587     if (vcpu->runbufin.addr && vcpu->runbufout.addr) {
588         return true;
589     }
590 
591     return false;
592 }
593 
594 static void *get_vcpu_state_ptr(SpaprMachineStateNestedGuest *guest,
595                               target_ulong vcpuid)
596 {
597     assert(spapr_nested_vcpu_check(guest, vcpuid, false));
598     return &guest->vcpus[vcpuid].state;
599 }
600 
601 static void *get_vcpu_ptr(SpaprMachineStateNestedGuest *guest,
602                                    target_ulong vcpuid)
603 {
604     assert(spapr_nested_vcpu_check(guest, vcpuid, false));
605     return &guest->vcpus[vcpuid];
606 }
607 
608 static void *get_guest_ptr(SpaprMachineStateNestedGuest *guest,
609                            target_ulong vcpuid)
610 {
611     return guest; /* for GSBE_NESTED */
612 }
613 
614 /*
615  * set=1 means the L1 is trying to set some state
616  * set=0 means the L1 is trying to get some state
617  */
618 static void copy_state_8to8(void *a, void *b, bool set)
619 {
620     /* set takes from the Big endian element_buf and sets internal buffer */
621 
622     if (set) {
623         *(uint64_t *)a = be64_to_cpu(*(uint64_t *)b);
624     } else {
625         *(uint64_t *)b = cpu_to_be64(*(uint64_t *)a);
626     }
627 }
628 
629 static void copy_state_4to4(void *a, void *b, bool set)
630 {
631     if (set) {
632         *(uint32_t *)a = be32_to_cpu(*(uint32_t *)b);
633     } else {
634         *(uint32_t *)b = cpu_to_be32(*((uint32_t *)a));
635     }
636 }
637 
638 static void copy_state_16to16(void *a, void *b, bool set)
639 {
640     uint64_t *src, *dst;
641 
642     if (set) {
643         src = b;
644         dst = a;
645 
646         dst[1] = be64_to_cpu(src[0]);
647         dst[0] = be64_to_cpu(src[1]);
648     } else {
649         src = a;
650         dst = b;
651 
652         dst[1] = cpu_to_be64(src[0]);
653         dst[0] = cpu_to_be64(src[1]);
654     }
655 }
656 
657 static void copy_state_4to8(void *a, void *b, bool set)
658 {
659     if (set) {
660         *(uint64_t *)a  = (uint64_t) be32_to_cpu(*(uint32_t *)b);
661     } else {
662         *(uint32_t *)b = cpu_to_be32((uint32_t) (*((uint64_t *)a)));
663     }
664 }
665 
666 static void copy_state_pagetbl(void *a, void *b, bool set)
667 {
668     uint64_t *pagetbl;
669     uint64_t *buf; /* 3 double words */
670     uint64_t rts;
671 
672     assert(set);
673 
674     pagetbl = a;
675     buf = b;
676 
677     *pagetbl = be64_to_cpu(buf[0]);
678     /* as per ISA section 6.7.6.1 */
679     *pagetbl |= PATE0_HR; /* Host Radix bit is 1 */
680 
681     /* RTS */
682     rts = be64_to_cpu(buf[1]);
683     assert(rts == 52);
684     rts = rts - 31; /* since radix tree size = 2^(RTS+31) */
685     *pagetbl |=  ((rts & 0x7) << 5); /* RTS2 is bit 56:58 */
686     *pagetbl |=  (((rts >> 3) & 0x3) << 61); /* RTS1 is bit 1:2 */
687 
688     /* RPDS {Size = 2^(RPDS+3) , RPDS >=5} */
689     *pagetbl |= 63 - clz64(be64_to_cpu(buf[2])) - 3;
690 }
691 
692 static void copy_state_proctbl(void *a, void *b, bool set)
693 {
694     uint64_t *proctbl;
695     uint64_t *buf; /* 2 double words */
696 
697     assert(set);
698 
699     proctbl = a;
700     buf = b;
701     /* PRTB: Process Table Base */
702     *proctbl = be64_to_cpu(buf[0]);
703     /* PRTS: Process Table Size = 2^(12+PRTS) */
704     if (be64_to_cpu(buf[1]) == (1ULL << 12)) {
705             *proctbl |= 0;
706     } else if (be64_to_cpu(buf[1]) == (1ULL << 24)) {
707             *proctbl |= 12;
708     } else {
709         g_assert_not_reached();
710     }
711 }
712 
713 static void copy_state_runbuf(void *a, void *b, bool set)
714 {
715     uint64_t *buf; /* 2 double words */
716     struct SpaprMachineStateNestedGuestVcpuRunBuf *runbuf;
717 
718     assert(set);
719 
720     runbuf = a;
721     buf = b;
722 
723     runbuf->addr = be64_to_cpu(buf[0]);
724     assert(runbuf->addr);
725 
726     /* per spec */
727     assert(be64_to_cpu(buf[1]) <= 16384);
728 
729     /*
730      * This will also hit in the input buffer but should be fine for
731      * now. If not we can split this function.
732      */
733     assert(be64_to_cpu(buf[1]) >= VCPU_OUT_BUF_MIN_SZ);
734 
735     runbuf->size = be64_to_cpu(buf[1]);
736 }
737 
738 /* tell the L1 how big we want the output vcpu run buffer */
739 static void out_buf_min_size(void *a, void *b, bool set)
740 {
741     uint64_t *buf; /* 1 double word */
742 
743     assert(!set);
744 
745     buf = b;
746 
747     buf[0] = cpu_to_be64(VCPU_OUT_BUF_MIN_SZ);
748 }
749 
750 static void copy_logical_pvr(void *a, void *b, bool set)
751 {
752     SpaprMachineStateNestedGuest *guest;
753     uint32_t *buf; /* 1 word */
754     uint32_t *pvr_logical_ptr;
755     uint32_t pvr_logical;
756     target_ulong pcr = 0;
757 
758     pvr_logical_ptr = a;
759     buf = b;
760 
761     if (!set) {
762         buf[0] = cpu_to_be32(*pvr_logical_ptr);
763         return;
764     }
765 
766     pvr_logical = be32_to_cpu(buf[0]);
767 
768     *pvr_logical_ptr = pvr_logical;
769 
770     if (*pvr_logical_ptr) {
771         switch (*pvr_logical_ptr) {
772         case CPU_POWERPC_LOGICAL_3_10:
773             pcr = PCR_COMPAT_3_10 | PCR_COMPAT_3_00;
774             break;
775         case CPU_POWERPC_LOGICAL_3_00:
776             pcr = PCR_COMPAT_3_00;
777             break;
778         default:
779             qemu_log_mask(LOG_GUEST_ERROR,
780                           "Could not set PCR for LPVR=0x%08x\n",
781                           *pvr_logical_ptr);
782             return;
783         }
784     }
785 
786     guest = container_of(pvr_logical_ptr,
787                          struct SpaprMachineStateNestedGuest,
788                          pvr_logical);
789     for (int i = 0; i < guest->nr_vcpus; i++) {
790         guest->vcpus[i].state.pcr = ~pcr | HVMASK_PCR;
791     }
792 }
793 
794 static void copy_tb_offset(void *a, void *b, bool set)
795 {
796     SpaprMachineStateNestedGuest *guest;
797     uint64_t *buf; /* 1 double word */
798     uint64_t *tb_offset_ptr;
799     uint64_t tb_offset;
800 
801     tb_offset_ptr = a;
802     buf = b;
803 
804     if (!set) {
805         buf[0] = cpu_to_be64(*tb_offset_ptr);
806         return;
807     }
808 
809     tb_offset = be64_to_cpu(buf[0]);
810     /* need to copy this to the individual tb_offset for each vcpu */
811     guest = container_of(tb_offset_ptr,
812                          struct SpaprMachineStateNestedGuest,
813                          tb_offset);
814     for (int i = 0; i < guest->nr_vcpus; i++) {
815         guest->vcpus[i].tb_offset = tb_offset;
816     }
817 }
818 
819 static void copy_state_hdecr(void *a, void *b, bool set)
820 {
821     uint64_t *buf; /* 1 double word */
822     uint64_t *hdecr_expiry_tb;
823 
824     hdecr_expiry_tb = a;
825     buf = b;
826 
827     if (!set) {
828         buf[0] = cpu_to_be64(*hdecr_expiry_tb);
829         return;
830     }
831 
832     *hdecr_expiry_tb = be64_to_cpu(buf[0]);
833 }
834 
835 struct guest_state_element_type guest_state_element_types[] = {
836     GUEST_STATE_ELEMENT_NOP(GSB_HV_VCPU_IGNORED_ID, 0),
837     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR0,  gpr[0]),
838     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR1,  gpr[1]),
839     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR2,  gpr[2]),
840     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR3,  gpr[3]),
841     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR4,  gpr[4]),
842     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR5,  gpr[5]),
843     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR6,  gpr[6]),
844     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR7,  gpr[7]),
845     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR8,  gpr[8]),
846     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR9,  gpr[9]),
847     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR10, gpr[10]),
848     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR11, gpr[11]),
849     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR12, gpr[12]),
850     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR13, gpr[13]),
851     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR14, gpr[14]),
852     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR15, gpr[15]),
853     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR16, gpr[16]),
854     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR17, gpr[17]),
855     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR18, gpr[18]),
856     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR19, gpr[19]),
857     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR20, gpr[20]),
858     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR21, gpr[21]),
859     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR22, gpr[22]),
860     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR23, gpr[23]),
861     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR24, gpr[24]),
862     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR25, gpr[25]),
863     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR26, gpr[26]),
864     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR27, gpr[27]),
865     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR28, gpr[28]),
866     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR29, gpr[29]),
867     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR30, gpr[30]),
868     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR31, gpr[31]),
869     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_NIA, nip),
870     GSE_ENV_DWM(GSB_VCPU_SPR_MSR, msr, HVMASK_MSR),
871     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CTR, ctr),
872     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_LR, lr),
873     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_XER, xer),
874     GUEST_STATE_ELEMENT_ENV_WW(GSB_VCPU_SPR_CR, cr),
875     GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_MMCR3),
876     GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_SIER2),
877     GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_SIER3),
878     GUEST_STATE_ELEMENT_NOP_W(GSB_VCPU_SPR_WORT),
879     GSE_ENV_DWM(GSB_VCPU_SPR_LPCR, lpcr, HVMASK_LPCR),
880     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_AMOR, amor),
881     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HFSCR, hfscr),
882     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAWR0, dawr0),
883     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DAWRX0, dawrx0),
884     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CIABR, ciabr),
885     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_PURR,  purr),
886     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPURR, spurr),
887     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_IC,    ic),
888     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_VTB,   vtb),
889     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HDAR,  hdar),
890     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_HDSISR, hdsisr),
891     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_HEIR,   heir),
892     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_ASDR,  asdr),
893     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SRR0,  srr0),
894     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SRR1,  srr1),
895     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG0, sprg0),
896     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG1, sprg1),
897     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG2, sprg2),
898     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG3, sprg3),
899     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PIDR,   pidr),
900     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CFAR,  cfar),
901     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_PPR,   ppr),
902     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAWR1, dawr1),
903     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DAWRX1, dawrx1),
904     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DEXCR, dexcr),
905     GSE_ENV_DWM(GSB_VCPU_SPR_HDEXCR, hdexcr, HVMASK_HDEXCR),
906     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HASHKEYR, hashkeyr),
907     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HASHPKEYR, hashpkeyr),
908     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR0, vsr[0]),
909     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR1, vsr[1]),
910     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR2, vsr[2]),
911     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR3, vsr[3]),
912     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR4, vsr[4]),
913     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR5, vsr[5]),
914     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR6, vsr[6]),
915     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR7, vsr[7]),
916     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR8, vsr[8]),
917     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR9, vsr[9]),
918     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR10, vsr[10]),
919     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR11, vsr[11]),
920     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR12, vsr[12]),
921     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR13, vsr[13]),
922     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR14, vsr[14]),
923     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR15, vsr[15]),
924     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR16, vsr[16]),
925     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR17, vsr[17]),
926     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR18, vsr[18]),
927     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR19, vsr[19]),
928     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR20, vsr[20]),
929     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR21, vsr[21]),
930     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR22, vsr[22]),
931     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR23, vsr[23]),
932     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR24, vsr[24]),
933     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR25, vsr[25]),
934     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR26, vsr[26]),
935     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR27, vsr[27]),
936     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR28, vsr[28]),
937     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR29, vsr[29]),
938     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR30, vsr[30]),
939     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR31, vsr[31]),
940     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR32, vsr[32]),
941     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR33, vsr[33]),
942     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR34, vsr[34]),
943     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR35, vsr[35]),
944     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR36, vsr[36]),
945     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR37, vsr[37]),
946     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR38, vsr[38]),
947     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR39, vsr[39]),
948     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR40, vsr[40]),
949     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR41, vsr[41]),
950     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR42, vsr[42]),
951     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR43, vsr[43]),
952     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR44, vsr[44]),
953     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR45, vsr[45]),
954     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR46, vsr[46]),
955     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR47, vsr[47]),
956     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR48, vsr[48]),
957     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR49, vsr[49]),
958     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR50, vsr[50]),
959     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR51, vsr[51]),
960     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR52, vsr[52]),
961     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR53, vsr[53]),
962     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR54, vsr[54]),
963     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR55, vsr[55]),
964     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR56, vsr[56]),
965     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR57, vsr[57]),
966     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR58, vsr[58]),
967     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR59, vsr[59]),
968     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR60, vsr[60]),
969     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR61, vsr[61]),
970     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR62, vsr[62]),
971     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR63, vsr[63]),
972     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_EBBHR, ebbhr),
973     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_TAR,   tar),
974     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_EBBRR, ebbrr),
975     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_BESCR, bescr),
976     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_IAMR,  iamr),
977     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_AMR,   amr),
978     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_UAMOR, uamor),
979     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DSCR,  dscr),
980     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_FSCR,  fscr),
981     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PSPB,   pspb),
982     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CTRL,  ctrl),
983     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_VRSAVE, vrsave),
984     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAR,   dar),
985     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DSISR,  dsisr),
986     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC1,   pmc1),
987     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC2,   pmc2),
988     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC3,   pmc3),
989     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC4,   pmc4),
990     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC5,   pmc5),
991     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC6,   pmc6),
992     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR0, mmcr0),
993     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR1, mmcr1),
994     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR2, mmcr2),
995     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCRA, mmcra),
996     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SDAR , sdar),
997     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SIAR , siar),
998     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SIER , sier),
999     GUEST_STATE_ELEMENT_ENV_WW(GSB_VCPU_SPR_VSCR,  vscr),
1000     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_FPSCR, fpscr),
1001     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_DEC_EXPIRE_TB, dec_expiry_tb),
1002     GSBE_NESTED(GSB_PART_SCOPED_PAGETBL, 0x18, parttbl[0],  copy_state_pagetbl),
1003     GSBE_NESTED(GSB_PROCESS_TBL,         0x10, parttbl[1],  copy_state_proctbl),
1004     GSBE_NESTED(GSB_VCPU_LPVR,           0x4,  pvr_logical, copy_logical_pvr),
1005     GSBE_NESTED_MSK(GSB_TB_OFFSET, 0x8, tb_offset, copy_tb_offset,
1006                     HVMASK_TB_OFFSET),
1007     GSBE_NESTED_VCPU(GSB_VCPU_IN_BUFFER, 0x10, runbufin,    copy_state_runbuf),
1008     GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUFFER, 0x10, runbufout,   copy_state_runbuf),
1009     GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUF_MIN_SZ, 0x8, runbufout, out_buf_min_size),
1010     GSBE_NESTED_VCPU(GSB_VCPU_HDEC_EXPIRY_TB, 0x8, hdecr_expiry_tb,
1011                      copy_state_hdecr)
1012 };
1013 
1014 void spapr_nested_gsb_init(void)
1015 {
1016     struct guest_state_element_type *type;
1017 
1018     /* Init the guest state elements lookup table, flags for now */
1019     for (int i = 0; i < ARRAY_SIZE(guest_state_element_types); i++) {
1020         type = &guest_state_element_types[i];
1021 
1022         assert(type->id <= GSB_LAST);
1023         if (type->id >= GSB_VCPU_SPR_HDAR)
1024             /* 0xf000 - 0xf005 Thread + RO */
1025             type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY;
1026         else if (type->id >= GSB_VCPU_IN_BUFFER)
1027             /* 0x0c00 - 0xf000 Thread + RW */
1028             type->flags = 0;
1029         else if (type->id >= GSB_VCPU_LPVR)
1030             /* 0x0003 - 0x0bff Guest + RW */
1031             type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE;
1032         else if (type->id >= GSB_HV_VCPU_STATE_SIZE)
1033             /* 0x0001 - 0x0002 Guest + RO */
1034             type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY |
1035                           GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE;
1036     }
1037 }
1038 
1039 static struct guest_state_element *guest_state_element_next(
1040     struct guest_state_element *element,
1041     int64_t *len,
1042     int64_t *num_elements)
1043 {
1044     uint16_t size;
1045 
1046     /* size is of element->value[] only. Not whole guest_state_element */
1047     size = be16_to_cpu(element->size);
1048 
1049     if (len) {
1050         *len -= size + offsetof(struct guest_state_element, value);
1051     }
1052 
1053     if (num_elements) {
1054         *num_elements -= 1;
1055     }
1056 
1057     return (struct guest_state_element *)(element->value + size);
1058 }
1059 
1060 static
1061 struct guest_state_element_type *guest_state_element_type_find(uint16_t id)
1062 {
1063     int i;
1064 
1065     for (i = 0; i < ARRAY_SIZE(guest_state_element_types); i++)
1066         if (id == guest_state_element_types[i].id) {
1067             return &guest_state_element_types[i];
1068         }
1069 
1070     return NULL;
1071 }
1072 
1073 static void log_element(struct guest_state_element *element,
1074                         struct guest_state_request *gsr)
1075 {
1076     qemu_log_mask(LOG_GUEST_ERROR, "h_guest_%s_state id:0x%04x size:0x%04x",
1077                   gsr->flags & GUEST_STATE_REQUEST_SET ? "set" : "get",
1078                   be16_to_cpu(element->id), be16_to_cpu(element->size));
1079     qemu_log_mask(LOG_GUEST_ERROR, "buf:0x%016"PRIx64" ...\n",
1080                   be64_to_cpu(*(uint64_t *)element->value));
1081 }
1082 
1083 static bool guest_state_request_check(struct guest_state_request *gsr)
1084 {
1085     int64_t num_elements, len = gsr->len;
1086     struct guest_state_buffer *gsb = gsr->gsb;
1087     struct guest_state_element *element;
1088     struct guest_state_element_type *type;
1089     uint16_t id, size;
1090 
1091     /* gsb->num_elements = 0 == 32 bits long */
1092     assert(len >= 4);
1093 
1094     num_elements = be32_to_cpu(gsb->num_elements);
1095     element = gsb->elements;
1096     len -= sizeof(gsb->num_elements);
1097 
1098     /* Walk the buffer to validate the length */
1099     while (num_elements) {
1100 
1101         id = be16_to_cpu(element->id);
1102         size = be16_to_cpu(element->size);
1103 
1104         if (false) {
1105             log_element(element, gsr);
1106         }
1107         /* buffer size too small */
1108         if (len < 0) {
1109             return false;
1110         }
1111 
1112         type = guest_state_element_type_find(id);
1113         if (!type) {
1114             qemu_log_mask(LOG_GUEST_ERROR, "Element ID %04x unknown\n", id);
1115             log_element(element, gsr);
1116             return false;
1117         }
1118 
1119         if (id == GSB_HV_VCPU_IGNORED_ID) {
1120             goto next_element;
1121         }
1122 
1123         if (size != type->size) {
1124             qemu_log_mask(LOG_GUEST_ERROR, "Size mismatch. Element ID:%04x."
1125                           "Size Exp:%i Got:%i\n", id, type->size, size);
1126             log_element(element, gsr);
1127             return false;
1128         }
1129 
1130         if ((type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY) &&
1131             (gsr->flags & GUEST_STATE_REQUEST_SET)) {
1132             qemu_log_mask(LOG_GUEST_ERROR, "Trying to set a read-only Element "
1133                           "ID:%04x.\n", id);
1134             return false;
1135         }
1136 
1137         if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE) {
1138             /* guest wide element type */
1139             if (!(gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE)) {
1140                 qemu_log_mask(LOG_GUEST_ERROR, "trying to set a guest wide "
1141                               "Element ID:%04x.\n", id);
1142                 return false;
1143             }
1144         } else {
1145             /* thread wide element type */
1146             if (gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE) {
1147                 qemu_log_mask(LOG_GUEST_ERROR, "trying to set a thread wide "
1148                               "Element ID:%04x.\n", id);
1149                 return false;
1150             }
1151         }
1152 next_element:
1153         element = guest_state_element_next(element, &len, &num_elements);
1154 
1155     }
1156     return true;
1157 }
1158 
1159 static bool is_gsr_invalid(struct guest_state_request *gsr,
1160                                    struct guest_state_element *element,
1161                                    struct guest_state_element_type *type)
1162 {
1163     if ((gsr->flags & GUEST_STATE_REQUEST_SET) &&
1164         (*(uint64_t *)(element->value) & ~(type->mask))) {
1165         log_element(element, gsr);
1166         qemu_log_mask(LOG_GUEST_ERROR, "L1 can't set reserved bits "
1167                       "(allowed mask: 0x%08"PRIx64")\n", type->mask);
1168         return true;
1169     }
1170     return false;
1171 }
1172 
1173 static target_ulong h_guest_get_capabilities(PowerPCCPU *cpu,
1174                                              SpaprMachineState *spapr,
1175                                              target_ulong opcode,
1176                                              target_ulong *args)
1177 {
1178     CPUPPCState *env = &cpu->env;
1179     target_ulong flags = args[0];
1180 
1181     if (flags) { /* don't handle any flags capabilities for now */
1182         return H_PARAMETER;
1183     }
1184 
1185     /* P10 capabilities */
1186     if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10, 0,
1187         spapr->max_compat_pvr)) {
1188         env->gpr[4] |= H_GUEST_CAPABILITIES_P10_MODE;
1189     }
1190 
1191     /* P9 capabilities */
1192     if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
1193         spapr->max_compat_pvr)) {
1194         env->gpr[4] |= H_GUEST_CAPABILITIES_P9_MODE;
1195     }
1196 
1197     return H_SUCCESS;
1198 }
1199 
1200 static target_ulong h_guest_set_capabilities(PowerPCCPU *cpu,
1201                                              SpaprMachineState *spapr,
1202                                              target_ulong opcode,
1203                                               target_ulong *args)
1204 {
1205     CPUPPCState *env = &cpu->env;
1206     target_ulong flags = args[0];
1207     target_ulong capabilities = args[1];
1208     env->gpr[4] = 0;
1209 
1210     if (flags) { /* don't handle any flags capabilities for now */
1211         return H_PARAMETER;
1212     }
1213 
1214     if (capabilities & H_GUEST_CAPABILITIES_COPY_MEM) {
1215         env->gpr[4] = 1;
1216         return H_P2; /* isn't supported */
1217     }
1218 
1219     /*
1220      * If there are no capabilities configured, set the R5 to the index of
1221      * the first supported Power Processor Mode
1222      */
1223     if (!capabilities) {
1224         env->gpr[4] = 1;
1225 
1226         /* set R5 to the first supported Power Processor Mode */
1227         if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10, 0,
1228                              spapr->max_compat_pvr)) {
1229             env->gpr[5] = H_GUEST_CAP_P10_MODE_BMAP;
1230         } else if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
1231                                     spapr->max_compat_pvr)) {
1232             env->gpr[5] = H_GUEST_CAP_P9_MODE_BMAP;
1233         }
1234 
1235         return H_P2;
1236     }
1237 
1238     /*
1239      * If an invalid capability is set, R5 should contain the index of the
1240      * invalid capability bit
1241      */
1242     if (capabilities & ~H_GUEST_CAP_VALID_MASK) {
1243         env->gpr[4] = 1;
1244 
1245         /* Set R5 to the index of the invalid capability */
1246         env->gpr[5] = 63 - ctz64(capabilities);
1247 
1248         return H_P2;
1249     }
1250 
1251     if (!spapr->nested.capabilities_set) {
1252         spapr->nested.capabilities_set = true;
1253         spapr->nested.pvr_base = env->spr[SPR_PVR];
1254         return H_SUCCESS;
1255     } else {
1256         return H_STATE;
1257     }
1258 }
1259 
1260 static void
1261 destroy_guest_helper(gpointer value)
1262 {
1263     struct SpaprMachineStateNestedGuest *guest = value;
1264     g_free(guest->vcpus);
1265     g_free(guest);
1266 }
1267 
1268 static target_ulong h_guest_create(PowerPCCPU *cpu,
1269                                    SpaprMachineState *spapr,
1270                                    target_ulong opcode,
1271                                    target_ulong *args)
1272 {
1273     CPUPPCState *env = &cpu->env;
1274     target_ulong flags = args[0];
1275     target_ulong continue_token = args[1];
1276     uint64_t guestid;
1277     int nguests = 0;
1278     struct SpaprMachineStateNestedGuest *guest;
1279 
1280     if (flags) { /* don't handle any flags for now */
1281         return H_UNSUPPORTED_FLAG;
1282     }
1283 
1284     if (continue_token != -1) {
1285         return H_P2;
1286     }
1287 
1288     if (!spapr->nested.capabilities_set) {
1289         return H_STATE;
1290     }
1291 
1292     if (!spapr->nested.guests) {
1293         spapr->nested.guests = g_hash_table_new_full(NULL,
1294                                                      NULL,
1295                                                      NULL,
1296                                                      destroy_guest_helper);
1297     }
1298 
1299     nguests = g_hash_table_size(spapr->nested.guests);
1300 
1301     if (nguests == PAPR_NESTED_GUEST_MAX) {
1302         return H_NO_MEM;
1303     }
1304 
1305     /* Lookup for available guestid */
1306     for (guestid = 1; guestid < PAPR_NESTED_GUEST_MAX; guestid++) {
1307         if (!(g_hash_table_lookup(spapr->nested.guests,
1308                                   GINT_TO_POINTER(guestid)))) {
1309             break;
1310         }
1311     }
1312 
1313     if (guestid == PAPR_NESTED_GUEST_MAX) {
1314         return H_NO_MEM;
1315     }
1316 
1317     guest = g_try_new0(struct SpaprMachineStateNestedGuest, 1);
1318     if (!guest) {
1319         return H_NO_MEM;
1320     }
1321 
1322     guest->pvr_logical = spapr->nested.pvr_base;
1323     g_hash_table_insert(spapr->nested.guests, GINT_TO_POINTER(guestid), guest);
1324     env->gpr[4] = guestid;
1325 
1326     return H_SUCCESS;
1327 }
1328 
1329 static target_ulong h_guest_delete(PowerPCCPU *cpu,
1330                                    SpaprMachineState *spapr,
1331                                    target_ulong opcode,
1332                                    target_ulong *args)
1333 {
1334     target_ulong flags = args[0];
1335     target_ulong guestid = args[1];
1336     struct SpaprMachineStateNestedGuest *guest;
1337 
1338     /*
1339      * handle flag deleteAllGuests, if set:
1340      * guestid is ignored and all guests are deleted
1341      *
1342      */
1343     if (flags & ~H_GUEST_DELETE_ALL_FLAG) {
1344         return H_UNSUPPORTED_FLAG; /* other flag bits reserved */
1345     } else if (flags & H_GUEST_DELETE_ALL_FLAG) {
1346         g_hash_table_destroy(spapr->nested.guests);
1347         return H_SUCCESS;
1348     }
1349 
1350     guest = g_hash_table_lookup(spapr->nested.guests, GINT_TO_POINTER(guestid));
1351     if (!guest) {
1352         return H_P2;
1353     }
1354 
1355     g_hash_table_remove(spapr->nested.guests, GINT_TO_POINTER(guestid));
1356 
1357     return H_SUCCESS;
1358 }
1359 
1360 static target_ulong h_guest_create_vcpu(PowerPCCPU *cpu,
1361                                         SpaprMachineState *spapr,
1362                                         target_ulong opcode,
1363                                         target_ulong *args)
1364 {
1365     target_ulong flags = args[0];
1366     target_ulong guestid = args[1];
1367     target_ulong vcpuid = args[2];
1368     SpaprMachineStateNestedGuest *guest;
1369 
1370     if (flags) { /* don't handle any flags for now */
1371         return H_UNSUPPORTED_FLAG;
1372     }
1373 
1374     guest = spapr_get_nested_guest(spapr, guestid);
1375     if (!guest) {
1376         return H_P2;
1377     }
1378 
1379     if (vcpuid < guest->nr_vcpus) {
1380         qemu_log_mask(LOG_UNIMP, "vcpuid " TARGET_FMT_ld " already in use.",
1381                       vcpuid);
1382         return H_IN_USE;
1383     }
1384     /* linear vcpuid allocation only */
1385     assert(vcpuid == guest->nr_vcpus);
1386 
1387     if (guest->nr_vcpus >= PAPR_NESTED_GUEST_VCPU_MAX) {
1388         return H_P3;
1389     }
1390 
1391     SpaprMachineStateNestedGuestVcpu *vcpus, *curr_vcpu;
1392     vcpus = g_try_renew(struct SpaprMachineStateNestedGuestVcpu,
1393                         guest->vcpus,
1394                         guest->nr_vcpus + 1);
1395     if (!vcpus) {
1396         return H_NO_MEM;
1397     }
1398     guest->vcpus = vcpus;
1399     curr_vcpu = &vcpus[guest->nr_vcpus];
1400     memset(curr_vcpu, 0, sizeof(SpaprMachineStateNestedGuestVcpu));
1401 
1402     curr_vcpu->enabled = true;
1403     guest->nr_vcpus++;
1404 
1405     return H_SUCCESS;
1406 }
1407 
1408 static target_ulong getset_state(SpaprMachineStateNestedGuest *guest,
1409                                  uint64_t vcpuid,
1410                                  struct guest_state_request *gsr)
1411 {
1412     void *ptr;
1413     uint16_t id;
1414     struct guest_state_element *element;
1415     struct guest_state_element_type *type;
1416     int64_t lenleft, num_elements;
1417 
1418     lenleft = gsr->len;
1419 
1420     if (!guest_state_request_check(gsr)) {
1421         return H_P3;
1422     }
1423 
1424     num_elements = be32_to_cpu(gsr->gsb->num_elements);
1425     element = gsr->gsb->elements;
1426     /* Process the elements */
1427     while (num_elements) {
1428         type = NULL;
1429         /* log_element(element, gsr); */
1430 
1431         id = be16_to_cpu(element->id);
1432         if (id == GSB_HV_VCPU_IGNORED_ID) {
1433             goto next_element;
1434         }
1435 
1436         type = guest_state_element_type_find(id);
1437         assert(type);
1438 
1439         /* Get pointer to guest data to get/set */
1440         if (type->location && type->copy) {
1441             ptr = type->location(guest, vcpuid);
1442             assert(ptr);
1443             if (!~(type->mask) && is_gsr_invalid(gsr, element, type)) {
1444                 return H_INVALID_ELEMENT_VALUE;
1445             }
1446             type->copy(ptr + type->offset, element->value,
1447                        gsr->flags & GUEST_STATE_REQUEST_SET ? true : false);
1448         }
1449 
1450 next_element:
1451         element = guest_state_element_next(element, &lenleft, &num_elements);
1452     }
1453 
1454     return H_SUCCESS;
1455 }
1456 
1457 static target_ulong map_and_getset_state(PowerPCCPU *cpu,
1458                                          SpaprMachineStateNestedGuest *guest,
1459                                          uint64_t vcpuid,
1460                                          struct guest_state_request *gsr)
1461 {
1462     target_ulong rc;
1463     int64_t len;
1464     bool is_write;
1465 
1466     len = gsr->len;
1467     /* only get_state would require write access to the provided buffer */
1468     is_write = (gsr->flags & GUEST_STATE_REQUEST_SET) ? false : true;
1469     gsr->gsb = address_space_map(CPU(cpu)->as, gsr->buf, (uint64_t *)&len,
1470                                  is_write, MEMTXATTRS_UNSPECIFIED);
1471     if (!gsr->gsb) {
1472         rc = H_P3;
1473         goto out1;
1474     }
1475 
1476     if (len != gsr->len) {
1477         rc = H_P3;
1478         goto out1;
1479     }
1480 
1481     rc = getset_state(guest, vcpuid, gsr);
1482 
1483 out1:
1484     address_space_unmap(CPU(cpu)->as, gsr->gsb, len, is_write, len);
1485     return rc;
1486 }
1487 
1488 static target_ulong h_guest_getset_state(PowerPCCPU *cpu,
1489                                          SpaprMachineState *spapr,
1490                                          target_ulong *args,
1491                                          bool set)
1492 {
1493     target_ulong flags = args[0];
1494     target_ulong lpid = args[1];
1495     target_ulong vcpuid = args[2];
1496     target_ulong buf = args[3];
1497     target_ulong buflen = args[4];
1498     struct guest_state_request gsr;
1499     SpaprMachineStateNestedGuest *guest;
1500 
1501     guest = spapr_get_nested_guest(spapr, lpid);
1502     if (!guest) {
1503         return H_P2;
1504     }
1505     gsr.buf = buf;
1506     assert(buflen <= GSB_MAX_BUF_SIZE);
1507     gsr.len = buflen;
1508     gsr.flags = 0;
1509     if (flags & H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) {
1510         gsr.flags |= GUEST_STATE_REQUEST_GUEST_WIDE;
1511     }
1512     if (flags & !H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) {
1513         return H_PARAMETER; /* flag not supported yet */
1514     }
1515 
1516     if (set) {
1517         gsr.flags |= GUEST_STATE_REQUEST_SET;
1518     }
1519     return map_and_getset_state(cpu, guest, vcpuid, &gsr);
1520 }
1521 
1522 static target_ulong h_guest_set_state(PowerPCCPU *cpu,
1523                                       SpaprMachineState *spapr,
1524                                       target_ulong opcode,
1525                                       target_ulong *args)
1526 {
1527     return h_guest_getset_state(cpu, spapr, args, true);
1528 }
1529 
1530 static target_ulong h_guest_get_state(PowerPCCPU *cpu,
1531                                       SpaprMachineState *spapr,
1532                                       target_ulong opcode,
1533                                       target_ulong *args)
1534 {
1535     return h_guest_getset_state(cpu, spapr, args, false);
1536 }
1537 
1538 static void exit_nested_store_l2(PowerPCCPU *cpu, int excp,
1539                                  SpaprMachineStateNestedGuestVcpu *vcpu)
1540 {
1541     CPUPPCState *env = &cpu->env;
1542     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1543     target_ulong now, hdar, hdsisr, asdr;
1544 
1545     assert(sizeof(env->gpr) == sizeof(vcpu->state.gpr)); /* sanity check */
1546 
1547     now = cpu_ppc_load_tbl(env); /* L2 timebase */
1548     now -= vcpu->tb_offset; /* L1 timebase */
1549     vcpu->state.dec_expiry_tb = now - cpu_ppc_load_decr(env);
1550     cpu_ppc_store_decr(env, spapr_cpu->nested_host_state->dec_expiry_tb - now);
1551     /* backup hdar, hdsisr, asdr if reqd later below */
1552     hdar   = vcpu->state.hdar;
1553     hdsisr = vcpu->state.hdsisr;
1554     asdr   = vcpu->state.asdr;
1555 
1556     nested_save_state(&vcpu->state, cpu);
1557 
1558     if (excp == POWERPC_EXCP_MCHECK ||
1559         excp == POWERPC_EXCP_RESET ||
1560         excp == POWERPC_EXCP_SYSCALL) {
1561         vcpu->state.nip = env->spr[SPR_SRR0];
1562         vcpu->state.msr = env->spr[SPR_SRR1] & env->msr_mask;
1563     } else {
1564         vcpu->state.nip = env->spr[SPR_HSRR0];
1565         vcpu->state.msr = env->spr[SPR_HSRR1] & env->msr_mask;
1566     }
1567 
1568     /* hdar, hdsisr, asdr should be retained unless certain exceptions */
1569     if ((excp != POWERPC_EXCP_HDSI) && (excp != POWERPC_EXCP_HISI)) {
1570         vcpu->state.asdr = asdr;
1571     } else if (excp != POWERPC_EXCP_HDSI) {
1572         vcpu->state.hdar   = hdar;
1573         vcpu->state.hdsisr = hdsisr;
1574     }
1575 }
1576 
1577 static int get_exit_ids(uint64_t srr0, uint16_t ids[16])
1578 {
1579     int nr;
1580 
1581     switch (srr0) {
1582     case 0xc00:
1583         nr = 10;
1584         ids[0] = GSB_VCPU_GPR3;
1585         ids[1] = GSB_VCPU_GPR4;
1586         ids[2] = GSB_VCPU_GPR5;
1587         ids[3] = GSB_VCPU_GPR6;
1588         ids[4] = GSB_VCPU_GPR7;
1589         ids[5] = GSB_VCPU_GPR8;
1590         ids[6] = GSB_VCPU_GPR9;
1591         ids[7] = GSB_VCPU_GPR10;
1592         ids[8] = GSB_VCPU_GPR11;
1593         ids[9] = GSB_VCPU_GPR12;
1594         break;
1595     case 0xe00:
1596         nr = 5;
1597         ids[0] = GSB_VCPU_SPR_HDAR;
1598         ids[1] = GSB_VCPU_SPR_HDSISR;
1599         ids[2] = GSB_VCPU_SPR_ASDR;
1600         ids[3] = GSB_VCPU_SPR_NIA;
1601         ids[4] = GSB_VCPU_SPR_MSR;
1602         break;
1603     case 0xe20:
1604         nr = 4;
1605         ids[0] = GSB_VCPU_SPR_HDAR;
1606         ids[1] = GSB_VCPU_SPR_ASDR;
1607         ids[2] = GSB_VCPU_SPR_NIA;
1608         ids[3] = GSB_VCPU_SPR_MSR;
1609         break;
1610     case 0xe40:
1611         nr = 3;
1612         ids[0] = GSB_VCPU_SPR_HEIR;
1613         ids[1] = GSB_VCPU_SPR_NIA;
1614         ids[2] = GSB_VCPU_SPR_MSR;
1615         break;
1616     case 0xf80:
1617         nr = 3;
1618         ids[0] = GSB_VCPU_SPR_HFSCR;
1619         ids[1] = GSB_VCPU_SPR_NIA;
1620         ids[2] = GSB_VCPU_SPR_MSR;
1621         break;
1622     default:
1623         nr = 0;
1624         break;
1625     }
1626 
1627     return nr;
1628 }
1629 
1630 static void exit_process_output_buffer(PowerPCCPU *cpu,
1631                                        SpaprMachineStateNestedGuest *guest,
1632                                        target_ulong vcpuid,
1633                                        target_ulong *r3)
1634 {
1635     SpaprMachineStateNestedGuestVcpu *vcpu = &guest->vcpus[vcpuid];
1636     struct guest_state_request gsr;
1637     struct guest_state_buffer *gsb;
1638     struct guest_state_element *element;
1639     struct guest_state_element_type *type;
1640     int exit_id_count = 0;
1641     uint16_t exit_cause_ids[16];
1642     hwaddr len;
1643 
1644     len = vcpu->runbufout.size;
1645     gsb = address_space_map(CPU(cpu)->as, vcpu->runbufout.addr, &len, true,
1646                             MEMTXATTRS_UNSPECIFIED);
1647     if (!gsb || len != vcpu->runbufout.size) {
1648         address_space_unmap(CPU(cpu)->as, gsb, len, true, len);
1649         *r3 = H_P2;
1650         return;
1651     }
1652 
1653     exit_id_count = get_exit_ids(*r3, exit_cause_ids);
1654 
1655     /* Create a buffer of elements to send back */
1656     gsb->num_elements = cpu_to_be32(exit_id_count);
1657     element = gsb->elements;
1658     for (int i = 0; i < exit_id_count; i++) {
1659         type = guest_state_element_type_find(exit_cause_ids[i]);
1660         assert(type);
1661         element->id = cpu_to_be16(exit_cause_ids[i]);
1662         element->size = cpu_to_be16(type->size);
1663         element = guest_state_element_next(element, NULL, NULL);
1664     }
1665     gsr.gsb = gsb;
1666     gsr.len = VCPU_OUT_BUF_MIN_SZ;
1667     gsr.flags = 0; /* get + never guest wide */
1668     getset_state(guest, vcpuid, &gsr);
1669 
1670     address_space_unmap(CPU(cpu)->as, gsb, len, true, len);
1671     return;
1672 }
1673 
1674 static
1675 void spapr_exit_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu, int excp)
1676 {
1677     CPUPPCState *env = &cpu->env;
1678     CPUState *cs = CPU(cpu);
1679     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1680     target_ulong r3_return = env->excp_vectors[excp]; /* hcall return value */
1681     target_ulong lpid = 0, vcpuid = 0;
1682     struct SpaprMachineStateNestedGuestVcpu *vcpu = NULL;
1683     struct SpaprMachineStateNestedGuest *guest = NULL;
1684 
1685     lpid = spapr_cpu->nested_host_state->gpr[5];
1686     vcpuid = spapr_cpu->nested_host_state->gpr[6];
1687     guest = spapr_get_nested_guest(spapr, lpid);
1688     assert(guest);
1689     spapr_nested_vcpu_check(guest, vcpuid, false);
1690     vcpu = &guest->vcpus[vcpuid];
1691 
1692     exit_nested_store_l2(cpu, excp, vcpu);
1693     /* do the output buffer for run_vcpu*/
1694     exit_process_output_buffer(cpu, guest, vcpuid, &r3_return);
1695 
1696     assert(env->spr[SPR_LPIDR] != 0);
1697     nested_load_state(cpu, spapr_cpu->nested_host_state);
1698     cpu_ppc_decrease_tb_by_offset(env, vcpu->tb_offset);
1699     env->gpr[3] = H_SUCCESS;
1700     env->gpr[4] = r3_return;
1701     nested_post_load_state(env, cs);
1702     cpu_ppc_hdecr_exit(env);
1703 
1704     spapr_cpu->in_nested = false;
1705     g_free(spapr_cpu->nested_host_state);
1706     spapr_cpu->nested_host_state = NULL;
1707 }
1708 
1709 void spapr_exit_nested(PowerPCCPU *cpu, int excp)
1710 {
1711     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1712     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1713 
1714     assert(spapr_cpu->in_nested);
1715     if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) {
1716         spapr_exit_nested_hv(cpu, excp);
1717     } else if (spapr_nested_api(spapr) == NESTED_API_PAPR) {
1718         spapr_exit_nested_papr(spapr, cpu, excp);
1719     } else {
1720         g_assert_not_reached();
1721     }
1722 }
1723 
1724 static void nested_papr_load_l2(PowerPCCPU *cpu,
1725                                 CPUPPCState *env,
1726                                 SpaprMachineStateNestedGuestVcpu *vcpu,
1727                                 target_ulong now)
1728 {
1729     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
1730     target_ulong lpcr, lpcr_mask, hdec;
1731     lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER;
1732 
1733     assert(vcpu);
1734     assert(sizeof(env->gpr) == sizeof(vcpu->state.gpr));
1735     nested_load_state(cpu, &vcpu->state);
1736     lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) |
1737            (vcpu->state.lpcr & lpcr_mask);
1738     lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE;
1739     lpcr &= ~LPCR_LPES0;
1740     env->spr[SPR_LPCR] = lpcr & pcc->lpcr_mask;
1741 
1742     hdec = vcpu->hdecr_expiry_tb - now;
1743     cpu_ppc_store_decr(env, vcpu->state.dec_expiry_tb - now);
1744     cpu_ppc_hdecr_init(env);
1745     cpu_ppc_store_hdecr(env, hdec);
1746 
1747     cpu_ppc_increase_tb_by_offset(env, vcpu->tb_offset);
1748 }
1749 
1750 static void nested_papr_run_vcpu(PowerPCCPU *cpu,
1751                                  uint64_t lpid,
1752                                  SpaprMachineStateNestedGuestVcpu *vcpu)
1753 {
1754     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1755     CPUPPCState *env = &cpu->env;
1756     CPUState *cs = CPU(cpu);
1757     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1758     target_ulong now = cpu_ppc_load_tbl(env);
1759 
1760     assert(env->spr[SPR_LPIDR] == 0);
1761     assert(spapr->nested.api); /* ensure API version is initialized */
1762     spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1);
1763     assert(spapr_cpu->nested_host_state);
1764     nested_save_state(spapr_cpu->nested_host_state, cpu);
1765     spapr_cpu->nested_host_state->dec_expiry_tb = now - cpu_ppc_load_decr(env);
1766     nested_papr_load_l2(cpu, env, vcpu, now);
1767     env->spr[SPR_LPIDR] = lpid; /* post load l2 */
1768 
1769     spapr_cpu->in_nested = true;
1770     nested_post_load_state(env, cs);
1771 }
1772 
1773 static target_ulong h_guest_run_vcpu(PowerPCCPU *cpu,
1774                                      SpaprMachineState *spapr,
1775                                      target_ulong opcode,
1776                                      target_ulong *args)
1777 {
1778     CPUPPCState *env = &cpu->env;
1779     target_ulong flags = args[0];
1780     target_ulong lpid = args[1];
1781     target_ulong vcpuid = args[2];
1782     struct SpaprMachineStateNestedGuestVcpu *vcpu;
1783     struct guest_state_request gsr;
1784     SpaprMachineStateNestedGuest *guest;
1785     target_ulong rc;
1786 
1787     if (flags) /* don't handle any flags for now */
1788         return H_PARAMETER;
1789 
1790     guest = spapr_get_nested_guest(spapr, lpid);
1791     if (!guest) {
1792         return H_P2;
1793     }
1794     if (!spapr_nested_vcpu_check(guest, vcpuid, true)) {
1795         return H_P3;
1796     }
1797 
1798     if (guest->parttbl[0] == 0) {
1799         /* At least need a partition scoped radix tree */
1800         return H_NOT_AVAILABLE;
1801     }
1802 
1803     vcpu = &guest->vcpus[vcpuid];
1804 
1805     /* Read run_vcpu input buffer to update state */
1806     gsr.buf = vcpu->runbufin.addr;
1807     gsr.len = vcpu->runbufin.size;
1808     gsr.flags = GUEST_STATE_REQUEST_SET; /* Thread wide + writing */
1809     rc = map_and_getset_state(cpu, guest, vcpuid, &gsr);
1810     if (rc == H_SUCCESS) {
1811         nested_papr_run_vcpu(cpu, lpid, vcpu);
1812     } else {
1813         env->gpr[3] = rc;
1814     }
1815     return env->gpr[3];
1816 }
1817 
1818 void spapr_register_nested_hv(void)
1819 {
1820     spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl);
1821     spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested);
1822     spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate);
1823     spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest);
1824 }
1825 
1826 void spapr_unregister_nested_hv(void)
1827 {
1828     spapr_unregister_hypercall(KVMPPC_H_SET_PARTITION_TABLE);
1829     spapr_unregister_hypercall(KVMPPC_H_ENTER_NESTED);
1830     spapr_unregister_hypercall(KVMPPC_H_TLB_INVALIDATE);
1831     spapr_unregister_hypercall(KVMPPC_H_COPY_TOFROM_GUEST);
1832 }
1833 
1834 void spapr_register_nested_papr(void)
1835 {
1836     spapr_register_hypercall(H_GUEST_GET_CAPABILITIES,
1837                              h_guest_get_capabilities);
1838     spapr_register_hypercall(H_GUEST_SET_CAPABILITIES,
1839                              h_guest_set_capabilities);
1840     spapr_register_hypercall(H_GUEST_CREATE, h_guest_create);
1841     spapr_register_hypercall(H_GUEST_DELETE, h_guest_delete);
1842     spapr_register_hypercall(H_GUEST_CREATE_VCPU, h_guest_create_vcpu);
1843     spapr_register_hypercall(H_GUEST_SET_STATE, h_guest_set_state);
1844     spapr_register_hypercall(H_GUEST_GET_STATE, h_guest_get_state);
1845     spapr_register_hypercall(H_GUEST_RUN_VCPU, h_guest_run_vcpu);
1846 }
1847 
1848 void spapr_unregister_nested_papr(void)
1849 {
1850     spapr_unregister_hypercall(H_GUEST_GET_CAPABILITIES);
1851     spapr_unregister_hypercall(H_GUEST_SET_CAPABILITIES);
1852     spapr_unregister_hypercall(H_GUEST_CREATE);
1853     spapr_unregister_hypercall(H_GUEST_DELETE);
1854     spapr_unregister_hypercall(H_GUEST_CREATE_VCPU);
1855     spapr_unregister_hypercall(H_GUEST_SET_STATE);
1856     spapr_unregister_hypercall(H_GUEST_GET_STATE);
1857     spapr_unregister_hypercall(H_GUEST_RUN_VCPU);
1858 }
1859 
1860 #else
1861 void spapr_exit_nested(PowerPCCPU *cpu, int excp)
1862 {
1863     g_assert_not_reached();
1864 }
1865 
1866 void spapr_register_nested_hv(void)
1867 {
1868     /* DO NOTHING */
1869 }
1870 
1871 void spapr_unregister_nested_hv(void)
1872 {
1873     /* DO NOTHING */
1874 }
1875 
1876 bool spapr_get_pate_nested_hv(SpaprMachineState *spapr, PowerPCCPU *cpu,
1877                               target_ulong lpid, ppc_v3_pate_t *entry)
1878 {
1879     return false;
1880 }
1881 
1882 bool spapr_get_pate_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu,
1883                                 target_ulong lpid, ppc_v3_pate_t *entry)
1884 {
1885     return false;
1886 }
1887 
1888 void spapr_register_nested_papr(void)
1889 {
1890     /* DO NOTHING */
1891 }
1892 
1893 void spapr_unregister_nested_papr(void)
1894 {
1895     /* DO NOTHING */
1896 }
1897 
1898 void spapr_nested_gsb_init(void)
1899 {
1900     /* DO NOTHING */
1901 }
1902 
1903 #endif
1904