xref: /openbmc/qemu/hw/ppc/spapr_nested.c (revision bb23bccebc7f99aa200fa27ff5c2056627951ae4)
1 #include "qemu/osdep.h"
2 #include "qemu/cutils.h"
3 #include "exec/exec-all.h"
4 #include "helper_regs.h"
5 #include "hw/ppc/ppc.h"
6 #include "hw/ppc/spapr.h"
7 #include "hw/ppc/spapr_cpu_core.h"
8 #include "hw/ppc/spapr_nested.h"
9 #include "mmu-book3s-v3.h"
10 #include "cpu-models.h"
11 #include "qemu/log.h"
12 
13 void spapr_nested_reset(SpaprMachineState *spapr)
14 {
15     if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
16         spapr->nested.api = NESTED_API_KVM_HV;
17         spapr_unregister_nested_hv();
18         spapr_register_nested_hv();
19     } else {
20         spapr->nested.api = 0;
21         spapr->nested.capabilities_set = false;
22     }
23 }
24 
25 uint8_t spapr_nested_api(SpaprMachineState *spapr)
26 {
27     return spapr->nested.api;
28 }
29 
30 #ifdef CONFIG_TCG
31 
32 bool spapr_get_pate_nested_hv(SpaprMachineState *spapr, PowerPCCPU *cpu,
33                               target_ulong lpid, ppc_v3_pate_t *entry)
34 {
35     uint64_t patb, pats;
36 
37     assert(lpid != 0);
38 
39     patb = spapr->nested.ptcr & PTCR_PATB;
40     pats = spapr->nested.ptcr & PTCR_PATS;
41 
42     /* Check if partition table is properly aligned */
43     if (patb & MAKE_64BIT_MASK(0, pats + 12)) {
44         return false;
45     }
46 
47     /* Calculate number of entries */
48     pats = 1ull << (pats + 12 - 4);
49     if (pats <= lpid) {
50         return false;
51     }
52 
53     /* Grab entry */
54     patb += 16 * lpid;
55     entry->dw0 = ldq_phys(CPU(cpu)->as, patb);
56     entry->dw1 = ldq_phys(CPU(cpu)->as, patb + 8);
57     return true;
58 }
59 
60 #define PRTS_MASK      0x1f
61 
62 static target_ulong h_set_ptbl(PowerPCCPU *cpu,
63                                SpaprMachineState *spapr,
64                                target_ulong opcode,
65                                target_ulong *args)
66 {
67     target_ulong ptcr = args[0];
68 
69     if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
70         return H_FUNCTION;
71     }
72 
73     if ((ptcr & PRTS_MASK) + 12 - 4 > 12) {
74         return H_PARAMETER;
75     }
76 
77     spapr->nested.ptcr = ptcr; /* Save new partition table */
78 
79     return H_SUCCESS;
80 }
81 
82 static target_ulong h_tlb_invalidate(PowerPCCPU *cpu,
83                                      SpaprMachineState *spapr,
84                                      target_ulong opcode,
85                                      target_ulong *args)
86 {
87     /*
88      * The spapr virtual hypervisor nested HV implementation retains no L2
89      * translation state except for TLB. And the TLB is always invalidated
90      * across L1<->L2 transitions, so nothing is required here.
91      */
92 
93     return H_SUCCESS;
94 }
95 
96 static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu,
97                                         SpaprMachineState *spapr,
98                                         target_ulong opcode,
99                                         target_ulong *args)
100 {
101     /*
102      * This HCALL is not required, L1 KVM will take a slow path and walk the
103      * page tables manually to do the data copy.
104      */
105     return H_FUNCTION;
106 }
107 
108 static void nested_save_state(struct nested_ppc_state *save, PowerPCCPU *cpu)
109 {
110     CPUPPCState *env = &cpu->env;
111     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
112 
113     memcpy(save->gpr, env->gpr, sizeof(save->gpr));
114 
115     save->lr = env->lr;
116     save->ctr = env->ctr;
117     save->cfar = env->cfar;
118     save->msr = env->msr;
119     save->nip = env->nip;
120 
121     save->cr = ppc_get_cr(env);
122     save->xer = cpu_read_xer(env);
123 
124     save->lpcr = env->spr[SPR_LPCR];
125     save->lpidr = env->spr[SPR_LPIDR];
126     save->pcr = env->spr[SPR_PCR];
127     save->dpdes = env->spr[SPR_DPDES];
128     save->hfscr = env->spr[SPR_HFSCR];
129     save->srr0 = env->spr[SPR_SRR0];
130     save->srr1 = env->spr[SPR_SRR1];
131     save->sprg0 = env->spr[SPR_SPRG0];
132     save->sprg1 = env->spr[SPR_SPRG1];
133     save->sprg2 = env->spr[SPR_SPRG2];
134     save->sprg3 = env->spr[SPR_SPRG3];
135     save->pidr = env->spr[SPR_BOOKS_PID];
136     save->ppr = env->spr[SPR_PPR];
137 
138     if (spapr_nested_api(spapr) == NESTED_API_PAPR) {
139         save->pvr = env->spr[SPR_PVR];
140         save->amor = env->spr[SPR_AMOR];
141         save->dawr0 = env->spr[SPR_DAWR0];
142         save->dawrx0 = env->spr[SPR_DAWRX0];
143         save->ciabr = env->spr[SPR_CIABR];
144         save->purr = env->spr[SPR_PURR];
145         save->spurr = env->spr[SPR_SPURR];
146         save->ic = env->spr[SPR_IC];
147         save->vtb = env->spr[SPR_VTB];
148         save->hdar = env->spr[SPR_HDAR];
149         save->hdsisr = env->spr[SPR_HDSISR];
150         save->heir = env->spr[SPR_HEIR];
151         save->asdr = env->spr[SPR_ASDR];
152         save->dawr1 = env->spr[SPR_DAWR1];
153         save->dawrx1 = env->spr[SPR_DAWRX1];
154         save->dexcr = env->spr[SPR_DEXCR];
155         save->hdexcr = env->spr[SPR_HDEXCR];
156         save->hashkeyr = env->spr[SPR_HASHKEYR];
157         save->hashpkeyr = env->spr[SPR_HASHPKEYR];
158         memcpy(save->vsr, env->vsr, sizeof(save->vsr));
159         save->ebbhr = env->spr[SPR_EBBHR];
160         save->tar = env->spr[SPR_TAR];
161         save->ebbrr = env->spr[SPR_EBBRR];
162         save->bescr = env->spr[SPR_BESCR];
163         save->iamr = env->spr[SPR_IAMR];
164         save->amr = env->spr[SPR_AMR];
165         save->uamor = env->spr[SPR_UAMOR];
166         save->dscr = env->spr[SPR_DSCR];
167         save->fscr = env->spr[SPR_FSCR];
168         save->pspb = env->spr[SPR_PSPB];
169         save->ctrl = env->spr[SPR_CTRL];
170         save->vrsave = env->spr[SPR_VRSAVE];
171         save->dar = env->spr[SPR_DAR];
172         save->dsisr = env->spr[SPR_DSISR];
173         save->pmc1 = env->spr[SPR_POWER_PMC1];
174         save->pmc2 = env->spr[SPR_POWER_PMC2];
175         save->pmc3 = env->spr[SPR_POWER_PMC3];
176         save->pmc4 = env->spr[SPR_POWER_PMC4];
177         save->pmc5 = env->spr[SPR_POWER_PMC5];
178         save->pmc6 = env->spr[SPR_POWER_PMC6];
179         save->mmcr0 = env->spr[SPR_POWER_MMCR0];
180         save->mmcr1 = env->spr[SPR_POWER_MMCR1];
181         save->mmcr2 = env->spr[SPR_POWER_MMCR2];
182         save->mmcra = env->spr[SPR_POWER_MMCRA];
183         save->sdar = env->spr[SPR_POWER_SDAR];
184         save->siar = env->spr[SPR_POWER_SIAR];
185         save->sier = env->spr[SPR_POWER_SIER];
186         save->vscr = ppc_get_vscr(env);
187         save->fpscr = env->fpscr;
188     }
189 
190     save->tb_offset = env->tb_env->tb_offset;
191 }
192 
193 static void nested_load_state(PowerPCCPU *cpu, struct nested_ppc_state *load)
194 {
195     CPUState *cs = CPU(cpu);
196     CPUPPCState *env = &cpu->env;
197     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
198 
199     memcpy(env->gpr, load->gpr, sizeof(env->gpr));
200 
201     env->lr = load->lr;
202     env->ctr = load->ctr;
203     env->cfar = load->cfar;
204     env->msr = load->msr;
205     env->nip = load->nip;
206 
207     ppc_set_cr(env, load->cr);
208     cpu_write_xer(env, load->xer);
209 
210     env->spr[SPR_LPCR] = load->lpcr;
211     env->spr[SPR_LPIDR] = load->lpidr;
212     env->spr[SPR_PCR] = load->pcr;
213     env->spr[SPR_DPDES] = load->dpdes;
214     env->spr[SPR_HFSCR] = load->hfscr;
215     env->spr[SPR_SRR0] = load->srr0;
216     env->spr[SPR_SRR1] = load->srr1;
217     env->spr[SPR_SPRG0] = load->sprg0;
218     env->spr[SPR_SPRG1] = load->sprg1;
219     env->spr[SPR_SPRG2] = load->sprg2;
220     env->spr[SPR_SPRG3] = load->sprg3;
221     env->spr[SPR_BOOKS_PID] = load->pidr;
222     env->spr[SPR_PPR] = load->ppr;
223 
224     if (spapr_nested_api(spapr) == NESTED_API_PAPR) {
225         env->spr[SPR_PVR] = load->pvr;
226         env->spr[SPR_AMOR] = load->amor;
227         env->spr[SPR_DAWR0] = load->dawr0;
228         env->spr[SPR_DAWRX0] = load->dawrx0;
229         env->spr[SPR_CIABR] = load->ciabr;
230         env->spr[SPR_PURR] = load->purr;
231         env->spr[SPR_SPURR] = load->purr;
232         env->spr[SPR_IC] = load->ic;
233         env->spr[SPR_VTB] = load->vtb;
234         env->spr[SPR_HDAR] = load->hdar;
235         env->spr[SPR_HDSISR] = load->hdsisr;
236         env->spr[SPR_HEIR] = load->heir;
237         env->spr[SPR_ASDR] = load->asdr;
238         env->spr[SPR_DAWR1] = load->dawr1;
239         env->spr[SPR_DAWRX1] = load->dawrx1;
240         env->spr[SPR_DEXCR] = load->dexcr;
241         env->spr[SPR_HDEXCR] = load->hdexcr;
242         env->spr[SPR_HASHKEYR] = load->hashkeyr;
243         env->spr[SPR_HASHPKEYR] = load->hashpkeyr;
244         memcpy(env->vsr, load->vsr, sizeof(env->vsr));
245         env->spr[SPR_EBBHR] = load->ebbhr;
246         env->spr[SPR_TAR] = load->tar;
247         env->spr[SPR_EBBRR] = load->ebbrr;
248         env->spr[SPR_BESCR] = load->bescr;
249         env->spr[SPR_IAMR] = load->iamr;
250         env->spr[SPR_AMR] = load->amr;
251         env->spr[SPR_UAMOR] = load->uamor;
252         env->spr[SPR_DSCR] = load->dscr;
253         env->spr[SPR_FSCR] = load->fscr;
254         env->spr[SPR_PSPB] = load->pspb;
255         env->spr[SPR_CTRL] = load->ctrl;
256         env->spr[SPR_VRSAVE] = load->vrsave;
257         env->spr[SPR_DAR] = load->dar;
258         env->spr[SPR_DSISR] = load->dsisr;
259         env->spr[SPR_POWER_PMC1] = load->pmc1;
260         env->spr[SPR_POWER_PMC2] = load->pmc2;
261         env->spr[SPR_POWER_PMC3] = load->pmc3;
262         env->spr[SPR_POWER_PMC4] = load->pmc4;
263         env->spr[SPR_POWER_PMC5] = load->pmc5;
264         env->spr[SPR_POWER_PMC6] = load->pmc6;
265         env->spr[SPR_POWER_MMCR0] = load->mmcr0;
266         env->spr[SPR_POWER_MMCR1] = load->mmcr1;
267         env->spr[SPR_POWER_MMCR2] = load->mmcr2;
268         env->spr[SPR_POWER_MMCRA] = load->mmcra;
269         env->spr[SPR_POWER_SDAR] = load->sdar;
270         env->spr[SPR_POWER_SIAR] = load->siar;
271         env->spr[SPR_POWER_SIER] = load->sier;
272         ppc_store_vscr(env, load->vscr);
273         ppc_store_fpscr(env, load->fpscr);
274     }
275 
276     env->tb_env->tb_offset = load->tb_offset;
277 
278     /*
279      * MSR updated, compute hflags and possible interrupts.
280      */
281     hreg_compute_hflags(env);
282     ppc_maybe_interrupt(env);
283 
284     /*
285      * Nested HV does not tag TLB entries between L1 and L2, so must
286      * flush on transition.
287      */
288     tlb_flush(cs);
289     env->reserve_addr = -1; /* Reset the reservation */
290 }
291 
292 /*
293  * When this handler returns, the environment is switched to the L2 guest
294  * and TCG begins running that. spapr_exit_nested() performs the switch from
295  * L2 back to L1 and returns from the H_ENTER_NESTED hcall.
296  */
297 static target_ulong h_enter_nested(PowerPCCPU *cpu,
298                                    SpaprMachineState *spapr,
299                                    target_ulong opcode,
300                                    target_ulong *args)
301 {
302     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
303     CPUPPCState *env = &cpu->env;
304     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
305     struct nested_ppc_state l2_state;
306     target_ulong hv_ptr = args[0];
307     target_ulong regs_ptr = args[1];
308     target_ulong hdec, now = cpu_ppc_load_tbl(env);
309     target_ulong lpcr, lpcr_mask;
310     struct kvmppc_hv_guest_state *hvstate;
311     struct kvmppc_hv_guest_state hv_state;
312     struct kvmppc_pt_regs *regs;
313     hwaddr len;
314 
315     if (spapr->nested.ptcr == 0) {
316         return H_NOT_AVAILABLE;
317     }
318 
319     len = sizeof(*hvstate);
320     hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false,
321                                 MEMTXATTRS_UNSPECIFIED);
322     if (len != sizeof(*hvstate)) {
323         address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false);
324         return H_PARAMETER;
325     }
326 
327     memcpy(&hv_state, hvstate, len);
328 
329     address_space_unmap(CPU(cpu)->as, hvstate, len, len, false);
330 
331     /*
332      * We accept versions 1 and 2. Version 2 fields are unused because TCG
333      * does not implement DAWR*.
334      */
335     if (hv_state.version > HV_GUEST_STATE_VERSION) {
336         return H_PARAMETER;
337     }
338 
339     if (hv_state.lpid == 0) {
340         return H_PARAMETER;
341     }
342 
343     spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1);
344     if (!spapr_cpu->nested_host_state) {
345         return H_NO_MEM;
346     }
347 
348     assert(env->spr[SPR_LPIDR] == 0);
349     assert(env->spr[SPR_DPDES] == 0);
350     nested_save_state(spapr_cpu->nested_host_state, cpu);
351 
352     len = sizeof(*regs);
353     regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false,
354                                 MEMTXATTRS_UNSPECIFIED);
355     if (!regs || len != sizeof(*regs)) {
356         address_space_unmap(CPU(cpu)->as, regs, len, 0, false);
357         g_free(spapr_cpu->nested_host_state);
358         return H_P2;
359     }
360 
361     len = sizeof(l2_state.gpr);
362     assert(len == sizeof(regs->gpr));
363     memcpy(l2_state.gpr, regs->gpr, len);
364 
365     l2_state.lr = regs->link;
366     l2_state.ctr = regs->ctr;
367     l2_state.xer = regs->xer;
368     l2_state.cr = regs->ccr;
369     l2_state.msr = regs->msr;
370     l2_state.nip = regs->nip;
371 
372     address_space_unmap(CPU(cpu)->as, regs, len, len, false);
373 
374     l2_state.cfar = hv_state.cfar;
375     l2_state.lpidr = hv_state.lpid;
376 
377     lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER;
378     lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask);
379     lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE;
380     lpcr &= ~LPCR_LPES0;
381     l2_state.lpcr = lpcr & pcc->lpcr_mask;
382 
383     l2_state.pcr = hv_state.pcr;
384     /* hv_state.amor is not used */
385     l2_state.dpdes = hv_state.dpdes;
386     l2_state.hfscr = hv_state.hfscr;
387     /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/
388     l2_state.srr0 = hv_state.srr0;
389     l2_state.srr1 = hv_state.srr1;
390     l2_state.sprg0 = hv_state.sprg[0];
391     l2_state.sprg1 = hv_state.sprg[1];
392     l2_state.sprg2 = hv_state.sprg[2];
393     l2_state.sprg3 = hv_state.sprg[3];
394     l2_state.pidr = hv_state.pidr;
395     l2_state.ppr = hv_state.ppr;
396     l2_state.tb_offset = env->tb_env->tb_offset + hv_state.tb_offset;
397 
398     /*
399      * Switch to the nested guest environment and start the "hdec" timer.
400      */
401     nested_load_state(cpu, &l2_state);
402 
403     hdec = hv_state.hdec_expiry - now;
404     cpu_ppc_hdecr_init(env);
405     cpu_ppc_store_hdecr(env, hdec);
406 
407     /*
408      * The hv_state.vcpu_token is not needed. It is used by the KVM
409      * implementation to remember which L2 vCPU last ran on which physical
410      * CPU so as to invalidate process scope translations if it is moved
411      * between physical CPUs. For now TLBs are always flushed on L1<->L2
412      * transitions so this is not a problem.
413      *
414      * Could validate that the same vcpu_token does not attempt to run on
415      * different L1 vCPUs at the same time, but that would be a L1 KVM bug
416      * and it's not obviously worth a new data structure to do it.
417      */
418 
419     spapr_cpu->in_nested = true;
420 
421     /*
422      * The spapr hcall helper sets env->gpr[3] to the return value, but at
423      * this point the L1 is not returning from the hcall but rather we
424      * start running the L2, so r3 must not be clobbered, so return env->gpr[3]
425      * to leave it unchanged.
426      */
427     return env->gpr[3];
428 }
429 
430 static void spapr_exit_nested_hv(PowerPCCPU *cpu, int excp)
431 {
432     CPUPPCState *env = &cpu->env;
433     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
434     struct nested_ppc_state l2_state;
435     target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4];
436     target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5];
437     target_ulong hsrr0, hsrr1, hdar, asdr, hdsisr;
438     struct kvmppc_hv_guest_state *hvstate;
439     struct kvmppc_pt_regs *regs;
440     hwaddr len;
441 
442     nested_save_state(&l2_state, cpu);
443     hsrr0 = env->spr[SPR_HSRR0];
444     hsrr1 = env->spr[SPR_HSRR1];
445     hdar = env->spr[SPR_HDAR];
446     hdsisr = env->spr[SPR_HDSISR];
447     asdr = env->spr[SPR_ASDR];
448 
449     /*
450      * Switch back to the host environment (including for any error).
451      */
452     assert(env->spr[SPR_LPIDR] != 0);
453     nested_load_state(cpu, spapr_cpu->nested_host_state);
454     env->gpr[3] = env->excp_vectors[excp]; /* hcall return value */
455 
456     cpu_ppc_hdecr_exit(env);
457 
458     spapr_cpu->in_nested = false;
459 
460     g_free(spapr_cpu->nested_host_state);
461     spapr_cpu->nested_host_state = NULL;
462 
463     len = sizeof(*hvstate);
464     hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true,
465                                 MEMTXATTRS_UNSPECIFIED);
466     if (len != sizeof(*hvstate)) {
467         address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true);
468         env->gpr[3] = H_PARAMETER;
469         return;
470     }
471 
472     hvstate->cfar = l2_state.cfar;
473     hvstate->lpcr = l2_state.lpcr;
474     hvstate->pcr = l2_state.pcr;
475     hvstate->dpdes = l2_state.dpdes;
476     hvstate->hfscr = l2_state.hfscr;
477 
478     if (excp == POWERPC_EXCP_HDSI) {
479         hvstate->hdar = hdar;
480         hvstate->hdsisr = hdsisr;
481         hvstate->asdr = asdr;
482     } else if (excp == POWERPC_EXCP_HISI) {
483         hvstate->asdr = asdr;
484     }
485 
486     /* HEIR should be implemented for HV mode and saved here. */
487     hvstate->srr0 = l2_state.srr0;
488     hvstate->srr1 = l2_state.srr1;
489     hvstate->sprg[0] = l2_state.sprg0;
490     hvstate->sprg[1] = l2_state.sprg1;
491     hvstate->sprg[2] = l2_state.sprg2;
492     hvstate->sprg[3] = l2_state.sprg3;
493     hvstate->pidr = l2_state.pidr;
494     hvstate->ppr = l2_state.ppr;
495 
496     /* Is it okay to specify write length larger than actual data written? */
497     address_space_unmap(CPU(cpu)->as, hvstate, len, len, true);
498 
499     len = sizeof(*regs);
500     regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true,
501                                 MEMTXATTRS_UNSPECIFIED);
502     if (!regs || len != sizeof(*regs)) {
503         address_space_unmap(CPU(cpu)->as, regs, len, 0, true);
504         env->gpr[3] = H_P2;
505         return;
506     }
507 
508     len = sizeof(env->gpr);
509     assert(len == sizeof(regs->gpr));
510     memcpy(regs->gpr, l2_state.gpr, len);
511 
512     regs->link = l2_state.lr;
513     regs->ctr = l2_state.ctr;
514     regs->xer = l2_state.xer;
515     regs->ccr = l2_state.cr;
516 
517     if (excp == POWERPC_EXCP_MCHECK ||
518         excp == POWERPC_EXCP_RESET ||
519         excp == POWERPC_EXCP_SYSCALL) {
520         regs->nip = l2_state.srr0;
521         regs->msr = l2_state.srr1 & env->msr_mask;
522     } else {
523         regs->nip = hsrr0;
524         regs->msr = hsrr1 & env->msr_mask;
525     }
526 
527     /* Is it okay to specify write length larger than actual data written? */
528     address_space_unmap(CPU(cpu)->as, regs, len, len, true);
529 }
530 
531 void spapr_exit_nested(PowerPCCPU *cpu, int excp)
532 {
533     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
534     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
535 
536     assert(spapr_cpu->in_nested);
537     if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) {
538         spapr_exit_nested_hv(cpu, excp);
539     } else {
540         g_assert_not_reached();
541     }
542 }
543 
544 static
545 SpaprMachineStateNestedGuest *spapr_get_nested_guest(SpaprMachineState *spapr,
546                                                      target_ulong guestid)
547 {
548     SpaprMachineStateNestedGuest *guest;
549 
550     guest = g_hash_table_lookup(spapr->nested.guests, GINT_TO_POINTER(guestid));
551     return guest;
552 }
553 
554 static target_ulong h_guest_get_capabilities(PowerPCCPU *cpu,
555                                              SpaprMachineState *spapr,
556                                              target_ulong opcode,
557                                              target_ulong *args)
558 {
559     CPUPPCState *env = &cpu->env;
560     target_ulong flags = args[0];
561 
562     if (flags) { /* don't handle any flags capabilities for now */
563         return H_PARAMETER;
564     }
565 
566     /* P10 capabilities */
567     if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10, 0,
568         spapr->max_compat_pvr)) {
569         env->gpr[4] |= H_GUEST_CAPABILITIES_P10_MODE;
570     }
571 
572     /* P9 capabilities */
573     if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
574         spapr->max_compat_pvr)) {
575         env->gpr[4] |= H_GUEST_CAPABILITIES_P9_MODE;
576     }
577 
578     return H_SUCCESS;
579 }
580 
581 static target_ulong h_guest_set_capabilities(PowerPCCPU *cpu,
582                                              SpaprMachineState *spapr,
583                                              target_ulong opcode,
584                                               target_ulong *args)
585 {
586     CPUPPCState *env = &cpu->env;
587     target_ulong flags = args[0];
588     target_ulong capabilities = args[1];
589     env->gpr[4] = 0;
590 
591     if (flags) { /* don't handle any flags capabilities for now */
592         return H_PARAMETER;
593     }
594 
595     if (capabilities & H_GUEST_CAPABILITIES_COPY_MEM) {
596         env->gpr[4] = 1;
597         return H_P2; /* isn't supported */
598     }
599 
600     /*
601      * If there are no capabilities configured, set the R5 to the index of
602      * the first supported Power Processor Mode
603      */
604     if (!capabilities) {
605         env->gpr[4] = 1;
606 
607         /* set R5 to the first supported Power Processor Mode */
608         if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10, 0,
609                              spapr->max_compat_pvr)) {
610             env->gpr[5] = H_GUEST_CAP_P10_MODE_BMAP;
611         } else if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
612                                     spapr->max_compat_pvr)) {
613             env->gpr[5] = H_GUEST_CAP_P9_MODE_BMAP;
614         }
615 
616         return H_P2;
617     }
618 
619     /*
620      * If an invalid capability is set, R5 should contain the index of the
621      * invalid capability bit
622      */
623     if (capabilities & ~H_GUEST_CAP_VALID_MASK) {
624         env->gpr[4] = 1;
625 
626         /* Set R5 to the index of the invalid capability */
627         env->gpr[5] = 63 - ctz64(capabilities);
628 
629         return H_P2;
630     }
631 
632     if (!spapr->nested.capabilities_set) {
633         spapr->nested.capabilities_set = true;
634         spapr->nested.pvr_base = env->spr[SPR_PVR];
635         return H_SUCCESS;
636     } else {
637         return H_STATE;
638     }
639 }
640 
641 static void
642 destroy_guest_helper(gpointer value)
643 {
644     struct SpaprMachineStateNestedGuest *guest = value;
645     g_free(guest->vcpus);
646     g_free(guest);
647 }
648 
649 static target_ulong h_guest_create(PowerPCCPU *cpu,
650                                    SpaprMachineState *spapr,
651                                    target_ulong opcode,
652                                    target_ulong *args)
653 {
654     CPUPPCState *env = &cpu->env;
655     target_ulong flags = args[0];
656     target_ulong continue_token = args[1];
657     uint64_t guestid;
658     int nguests = 0;
659     struct SpaprMachineStateNestedGuest *guest;
660 
661     if (flags) { /* don't handle any flags for now */
662         return H_UNSUPPORTED_FLAG;
663     }
664 
665     if (continue_token != -1) {
666         return H_P2;
667     }
668 
669     if (!spapr->nested.capabilities_set) {
670         return H_STATE;
671     }
672 
673     if (!spapr->nested.guests) {
674         spapr->nested.guests = g_hash_table_new_full(NULL,
675                                                      NULL,
676                                                      NULL,
677                                                      destroy_guest_helper);
678     }
679 
680     nguests = g_hash_table_size(spapr->nested.guests);
681 
682     if (nguests == PAPR_NESTED_GUEST_MAX) {
683         return H_NO_MEM;
684     }
685 
686     /* Lookup for available guestid */
687     for (guestid = 1; guestid < PAPR_NESTED_GUEST_MAX; guestid++) {
688         if (!(g_hash_table_lookup(spapr->nested.guests,
689                                   GINT_TO_POINTER(guestid)))) {
690             break;
691         }
692     }
693 
694     if (guestid == PAPR_NESTED_GUEST_MAX) {
695         return H_NO_MEM;
696     }
697 
698     guest = g_try_new0(struct SpaprMachineStateNestedGuest, 1);
699     if (!guest) {
700         return H_NO_MEM;
701     }
702 
703     guest->pvr_logical = spapr->nested.pvr_base;
704     g_hash_table_insert(spapr->nested.guests, GINT_TO_POINTER(guestid), guest);
705     env->gpr[4] = guestid;
706 
707     return H_SUCCESS;
708 }
709 
710 static target_ulong h_guest_delete(PowerPCCPU *cpu,
711                                    SpaprMachineState *spapr,
712                                    target_ulong opcode,
713                                    target_ulong *args)
714 {
715     target_ulong flags = args[0];
716     target_ulong guestid = args[1];
717     struct SpaprMachineStateNestedGuest *guest;
718 
719     /*
720      * handle flag deleteAllGuests, if set:
721      * guestid is ignored and all guests are deleted
722      *
723      */
724     if (flags & ~H_GUEST_DELETE_ALL_FLAG) {
725         return H_UNSUPPORTED_FLAG; /* other flag bits reserved */
726     } else if (flags & H_GUEST_DELETE_ALL_FLAG) {
727         g_hash_table_destroy(spapr->nested.guests);
728         return H_SUCCESS;
729     }
730 
731     guest = g_hash_table_lookup(spapr->nested.guests, GINT_TO_POINTER(guestid));
732     if (!guest) {
733         return H_P2;
734     }
735 
736     g_hash_table_remove(spapr->nested.guests, GINT_TO_POINTER(guestid));
737 
738     return H_SUCCESS;
739 }
740 
741 static target_ulong h_guest_create_vcpu(PowerPCCPU *cpu,
742                                         SpaprMachineState *spapr,
743                                         target_ulong opcode,
744                                         target_ulong *args)
745 {
746     target_ulong flags = args[0];
747     target_ulong guestid = args[1];
748     target_ulong vcpuid = args[2];
749     SpaprMachineStateNestedGuest *guest;
750 
751     if (flags) { /* don't handle any flags for now */
752         return H_UNSUPPORTED_FLAG;
753     }
754 
755     guest = spapr_get_nested_guest(spapr, guestid);
756     if (!guest) {
757         return H_P2;
758     }
759 
760     if (vcpuid < guest->nr_vcpus) {
761         qemu_log_mask(LOG_UNIMP, "vcpuid " TARGET_FMT_ld " already in use.",
762                       vcpuid);
763         return H_IN_USE;
764     }
765     /* linear vcpuid allocation only */
766     assert(vcpuid == guest->nr_vcpus);
767 
768     if (guest->nr_vcpus >= PAPR_NESTED_GUEST_VCPU_MAX) {
769         return H_P3;
770     }
771 
772     SpaprMachineStateNestedGuestVcpu *vcpus, *curr_vcpu;
773     vcpus = g_try_renew(struct SpaprMachineStateNestedGuestVcpu,
774                         guest->vcpus,
775                         guest->nr_vcpus + 1);
776     if (!vcpus) {
777         return H_NO_MEM;
778     }
779     guest->vcpus = vcpus;
780     curr_vcpu = &vcpus[guest->nr_vcpus];
781     memset(curr_vcpu, 0, sizeof(SpaprMachineStateNestedGuestVcpu));
782 
783     curr_vcpu->enabled = true;
784     guest->nr_vcpus++;
785 
786     return H_SUCCESS;
787 }
788 
789 void spapr_register_nested_hv(void)
790 {
791     spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl);
792     spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested);
793     spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate);
794     spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest);
795 }
796 
797 void spapr_unregister_nested_hv(void)
798 {
799     spapr_unregister_hypercall(KVMPPC_H_SET_PARTITION_TABLE);
800     spapr_unregister_hypercall(KVMPPC_H_ENTER_NESTED);
801     spapr_unregister_hypercall(KVMPPC_H_TLB_INVALIDATE);
802     spapr_unregister_hypercall(KVMPPC_H_COPY_TOFROM_GUEST);
803 }
804 
805 void spapr_register_nested_papr(void)
806 {
807     spapr_register_hypercall(H_GUEST_GET_CAPABILITIES,
808                              h_guest_get_capabilities);
809     spapr_register_hypercall(H_GUEST_SET_CAPABILITIES,
810                              h_guest_set_capabilities);
811     spapr_register_hypercall(H_GUEST_CREATE, h_guest_create);
812     spapr_register_hypercall(H_GUEST_DELETE, h_guest_delete);
813     spapr_register_hypercall(H_GUEST_CREATE_VCPU, h_guest_create_vcpu);
814 }
815 
816 void spapr_unregister_nested_papr(void)
817 {
818     spapr_unregister_hypercall(H_GUEST_GET_CAPABILITIES);
819     spapr_unregister_hypercall(H_GUEST_SET_CAPABILITIES);
820     spapr_unregister_hypercall(H_GUEST_CREATE);
821     spapr_unregister_hypercall(H_GUEST_DELETE);
822     spapr_unregister_hypercall(H_GUEST_CREATE_VCPU);
823 }
824 
825 #else
826 void spapr_exit_nested(PowerPCCPU *cpu, int excp)
827 {
828     g_assert_not_reached();
829 }
830 
831 void spapr_register_nested_hv(void)
832 {
833     /* DO NOTHING */
834 }
835 
836 void spapr_unregister_nested_hv(void)
837 {
838     /* DO NOTHING */
839 }
840 
841 bool spapr_get_pate_nested_hv(SpaprMachineState *spapr, PowerPCCPU *cpu,
842                               target_ulong lpid, ppc_v3_pate_t *entry)
843 {
844     return false;
845 }
846 
847 void spapr_register_nested_papr(void)
848 {
849     /* DO NOTHING */
850 }
851 
852 void spapr_unregister_nested_papr(void)
853 {
854     /* DO NOTHING */
855 }
856 
857 #endif
858