xref: /openbmc/qemu/hw/ppc/spapr_hcall.c (revision 89854803)
1 #include "qemu/osdep.h"
2 #include "qapi/error.h"
3 #include "sysemu/hw_accel.h"
4 #include "sysemu/sysemu.h"
5 #include "qemu/log.h"
6 #include "qemu/error-report.h"
7 #include "cpu.h"
8 #include "exec/exec-all.h"
9 #include "helper_regs.h"
10 #include "hw/ppc/spapr.h"
11 #include "mmu-hash64.h"
12 #include "cpu-models.h"
13 #include "trace.h"
14 #include "kvm_ppc.h"
15 #include "hw/ppc/spapr_ovec.h"
16 #include "mmu-book3s-v3.h"
17 
18 struct LPCRSyncState {
19     target_ulong value;
20     target_ulong mask;
21 };
22 
23 static void do_lpcr_sync(CPUState *cs, run_on_cpu_data arg)
24 {
25     struct LPCRSyncState *s = arg.host_ptr;
26     PowerPCCPU *cpu = POWERPC_CPU(cs);
27     CPUPPCState *env = &cpu->env;
28     target_ulong lpcr;
29 
30     cpu_synchronize_state(cs);
31     lpcr = env->spr[SPR_LPCR];
32     lpcr &= ~s->mask;
33     lpcr |= s->value;
34     ppc_store_lpcr(cpu, lpcr);
35 }
36 
37 static void set_all_lpcrs(target_ulong value, target_ulong mask)
38 {
39     CPUState *cs;
40     struct LPCRSyncState s = {
41         .value = value,
42         .mask = mask
43     };
44     CPU_FOREACH(cs) {
45         run_on_cpu(cs, do_lpcr_sync, RUN_ON_CPU_HOST_PTR(&s));
46     }
47 }
48 
49 static bool has_spr(PowerPCCPU *cpu, int spr)
50 {
51     /* We can test whether the SPR is defined by checking for a valid name */
52     return cpu->env.spr_cb[spr].name != NULL;
53 }
54 
55 static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
56 {
57     /*
58      * hash value/pteg group index is normalized by HPT mask
59      */
60     if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
61         return false;
62     }
63     return true;
64 }
65 
66 static bool is_ram_address(sPAPRMachineState *spapr, hwaddr addr)
67 {
68     MachineState *machine = MACHINE(spapr);
69     MemoryHotplugState *hpms = &spapr->hotplug_memory;
70 
71     if (addr < machine->ram_size) {
72         return true;
73     }
74     if ((addr >= hpms->base)
75         && ((addr - hpms->base) < memory_region_size(&hpms->mr))) {
76         return true;
77     }
78 
79     return false;
80 }
81 
82 static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
83                             target_ulong opcode, target_ulong *args)
84 {
85     target_ulong flags = args[0];
86     target_ulong ptex = args[1];
87     target_ulong pteh = args[2];
88     target_ulong ptel = args[3];
89     unsigned apshift;
90     target_ulong raddr;
91     target_ulong slot;
92     const ppc_hash_pte64_t *hptes;
93 
94     apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
95     if (!apshift) {
96         /* Bad page size encoding */
97         return H_PARAMETER;
98     }
99 
100     raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1);
101 
102     if (is_ram_address(spapr, raddr)) {
103         /* Regular RAM - should have WIMG=0010 */
104         if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
105             return H_PARAMETER;
106         }
107     } else {
108         target_ulong wimg_flags;
109         /* Looks like an IO address */
110         /* FIXME: What WIMG combinations could be sensible for IO?
111          * For now we allow WIMG=010x, but are there others? */
112         /* FIXME: Should we check against registered IO addresses? */
113         wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M));
114 
115         if (wimg_flags != HPTE64_R_I &&
116             wimg_flags != (HPTE64_R_I | HPTE64_R_M)) {
117             return H_PARAMETER;
118         }
119     }
120 
121     pteh &= ~0x60ULL;
122 
123     if (!valid_ptex(cpu, ptex)) {
124         return H_PARAMETER;
125     }
126 
127     slot = ptex & 7ULL;
128     ptex = ptex & ~7ULL;
129 
130     if (likely((flags & H_EXACT) == 0)) {
131         hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
132         for (slot = 0; slot < 8; slot++) {
133             if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
134                 break;
135             }
136         }
137         ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
138         if (slot == 8) {
139             return H_PTEG_FULL;
140         }
141     } else {
142         hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
143         if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
144             ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
145             return H_PTEG_FULL;
146         }
147         ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
148     }
149 
150     ppc_hash64_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
151 
152     args[0] = ptex + slot;
153     return H_SUCCESS;
154 }
155 
156 typedef enum {
157     REMOVE_SUCCESS = 0,
158     REMOVE_NOT_FOUND = 1,
159     REMOVE_PARM = 2,
160     REMOVE_HW = 3,
161 } RemoveResult;
162 
163 static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
164                                 target_ulong avpn,
165                                 target_ulong flags,
166                                 target_ulong *vp, target_ulong *rp)
167 {
168     const ppc_hash_pte64_t *hptes;
169     target_ulong v, r;
170 
171     if (!valid_ptex(cpu, ptex)) {
172         return REMOVE_PARM;
173     }
174 
175     hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
176     v = ppc_hash64_hpte0(cpu, hptes, 0);
177     r = ppc_hash64_hpte1(cpu, hptes, 0);
178     ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
179 
180     if ((v & HPTE64_V_VALID) == 0 ||
181         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
182         ((flags & H_ANDCOND) && (v & avpn) != 0)) {
183         return REMOVE_NOT_FOUND;
184     }
185     *vp = v;
186     *rp = r;
187     ppc_hash64_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
188     ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
189     return REMOVE_SUCCESS;
190 }
191 
192 static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
193                              target_ulong opcode, target_ulong *args)
194 {
195     CPUPPCState *env = &cpu->env;
196     target_ulong flags = args[0];
197     target_ulong ptex = args[1];
198     target_ulong avpn = args[2];
199     RemoveResult ret;
200 
201     ret = remove_hpte(cpu, ptex, avpn, flags,
202                       &args[0], &args[1]);
203 
204     switch (ret) {
205     case REMOVE_SUCCESS:
206         check_tlb_flush(env, true);
207         return H_SUCCESS;
208 
209     case REMOVE_NOT_FOUND:
210         return H_NOT_FOUND;
211 
212     case REMOVE_PARM:
213         return H_PARAMETER;
214 
215     case REMOVE_HW:
216         return H_HARDWARE;
217     }
218 
219     g_assert_not_reached();
220 }
221 
222 #define H_BULK_REMOVE_TYPE             0xc000000000000000ULL
223 #define   H_BULK_REMOVE_REQUEST        0x4000000000000000ULL
224 #define   H_BULK_REMOVE_RESPONSE       0x8000000000000000ULL
225 #define   H_BULK_REMOVE_END            0xc000000000000000ULL
226 #define H_BULK_REMOVE_CODE             0x3000000000000000ULL
227 #define   H_BULK_REMOVE_SUCCESS        0x0000000000000000ULL
228 #define   H_BULK_REMOVE_NOT_FOUND      0x1000000000000000ULL
229 #define   H_BULK_REMOVE_PARM           0x2000000000000000ULL
230 #define   H_BULK_REMOVE_HW             0x3000000000000000ULL
231 #define H_BULK_REMOVE_RC               0x0c00000000000000ULL
232 #define H_BULK_REMOVE_FLAGS            0x0300000000000000ULL
233 #define   H_BULK_REMOVE_ABSOLUTE       0x0000000000000000ULL
234 #define   H_BULK_REMOVE_ANDCOND        0x0100000000000000ULL
235 #define   H_BULK_REMOVE_AVPN           0x0200000000000000ULL
236 #define H_BULK_REMOVE_PTEX             0x00ffffffffffffffULL
237 
238 #define H_BULK_REMOVE_MAX_BATCH        4
239 
240 static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
241                                   target_ulong opcode, target_ulong *args)
242 {
243     CPUPPCState *env = &cpu->env;
244     int i;
245     target_ulong rc = H_SUCCESS;
246 
247     for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
248         target_ulong *tsh = &args[i*2];
249         target_ulong tsl = args[i*2 + 1];
250         target_ulong v, r, ret;
251 
252         if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
253             break;
254         } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
255             return H_PARAMETER;
256         }
257 
258         *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
259         *tsh |= H_BULK_REMOVE_RESPONSE;
260 
261         if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
262             *tsh |= H_BULK_REMOVE_PARM;
263             return H_PARAMETER;
264         }
265 
266         ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl,
267                           (*tsh & H_BULK_REMOVE_FLAGS) >> 26,
268                           &v, &r);
269 
270         *tsh |= ret << 60;
271 
272         switch (ret) {
273         case REMOVE_SUCCESS:
274             *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43;
275             break;
276 
277         case REMOVE_PARM:
278             rc = H_PARAMETER;
279             goto exit;
280 
281         case REMOVE_HW:
282             rc = H_HARDWARE;
283             goto exit;
284         }
285     }
286  exit:
287     check_tlb_flush(env, true);
288 
289     return rc;
290 }
291 
292 static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
293                               target_ulong opcode, target_ulong *args)
294 {
295     CPUPPCState *env = &cpu->env;
296     target_ulong flags = args[0];
297     target_ulong ptex = args[1];
298     target_ulong avpn = args[2];
299     const ppc_hash_pte64_t *hptes;
300     target_ulong v, r;
301 
302     if (!valid_ptex(cpu, ptex)) {
303         return H_PARAMETER;
304     }
305 
306     hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
307     v = ppc_hash64_hpte0(cpu, hptes, 0);
308     r = ppc_hash64_hpte1(cpu, hptes, 0);
309     ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
310 
311     if ((v & HPTE64_V_VALID) == 0 ||
312         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
313         return H_NOT_FOUND;
314     }
315 
316     r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N |
317            HPTE64_R_KEY_HI | HPTE64_R_KEY_LO);
318     r |= (flags << 55) & HPTE64_R_PP0;
319     r |= (flags << 48) & HPTE64_R_KEY_HI;
320     r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
321     ppc_hash64_store_hpte(cpu, ptex,
322                           (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
323     ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
324     /* Flush the tlb */
325     check_tlb_flush(env, true);
326     /* Don't need a memory barrier, due to qemu's global lock */
327     ppc_hash64_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
328     return H_SUCCESS;
329 }
330 
331 static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
332                            target_ulong opcode, target_ulong *args)
333 {
334     target_ulong flags = args[0];
335     target_ulong ptex = args[1];
336     uint8_t *hpte;
337     int i, ridx, n_entries = 1;
338 
339     if (!valid_ptex(cpu, ptex)) {
340         return H_PARAMETER;
341     }
342 
343     if (flags & H_READ_4) {
344         /* Clear the two low order bits */
345         ptex &= ~(3ULL);
346         n_entries = 4;
347     }
348 
349     hpte = spapr->htab + (ptex * HASH_PTE_SIZE_64);
350 
351     for (i = 0, ridx = 0; i < n_entries; i++) {
352         args[ridx++] = ldq_p(hpte);
353         args[ridx++] = ldq_p(hpte + (HASH_PTE_SIZE_64/2));
354         hpte += HASH_PTE_SIZE_64;
355     }
356 
357     return H_SUCCESS;
358 }
359 
360 struct sPAPRPendingHPT {
361     /* These fields are read-only after initialization */
362     int shift;
363     QemuThread thread;
364 
365     /* These fields are protected by the BQL */
366     bool complete;
367 
368     /* These fields are private to the preparation thread if
369      * !complete, otherwise protected by the BQL */
370     int ret;
371     void *hpt;
372 };
373 
374 static void free_pending_hpt(sPAPRPendingHPT *pending)
375 {
376     if (pending->hpt) {
377         qemu_vfree(pending->hpt);
378     }
379 
380     g_free(pending);
381 }
382 
383 static void *hpt_prepare_thread(void *opaque)
384 {
385     sPAPRPendingHPT *pending = opaque;
386     size_t size = 1ULL << pending->shift;
387 
388     pending->hpt = qemu_memalign(size, size);
389     if (pending->hpt) {
390         memset(pending->hpt, 0, size);
391         pending->ret = H_SUCCESS;
392     } else {
393         pending->ret = H_NO_MEM;
394     }
395 
396     qemu_mutex_lock_iothread();
397 
398     if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
399         /* Ready to go */
400         pending->complete = true;
401     } else {
402         /* We've been cancelled, clean ourselves up */
403         free_pending_hpt(pending);
404     }
405 
406     qemu_mutex_unlock_iothread();
407     return NULL;
408 }
409 
410 /* Must be called with BQL held */
411 static void cancel_hpt_prepare(sPAPRMachineState *spapr)
412 {
413     sPAPRPendingHPT *pending = spapr->pending_hpt;
414 
415     /* Let the thread know it's cancelled */
416     spapr->pending_hpt = NULL;
417 
418     if (!pending) {
419         /* Nothing to do */
420         return;
421     }
422 
423     if (!pending->complete) {
424         /* thread will clean itself up */
425         return;
426     }
427 
428     free_pending_hpt(pending);
429 }
430 
431 /* Convert a return code from the KVM ioctl()s implementing resize HPT
432  * into a PAPR hypercall return code */
433 static target_ulong resize_hpt_convert_rc(int ret)
434 {
435     if (ret >= 100000) {
436         return H_LONG_BUSY_ORDER_100_SEC;
437     } else if (ret >= 10000) {
438         return H_LONG_BUSY_ORDER_10_SEC;
439     } else if (ret >= 1000) {
440         return H_LONG_BUSY_ORDER_1_SEC;
441     } else if (ret >= 100) {
442         return H_LONG_BUSY_ORDER_100_MSEC;
443     } else if (ret >= 10) {
444         return H_LONG_BUSY_ORDER_10_MSEC;
445     } else if (ret > 0) {
446         return H_LONG_BUSY_ORDER_1_MSEC;
447     }
448 
449     switch (ret) {
450     case 0:
451         return H_SUCCESS;
452     case -EPERM:
453         return H_AUTHORITY;
454     case -EINVAL:
455         return H_PARAMETER;
456     case -ENXIO:
457         return H_CLOSED;
458     case -ENOSPC:
459         return H_PTEG_FULL;
460     case -EBUSY:
461         return H_BUSY;
462     case -ENOMEM:
463         return H_NO_MEM;
464     default:
465         return H_HARDWARE;
466     }
467 }
468 
469 static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
470                                          sPAPRMachineState *spapr,
471                                          target_ulong opcode,
472                                          target_ulong *args)
473 {
474     target_ulong flags = args[0];
475     int shift = args[1];
476     sPAPRPendingHPT *pending = spapr->pending_hpt;
477     uint64_t current_ram_size;
478     int rc;
479 
480     if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
481         return H_AUTHORITY;
482     }
483 
484     if (!spapr->htab_shift) {
485         /* Radix guest, no HPT */
486         return H_NOT_AVAILABLE;
487     }
488 
489     trace_spapr_h_resize_hpt_prepare(flags, shift);
490 
491     if (flags != 0) {
492         return H_PARAMETER;
493     }
494 
495     if (shift && ((shift < 18) || (shift > 46))) {
496         return H_PARAMETER;
497     }
498 
499     current_ram_size = MACHINE(spapr)->ram_size + get_plugged_memory_size();
500 
501     /* We only allow the guest to allocate an HPT one order above what
502      * we'd normally give them (to stop a small guest claiming a huge
503      * chunk of resources in the HPT */
504     if (shift > (spapr_hpt_shift_for_ramsize(current_ram_size) + 1)) {
505         return H_RESOURCE;
506     }
507 
508     rc = kvmppc_resize_hpt_prepare(cpu, flags, shift);
509     if (rc != -ENOSYS) {
510         return resize_hpt_convert_rc(rc);
511     }
512 
513     if (pending) {
514         /* something already in progress */
515         if (pending->shift == shift) {
516             /* and it's suitable */
517             if (pending->complete) {
518                 return pending->ret;
519             } else {
520                 return H_LONG_BUSY_ORDER_100_MSEC;
521             }
522         }
523 
524         /* not suitable, cancel and replace */
525         cancel_hpt_prepare(spapr);
526     }
527 
528     if (!shift) {
529         /* nothing to do */
530         return H_SUCCESS;
531     }
532 
533     /* start new prepare */
534 
535     pending = g_new0(sPAPRPendingHPT, 1);
536     pending->shift = shift;
537     pending->ret = H_HARDWARE;
538 
539     qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
540                        hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
541 
542     spapr->pending_hpt = pending;
543 
544     /* In theory we could estimate the time more accurately based on
545      * the new size, but there's not much point */
546     return H_LONG_BUSY_ORDER_100_MSEC;
547 }
548 
549 static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
550 {
551     uint8_t *addr = htab;
552 
553     addr += pteg * HASH_PTEG_SIZE_64;
554     addr += slot * HASH_PTE_SIZE_64;
555     return  ldq_p(addr);
556 }
557 
558 static void new_hpte_store(void *htab, uint64_t pteg, int slot,
559                            uint64_t pte0, uint64_t pte1)
560 {
561     uint8_t *addr = htab;
562 
563     addr += pteg * HASH_PTEG_SIZE_64;
564     addr += slot * HASH_PTE_SIZE_64;
565 
566     stq_p(addr, pte0);
567     stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1);
568 }
569 
570 static int rehash_hpte(PowerPCCPU *cpu,
571                        const ppc_hash_pte64_t *hptes,
572                        void *old_hpt, uint64_t oldsize,
573                        void *new_hpt, uint64_t newsize,
574                        uint64_t pteg, int slot)
575 {
576     uint64_t old_hash_mask = (oldsize >> 7) - 1;
577     uint64_t new_hash_mask = (newsize >> 7) - 1;
578     target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
579     target_ulong pte1;
580     uint64_t avpn;
581     unsigned base_pg_shift;
582     uint64_t hash, new_pteg, replace_pte0;
583 
584     if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
585         return H_SUCCESS;
586     }
587 
588     pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
589 
590     base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
591     assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
592     avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
593 
594     if (pte0 & HPTE64_V_SECONDARY) {
595         pteg = ~pteg;
596     }
597 
598     if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
599         uint64_t offset, vsid;
600 
601         /* We only have 28 - 23 bits of offset in avpn */
602         offset = (avpn & 0x1f) << 23;
603         vsid = avpn >> 5;
604         /* We can find more bits from the pteg value */
605         if (base_pg_shift < 23) {
606             offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
607         }
608 
609         hash = vsid ^ (offset >> base_pg_shift);
610     } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
611         uint64_t offset, vsid;
612 
613         /* We only have 40 - 23 bits of seg_off in avpn */
614         offset = (avpn & 0x1ffff) << 23;
615         vsid = avpn >> 17;
616         if (base_pg_shift < 23) {
617             offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
618                 << base_pg_shift;
619         }
620 
621         hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
622     } else {
623         error_report("rehash_pte: Bad segment size in HPTE");
624         return H_HARDWARE;
625     }
626 
627     new_pteg = hash & new_hash_mask;
628     if (pte0 & HPTE64_V_SECONDARY) {
629         assert(~pteg == (hash & old_hash_mask));
630         new_pteg = ~new_pteg;
631     } else {
632         assert(pteg == (hash & old_hash_mask));
633     }
634     assert((oldsize != newsize) || (pteg == new_pteg));
635     replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
636     /*
637      * Strictly speaking, we don't need all these tests, since we only
638      * ever rehash bolted HPTEs.  We might in future handle non-bolted
639      * HPTEs, though so make the logic correct for those cases as
640      * well.
641      */
642     if (replace_pte0 & HPTE64_V_VALID) {
643         assert(newsize < oldsize);
644         if (replace_pte0 & HPTE64_V_BOLTED) {
645             if (pte0 & HPTE64_V_BOLTED) {
646                 /* Bolted collision, nothing we can do */
647                 return H_PTEG_FULL;
648             } else {
649                 /* Discard this hpte */
650                 return H_SUCCESS;
651             }
652         }
653     }
654 
655     new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
656     return H_SUCCESS;
657 }
658 
659 static int rehash_hpt(PowerPCCPU *cpu,
660                       void *old_hpt, uint64_t oldsize,
661                       void *new_hpt, uint64_t newsize)
662 {
663     uint64_t n_ptegs = oldsize >> 7;
664     uint64_t pteg;
665     int slot;
666     int rc;
667 
668     for (pteg = 0; pteg < n_ptegs; pteg++) {
669         hwaddr ptex = pteg * HPTES_PER_GROUP;
670         const ppc_hash_pte64_t *hptes
671             = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
672 
673         if (!hptes) {
674             return H_HARDWARE;
675         }
676 
677         for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
678             rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
679                              pteg, slot);
680             if (rc != H_SUCCESS) {
681                 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
682                 return rc;
683             }
684         }
685         ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
686     }
687 
688     return H_SUCCESS;
689 }
690 
691 static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data)
692 {
693     int ret;
694 
695     cpu_synchronize_state(cs);
696 
697     ret = kvmppc_put_books_sregs(POWERPC_CPU(cs));
698     if (ret < 0) {
699         error_report("failed to push sregs to KVM: %s", strerror(-ret));
700         exit(1);
701     }
702 }
703 
704 static void push_sregs_to_kvm_pr(sPAPRMachineState *spapr)
705 {
706     CPUState *cs;
707 
708     /*
709      * This is a hack for the benefit of KVM PR - it abuses the SDR1
710      * slot in kvm_sregs to communicate the userspace address of the
711      * HPT
712      */
713     if (!kvm_enabled() || !spapr->htab) {
714         return;
715     }
716 
717     CPU_FOREACH(cs) {
718         run_on_cpu(cs, do_push_sregs_to_kvm_pr, RUN_ON_CPU_NULL);
719     }
720 }
721 
722 static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu,
723                                         sPAPRMachineState *spapr,
724                                         target_ulong opcode,
725                                         target_ulong *args)
726 {
727     target_ulong flags = args[0];
728     target_ulong shift = args[1];
729     sPAPRPendingHPT *pending = spapr->pending_hpt;
730     int rc;
731     size_t newsize;
732 
733     if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
734         return H_AUTHORITY;
735     }
736 
737     if (!spapr->htab_shift) {
738         /* Radix guest, no HPT */
739         return H_NOT_AVAILABLE;
740     }
741 
742     trace_spapr_h_resize_hpt_commit(flags, shift);
743 
744     rc = kvmppc_resize_hpt_commit(cpu, flags, shift);
745     if (rc != -ENOSYS) {
746         rc = resize_hpt_convert_rc(rc);
747         if (rc == H_SUCCESS) {
748             /* Need to set the new htab_shift in the machine state */
749             spapr->htab_shift = shift;
750         }
751         return rc;
752     }
753 
754     if (flags != 0) {
755         return H_PARAMETER;
756     }
757 
758     if (!pending || (pending->shift != shift)) {
759         /* no matching prepare */
760         return H_CLOSED;
761     }
762 
763     if (!pending->complete) {
764         /* prepare has not completed */
765         return H_BUSY;
766     }
767 
768     /* Shouldn't have got past PREPARE without an HPT */
769     g_assert(spapr->htab_shift);
770 
771     newsize = 1ULL << pending->shift;
772     rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
773                     pending->hpt, newsize);
774     if (rc == H_SUCCESS) {
775         qemu_vfree(spapr->htab);
776         spapr->htab = pending->hpt;
777         spapr->htab_shift = pending->shift;
778 
779         push_sregs_to_kvm_pr(spapr);
780 
781         pending->hpt = NULL; /* so it's not free()d */
782     }
783 
784     /* Clean up */
785     spapr->pending_hpt = NULL;
786     free_pending_hpt(pending);
787 
788     return rc;
789 }
790 
791 static target_ulong h_set_sprg0(PowerPCCPU *cpu, sPAPRMachineState *spapr,
792                                 target_ulong opcode, target_ulong *args)
793 {
794     cpu_synchronize_state(CPU(cpu));
795     cpu->env.spr[SPR_SPRG0] = args[0];
796 
797     return H_SUCCESS;
798 }
799 
800 static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
801                                target_ulong opcode, target_ulong *args)
802 {
803     if (!has_spr(cpu, SPR_DABR)) {
804         return H_HARDWARE;              /* DABR register not available */
805     }
806     cpu_synchronize_state(CPU(cpu));
807 
808     if (has_spr(cpu, SPR_DABRX)) {
809         cpu->env.spr[SPR_DABRX] = 0x3;  /* Use Problem and Privileged state */
810     } else if (!(args[0] & 0x4)) {      /* Breakpoint Translation set? */
811         return H_RESERVED_DABR;
812     }
813 
814     cpu->env.spr[SPR_DABR] = args[0];
815     return H_SUCCESS;
816 }
817 
818 static target_ulong h_set_xdabr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
819                                 target_ulong opcode, target_ulong *args)
820 {
821     target_ulong dabrx = args[1];
822 
823     if (!has_spr(cpu, SPR_DABR) || !has_spr(cpu, SPR_DABRX)) {
824         return H_HARDWARE;
825     }
826 
827     if ((dabrx & ~0xfULL) != 0 || (dabrx & H_DABRX_HYPERVISOR) != 0
828         || (dabrx & (H_DABRX_KERNEL | H_DABRX_USER)) == 0) {
829         return H_PARAMETER;
830     }
831 
832     cpu_synchronize_state(CPU(cpu));
833     cpu->env.spr[SPR_DABRX] = dabrx;
834     cpu->env.spr[SPR_DABR] = args[0];
835 
836     return H_SUCCESS;
837 }
838 
839 static target_ulong h_page_init(PowerPCCPU *cpu, sPAPRMachineState *spapr,
840                                 target_ulong opcode, target_ulong *args)
841 {
842     target_ulong flags = args[0];
843     hwaddr dst = args[1];
844     hwaddr src = args[2];
845     hwaddr len = TARGET_PAGE_SIZE;
846     uint8_t *pdst, *psrc;
847     target_long ret = H_SUCCESS;
848 
849     if (flags & ~(H_ICACHE_SYNCHRONIZE | H_ICACHE_INVALIDATE
850                   | H_COPY_PAGE | H_ZERO_PAGE)) {
851         qemu_log_mask(LOG_UNIMP, "h_page_init: Bad flags (" TARGET_FMT_lx "\n",
852                       flags);
853         return H_PARAMETER;
854     }
855 
856     /* Map-in destination */
857     if (!is_ram_address(spapr, dst) || (dst & ~TARGET_PAGE_MASK) != 0) {
858         return H_PARAMETER;
859     }
860     pdst = cpu_physical_memory_map(dst, &len, 1);
861     if (!pdst || len != TARGET_PAGE_SIZE) {
862         return H_PARAMETER;
863     }
864 
865     if (flags & H_COPY_PAGE) {
866         /* Map-in source, copy to destination, and unmap source again */
867         if (!is_ram_address(spapr, src) || (src & ~TARGET_PAGE_MASK) != 0) {
868             ret = H_PARAMETER;
869             goto unmap_out;
870         }
871         psrc = cpu_physical_memory_map(src, &len, 0);
872         if (!psrc || len != TARGET_PAGE_SIZE) {
873             ret = H_PARAMETER;
874             goto unmap_out;
875         }
876         memcpy(pdst, psrc, len);
877         cpu_physical_memory_unmap(psrc, len, 0, len);
878     } else if (flags & H_ZERO_PAGE) {
879         memset(pdst, 0, len);          /* Just clear the destination page */
880     }
881 
882     if (kvm_enabled() && (flags & H_ICACHE_SYNCHRONIZE) != 0) {
883         kvmppc_dcbst_range(cpu, pdst, len);
884     }
885     if (flags & (H_ICACHE_SYNCHRONIZE | H_ICACHE_INVALIDATE)) {
886         if (kvm_enabled()) {
887             kvmppc_icbi_range(cpu, pdst, len);
888         } else {
889             tb_flush(CPU(cpu));
890         }
891     }
892 
893 unmap_out:
894     cpu_physical_memory_unmap(pdst, TARGET_PAGE_SIZE, 1, len);
895     return ret;
896 }
897 
898 #define FLAGS_REGISTER_VPA         0x0000200000000000ULL
899 #define FLAGS_REGISTER_DTL         0x0000400000000000ULL
900 #define FLAGS_REGISTER_SLBSHADOW   0x0000600000000000ULL
901 #define FLAGS_DEREGISTER_VPA       0x0000a00000000000ULL
902 #define FLAGS_DEREGISTER_DTL       0x0000c00000000000ULL
903 #define FLAGS_DEREGISTER_SLBSHADOW 0x0000e00000000000ULL
904 
905 #define VPA_MIN_SIZE           640
906 #define VPA_SIZE_OFFSET        0x4
907 #define VPA_SHARED_PROC_OFFSET 0x9
908 #define VPA_SHARED_PROC_VAL    0x2
909 
910 static target_ulong register_vpa(CPUPPCState *env, target_ulong vpa)
911 {
912     CPUState *cs = CPU(ppc_env_get_cpu(env));
913     uint16_t size;
914     uint8_t tmp;
915 
916     if (vpa == 0) {
917         hcall_dprintf("Can't cope with registering a VPA at logical 0\n");
918         return H_HARDWARE;
919     }
920 
921     if (vpa % env->dcache_line_size) {
922         return H_PARAMETER;
923     }
924     /* FIXME: bounds check the address */
925 
926     size = lduw_be_phys(cs->as, vpa + 0x4);
927 
928     if (size < VPA_MIN_SIZE) {
929         return H_PARAMETER;
930     }
931 
932     /* VPA is not allowed to cross a page boundary */
933     if ((vpa / 4096) != ((vpa + size - 1) / 4096)) {
934         return H_PARAMETER;
935     }
936 
937     env->vpa_addr = vpa;
938 
939     tmp = ldub_phys(cs->as, env->vpa_addr + VPA_SHARED_PROC_OFFSET);
940     tmp |= VPA_SHARED_PROC_VAL;
941     stb_phys(cs->as, env->vpa_addr + VPA_SHARED_PROC_OFFSET, tmp);
942 
943     return H_SUCCESS;
944 }
945 
946 static target_ulong deregister_vpa(CPUPPCState *env, target_ulong vpa)
947 {
948     if (env->slb_shadow_addr) {
949         return H_RESOURCE;
950     }
951 
952     if (env->dtl_addr) {
953         return H_RESOURCE;
954     }
955 
956     env->vpa_addr = 0;
957     return H_SUCCESS;
958 }
959 
960 static target_ulong register_slb_shadow(CPUPPCState *env, target_ulong addr)
961 {
962     CPUState *cs = CPU(ppc_env_get_cpu(env));
963     uint32_t size;
964 
965     if (addr == 0) {
966         hcall_dprintf("Can't cope with SLB shadow at logical 0\n");
967         return H_HARDWARE;
968     }
969 
970     size = ldl_be_phys(cs->as, addr + 0x4);
971     if (size < 0x8) {
972         return H_PARAMETER;
973     }
974 
975     if ((addr / 4096) != ((addr + size - 1) / 4096)) {
976         return H_PARAMETER;
977     }
978 
979     if (!env->vpa_addr) {
980         return H_RESOURCE;
981     }
982 
983     env->slb_shadow_addr = addr;
984     env->slb_shadow_size = size;
985 
986     return H_SUCCESS;
987 }
988 
989 static target_ulong deregister_slb_shadow(CPUPPCState *env, target_ulong addr)
990 {
991     env->slb_shadow_addr = 0;
992     env->slb_shadow_size = 0;
993     return H_SUCCESS;
994 }
995 
996 static target_ulong register_dtl(CPUPPCState *env, target_ulong addr)
997 {
998     CPUState *cs = CPU(ppc_env_get_cpu(env));
999     uint32_t size;
1000 
1001     if (addr == 0) {
1002         hcall_dprintf("Can't cope with DTL at logical 0\n");
1003         return H_HARDWARE;
1004     }
1005 
1006     size = ldl_be_phys(cs->as, addr + 0x4);
1007 
1008     if (size < 48) {
1009         return H_PARAMETER;
1010     }
1011 
1012     if (!env->vpa_addr) {
1013         return H_RESOURCE;
1014     }
1015 
1016     env->dtl_addr = addr;
1017     env->dtl_size = size;
1018 
1019     return H_SUCCESS;
1020 }
1021 
1022 static target_ulong deregister_dtl(CPUPPCState *env, target_ulong addr)
1023 {
1024     env->dtl_addr = 0;
1025     env->dtl_size = 0;
1026 
1027     return H_SUCCESS;
1028 }
1029 
1030 static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPRMachineState *spapr,
1031                                    target_ulong opcode, target_ulong *args)
1032 {
1033     target_ulong flags = args[0];
1034     target_ulong procno = args[1];
1035     target_ulong vpa = args[2];
1036     target_ulong ret = H_PARAMETER;
1037     CPUPPCState *tenv;
1038     PowerPCCPU *tcpu;
1039 
1040     tcpu = spapr_find_cpu(procno);
1041     if (!tcpu) {
1042         return H_PARAMETER;
1043     }
1044     tenv = &tcpu->env;
1045 
1046     switch (flags) {
1047     case FLAGS_REGISTER_VPA:
1048         ret = register_vpa(tenv, vpa);
1049         break;
1050 
1051     case FLAGS_DEREGISTER_VPA:
1052         ret = deregister_vpa(tenv, vpa);
1053         break;
1054 
1055     case FLAGS_REGISTER_SLBSHADOW:
1056         ret = register_slb_shadow(tenv, vpa);
1057         break;
1058 
1059     case FLAGS_DEREGISTER_SLBSHADOW:
1060         ret = deregister_slb_shadow(tenv, vpa);
1061         break;
1062 
1063     case FLAGS_REGISTER_DTL:
1064         ret = register_dtl(tenv, vpa);
1065         break;
1066 
1067     case FLAGS_DEREGISTER_DTL:
1068         ret = deregister_dtl(tenv, vpa);
1069         break;
1070     }
1071 
1072     return ret;
1073 }
1074 
1075 static target_ulong h_cede(PowerPCCPU *cpu, sPAPRMachineState *spapr,
1076                            target_ulong opcode, target_ulong *args)
1077 {
1078     CPUPPCState *env = &cpu->env;
1079     CPUState *cs = CPU(cpu);
1080 
1081     env->msr |= (1ULL << MSR_EE);
1082     hreg_compute_hflags(env);
1083     if (!cpu_has_work(cs)) {
1084         cs->halted = 1;
1085         cs->exception_index = EXCP_HLT;
1086         cs->exit_request = 1;
1087     }
1088     return H_SUCCESS;
1089 }
1090 
1091 static target_ulong h_rtas(PowerPCCPU *cpu, sPAPRMachineState *spapr,
1092                            target_ulong opcode, target_ulong *args)
1093 {
1094     target_ulong rtas_r3 = args[0];
1095     uint32_t token = rtas_ld(rtas_r3, 0);
1096     uint32_t nargs = rtas_ld(rtas_r3, 1);
1097     uint32_t nret = rtas_ld(rtas_r3, 2);
1098 
1099     return spapr_rtas_call(cpu, spapr, token, nargs, rtas_r3 + 12,
1100                            nret, rtas_r3 + 12 + 4*nargs);
1101 }
1102 
1103 static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPRMachineState *spapr,
1104                                    target_ulong opcode, target_ulong *args)
1105 {
1106     CPUState *cs = CPU(cpu);
1107     target_ulong size = args[0];
1108     target_ulong addr = args[1];
1109 
1110     switch (size) {
1111     case 1:
1112         args[0] = ldub_phys(cs->as, addr);
1113         return H_SUCCESS;
1114     case 2:
1115         args[0] = lduw_phys(cs->as, addr);
1116         return H_SUCCESS;
1117     case 4:
1118         args[0] = ldl_phys(cs->as, addr);
1119         return H_SUCCESS;
1120     case 8:
1121         args[0] = ldq_phys(cs->as, addr);
1122         return H_SUCCESS;
1123     }
1124     return H_PARAMETER;
1125 }
1126 
1127 static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPRMachineState *spapr,
1128                                     target_ulong opcode, target_ulong *args)
1129 {
1130     CPUState *cs = CPU(cpu);
1131 
1132     target_ulong size = args[0];
1133     target_ulong addr = args[1];
1134     target_ulong val  = args[2];
1135 
1136     switch (size) {
1137     case 1:
1138         stb_phys(cs->as, addr, val);
1139         return H_SUCCESS;
1140     case 2:
1141         stw_phys(cs->as, addr, val);
1142         return H_SUCCESS;
1143     case 4:
1144         stl_phys(cs->as, addr, val);
1145         return H_SUCCESS;
1146     case 8:
1147         stq_phys(cs->as, addr, val);
1148         return H_SUCCESS;
1149     }
1150     return H_PARAMETER;
1151 }
1152 
1153 static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPRMachineState *spapr,
1154                                     target_ulong opcode, target_ulong *args)
1155 {
1156     CPUState *cs = CPU(cpu);
1157 
1158     target_ulong dst   = args[0]; /* Destination address */
1159     target_ulong src   = args[1]; /* Source address */
1160     target_ulong esize = args[2]; /* Element size (0=1,1=2,2=4,3=8) */
1161     target_ulong count = args[3]; /* Element count */
1162     target_ulong op    = args[4]; /* 0 = copy, 1 = invert */
1163     uint64_t tmp;
1164     unsigned int mask = (1 << esize) - 1;
1165     int step = 1 << esize;
1166 
1167     if (count > 0x80000000) {
1168         return H_PARAMETER;
1169     }
1170 
1171     if ((dst & mask) || (src & mask) || (op > 1)) {
1172         return H_PARAMETER;
1173     }
1174 
1175     if (dst >= src && dst < (src + (count << esize))) {
1176             dst = dst + ((count - 1) << esize);
1177             src = src + ((count - 1) << esize);
1178             step = -step;
1179     }
1180 
1181     while (count--) {
1182         switch (esize) {
1183         case 0:
1184             tmp = ldub_phys(cs->as, src);
1185             break;
1186         case 1:
1187             tmp = lduw_phys(cs->as, src);
1188             break;
1189         case 2:
1190             tmp = ldl_phys(cs->as, src);
1191             break;
1192         case 3:
1193             tmp = ldq_phys(cs->as, src);
1194             break;
1195         default:
1196             return H_PARAMETER;
1197         }
1198         if (op == 1) {
1199             tmp = ~tmp;
1200         }
1201         switch (esize) {
1202         case 0:
1203             stb_phys(cs->as, dst, tmp);
1204             break;
1205         case 1:
1206             stw_phys(cs->as, dst, tmp);
1207             break;
1208         case 2:
1209             stl_phys(cs->as, dst, tmp);
1210             break;
1211         case 3:
1212             stq_phys(cs->as, dst, tmp);
1213             break;
1214         }
1215         dst = dst + step;
1216         src = src + step;
1217     }
1218 
1219     return H_SUCCESS;
1220 }
1221 
1222 static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
1223                                    target_ulong opcode, target_ulong *args)
1224 {
1225     /* Nothing to do on emulation, KVM will trap this in the kernel */
1226     return H_SUCCESS;
1227 }
1228 
1229 static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPRMachineState *spapr,
1230                                    target_ulong opcode, target_ulong *args)
1231 {
1232     /* Nothing to do on emulation, KVM will trap this in the kernel */
1233     return H_SUCCESS;
1234 }
1235 
1236 static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu,
1237                                            target_ulong mflags,
1238                                            target_ulong value1,
1239                                            target_ulong value2)
1240 {
1241     if (value1) {
1242         return H_P3;
1243     }
1244     if (value2) {
1245         return H_P4;
1246     }
1247 
1248     switch (mflags) {
1249     case H_SET_MODE_ENDIAN_BIG:
1250         set_all_lpcrs(0, LPCR_ILE);
1251         spapr_pci_switch_vga(true);
1252         return H_SUCCESS;
1253 
1254     case H_SET_MODE_ENDIAN_LITTLE:
1255         set_all_lpcrs(LPCR_ILE, LPCR_ILE);
1256         spapr_pci_switch_vga(false);
1257         return H_SUCCESS;
1258     }
1259 
1260     return H_UNSUPPORTED_FLAG;
1261 }
1262 
1263 static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu,
1264                                                         target_ulong mflags,
1265                                                         target_ulong value1,
1266                                                         target_ulong value2)
1267 {
1268     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
1269 
1270     if (!(pcc->insns_flags2 & PPC2_ISA207S)) {
1271         return H_P2;
1272     }
1273     if (value1) {
1274         return H_P3;
1275     }
1276     if (value2) {
1277         return H_P4;
1278     }
1279 
1280     if (mflags == AIL_RESERVED) {
1281         return H_UNSUPPORTED_FLAG;
1282     }
1283 
1284     set_all_lpcrs(mflags << LPCR_AIL_SHIFT, LPCR_AIL);
1285 
1286     return H_SUCCESS;
1287 }
1288 
1289 static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr,
1290                                target_ulong opcode, target_ulong *args)
1291 {
1292     target_ulong resource = args[1];
1293     target_ulong ret = H_P2;
1294 
1295     switch (resource) {
1296     case H_SET_MODE_RESOURCE_LE:
1297         ret = h_set_mode_resource_le(cpu, args[0], args[2], args[3]);
1298         break;
1299     case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
1300         ret = h_set_mode_resource_addr_trans_mode(cpu, args[0],
1301                                                   args[2], args[3]);
1302         break;
1303     }
1304 
1305     return ret;
1306 }
1307 
1308 static target_ulong h_clean_slb(PowerPCCPU *cpu, sPAPRMachineState *spapr,
1309                                 target_ulong opcode, target_ulong *args)
1310 {
1311     qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n",
1312                   opcode, " (H_CLEAN_SLB)");
1313     return H_FUNCTION;
1314 }
1315 
1316 static target_ulong h_invalidate_pid(PowerPCCPU *cpu, sPAPRMachineState *spapr,
1317                                      target_ulong opcode, target_ulong *args)
1318 {
1319     qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n",
1320                   opcode, " (H_INVALIDATE_PID)");
1321     return H_FUNCTION;
1322 }
1323 
1324 static void spapr_check_setup_free_hpt(sPAPRMachineState *spapr,
1325                                        uint64_t patbe_old, uint64_t patbe_new)
1326 {
1327     /*
1328      * We have 4 Options:
1329      * HASH->HASH || RADIX->RADIX || NOTHING->RADIX : Do Nothing
1330      * HASH->RADIX                                  : Free HPT
1331      * RADIX->HASH                                  : Allocate HPT
1332      * NOTHING->HASH                                : Allocate HPT
1333      * Note: NOTHING implies the case where we said the guest could choose
1334      *       later and so assumed radix and now it's called H_REG_PROC_TBL
1335      */
1336 
1337     if ((patbe_old & PATBE1_GR) == (patbe_new & PATBE1_GR)) {
1338         /* We assume RADIX, so this catches all the "Do Nothing" cases */
1339     } else if (!(patbe_old & PATBE1_GR)) {
1340         /* HASH->RADIX : Free HPT */
1341         spapr_free_hpt(spapr);
1342     } else if (!(patbe_new & PATBE1_GR)) {
1343         /* RADIX->HASH || NOTHING->HASH : Allocate HPT */
1344         spapr_setup_hpt_and_vrma(spapr);
1345     }
1346     return;
1347 }
1348 
1349 #define FLAGS_MASK              0x01FULL
1350 #define FLAG_MODIFY             0x10
1351 #define FLAG_REGISTER           0x08
1352 #define FLAG_RADIX              0x04
1353 #define FLAG_HASH_PROC_TBL      0x02
1354 #define FLAG_GTSE               0x01
1355 
1356 static target_ulong h_register_process_table(PowerPCCPU *cpu,
1357                                              sPAPRMachineState *spapr,
1358                                              target_ulong opcode,
1359                                              target_ulong *args)
1360 {
1361     target_ulong flags = args[0];
1362     target_ulong proc_tbl = args[1];
1363     target_ulong page_size = args[2];
1364     target_ulong table_size = args[3];
1365     uint64_t cproc;
1366 
1367     if (flags & ~FLAGS_MASK) { /* Check no reserved bits are set */
1368         return H_PARAMETER;
1369     }
1370     if (flags & FLAG_MODIFY) {
1371         if (flags & FLAG_REGISTER) {
1372             if (flags & FLAG_RADIX) { /* Register new RADIX process table */
1373                 if (proc_tbl & 0xfff || proc_tbl >> 60) {
1374                     return H_P2;
1375                 } else if (page_size) {
1376                     return H_P3;
1377                 } else if (table_size > 24) {
1378                     return H_P4;
1379                 }
1380                 cproc = PATBE1_GR | proc_tbl | table_size;
1381             } else { /* Register new HPT process table */
1382                 if (flags & FLAG_HASH_PROC_TBL) { /* Hash with Segment Tables */
1383                     /* TODO - Not Supported */
1384                     /* Technically caused by flag bits => H_PARAMETER */
1385                     return H_PARAMETER;
1386                 } else { /* Hash with SLB */
1387                     if (proc_tbl >> 38) {
1388                         return H_P2;
1389                     } else if (page_size & ~0x7) {
1390                         return H_P3;
1391                     } else if (table_size > 24) {
1392                         return H_P4;
1393                     }
1394                 }
1395                 cproc = (proc_tbl << 25) | page_size << 5 | table_size;
1396             }
1397 
1398         } else { /* Deregister current process table */
1399             /* Set to benign value: (current GR) | 0. This allows
1400              * deregistration in KVM to succeed even if the radix bit in flags
1401              * doesn't match the radix bit in the old PATB. */
1402             cproc = spapr->patb_entry & PATBE1_GR;
1403         }
1404     } else { /* Maintain current registration */
1405         if (!(flags & FLAG_RADIX) != !(spapr->patb_entry & PATBE1_GR)) {
1406             /* Technically caused by flag bits => H_PARAMETER */
1407             return H_PARAMETER; /* Existing Process Table Mismatch */
1408         }
1409         cproc = spapr->patb_entry;
1410     }
1411 
1412     /* Check if we need to setup OR free the hpt */
1413     spapr_check_setup_free_hpt(spapr, spapr->patb_entry, cproc);
1414 
1415     spapr->patb_entry = cproc; /* Save new process table */
1416 
1417     /* Update the UPRT and GTSE bits in the LPCR for all cpus */
1418     set_all_lpcrs(((flags & (FLAG_RADIX | FLAG_HASH_PROC_TBL)) ? LPCR_UPRT : 0) |
1419                   ((flags & FLAG_GTSE) ? LPCR_GTSE : 0),
1420                   LPCR_UPRT | LPCR_GTSE);
1421 
1422     if (kvm_enabled()) {
1423         return kvmppc_configure_v3_mmu(cpu, flags & FLAG_RADIX,
1424                                        flags & FLAG_GTSE, cproc);
1425     }
1426     return H_SUCCESS;
1427 }
1428 
1429 #define H_SIGNAL_SYS_RESET_ALL         -1
1430 #define H_SIGNAL_SYS_RESET_ALLBUTSELF  -2
1431 
1432 static target_ulong h_signal_sys_reset(PowerPCCPU *cpu,
1433                                        sPAPRMachineState *spapr,
1434                                        target_ulong opcode, target_ulong *args)
1435 {
1436     target_long target = args[0];
1437     CPUState *cs;
1438 
1439     if (target < 0) {
1440         /* Broadcast */
1441         if (target < H_SIGNAL_SYS_RESET_ALLBUTSELF) {
1442             return H_PARAMETER;
1443         }
1444 
1445         CPU_FOREACH(cs) {
1446             PowerPCCPU *c = POWERPC_CPU(cs);
1447 
1448             if (target == H_SIGNAL_SYS_RESET_ALLBUTSELF) {
1449                 if (c == cpu) {
1450                     continue;
1451                 }
1452             }
1453             run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
1454         }
1455         return H_SUCCESS;
1456 
1457     } else {
1458         /* Unicast */
1459         cs = CPU(spapr_find_cpu(target));
1460         if (cs) {
1461             run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
1462             return H_SUCCESS;
1463         }
1464         return H_PARAMETER;
1465     }
1466 }
1467 
1468 static uint32_t cas_check_pvr(sPAPRMachineState *spapr, PowerPCCPU *cpu,
1469                               target_ulong *addr, bool *raw_mode_supported,
1470                               Error **errp)
1471 {
1472     bool explicit_match = false; /* Matched the CPU's real PVR */
1473     uint32_t max_compat = spapr->max_compat_pvr;
1474     uint32_t best_compat = 0;
1475     int i;
1476 
1477     /*
1478      * We scan the supplied table of PVRs looking for two things
1479      *   1. Is our real CPU PVR in the list?
1480      *   2. What's the "best" listed logical PVR
1481      */
1482     for (i = 0; i < 512; ++i) {
1483         uint32_t pvr, pvr_mask;
1484 
1485         pvr_mask = ldl_be_phys(&address_space_memory, *addr);
1486         pvr = ldl_be_phys(&address_space_memory, *addr + 4);
1487         *addr += 8;
1488 
1489         if (~pvr_mask & pvr) {
1490             break; /* Terminator record */
1491         }
1492 
1493         if ((cpu->env.spr[SPR_PVR] & pvr_mask) == (pvr & pvr_mask)) {
1494             explicit_match = true;
1495         } else {
1496             if (ppc_check_compat(cpu, pvr, best_compat, max_compat)) {
1497                 best_compat = pvr;
1498             }
1499         }
1500     }
1501 
1502     if ((best_compat == 0) && (!explicit_match || max_compat)) {
1503         /* We couldn't find a suitable compatibility mode, and either
1504          * the guest doesn't support "raw" mode for this CPU, or raw
1505          * mode is disabled because a maximum compat mode is set */
1506         error_setg(errp, "Couldn't negotiate a suitable PVR during CAS");
1507         return 0;
1508     }
1509 
1510     *raw_mode_supported = explicit_match;
1511 
1512     /* Parsing finished */
1513     trace_spapr_cas_pvr(cpu->compat_pvr, explicit_match, best_compat);
1514 
1515     return best_compat;
1516 }
1517 
1518 static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
1519                                                   sPAPRMachineState *spapr,
1520                                                   target_ulong opcode,
1521                                                   target_ulong *args)
1522 {
1523     /* Working address in data buffer */
1524     target_ulong addr = ppc64_phys_to_real(args[0]);
1525     target_ulong ov_table;
1526     uint32_t cas_pvr;
1527     sPAPROptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates;
1528     bool guest_radix;
1529     Error *local_err = NULL;
1530     bool raw_mode_supported = false;
1531 
1532     cas_pvr = cas_check_pvr(spapr, cpu, &addr, &raw_mode_supported, &local_err);
1533     if (local_err) {
1534         error_report_err(local_err);
1535         return H_HARDWARE;
1536     }
1537 
1538     /* Update CPUs */
1539     if (cpu->compat_pvr != cas_pvr) {
1540         ppc_set_compat_all(cas_pvr, &local_err);
1541         if (local_err) {
1542             /* We fail to set compat mode (likely because running with KVM PR),
1543              * but maybe we can fallback to raw mode if the guest supports it.
1544              */
1545             if (!raw_mode_supported) {
1546                 error_report_err(local_err);
1547                 return H_HARDWARE;
1548             }
1549             local_err = NULL;
1550         }
1551     }
1552 
1553     /* For the future use: here @ov_table points to the first option vector */
1554     ov_table = addr;
1555 
1556     ov1_guest = spapr_ovec_parse_vector(ov_table, 1);
1557     ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
1558     if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) {
1559         error_report("guest requested hash and radix MMU, which is invalid.");
1560         exit(EXIT_FAILURE);
1561     }
1562     /* The radix/hash bit in byte 24 requires special handling: */
1563     guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300);
1564     spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300);
1565 
1566     /*
1567      * HPT resizing is a bit of a special case, because when enabled
1568      * we assume an HPT guest will support it until it says it
1569      * doesn't, instead of assuming it won't support it until it says
1570      * it does.  Strictly speaking that approach could break for
1571      * guests which don't make a CAS call, but those are so old we
1572      * don't care about them.  Without that assumption we'd have to
1573      * make at least a temporary allocation of an HPT sized for max
1574      * memory, which could be impossibly difficult under KVM HV if
1575      * maxram is large.
1576      */
1577     if (!guest_radix && !spapr_ovec_test(ov5_guest, OV5_HPT_RESIZE)) {
1578         int maxshift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
1579 
1580         if (spapr->resize_hpt == SPAPR_RESIZE_HPT_REQUIRED) {
1581             error_report(
1582                 "h_client_architecture_support: Guest doesn't support HPT resizing, but resize-hpt=required");
1583             exit(1);
1584         }
1585 
1586         if (spapr->htab_shift < maxshift) {
1587             /* Guest doesn't know about HPT resizing, so we
1588              * pre-emptively resize for the maximum permitted RAM.  At
1589              * the point this is called, nothing should have been
1590              * entered into the existing HPT */
1591             spapr_reallocate_hpt(spapr, maxshift, &error_fatal);
1592             push_sregs_to_kvm_pr(spapr);
1593         }
1594     }
1595 
1596     /* NOTE: there are actually a number of ov5 bits where input from the
1597      * guest is always zero, and the platform/QEMU enables them independently
1598      * of guest input. To model these properly we'd want some sort of mask,
1599      * but since they only currently apply to memory migration as defined
1600      * by LoPAPR 1.1, 14.5.4.8, which QEMU doesn't implement, we don't need
1601      * to worry about this for now.
1602      */
1603     ov5_cas_old = spapr_ovec_clone(spapr->ov5_cas);
1604 
1605     /* also clear the radix/hash bit from the current ov5_cas bits to
1606      * be in sync with the newly ov5 bits. Else the radix bit will be
1607      * seen as being removed and this will generate a reset loop
1608      */
1609     spapr_ovec_clear(ov5_cas_old, OV5_MMU_RADIX_300);
1610 
1611     /* full range of negotiated ov5 capabilities */
1612     spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest);
1613     spapr_ovec_cleanup(ov5_guest);
1614     /* capabilities that have been added since CAS-generated guest reset.
1615      * if capabilities have since been removed, generate another reset
1616      */
1617     ov5_updates = spapr_ovec_new();
1618     spapr->cas_reboot = spapr_ovec_diff(ov5_updates,
1619                                         ov5_cas_old, spapr->ov5_cas);
1620     /* Now that processing is finished, set the radix/hash bit for the
1621      * guest if it requested a valid mode; otherwise terminate the boot. */
1622     if (guest_radix) {
1623         if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) {
1624             error_report("Guest requested unavailable MMU mode (radix).");
1625             exit(EXIT_FAILURE);
1626         }
1627         spapr_ovec_set(spapr->ov5_cas, OV5_MMU_RADIX_300);
1628     } else {
1629         if (kvm_enabled() && kvmppc_has_cap_mmu_radix()
1630             && !kvmppc_has_cap_mmu_hash_v3()) {
1631             error_report("Guest requested unavailable MMU mode (hash).");
1632             exit(EXIT_FAILURE);
1633         }
1634     }
1635     spapr->cas_legacy_guest_workaround = !spapr_ovec_test(ov1_guest,
1636                                                           OV1_PPC_3_00);
1637     if (!spapr->cas_reboot) {
1638         /* If spapr_machine_reset() did not set up a HPT but one is necessary
1639          * (because the guest isn't going to use radix) then set it up here. */
1640         if ((spapr->patb_entry & PATBE1_GR) && !guest_radix) {
1641             /* legacy hash or new hash: */
1642             spapr_setup_hpt_and_vrma(spapr);
1643         }
1644         spapr->cas_reboot =
1645             (spapr_h_cas_compose_response(spapr, args[1], args[2],
1646                                           ov5_updates) != 0);
1647     }
1648     spapr_ovec_cleanup(ov5_updates);
1649 
1650     if (spapr->cas_reboot) {
1651         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
1652     }
1653 
1654     return H_SUCCESS;
1655 }
1656 
1657 static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
1658                                               sPAPRMachineState *spapr,
1659                                               target_ulong opcode,
1660                                               target_ulong *args)
1661 {
1662     uint64_t characteristics = H_CPU_CHAR_HON_BRANCH_HINTS &
1663                                ~H_CPU_CHAR_THR_RECONF_TRIG;
1664     uint64_t behaviour = H_CPU_BEHAV_FAVOUR_SECURITY;
1665     uint8_t safe_cache = spapr_get_cap(spapr, SPAPR_CAP_CFPC);
1666     uint8_t safe_bounds_check = spapr_get_cap(spapr, SPAPR_CAP_SBBC);
1667     uint8_t safe_indirect_branch = spapr_get_cap(spapr, SPAPR_CAP_IBS);
1668 
1669     switch (safe_cache) {
1670     case SPAPR_CAP_WORKAROUND:
1671         characteristics |= H_CPU_CHAR_L1D_FLUSH_ORI30;
1672         characteristics |= H_CPU_CHAR_L1D_FLUSH_TRIG2;
1673         characteristics |= H_CPU_CHAR_L1D_THREAD_PRIV;
1674         behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR;
1675         break;
1676     case SPAPR_CAP_FIXED:
1677         break;
1678     default: /* broken */
1679         assert(safe_cache == SPAPR_CAP_BROKEN);
1680         behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR;
1681         break;
1682     }
1683 
1684     switch (safe_bounds_check) {
1685     case SPAPR_CAP_WORKAROUND:
1686         characteristics |= H_CPU_CHAR_SPEC_BAR_ORI31;
1687         behaviour |= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
1688         break;
1689     case SPAPR_CAP_FIXED:
1690         break;
1691     default: /* broken */
1692         assert(safe_bounds_check == SPAPR_CAP_BROKEN);
1693         behaviour |= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
1694         break;
1695     }
1696 
1697     switch (safe_indirect_branch) {
1698     case SPAPR_CAP_FIXED_CCD:
1699         characteristics |= H_CPU_CHAR_CACHE_COUNT_DIS;
1700         break;
1701     case SPAPR_CAP_FIXED_IBS:
1702         characteristics |= H_CPU_CHAR_BCCTRL_SERIALISED;
1703         break;
1704     default: /* broken */
1705         assert(safe_indirect_branch == SPAPR_CAP_BROKEN);
1706         break;
1707     }
1708 
1709     args[0] = characteristics;
1710     args[1] = behaviour;
1711 
1712     return H_SUCCESS;
1713 }
1714 
1715 static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
1716 static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1];
1717 
1718 void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
1719 {
1720     spapr_hcall_fn *slot;
1721 
1722     if (opcode <= MAX_HCALL_OPCODE) {
1723         assert((opcode & 0x3) == 0);
1724 
1725         slot = &papr_hypercall_table[opcode / 4];
1726     } else {
1727         assert((opcode >= KVMPPC_HCALL_BASE) && (opcode <= KVMPPC_HCALL_MAX));
1728 
1729         slot = &kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE];
1730     }
1731 
1732     assert(!(*slot));
1733     *slot = fn;
1734 }
1735 
1736 target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
1737                              target_ulong *args)
1738 {
1739     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1740 
1741     if ((opcode <= MAX_HCALL_OPCODE)
1742         && ((opcode & 0x3) == 0)) {
1743         spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
1744 
1745         if (fn) {
1746             return fn(cpu, spapr, opcode, args);
1747         }
1748     } else if ((opcode >= KVMPPC_HCALL_BASE) &&
1749                (opcode <= KVMPPC_HCALL_MAX)) {
1750         spapr_hcall_fn fn = kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE];
1751 
1752         if (fn) {
1753             return fn(cpu, spapr, opcode, args);
1754         }
1755     }
1756 
1757     qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x" TARGET_FMT_lx "\n",
1758                   opcode);
1759     return H_FUNCTION;
1760 }
1761 
1762 static void hypercall_register_types(void)
1763 {
1764     /* hcall-pft */
1765     spapr_register_hypercall(H_ENTER, h_enter);
1766     spapr_register_hypercall(H_REMOVE, h_remove);
1767     spapr_register_hypercall(H_PROTECT, h_protect);
1768     spapr_register_hypercall(H_READ, h_read);
1769 
1770     /* hcall-bulk */
1771     spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
1772 
1773     /* hcall-hpt-resize */
1774     spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare);
1775     spapr_register_hypercall(H_RESIZE_HPT_COMMIT, h_resize_hpt_commit);
1776 
1777     /* hcall-splpar */
1778     spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
1779     spapr_register_hypercall(H_CEDE, h_cede);
1780     spapr_register_hypercall(H_SIGNAL_SYS_RESET, h_signal_sys_reset);
1781 
1782     /* processor register resource access h-calls */
1783     spapr_register_hypercall(H_SET_SPRG0, h_set_sprg0);
1784     spapr_register_hypercall(H_SET_DABR, h_set_dabr);
1785     spapr_register_hypercall(H_SET_XDABR, h_set_xdabr);
1786     spapr_register_hypercall(H_PAGE_INIT, h_page_init);
1787     spapr_register_hypercall(H_SET_MODE, h_set_mode);
1788 
1789     /* In Memory Table MMU h-calls */
1790     spapr_register_hypercall(H_CLEAN_SLB, h_clean_slb);
1791     spapr_register_hypercall(H_INVALIDATE_PID, h_invalidate_pid);
1792     spapr_register_hypercall(H_REGISTER_PROC_TBL, h_register_process_table);
1793 
1794     /* hcall-get-cpu-characteristics */
1795     spapr_register_hypercall(H_GET_CPU_CHARACTERISTICS,
1796                              h_get_cpu_characteristics);
1797 
1798     /* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate
1799      * here between the "CI" and the "CACHE" variants, they will use whatever
1800      * mapping attributes qemu is using. When using KVM, the kernel will
1801      * enforce the attributes more strongly
1802      */
1803     spapr_register_hypercall(H_LOGICAL_CI_LOAD, h_logical_load);
1804     spapr_register_hypercall(H_LOGICAL_CI_STORE, h_logical_store);
1805     spapr_register_hypercall(H_LOGICAL_CACHE_LOAD, h_logical_load);
1806     spapr_register_hypercall(H_LOGICAL_CACHE_STORE, h_logical_store);
1807     spapr_register_hypercall(H_LOGICAL_ICBI, h_logical_icbi);
1808     spapr_register_hypercall(H_LOGICAL_DCBF, h_logical_dcbf);
1809     spapr_register_hypercall(KVMPPC_H_LOGICAL_MEMOP, h_logical_memop);
1810 
1811     /* qemu/KVM-PPC specific hcalls */
1812     spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
1813 
1814     /* ibm,client-architecture-support support */
1815     spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support);
1816 }
1817 
1818 type_init(hypercall_register_types)
1819