xref: /openbmc/qemu/hw/ppc/spapr_vhyp_mmu.c (revision f28b958cbf08c4019f99091208e5c877b857b030)
1 /*
2  * MMU hypercalls for the sPAPR (pseries) vHyp hypervisor that is used by TCG
3  *
4  * Copyright (c) 2004-2007 Fabrice Bellard
5  * Copyright (c) 2007 Jocelyn Mayer
6  * Copyright (c) 2010 David Gibson, IBM Corporation.
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 #include "qemu/osdep.h"
11 #include "qemu/cutils.h"
12 #include "qemu/memalign.h"
13 #include "qemu/error-report.h"
14 #include "cpu.h"
15 #include "helper_regs.h"
16 #include "hw/ppc/spapr.h"
17 #include "mmu-hash64.h"
18 #include "mmu-book3s-v3.h"
19 
20 
21 static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
22 {
23     /*
24      * hash value/pteg group index is normalized by HPT mask
25      */
26     if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
27         return false;
28     }
29     return true;
30 }
31 
32 static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
33                             target_ulong opcode, target_ulong *args)
34 {
35     target_ulong flags = args[0];
36     target_ulong ptex = args[1];
37     target_ulong pteh = args[2];
38     target_ulong ptel = args[3];
39     unsigned apshift;
40     target_ulong raddr;
41     target_ulong slot;
42     const ppc_hash_pte64_t *hptes;
43 
44     apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
45     if (!apshift) {
46         /* Bad page size encoding */
47         return H_PARAMETER;
48     }
49 
50     raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1);
51 
52     if (is_ram_address(spapr, raddr)) {
53         /* Regular RAM - should have WIMG=0010 */
54         if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
55             return H_PARAMETER;
56         }
57     } else {
58         target_ulong wimg_flags;
59         /* Looks like an IO address */
60         /* FIXME: What WIMG combinations could be sensible for IO?
61          * For now we allow WIMG=010x, but are there others? */
62         /* FIXME: Should we check against registered IO addresses? */
63         wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M));
64 
65         if (wimg_flags != HPTE64_R_I &&
66             wimg_flags != (HPTE64_R_I | HPTE64_R_M)) {
67             return H_PARAMETER;
68         }
69     }
70 
71     pteh &= ~0x60ULL;
72 
73     if (!valid_ptex(cpu, ptex)) {
74         return H_PARAMETER;
75     }
76 
77     slot = ptex & 7ULL;
78     ptex = ptex & ~7ULL;
79 
80     if (likely((flags & H_EXACT) == 0)) {
81         hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
82         for (slot = 0; slot < 8; slot++) {
83             if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
84                 break;
85             }
86         }
87         ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
88         if (slot == 8) {
89             return H_PTEG_FULL;
90         }
91     } else {
92         hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
93         if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
94             ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
95             return H_PTEG_FULL;
96         }
97         ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
98     }
99 
100     spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
101 
102     args[0] = ptex + slot;
103     return H_SUCCESS;
104 }
105 
106 typedef enum {
107     REMOVE_SUCCESS = 0,
108     REMOVE_NOT_FOUND = 1,
109     REMOVE_PARM = 2,
110     REMOVE_HW = 3,
111 } RemoveResult;
112 
113 static RemoveResult remove_hpte(PowerPCCPU *cpu
114                                 , target_ulong ptex,
115                                 target_ulong avpn,
116                                 target_ulong flags,
117                                 target_ulong *vp, target_ulong *rp)
118 {
119     const ppc_hash_pte64_t *hptes;
120     target_ulong v, r;
121 
122     if (!valid_ptex(cpu, ptex)) {
123         return REMOVE_PARM;
124     }
125 
126     hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
127     v = ppc_hash64_hpte0(cpu, hptes, 0);
128     r = ppc_hash64_hpte1(cpu, hptes, 0);
129     ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
130 
131     if ((v & HPTE64_V_VALID) == 0 ||
132         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
133         ((flags & H_ANDCOND) && (v & avpn) != 0)) {
134         return REMOVE_NOT_FOUND;
135     }
136     *vp = v;
137     *rp = r;
138     spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
139     ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
140     return REMOVE_SUCCESS;
141 }
142 
143 static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
144                              target_ulong opcode, target_ulong *args)
145 {
146     CPUPPCState *env = &cpu->env;
147     target_ulong flags = args[0];
148     target_ulong ptex = args[1];
149     target_ulong avpn = args[2];
150     RemoveResult ret;
151 
152     ret = remove_hpte(cpu, ptex, avpn, flags,
153                       &args[0], &args[1]);
154 
155     switch (ret) {
156     case REMOVE_SUCCESS:
157         check_tlb_flush(env, true);
158         return H_SUCCESS;
159 
160     case REMOVE_NOT_FOUND:
161         return H_NOT_FOUND;
162 
163     case REMOVE_PARM:
164         return H_PARAMETER;
165 
166     case REMOVE_HW:
167         return H_HARDWARE;
168     }
169 
170     g_assert_not_reached();
171 }
172 
173 #define H_BULK_REMOVE_TYPE             0xc000000000000000ULL
174 #define   H_BULK_REMOVE_REQUEST        0x4000000000000000ULL
175 #define   H_BULK_REMOVE_RESPONSE       0x8000000000000000ULL
176 #define   H_BULK_REMOVE_END            0xc000000000000000ULL
177 #define H_BULK_REMOVE_CODE             0x3000000000000000ULL
178 #define   H_BULK_REMOVE_SUCCESS        0x0000000000000000ULL
179 #define   H_BULK_REMOVE_NOT_FOUND      0x1000000000000000ULL
180 #define   H_BULK_REMOVE_PARM           0x2000000000000000ULL
181 #define   H_BULK_REMOVE_HW             0x3000000000000000ULL
182 #define H_BULK_REMOVE_RC               0x0c00000000000000ULL
183 #define H_BULK_REMOVE_FLAGS            0x0300000000000000ULL
184 #define   H_BULK_REMOVE_ABSOLUTE       0x0000000000000000ULL
185 #define   H_BULK_REMOVE_ANDCOND        0x0100000000000000ULL
186 #define   H_BULK_REMOVE_AVPN           0x0200000000000000ULL
187 #define H_BULK_REMOVE_PTEX             0x00ffffffffffffffULL
188 
189 #define H_BULK_REMOVE_MAX_BATCH        4
190 
191 static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
192                                   target_ulong opcode, target_ulong *args)
193 {
194     CPUPPCState *env = &cpu->env;
195     int i;
196     target_ulong rc = H_SUCCESS;
197 
198     for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
199         target_ulong *tsh = &args[i*2];
200         target_ulong tsl = args[i*2 + 1];
201         target_ulong v, r, ret;
202 
203         if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
204             break;
205         } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
206             return H_PARAMETER;
207         }
208 
209         *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
210         *tsh |= H_BULK_REMOVE_RESPONSE;
211 
212         if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
213             *tsh |= H_BULK_REMOVE_PARM;
214             return H_PARAMETER;
215         }
216 
217         ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl,
218                           (*tsh & H_BULK_REMOVE_FLAGS) >> 26,
219                           &v, &r);
220 
221         *tsh |= ret << 60;
222 
223         switch (ret) {
224         case REMOVE_SUCCESS:
225             *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43;
226             break;
227 
228         case REMOVE_PARM:
229             rc = H_PARAMETER;
230             goto exit;
231 
232         case REMOVE_HW:
233             rc = H_HARDWARE;
234             goto exit;
235         }
236     }
237  exit:
238     check_tlb_flush(env, true);
239 
240     return rc;
241 }
242 
243 static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
244                               target_ulong opcode, target_ulong *args)
245 {
246     CPUPPCState *env = &cpu->env;
247     target_ulong flags = args[0];
248     target_ulong ptex = args[1];
249     target_ulong avpn = args[2];
250     const ppc_hash_pte64_t *hptes;
251     target_ulong v, r;
252 
253     if (!valid_ptex(cpu, ptex)) {
254         return H_PARAMETER;
255     }
256 
257     hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
258     v = ppc_hash64_hpte0(cpu, hptes, 0);
259     r = ppc_hash64_hpte1(cpu, hptes, 0);
260     ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
261 
262     if ((v & HPTE64_V_VALID) == 0 ||
263         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
264         return H_NOT_FOUND;
265     }
266 
267     r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N |
268            HPTE64_R_KEY_HI | HPTE64_R_KEY_LO);
269     r |= (flags << 55) & HPTE64_R_PP0;
270     r |= (flags << 48) & HPTE64_R_KEY_HI;
271     r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
272     spapr_store_hpte(cpu, ptex,
273                      (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
274     ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
275     /* Flush the tlb */
276     check_tlb_flush(env, true);
277     /* Don't need a memory barrier, due to qemu's global lock */
278     spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
279     return H_SUCCESS;
280 }
281 
282 static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
283                            target_ulong opcode, target_ulong *args)
284 {
285     target_ulong flags = args[0];
286     target_ulong ptex = args[1];
287     int i, ridx, n_entries = 1;
288     const ppc_hash_pte64_t *hptes;
289 
290     if (!valid_ptex(cpu, ptex)) {
291         return H_PARAMETER;
292     }
293 
294     if (flags & H_READ_4) {
295         /* Clear the two low order bits */
296         ptex &= ~(3ULL);
297         n_entries = 4;
298     }
299 
300     hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries);
301     for (i = 0, ridx = 0; i < n_entries; i++) {
302         args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i);
303         args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i);
304     }
305     ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries);
306 
307     return H_SUCCESS;
308 }
309 
310 struct SpaprPendingHpt {
311     /* These fields are read-only after initialization */
312     int shift;
313     QemuThread thread;
314 
315     /* These fields are protected by the BQL */
316     bool complete;
317 
318     /* These fields are private to the preparation thread if
319      * !complete, otherwise protected by the BQL */
320     int ret;
321     void *hpt;
322 };
323 
324 static void free_pending_hpt(SpaprPendingHpt *pending)
325 {
326     if (pending->hpt) {
327         qemu_vfree(pending->hpt);
328     }
329 
330     g_free(pending);
331 }
332 
333 static void *hpt_prepare_thread(void *opaque)
334 {
335     SpaprPendingHpt *pending = opaque;
336     size_t size = 1ULL << pending->shift;
337 
338     pending->hpt = qemu_try_memalign(size, size);
339     if (pending->hpt) {
340         memset(pending->hpt, 0, size);
341         pending->ret = H_SUCCESS;
342     } else {
343         pending->ret = H_NO_MEM;
344     }
345 
346     bql_lock();
347 
348     if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
349         /* Ready to go */
350         pending->complete = true;
351     } else {
352         /* We've been cancelled, clean ourselves up */
353         free_pending_hpt(pending);
354     }
355 
356     bql_unlock();
357     return NULL;
358 }
359 
360 /* Must be called with BQL held */
361 static void cancel_hpt_prepare(SpaprMachineState *spapr)
362 {
363     SpaprPendingHpt *pending = spapr->pending_hpt;
364 
365     /* Let the thread know it's cancelled */
366     spapr->pending_hpt = NULL;
367 
368     if (!pending) {
369         /* Nothing to do */
370         return;
371     }
372 
373     if (!pending->complete) {
374         /* thread will clean itself up */
375         return;
376     }
377 
378     free_pending_hpt(pending);
379 }
380 
381 target_ulong vhyp_mmu_resize_hpt_prepare(PowerPCCPU *cpu,
382                                          SpaprMachineState *spapr,
383                                          target_ulong shift)
384 {
385     SpaprPendingHpt *pending = spapr->pending_hpt;
386 
387     if (pending) {
388         /* something already in progress */
389         if (pending->shift == shift) {
390             /* and it's suitable */
391             if (pending->complete) {
392                 return pending->ret;
393             } else {
394                 return H_LONG_BUSY_ORDER_100_MSEC;
395             }
396         }
397 
398         /* not suitable, cancel and replace */
399         cancel_hpt_prepare(spapr);
400     }
401 
402     if (!shift) {
403         /* nothing to do */
404         return H_SUCCESS;
405     }
406 
407     /* start new prepare */
408 
409     pending = g_new0(SpaprPendingHpt, 1);
410     pending->shift = shift;
411     pending->ret = H_HARDWARE;
412 
413     qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
414                        hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
415 
416     spapr->pending_hpt = pending;
417 
418     /* In theory we could estimate the time more accurately based on
419      * the new size, but there's not much point */
420     return H_LONG_BUSY_ORDER_100_MSEC;
421 }
422 
423 static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
424 {
425     uint8_t *addr = htab;
426 
427     addr += pteg * HASH_PTEG_SIZE_64;
428     addr += slot * HASH_PTE_SIZE_64;
429     return  ldq_p(addr);
430 }
431 
432 static void new_hpte_store(void *htab, uint64_t pteg, int slot,
433                            uint64_t pte0, uint64_t pte1)
434 {
435     uint8_t *addr = htab;
436 
437     addr += pteg * HASH_PTEG_SIZE_64;
438     addr += slot * HASH_PTE_SIZE_64;
439 
440     stq_p(addr, pte0);
441     stq_p(addr + HPTE64_DW1, pte1);
442 }
443 
444 static int rehash_hpte(PowerPCCPU *cpu,
445                        const ppc_hash_pte64_t *hptes,
446                        void *old_hpt, uint64_t oldsize,
447                        void *new_hpt, uint64_t newsize,
448                        uint64_t pteg, int slot)
449 {
450     uint64_t old_hash_mask = (oldsize >> 7) - 1;
451     uint64_t new_hash_mask = (newsize >> 7) - 1;
452     target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
453     target_ulong pte1;
454     uint64_t avpn;
455     unsigned base_pg_shift;
456     uint64_t hash, new_pteg, replace_pte0;
457 
458     if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
459         return H_SUCCESS;
460     }
461 
462     pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
463 
464     base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
465     assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
466     avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
467 
468     if (pte0 & HPTE64_V_SECONDARY) {
469         pteg = ~pteg;
470     }
471 
472     if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
473         uint64_t offset, vsid;
474 
475         /* We only have 28 - 23 bits of offset in avpn */
476         offset = (avpn & 0x1f) << 23;
477         vsid = avpn >> 5;
478         /* We can find more bits from the pteg value */
479         if (base_pg_shift < 23) {
480             offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
481         }
482 
483         hash = vsid ^ (offset >> base_pg_shift);
484     } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
485         uint64_t offset, vsid;
486 
487         /* We only have 40 - 23 bits of seg_off in avpn */
488         offset = (avpn & 0x1ffff) << 23;
489         vsid = avpn >> 17;
490         if (base_pg_shift < 23) {
491             offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
492                 << base_pg_shift;
493         }
494 
495         hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
496     } else {
497         error_report("rehash_pte: Bad segment size in HPTE");
498         return H_HARDWARE;
499     }
500 
501     new_pteg = hash & new_hash_mask;
502     if (pte0 & HPTE64_V_SECONDARY) {
503         assert(~pteg == (hash & old_hash_mask));
504         new_pteg = ~new_pteg;
505     } else {
506         assert(pteg == (hash & old_hash_mask));
507     }
508     assert((oldsize != newsize) || (pteg == new_pteg));
509     replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
510     /*
511      * Strictly speaking, we don't need all these tests, since we only
512      * ever rehash bolted HPTEs.  We might in future handle non-bolted
513      * HPTEs, though so make the logic correct for those cases as
514      * well.
515      */
516     if (replace_pte0 & HPTE64_V_VALID) {
517         assert(newsize < oldsize);
518         if (replace_pte0 & HPTE64_V_BOLTED) {
519             if (pte0 & HPTE64_V_BOLTED) {
520                 /* Bolted collision, nothing we can do */
521                 return H_PTEG_FULL;
522             } else {
523                 /* Discard this hpte */
524                 return H_SUCCESS;
525             }
526         }
527     }
528 
529     new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
530     return H_SUCCESS;
531 }
532 
533 static int rehash_hpt(PowerPCCPU *cpu,
534                       void *old_hpt, uint64_t oldsize,
535                       void *new_hpt, uint64_t newsize)
536 {
537     uint64_t n_ptegs = oldsize >> 7;
538     uint64_t pteg;
539     int slot;
540     int rc;
541 
542     for (pteg = 0; pteg < n_ptegs; pteg++) {
543         hwaddr ptex = pteg * HPTES_PER_GROUP;
544         const ppc_hash_pte64_t *hptes
545             = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
546 
547         if (!hptes) {
548             return H_HARDWARE;
549         }
550 
551         for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
552             rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
553                              pteg, slot);
554             if (rc != H_SUCCESS) {
555                 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
556                 return rc;
557             }
558         }
559         ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
560     }
561 
562     return H_SUCCESS;
563 }
564 
565 target_ulong vhyp_mmu_resize_hpt_commit(PowerPCCPU *cpu,
566                                         SpaprMachineState *spapr,
567                                         target_ulong flags,
568                                         target_ulong shift)
569 {
570     SpaprPendingHpt *pending = spapr->pending_hpt;
571     int rc;
572     size_t newsize;
573 
574     if (flags != 0) {
575         return H_PARAMETER;
576     }
577 
578     if (!pending || (pending->shift != shift)) {
579         /* no matching prepare */
580         return H_CLOSED;
581     }
582 
583     if (!pending->complete) {
584         /* prepare has not completed */
585         return H_BUSY;
586     }
587 
588     /* Shouldn't have got past PREPARE without an HPT */
589     g_assert(spapr->htab_shift);
590 
591     newsize = 1ULL << pending->shift;
592     rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
593                     pending->hpt, newsize);
594     if (rc == H_SUCCESS) {
595         qemu_vfree(spapr->htab);
596         spapr->htab = pending->hpt;
597         spapr->htab_shift = pending->shift;
598 
599         push_sregs_to_kvm_pr(spapr);
600 
601         pending->hpt = NULL; /* so it's not free()d */
602     }
603 
604     /* Clean up */
605     spapr->pending_hpt = NULL;
606     free_pending_hpt(pending);
607 
608     return rc;
609 }
610 
611 static void hypercall_register_types(void)
612 {
613     /* hcall-pft */
614     spapr_register_hypercall(H_ENTER, h_enter);
615     spapr_register_hypercall(H_REMOVE, h_remove);
616     spapr_register_hypercall(H_PROTECT, h_protect);
617     spapr_register_hypercall(H_READ, h_read);
618 
619     /* hcall-bulk */
620     spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
621 
622 }
623 
624 type_init(hypercall_register_types)
625