xref: /openbmc/qemu/hw/ppc/spapr_softmmu.c (revision 812b31d3)
1 #include "qemu/osdep.h"
2 #include "qemu/cutils.h"
3 #include "qapi/error.h"
4 #include "sysemu/hw_accel.h"
5 #include "sysemu/runstate.h"
6 #include "qemu/log.h"
7 #include "qemu/main-loop.h"
8 #include "qemu/module.h"
9 #include "qemu/error-report.h"
10 #include "cpu.h"
11 #include "exec/exec-all.h"
12 #include "helper_regs.h"
13 #include "hw/ppc/spapr.h"
14 #include "hw/ppc/spapr_cpu_core.h"
15 #include "mmu-hash64.h"
16 #include "cpu-models.h"
17 #include "trace.h"
18 #include "kvm_ppc.h"
19 #include "hw/ppc/fdt.h"
20 #include "hw/ppc/spapr_ovec.h"
21 #include "mmu-book3s-v3.h"
22 #include "hw/mem/memory-device.h"
23 
24 static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
25 {
26     /*
27      * hash value/pteg group index is normalized by HPT mask
28      */
29     if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
30         return false;
31     }
32     return true;
33 }
34 
35 static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
36                             target_ulong opcode, target_ulong *args)
37 {
38     target_ulong flags = args[0];
39     target_ulong ptex = args[1];
40     target_ulong pteh = args[2];
41     target_ulong ptel = args[3];
42     unsigned apshift;
43     target_ulong raddr;
44     target_ulong slot;
45     const ppc_hash_pte64_t *hptes;
46 
47     apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
48     if (!apshift) {
49         /* Bad page size encoding */
50         return H_PARAMETER;
51     }
52 
53     raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1);
54 
55     if (is_ram_address(spapr, raddr)) {
56         /* Regular RAM - should have WIMG=0010 */
57         if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
58             return H_PARAMETER;
59         }
60     } else {
61         target_ulong wimg_flags;
62         /* Looks like an IO address */
63         /* FIXME: What WIMG combinations could be sensible for IO?
64          * For now we allow WIMG=010x, but are there others? */
65         /* FIXME: Should we check against registered IO addresses? */
66         wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M));
67 
68         if (wimg_flags != HPTE64_R_I &&
69             wimg_flags != (HPTE64_R_I | HPTE64_R_M)) {
70             return H_PARAMETER;
71         }
72     }
73 
74     pteh &= ~0x60ULL;
75 
76     if (!valid_ptex(cpu, ptex)) {
77         return H_PARAMETER;
78     }
79 
80     slot = ptex & 7ULL;
81     ptex = ptex & ~7ULL;
82 
83     if (likely((flags & H_EXACT) == 0)) {
84         hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
85         for (slot = 0; slot < 8; slot++) {
86             if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
87                 break;
88             }
89         }
90         ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
91         if (slot == 8) {
92             return H_PTEG_FULL;
93         }
94     } else {
95         hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
96         if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
97             ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
98             return H_PTEG_FULL;
99         }
100         ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
101     }
102 
103     spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
104 
105     args[0] = ptex + slot;
106     return H_SUCCESS;
107 }
108 
109 typedef enum {
110     REMOVE_SUCCESS = 0,
111     REMOVE_NOT_FOUND = 1,
112     REMOVE_PARM = 2,
113     REMOVE_HW = 3,
114 } RemoveResult;
115 
116 static RemoveResult remove_hpte(PowerPCCPU *cpu
117                                 , target_ulong ptex,
118                                 target_ulong avpn,
119                                 target_ulong flags,
120                                 target_ulong *vp, target_ulong *rp)
121 {
122     const ppc_hash_pte64_t *hptes;
123     target_ulong v, r;
124 
125     if (!valid_ptex(cpu, ptex)) {
126         return REMOVE_PARM;
127     }
128 
129     hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
130     v = ppc_hash64_hpte0(cpu, hptes, 0);
131     r = ppc_hash64_hpte1(cpu, hptes, 0);
132     ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
133 
134     if ((v & HPTE64_V_VALID) == 0 ||
135         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
136         ((flags & H_ANDCOND) && (v & avpn) != 0)) {
137         return REMOVE_NOT_FOUND;
138     }
139     *vp = v;
140     *rp = r;
141     spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
142     ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
143     return REMOVE_SUCCESS;
144 }
145 
146 static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
147                              target_ulong opcode, target_ulong *args)
148 {
149     CPUPPCState *env = &cpu->env;
150     target_ulong flags = args[0];
151     target_ulong ptex = args[1];
152     target_ulong avpn = args[2];
153     RemoveResult ret;
154 
155     ret = remove_hpte(cpu, ptex, avpn, flags,
156                       &args[0], &args[1]);
157 
158     switch (ret) {
159     case REMOVE_SUCCESS:
160         check_tlb_flush(env, true);
161         return H_SUCCESS;
162 
163     case REMOVE_NOT_FOUND:
164         return H_NOT_FOUND;
165 
166     case REMOVE_PARM:
167         return H_PARAMETER;
168 
169     case REMOVE_HW:
170         return H_HARDWARE;
171     }
172 
173     g_assert_not_reached();
174 }
175 
176 #define H_BULK_REMOVE_TYPE             0xc000000000000000ULL
177 #define   H_BULK_REMOVE_REQUEST        0x4000000000000000ULL
178 #define   H_BULK_REMOVE_RESPONSE       0x8000000000000000ULL
179 #define   H_BULK_REMOVE_END            0xc000000000000000ULL
180 #define H_BULK_REMOVE_CODE             0x3000000000000000ULL
181 #define   H_BULK_REMOVE_SUCCESS        0x0000000000000000ULL
182 #define   H_BULK_REMOVE_NOT_FOUND      0x1000000000000000ULL
183 #define   H_BULK_REMOVE_PARM           0x2000000000000000ULL
184 #define   H_BULK_REMOVE_HW             0x3000000000000000ULL
185 #define H_BULK_REMOVE_RC               0x0c00000000000000ULL
186 #define H_BULK_REMOVE_FLAGS            0x0300000000000000ULL
187 #define   H_BULK_REMOVE_ABSOLUTE       0x0000000000000000ULL
188 #define   H_BULK_REMOVE_ANDCOND        0x0100000000000000ULL
189 #define   H_BULK_REMOVE_AVPN           0x0200000000000000ULL
190 #define H_BULK_REMOVE_PTEX             0x00ffffffffffffffULL
191 
192 #define H_BULK_REMOVE_MAX_BATCH        4
193 
194 static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
195                                   target_ulong opcode, target_ulong *args)
196 {
197     CPUPPCState *env = &cpu->env;
198     int i;
199     target_ulong rc = H_SUCCESS;
200 
201     for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
202         target_ulong *tsh = &args[i*2];
203         target_ulong tsl = args[i*2 + 1];
204         target_ulong v, r, ret;
205 
206         if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
207             break;
208         } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
209             return H_PARAMETER;
210         }
211 
212         *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
213         *tsh |= H_BULK_REMOVE_RESPONSE;
214 
215         if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
216             *tsh |= H_BULK_REMOVE_PARM;
217             return H_PARAMETER;
218         }
219 
220         ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl,
221                           (*tsh & H_BULK_REMOVE_FLAGS) >> 26,
222                           &v, &r);
223 
224         *tsh |= ret << 60;
225 
226         switch (ret) {
227         case REMOVE_SUCCESS:
228             *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43;
229             break;
230 
231         case REMOVE_PARM:
232             rc = H_PARAMETER;
233             goto exit;
234 
235         case REMOVE_HW:
236             rc = H_HARDWARE;
237             goto exit;
238         }
239     }
240  exit:
241     check_tlb_flush(env, true);
242 
243     return rc;
244 }
245 
246 static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
247                               target_ulong opcode, target_ulong *args)
248 {
249     CPUPPCState *env = &cpu->env;
250     target_ulong flags = args[0];
251     target_ulong ptex = args[1];
252     target_ulong avpn = args[2];
253     const ppc_hash_pte64_t *hptes;
254     target_ulong v, r;
255 
256     if (!valid_ptex(cpu, ptex)) {
257         return H_PARAMETER;
258     }
259 
260     hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
261     v = ppc_hash64_hpte0(cpu, hptes, 0);
262     r = ppc_hash64_hpte1(cpu, hptes, 0);
263     ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
264 
265     if ((v & HPTE64_V_VALID) == 0 ||
266         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
267         return H_NOT_FOUND;
268     }
269 
270     r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N |
271            HPTE64_R_KEY_HI | HPTE64_R_KEY_LO);
272     r |= (flags << 55) & HPTE64_R_PP0;
273     r |= (flags << 48) & HPTE64_R_KEY_HI;
274     r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
275     spapr_store_hpte(cpu, ptex,
276                      (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
277     ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
278     /* Flush the tlb */
279     check_tlb_flush(env, true);
280     /* Don't need a memory barrier, due to qemu's global lock */
281     spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
282     return H_SUCCESS;
283 }
284 
285 static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
286                            target_ulong opcode, target_ulong *args)
287 {
288     target_ulong flags = args[0];
289     target_ulong ptex = args[1];
290     int i, ridx, n_entries = 1;
291     const ppc_hash_pte64_t *hptes;
292 
293     if (!valid_ptex(cpu, ptex)) {
294         return H_PARAMETER;
295     }
296 
297     if (flags & H_READ_4) {
298         /* Clear the two low order bits */
299         ptex &= ~(3ULL);
300         n_entries = 4;
301     }
302 
303     hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries);
304     for (i = 0, ridx = 0; i < n_entries; i++) {
305         args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i);
306         args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i);
307     }
308     ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries);
309 
310     return H_SUCCESS;
311 }
312 
313 struct SpaprPendingHpt {
314     /* These fields are read-only after initialization */
315     int shift;
316     QemuThread thread;
317 
318     /* These fields are protected by the BQL */
319     bool complete;
320 
321     /* These fields are private to the preparation thread if
322      * !complete, otherwise protected by the BQL */
323     int ret;
324     void *hpt;
325 };
326 
327 static void free_pending_hpt(SpaprPendingHpt *pending)
328 {
329     if (pending->hpt) {
330         qemu_vfree(pending->hpt);
331     }
332 
333     g_free(pending);
334 }
335 
336 static void *hpt_prepare_thread(void *opaque)
337 {
338     SpaprPendingHpt *pending = opaque;
339     size_t size = 1ULL << pending->shift;
340 
341     pending->hpt = qemu_try_memalign(size, size);
342     if (pending->hpt) {
343         memset(pending->hpt, 0, size);
344         pending->ret = H_SUCCESS;
345     } else {
346         pending->ret = H_NO_MEM;
347     }
348 
349     qemu_mutex_lock_iothread();
350 
351     if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
352         /* Ready to go */
353         pending->complete = true;
354     } else {
355         /* We've been cancelled, clean ourselves up */
356         free_pending_hpt(pending);
357     }
358 
359     qemu_mutex_unlock_iothread();
360     return NULL;
361 }
362 
363 /* Must be called with BQL held */
364 static void cancel_hpt_prepare(SpaprMachineState *spapr)
365 {
366     SpaprPendingHpt *pending = spapr->pending_hpt;
367 
368     /* Let the thread know it's cancelled */
369     spapr->pending_hpt = NULL;
370 
371     if (!pending) {
372         /* Nothing to do */
373         return;
374     }
375 
376     if (!pending->complete) {
377         /* thread will clean itself up */
378         return;
379     }
380 
381     free_pending_hpt(pending);
382 }
383 
384 target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu,
385                                          SpaprMachineState *spapr,
386                                          target_ulong shift)
387 {
388     SpaprPendingHpt *pending = spapr->pending_hpt;
389 
390     if (pending) {
391         /* something already in progress */
392         if (pending->shift == shift) {
393             /* and it's suitable */
394             if (pending->complete) {
395                 return pending->ret;
396             } else {
397                 return H_LONG_BUSY_ORDER_100_MSEC;
398             }
399         }
400 
401         /* not suitable, cancel and replace */
402         cancel_hpt_prepare(spapr);
403     }
404 
405     if (!shift) {
406         /* nothing to do */
407         return H_SUCCESS;
408     }
409 
410     /* start new prepare */
411 
412     pending = g_new0(SpaprPendingHpt, 1);
413     pending->shift = shift;
414     pending->ret = H_HARDWARE;
415 
416     qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
417                        hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
418 
419     spapr->pending_hpt = pending;
420 
421     /* In theory we could estimate the time more accurately based on
422      * the new size, but there's not much point */
423     return H_LONG_BUSY_ORDER_100_MSEC;
424 }
425 
426 static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
427 {
428     uint8_t *addr = htab;
429 
430     addr += pteg * HASH_PTEG_SIZE_64;
431     addr += slot * HASH_PTE_SIZE_64;
432     return  ldq_p(addr);
433 }
434 
435 static void new_hpte_store(void *htab, uint64_t pteg, int slot,
436                            uint64_t pte0, uint64_t pte1)
437 {
438     uint8_t *addr = htab;
439 
440     addr += pteg * HASH_PTEG_SIZE_64;
441     addr += slot * HASH_PTE_SIZE_64;
442 
443     stq_p(addr, pte0);
444     stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1);
445 }
446 
447 static int rehash_hpte(PowerPCCPU *cpu,
448                        const ppc_hash_pte64_t *hptes,
449                        void *old_hpt, uint64_t oldsize,
450                        void *new_hpt, uint64_t newsize,
451                        uint64_t pteg, int slot)
452 {
453     uint64_t old_hash_mask = (oldsize >> 7) - 1;
454     uint64_t new_hash_mask = (newsize >> 7) - 1;
455     target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
456     target_ulong pte1;
457     uint64_t avpn;
458     unsigned base_pg_shift;
459     uint64_t hash, new_pteg, replace_pte0;
460 
461     if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
462         return H_SUCCESS;
463     }
464 
465     pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
466 
467     base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
468     assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
469     avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
470 
471     if (pte0 & HPTE64_V_SECONDARY) {
472         pteg = ~pteg;
473     }
474 
475     if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
476         uint64_t offset, vsid;
477 
478         /* We only have 28 - 23 bits of offset in avpn */
479         offset = (avpn & 0x1f) << 23;
480         vsid = avpn >> 5;
481         /* We can find more bits from the pteg value */
482         if (base_pg_shift < 23) {
483             offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
484         }
485 
486         hash = vsid ^ (offset >> base_pg_shift);
487     } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
488         uint64_t offset, vsid;
489 
490         /* We only have 40 - 23 bits of seg_off in avpn */
491         offset = (avpn & 0x1ffff) << 23;
492         vsid = avpn >> 17;
493         if (base_pg_shift < 23) {
494             offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
495                 << base_pg_shift;
496         }
497 
498         hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
499     } else {
500         error_report("rehash_pte: Bad segment size in HPTE");
501         return H_HARDWARE;
502     }
503 
504     new_pteg = hash & new_hash_mask;
505     if (pte0 & HPTE64_V_SECONDARY) {
506         assert(~pteg == (hash & old_hash_mask));
507         new_pteg = ~new_pteg;
508     } else {
509         assert(pteg == (hash & old_hash_mask));
510     }
511     assert((oldsize != newsize) || (pteg == new_pteg));
512     replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
513     /*
514      * Strictly speaking, we don't need all these tests, since we only
515      * ever rehash bolted HPTEs.  We might in future handle non-bolted
516      * HPTEs, though so make the logic correct for those cases as
517      * well.
518      */
519     if (replace_pte0 & HPTE64_V_VALID) {
520         assert(newsize < oldsize);
521         if (replace_pte0 & HPTE64_V_BOLTED) {
522             if (pte0 & HPTE64_V_BOLTED) {
523                 /* Bolted collision, nothing we can do */
524                 return H_PTEG_FULL;
525             } else {
526                 /* Discard this hpte */
527                 return H_SUCCESS;
528             }
529         }
530     }
531 
532     new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
533     return H_SUCCESS;
534 }
535 
536 static int rehash_hpt(PowerPCCPU *cpu,
537                       void *old_hpt, uint64_t oldsize,
538                       void *new_hpt, uint64_t newsize)
539 {
540     uint64_t n_ptegs = oldsize >> 7;
541     uint64_t pteg;
542     int slot;
543     int rc;
544 
545     for (pteg = 0; pteg < n_ptegs; pteg++) {
546         hwaddr ptex = pteg * HPTES_PER_GROUP;
547         const ppc_hash_pte64_t *hptes
548             = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
549 
550         if (!hptes) {
551             return H_HARDWARE;
552         }
553 
554         for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
555             rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
556                              pteg, slot);
557             if (rc != H_SUCCESS) {
558                 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
559                 return rc;
560             }
561         }
562         ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
563     }
564 
565     return H_SUCCESS;
566 }
567 
568 target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu,
569                                         SpaprMachineState *spapr,
570                                         target_ulong flags,
571                                         target_ulong shift)
572 {
573     SpaprPendingHpt *pending = spapr->pending_hpt;
574     int rc;
575     size_t newsize;
576 
577     if (flags != 0) {
578         return H_PARAMETER;
579     }
580 
581     if (!pending || (pending->shift != shift)) {
582         /* no matching prepare */
583         return H_CLOSED;
584     }
585 
586     if (!pending->complete) {
587         /* prepare has not completed */
588         return H_BUSY;
589     }
590 
591     /* Shouldn't have got past PREPARE without an HPT */
592     g_assert(spapr->htab_shift);
593 
594     newsize = 1ULL << pending->shift;
595     rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
596                     pending->hpt, newsize);
597     if (rc == H_SUCCESS) {
598         qemu_vfree(spapr->htab);
599         spapr->htab = pending->hpt;
600         spapr->htab_shift = pending->shift;
601 
602         push_sregs_to_kvm_pr(spapr);
603 
604         pending->hpt = NULL; /* so it's not free()d */
605     }
606 
607     /* Clean up */
608     spapr->pending_hpt = NULL;
609     free_pending_hpt(pending);
610 
611     return rc;
612 }
613 
614 static void hypercall_register_types(void)
615 {
616     /* hcall-pft */
617     spapr_register_hypercall(H_ENTER, h_enter);
618     spapr_register_hypercall(H_REMOVE, h_remove);
619     spapr_register_hypercall(H_PROTECT, h_protect);
620     spapr_register_hypercall(H_READ, h_read);
621 
622     /* hcall-bulk */
623     spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
624 
625 }
626 
627 type_init(hypercall_register_types)
628