1 /*
2 * MMU hypercalls for the sPAPR (pseries) vHyp hypervisor that is used by TCG
3 *
4 * Copyright (c) 2004-2007 Fabrice Bellard
5 * Copyright (c) 2007 Jocelyn Mayer
6 * Copyright (c) 2010 David Gibson, IBM Corporation.
7 *
8 * SPDX-License-Identifier: MIT
9 */
10 #include "qemu/osdep.h"
11 #include "qemu/cutils.h"
12 #include "qemu/memalign.h"
13 #include "qemu/error-report.h"
14 #include "cpu.h"
15 #include "helper_regs.h"
16 #include "hw/ppc/spapr.h"
17 #include "mmu-hash64.h"
18
h_enter(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)19 static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
20 target_ulong opcode, target_ulong *args)
21 {
22 target_ulong flags = args[0];
23 target_ulong ptex = args[1];
24 target_ulong pteh = args[2];
25 target_ulong ptel = args[3];
26 unsigned apshift;
27 target_ulong raddr;
28 target_ulong slot;
29 const ppc_hash_pte64_t *hptes;
30
31 apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
32 if (!apshift) {
33 /* Bad page size encoding */
34 return H_PARAMETER;
35 }
36
37 raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1);
38
39 if (is_ram_address(spapr, raddr)) {
40 /* Regular RAM - should have WIMG=0010 */
41 if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
42 return H_PARAMETER;
43 }
44 } else {
45 target_ulong wimg_flags;
46 /* Looks like an IO address */
47 /* FIXME: What WIMG combinations could be sensible for IO?
48 * For now we allow WIMG=010x, but are there others? */
49 /* FIXME: Should we check against registered IO addresses? */
50 wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M));
51
52 if (wimg_flags != HPTE64_R_I &&
53 wimg_flags != (HPTE64_R_I | HPTE64_R_M)) {
54 return H_PARAMETER;
55 }
56 }
57
58 pteh &= ~0x60ULL;
59
60 if (!ppc_hash64_valid_ptex(cpu, ptex)) {
61 return H_PARAMETER;
62 }
63
64 slot = ptex & 7ULL;
65 ptex = ptex & ~7ULL;
66
67 if (likely((flags & H_EXACT) == 0)) {
68 hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
69 for (slot = 0; slot < 8; slot++) {
70 if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
71 break;
72 }
73 }
74 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
75 if (slot == 8) {
76 return H_PTEG_FULL;
77 }
78 } else {
79 hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
80 if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
81 ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
82 return H_PTEG_FULL;
83 }
84 ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
85 }
86
87 spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
88
89 args[0] = ptex + slot;
90 return H_SUCCESS;
91 }
92
93 typedef enum {
94 REMOVE_SUCCESS = 0,
95 REMOVE_NOT_FOUND = 1,
96 REMOVE_PARM = 2,
97 REMOVE_HW = 3,
98 } RemoveResult;
99
remove_hpte(PowerPCCPU * cpu,target_ulong ptex,target_ulong avpn,target_ulong flags,target_ulong * vp,target_ulong * rp)100 static RemoveResult remove_hpte(PowerPCCPU *cpu
101 , target_ulong ptex,
102 target_ulong avpn,
103 target_ulong flags,
104 target_ulong *vp, target_ulong *rp)
105 {
106 const ppc_hash_pte64_t *hptes;
107 target_ulong v, r;
108
109 if (!ppc_hash64_valid_ptex(cpu, ptex)) {
110 return REMOVE_PARM;
111 }
112
113 hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
114 v = ppc_hash64_hpte0(cpu, hptes, 0);
115 r = ppc_hash64_hpte1(cpu, hptes, 0);
116 ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
117
118 if ((v & HPTE64_V_VALID) == 0 ||
119 ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
120 ((flags & H_ANDCOND) && (v & avpn) != 0)) {
121 return REMOVE_NOT_FOUND;
122 }
123 *vp = v;
124 *rp = r;
125 spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
126 ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
127 return REMOVE_SUCCESS;
128 }
129
h_remove(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)130 static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
131 target_ulong opcode, target_ulong *args)
132 {
133 CPUPPCState *env = &cpu->env;
134 target_ulong flags = args[0];
135 target_ulong ptex = args[1];
136 target_ulong avpn = args[2];
137 RemoveResult ret;
138
139 ret = remove_hpte(cpu, ptex, avpn, flags,
140 &args[0], &args[1]);
141
142 switch (ret) {
143 case REMOVE_SUCCESS:
144 check_tlb_flush(env, true);
145 return H_SUCCESS;
146
147 case REMOVE_NOT_FOUND:
148 return H_NOT_FOUND;
149
150 case REMOVE_PARM:
151 return H_PARAMETER;
152
153 case REMOVE_HW:
154 return H_HARDWARE;
155 }
156
157 g_assert_not_reached();
158 }
159
160 #define H_BULK_REMOVE_TYPE 0xc000000000000000ULL
161 #define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL
162 #define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL
163 #define H_BULK_REMOVE_END 0xc000000000000000ULL
164 #define H_BULK_REMOVE_CODE 0x3000000000000000ULL
165 #define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL
166 #define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL
167 #define H_BULK_REMOVE_PARM 0x2000000000000000ULL
168 #define H_BULK_REMOVE_HW 0x3000000000000000ULL
169 #define H_BULK_REMOVE_RC 0x0c00000000000000ULL
170 #define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL
171 #define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL
172 #define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL
173 #define H_BULK_REMOVE_AVPN 0x0200000000000000ULL
174 #define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL
175
176 #define H_BULK_REMOVE_MAX_BATCH 4
177
h_bulk_remove(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)178 static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
179 target_ulong opcode, target_ulong *args)
180 {
181 CPUPPCState *env = &cpu->env;
182 int i;
183 target_ulong rc = H_SUCCESS;
184
185 for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
186 target_ulong *tsh = &args[i*2];
187 target_ulong tsl = args[i*2 + 1];
188 target_ulong v, r, ret;
189
190 if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
191 break;
192 } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
193 return H_PARAMETER;
194 }
195
196 *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
197 *tsh |= H_BULK_REMOVE_RESPONSE;
198
199 if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
200 *tsh |= H_BULK_REMOVE_PARM;
201 return H_PARAMETER;
202 }
203
204 ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl,
205 (*tsh & H_BULK_REMOVE_FLAGS) >> 26,
206 &v, &r);
207
208 *tsh |= ret << 60;
209
210 switch (ret) {
211 case REMOVE_SUCCESS:
212 *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43;
213 break;
214
215 case REMOVE_PARM:
216 rc = H_PARAMETER;
217 goto exit;
218
219 case REMOVE_HW:
220 rc = H_HARDWARE;
221 goto exit;
222 }
223 }
224 exit:
225 check_tlb_flush(env, true);
226
227 return rc;
228 }
229
h_protect(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)230 static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
231 target_ulong opcode, target_ulong *args)
232 {
233 CPUPPCState *env = &cpu->env;
234 target_ulong flags = args[0];
235 target_ulong ptex = args[1];
236 target_ulong avpn = args[2];
237 const ppc_hash_pte64_t *hptes;
238 target_ulong v, r;
239
240 if (!ppc_hash64_valid_ptex(cpu, ptex)) {
241 return H_PARAMETER;
242 }
243
244 hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
245 v = ppc_hash64_hpte0(cpu, hptes, 0);
246 r = ppc_hash64_hpte1(cpu, hptes, 0);
247 ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
248
249 if ((v & HPTE64_V_VALID) == 0 ||
250 ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
251 return H_NOT_FOUND;
252 }
253
254 r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N |
255 HPTE64_R_KEY_HI | HPTE64_R_KEY_LO);
256 r |= (flags << 55) & HPTE64_R_PP0;
257 r |= (flags << 48) & HPTE64_R_KEY_HI;
258 r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
259 spapr_store_hpte(cpu, ptex,
260 (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
261 ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
262 /* Flush the tlb */
263 check_tlb_flush(env, true);
264 /* Don't need a memory barrier, due to qemu's global lock */
265 spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
266 return H_SUCCESS;
267 }
268
h_read(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong opcode,target_ulong * args)269 static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
270 target_ulong opcode, target_ulong *args)
271 {
272 target_ulong flags = args[0];
273 target_ulong ptex = args[1];
274 int i, ridx, n_entries = 1;
275 const ppc_hash_pte64_t *hptes;
276
277 if (!ppc_hash64_valid_ptex(cpu, ptex)) {
278 return H_PARAMETER;
279 }
280
281 if (flags & H_READ_4) {
282 /* Clear the two low order bits */
283 ptex &= ~(3ULL);
284 n_entries = 4;
285 }
286
287 hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries);
288 for (i = 0, ridx = 0; i < n_entries; i++) {
289 args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i);
290 args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i);
291 }
292 ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries);
293
294 return H_SUCCESS;
295 }
296
297 struct SpaprPendingHpt {
298 /* These fields are read-only after initialization */
299 int shift;
300 QemuThread thread;
301
302 /* These fields are protected by the BQL */
303 bool complete;
304
305 /* These fields are private to the preparation thread if
306 * !complete, otherwise protected by the BQL */
307 int ret;
308 void *hpt;
309 };
310
free_pending_hpt(SpaprPendingHpt * pending)311 static void free_pending_hpt(SpaprPendingHpt *pending)
312 {
313 if (pending->hpt) {
314 qemu_vfree(pending->hpt);
315 }
316
317 g_free(pending);
318 }
319
hpt_prepare_thread(void * opaque)320 static void *hpt_prepare_thread(void *opaque)
321 {
322 SpaprPendingHpt *pending = opaque;
323 size_t size = 1ULL << pending->shift;
324
325 pending->hpt = qemu_try_memalign(size, size);
326 if (pending->hpt) {
327 memset(pending->hpt, 0, size);
328 pending->ret = H_SUCCESS;
329 } else {
330 pending->ret = H_NO_MEM;
331 }
332
333 bql_lock();
334
335 if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
336 /* Ready to go */
337 pending->complete = true;
338 } else {
339 /* We've been cancelled, clean ourselves up */
340 free_pending_hpt(pending);
341 }
342
343 bql_unlock();
344 return NULL;
345 }
346
347 /* Must be called with BQL held */
cancel_hpt_prepare(SpaprMachineState * spapr)348 static void cancel_hpt_prepare(SpaprMachineState *spapr)
349 {
350 SpaprPendingHpt *pending = spapr->pending_hpt;
351
352 /* Let the thread know it's cancelled */
353 spapr->pending_hpt = NULL;
354
355 if (!pending) {
356 /* Nothing to do */
357 return;
358 }
359
360 if (!pending->complete) {
361 /* thread will clean itself up */
362 return;
363 }
364
365 free_pending_hpt(pending);
366 }
367
vhyp_mmu_resize_hpt_prepare(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong shift)368 target_ulong vhyp_mmu_resize_hpt_prepare(PowerPCCPU *cpu,
369 SpaprMachineState *spapr,
370 target_ulong shift)
371 {
372 SpaprPendingHpt *pending = spapr->pending_hpt;
373
374 if (pending) {
375 /* something already in progress */
376 if (pending->shift == shift) {
377 /* and it's suitable */
378 if (pending->complete) {
379 return pending->ret;
380 } else {
381 return H_LONG_BUSY_ORDER_100_MSEC;
382 }
383 }
384
385 /* not suitable, cancel and replace */
386 cancel_hpt_prepare(spapr);
387 }
388
389 if (!shift) {
390 /* nothing to do */
391 return H_SUCCESS;
392 }
393
394 /* start new prepare */
395
396 pending = g_new0(SpaprPendingHpt, 1);
397 pending->shift = shift;
398 pending->ret = H_HARDWARE;
399
400 qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
401 hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
402
403 spapr->pending_hpt = pending;
404
405 /* In theory we could estimate the time more accurately based on
406 * the new size, but there's not much point */
407 return H_LONG_BUSY_ORDER_100_MSEC;
408 }
409
new_hpte_load0(void * htab,uint64_t pteg,int slot)410 static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
411 {
412 uint8_t *addr = htab;
413
414 addr += pteg * HASH_PTEG_SIZE_64;
415 addr += slot * HASH_PTE_SIZE_64;
416 return ldq_p(addr);
417 }
418
new_hpte_store(void * htab,uint64_t pteg,int slot,uint64_t pte0,uint64_t pte1)419 static void new_hpte_store(void *htab, uint64_t pteg, int slot,
420 uint64_t pte0, uint64_t pte1)
421 {
422 uint8_t *addr = htab;
423
424 addr += pteg * HASH_PTEG_SIZE_64;
425 addr += slot * HASH_PTE_SIZE_64;
426
427 stq_p(addr, pte0);
428 stq_p(addr + HPTE64_DW1, pte1);
429 }
430
rehash_hpte(PowerPCCPU * cpu,const ppc_hash_pte64_t * hptes,void * old_hpt,uint64_t oldsize,void * new_hpt,uint64_t newsize,uint64_t pteg,int slot)431 static int rehash_hpte(PowerPCCPU *cpu,
432 const ppc_hash_pte64_t *hptes,
433 void *old_hpt, uint64_t oldsize,
434 void *new_hpt, uint64_t newsize,
435 uint64_t pteg, int slot)
436 {
437 uint64_t old_hash_mask = (oldsize >> 7) - 1;
438 uint64_t new_hash_mask = (newsize >> 7) - 1;
439 target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
440 target_ulong pte1;
441 uint64_t avpn;
442 unsigned base_pg_shift;
443 uint64_t hash, new_pteg, replace_pte0;
444
445 if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
446 return H_SUCCESS;
447 }
448
449 pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
450
451 base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
452 assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
453 avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
454
455 if (pte0 & HPTE64_V_SECONDARY) {
456 pteg = ~pteg;
457 }
458
459 if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
460 uint64_t offset, vsid;
461
462 /* We only have 28 - 23 bits of offset in avpn */
463 offset = (avpn & 0x1f) << 23;
464 vsid = avpn >> 5;
465 /* We can find more bits from the pteg value */
466 if (base_pg_shift < 23) {
467 offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
468 }
469
470 hash = vsid ^ (offset >> base_pg_shift);
471 } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
472 uint64_t offset, vsid;
473
474 /* We only have 40 - 23 bits of seg_off in avpn */
475 offset = (avpn & 0x1ffff) << 23;
476 vsid = avpn >> 17;
477 if (base_pg_shift < 23) {
478 offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
479 << base_pg_shift;
480 }
481
482 hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
483 } else {
484 error_report("rehash_pte: Bad segment size in HPTE");
485 return H_HARDWARE;
486 }
487
488 new_pteg = hash & new_hash_mask;
489 if (pte0 & HPTE64_V_SECONDARY) {
490 assert(~pteg == (hash & old_hash_mask));
491 new_pteg = ~new_pteg;
492 } else {
493 assert(pteg == (hash & old_hash_mask));
494 }
495 assert((oldsize != newsize) || (pteg == new_pteg));
496 replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
497 /*
498 * Strictly speaking, we don't need all these tests, since we only
499 * ever rehash bolted HPTEs. We might in future handle non-bolted
500 * HPTEs, though so make the logic correct for those cases as
501 * well.
502 */
503 if (replace_pte0 & HPTE64_V_VALID) {
504 assert(newsize < oldsize);
505 if (replace_pte0 & HPTE64_V_BOLTED) {
506 if (pte0 & HPTE64_V_BOLTED) {
507 /* Bolted collision, nothing we can do */
508 return H_PTEG_FULL;
509 } else {
510 /* Discard this hpte */
511 return H_SUCCESS;
512 }
513 }
514 }
515
516 new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
517 return H_SUCCESS;
518 }
519
rehash_hpt(PowerPCCPU * cpu,void * old_hpt,uint64_t oldsize,void * new_hpt,uint64_t newsize)520 static int rehash_hpt(PowerPCCPU *cpu,
521 void *old_hpt, uint64_t oldsize,
522 void *new_hpt, uint64_t newsize)
523 {
524 uint64_t n_ptegs = oldsize >> 7;
525 uint64_t pteg;
526 int slot;
527 int rc;
528
529 for (pteg = 0; pteg < n_ptegs; pteg++) {
530 hwaddr ptex = pteg * HPTES_PER_GROUP;
531 const ppc_hash_pte64_t *hptes
532 = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
533
534 if (!hptes) {
535 return H_HARDWARE;
536 }
537
538 for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
539 rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
540 pteg, slot);
541 if (rc != H_SUCCESS) {
542 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
543 return rc;
544 }
545 }
546 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
547 }
548
549 return H_SUCCESS;
550 }
551
vhyp_mmu_resize_hpt_commit(PowerPCCPU * cpu,SpaprMachineState * spapr,target_ulong flags,target_ulong shift)552 target_ulong vhyp_mmu_resize_hpt_commit(PowerPCCPU *cpu,
553 SpaprMachineState *spapr,
554 target_ulong flags,
555 target_ulong shift)
556 {
557 SpaprPendingHpt *pending = spapr->pending_hpt;
558 int rc;
559 size_t newsize;
560
561 if (flags != 0) {
562 return H_PARAMETER;
563 }
564
565 if (!pending || (pending->shift != shift)) {
566 /* no matching prepare */
567 return H_CLOSED;
568 }
569
570 if (!pending->complete) {
571 /* prepare has not completed */
572 return H_BUSY;
573 }
574
575 /* Shouldn't have got past PREPARE without an HPT */
576 g_assert(spapr->htab_shift);
577
578 newsize = 1ULL << pending->shift;
579 rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
580 pending->hpt, newsize);
581 if (rc == H_SUCCESS) {
582 qemu_vfree(spapr->htab);
583 spapr->htab = pending->hpt;
584 spapr->htab_shift = pending->shift;
585
586 push_sregs_to_kvm_pr(spapr);
587
588 pending->hpt = NULL; /* so it's not free()d */
589 }
590
591 /* Clean up */
592 spapr->pending_hpt = NULL;
593 free_pending_hpt(pending);
594
595 return rc;
596 }
597
hypercall_register_types(void)598 static void hypercall_register_types(void)
599 {
600 /* hcall-pft */
601 spapr_register_hypercall(H_ENTER, h_enter);
602 spapr_register_hypercall(H_REMOVE, h_remove);
603 spapr_register_hypercall(H_PROTECT, h_protect);
604 spapr_register_hypercall(H_READ, h_read);
605
606 /* hcall-bulk */
607 spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
608
609 }
610
611 type_init(hypercall_register_types)
612