1 /* 2 * MMU hypercalls for the sPAPR (pseries) vHyp hypervisor that is used by TCG 3 * 4 * Copyright (c) 2004-2007 Fabrice Bellard 5 * Copyright (c) 2007 Jocelyn Mayer 6 * Copyright (c) 2010 David Gibson, IBM Corporation. 7 * 8 * SPDX-License-Identifier: MIT 9 */ 10 #include "qemu/osdep.h" 11 #include "qemu/cutils.h" 12 #include "qemu/memalign.h" 13 #include "qemu/error-report.h" 14 #include "cpu.h" 15 #include "helper_regs.h" 16 #include "hw/ppc/spapr.h" 17 #include "mmu-hash64.h" 18 #include "mmu-book3s-v3.h" 19 20 21 static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex) 22 { 23 /* 24 * hash value/pteg group index is normalized by HPT mask 25 */ 26 if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) { 27 return false; 28 } 29 return true; 30 } 31 32 static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr, 33 target_ulong opcode, target_ulong *args) 34 { 35 target_ulong flags = args[0]; 36 target_ulong ptex = args[1]; 37 target_ulong pteh = args[2]; 38 target_ulong ptel = args[3]; 39 unsigned apshift; 40 target_ulong raddr; 41 target_ulong slot; 42 const ppc_hash_pte64_t *hptes; 43 44 apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel); 45 if (!apshift) { 46 /* Bad page size encoding */ 47 return H_PARAMETER; 48 } 49 50 raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1); 51 52 if (is_ram_address(spapr, raddr)) { 53 /* Regular RAM - should have WIMG=0010 */ 54 if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) { 55 return H_PARAMETER; 56 } 57 } else { 58 target_ulong wimg_flags; 59 /* Looks like an IO address */ 60 /* FIXME: What WIMG combinations could be sensible for IO? 61 * For now we allow WIMG=010x, but are there others? */ 62 /* FIXME: Should we check against registered IO addresses? */ 63 wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M)); 64 65 if (wimg_flags != HPTE64_R_I && 66 wimg_flags != (HPTE64_R_I | HPTE64_R_M)) { 67 return H_PARAMETER; 68 } 69 } 70 71 pteh &= ~0x60ULL; 72 73 if (!valid_ptex(cpu, ptex)) { 74 return H_PARAMETER; 75 } 76 77 slot = ptex & 7ULL; 78 ptex = ptex & ~7ULL; 79 80 if (likely((flags & H_EXACT) == 0)) { 81 hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); 82 for (slot = 0; slot < 8; slot++) { 83 if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) { 84 break; 85 } 86 } 87 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); 88 if (slot == 8) { 89 return H_PTEG_FULL; 90 } 91 } else { 92 hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1); 93 if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) { 94 ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1); 95 return H_PTEG_FULL; 96 } 97 ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); 98 } 99 100 spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel); 101 102 args[0] = ptex + slot; 103 return H_SUCCESS; 104 } 105 106 typedef enum { 107 REMOVE_SUCCESS = 0, 108 REMOVE_NOT_FOUND = 1, 109 REMOVE_PARM = 2, 110 REMOVE_HW = 3, 111 } RemoveResult; 112 113 static RemoveResult remove_hpte(PowerPCCPU *cpu 114 , target_ulong ptex, 115 target_ulong avpn, 116 target_ulong flags, 117 target_ulong *vp, target_ulong *rp) 118 { 119 const ppc_hash_pte64_t *hptes; 120 target_ulong v, r; 121 122 if (!valid_ptex(cpu, ptex)) { 123 return REMOVE_PARM; 124 } 125 126 hptes = ppc_hash64_map_hptes(cpu, ptex, 1); 127 v = ppc_hash64_hpte0(cpu, hptes, 0); 128 r = ppc_hash64_hpte1(cpu, hptes, 0); 129 ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); 130 131 if ((v & HPTE64_V_VALID) == 0 || 132 ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || 133 ((flags & H_ANDCOND) && (v & avpn) != 0)) { 134 return REMOVE_NOT_FOUND; 135 } 136 *vp = v; 137 *rp = r; 138 spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0); 139 ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); 140 return REMOVE_SUCCESS; 141 } 142 143 static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, 144 target_ulong opcode, target_ulong *args) 145 { 146 CPUPPCState *env = &cpu->env; 147 target_ulong flags = args[0]; 148 target_ulong ptex = args[1]; 149 target_ulong avpn = args[2]; 150 RemoveResult ret; 151 152 ret = remove_hpte(cpu, ptex, avpn, flags, 153 &args[0], &args[1]); 154 155 switch (ret) { 156 case REMOVE_SUCCESS: 157 check_tlb_flush(env, true); 158 return H_SUCCESS; 159 160 case REMOVE_NOT_FOUND: 161 return H_NOT_FOUND; 162 163 case REMOVE_PARM: 164 return H_PARAMETER; 165 166 case REMOVE_HW: 167 return H_HARDWARE; 168 } 169 170 g_assert_not_reached(); 171 } 172 173 #define H_BULK_REMOVE_TYPE 0xc000000000000000ULL 174 #define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL 175 #define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL 176 #define H_BULK_REMOVE_END 0xc000000000000000ULL 177 #define H_BULK_REMOVE_CODE 0x3000000000000000ULL 178 #define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL 179 #define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL 180 #define H_BULK_REMOVE_PARM 0x2000000000000000ULL 181 #define H_BULK_REMOVE_HW 0x3000000000000000ULL 182 #define H_BULK_REMOVE_RC 0x0c00000000000000ULL 183 #define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL 184 #define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL 185 #define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL 186 #define H_BULK_REMOVE_AVPN 0x0200000000000000ULL 187 #define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL 188 189 #define H_BULK_REMOVE_MAX_BATCH 4 190 191 static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, 192 target_ulong opcode, target_ulong *args) 193 { 194 CPUPPCState *env = &cpu->env; 195 int i; 196 target_ulong rc = H_SUCCESS; 197 198 for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { 199 target_ulong *tsh = &args[i*2]; 200 target_ulong tsl = args[i*2 + 1]; 201 target_ulong v, r, ret; 202 203 if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) { 204 break; 205 } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) { 206 return H_PARAMETER; 207 } 208 209 *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS; 210 *tsh |= H_BULK_REMOVE_RESPONSE; 211 212 if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) { 213 *tsh |= H_BULK_REMOVE_PARM; 214 return H_PARAMETER; 215 } 216 217 ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl, 218 (*tsh & H_BULK_REMOVE_FLAGS) >> 26, 219 &v, &r); 220 221 *tsh |= ret << 60; 222 223 switch (ret) { 224 case REMOVE_SUCCESS: 225 *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43; 226 break; 227 228 case REMOVE_PARM: 229 rc = H_PARAMETER; 230 goto exit; 231 232 case REMOVE_HW: 233 rc = H_HARDWARE; 234 goto exit; 235 } 236 } 237 exit: 238 check_tlb_flush(env, true); 239 240 return rc; 241 } 242 243 static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr, 244 target_ulong opcode, target_ulong *args) 245 { 246 CPUPPCState *env = &cpu->env; 247 target_ulong flags = args[0]; 248 target_ulong ptex = args[1]; 249 target_ulong avpn = args[2]; 250 const ppc_hash_pte64_t *hptes; 251 target_ulong v, r; 252 253 if (!valid_ptex(cpu, ptex)) { 254 return H_PARAMETER; 255 } 256 257 hptes = ppc_hash64_map_hptes(cpu, ptex, 1); 258 v = ppc_hash64_hpte0(cpu, hptes, 0); 259 r = ppc_hash64_hpte1(cpu, hptes, 0); 260 ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); 261 262 if ((v & HPTE64_V_VALID) == 0 || 263 ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { 264 return H_NOT_FOUND; 265 } 266 267 r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N | 268 HPTE64_R_KEY_HI | HPTE64_R_KEY_LO); 269 r |= (flags << 55) & HPTE64_R_PP0; 270 r |= (flags << 48) & HPTE64_R_KEY_HI; 271 r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO); 272 spapr_store_hpte(cpu, ptex, 273 (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0); 274 ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); 275 /* Flush the tlb */ 276 check_tlb_flush(env, true); 277 /* Don't need a memory barrier, due to qemu's global lock */ 278 spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r); 279 return H_SUCCESS; 280 } 281 282 static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr, 283 target_ulong opcode, target_ulong *args) 284 { 285 target_ulong flags = args[0]; 286 target_ulong ptex = args[1]; 287 int i, ridx, n_entries = 1; 288 const ppc_hash_pte64_t *hptes; 289 290 if (!valid_ptex(cpu, ptex)) { 291 return H_PARAMETER; 292 } 293 294 if (flags & H_READ_4) { 295 /* Clear the two low order bits */ 296 ptex &= ~(3ULL); 297 n_entries = 4; 298 } 299 300 hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries); 301 for (i = 0, ridx = 0; i < n_entries; i++) { 302 args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i); 303 args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i); 304 } 305 ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries); 306 307 return H_SUCCESS; 308 } 309 310 struct SpaprPendingHpt { 311 /* These fields are read-only after initialization */ 312 int shift; 313 QemuThread thread; 314 315 /* These fields are protected by the BQL */ 316 bool complete; 317 318 /* These fields are private to the preparation thread if 319 * !complete, otherwise protected by the BQL */ 320 int ret; 321 void *hpt; 322 }; 323 324 static void free_pending_hpt(SpaprPendingHpt *pending) 325 { 326 if (pending->hpt) { 327 qemu_vfree(pending->hpt); 328 } 329 330 g_free(pending); 331 } 332 333 static void *hpt_prepare_thread(void *opaque) 334 { 335 SpaprPendingHpt *pending = opaque; 336 size_t size = 1ULL << pending->shift; 337 338 pending->hpt = qemu_try_memalign(size, size); 339 if (pending->hpt) { 340 memset(pending->hpt, 0, size); 341 pending->ret = H_SUCCESS; 342 } else { 343 pending->ret = H_NO_MEM; 344 } 345 346 bql_lock(); 347 348 if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { 349 /* Ready to go */ 350 pending->complete = true; 351 } else { 352 /* We've been cancelled, clean ourselves up */ 353 free_pending_hpt(pending); 354 } 355 356 bql_unlock(); 357 return NULL; 358 } 359 360 /* Must be called with BQL held */ 361 static void cancel_hpt_prepare(SpaprMachineState *spapr) 362 { 363 SpaprPendingHpt *pending = spapr->pending_hpt; 364 365 /* Let the thread know it's cancelled */ 366 spapr->pending_hpt = NULL; 367 368 if (!pending) { 369 /* Nothing to do */ 370 return; 371 } 372 373 if (!pending->complete) { 374 /* thread will clean itself up */ 375 return; 376 } 377 378 free_pending_hpt(pending); 379 } 380 381 target_ulong vhyp_mmu_resize_hpt_prepare(PowerPCCPU *cpu, 382 SpaprMachineState *spapr, 383 target_ulong shift) 384 { 385 SpaprPendingHpt *pending = spapr->pending_hpt; 386 387 if (pending) { 388 /* something already in progress */ 389 if (pending->shift == shift) { 390 /* and it's suitable */ 391 if (pending->complete) { 392 return pending->ret; 393 } else { 394 return H_LONG_BUSY_ORDER_100_MSEC; 395 } 396 } 397 398 /* not suitable, cancel and replace */ 399 cancel_hpt_prepare(spapr); 400 } 401 402 if (!shift) { 403 /* nothing to do */ 404 return H_SUCCESS; 405 } 406 407 /* start new prepare */ 408 409 pending = g_new0(SpaprPendingHpt, 1); 410 pending->shift = shift; 411 pending->ret = H_HARDWARE; 412 413 qemu_thread_create(&pending->thread, "sPAPR HPT prepare", 414 hpt_prepare_thread, pending, QEMU_THREAD_DETACHED); 415 416 spapr->pending_hpt = pending; 417 418 /* In theory we could estimate the time more accurately based on 419 * the new size, but there's not much point */ 420 return H_LONG_BUSY_ORDER_100_MSEC; 421 } 422 423 static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot) 424 { 425 uint8_t *addr = htab; 426 427 addr += pteg * HASH_PTEG_SIZE_64; 428 addr += slot * HASH_PTE_SIZE_64; 429 return ldq_p(addr); 430 } 431 432 static void new_hpte_store(void *htab, uint64_t pteg, int slot, 433 uint64_t pte0, uint64_t pte1) 434 { 435 uint8_t *addr = htab; 436 437 addr += pteg * HASH_PTEG_SIZE_64; 438 addr += slot * HASH_PTE_SIZE_64; 439 440 stq_p(addr, pte0); 441 stq_p(addr + HPTE64_DW1, pte1); 442 } 443 444 static int rehash_hpte(PowerPCCPU *cpu, 445 const ppc_hash_pte64_t *hptes, 446 void *old_hpt, uint64_t oldsize, 447 void *new_hpt, uint64_t newsize, 448 uint64_t pteg, int slot) 449 { 450 uint64_t old_hash_mask = (oldsize >> 7) - 1; 451 uint64_t new_hash_mask = (newsize >> 7) - 1; 452 target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot); 453 target_ulong pte1; 454 uint64_t avpn; 455 unsigned base_pg_shift; 456 uint64_t hash, new_pteg, replace_pte0; 457 458 if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) { 459 return H_SUCCESS; 460 } 461 462 pte1 = ppc_hash64_hpte1(cpu, hptes, slot); 463 464 base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1); 465 assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */ 466 avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23); 467 468 if (pte0 & HPTE64_V_SECONDARY) { 469 pteg = ~pteg; 470 } 471 472 if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) { 473 uint64_t offset, vsid; 474 475 /* We only have 28 - 23 bits of offset in avpn */ 476 offset = (avpn & 0x1f) << 23; 477 vsid = avpn >> 5; 478 /* We can find more bits from the pteg value */ 479 if (base_pg_shift < 23) { 480 offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift; 481 } 482 483 hash = vsid ^ (offset >> base_pg_shift); 484 } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) { 485 uint64_t offset, vsid; 486 487 /* We only have 40 - 23 bits of seg_off in avpn */ 488 offset = (avpn & 0x1ffff) << 23; 489 vsid = avpn >> 17; 490 if (base_pg_shift < 23) { 491 offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) 492 << base_pg_shift; 493 } 494 495 hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift); 496 } else { 497 error_report("rehash_pte: Bad segment size in HPTE"); 498 return H_HARDWARE; 499 } 500 501 new_pteg = hash & new_hash_mask; 502 if (pte0 & HPTE64_V_SECONDARY) { 503 assert(~pteg == (hash & old_hash_mask)); 504 new_pteg = ~new_pteg; 505 } else { 506 assert(pteg == (hash & old_hash_mask)); 507 } 508 assert((oldsize != newsize) || (pteg == new_pteg)); 509 replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot); 510 /* 511 * Strictly speaking, we don't need all these tests, since we only 512 * ever rehash bolted HPTEs. We might in future handle non-bolted 513 * HPTEs, though so make the logic correct for those cases as 514 * well. 515 */ 516 if (replace_pte0 & HPTE64_V_VALID) { 517 assert(newsize < oldsize); 518 if (replace_pte0 & HPTE64_V_BOLTED) { 519 if (pte0 & HPTE64_V_BOLTED) { 520 /* Bolted collision, nothing we can do */ 521 return H_PTEG_FULL; 522 } else { 523 /* Discard this hpte */ 524 return H_SUCCESS; 525 } 526 } 527 } 528 529 new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1); 530 return H_SUCCESS; 531 } 532 533 static int rehash_hpt(PowerPCCPU *cpu, 534 void *old_hpt, uint64_t oldsize, 535 void *new_hpt, uint64_t newsize) 536 { 537 uint64_t n_ptegs = oldsize >> 7; 538 uint64_t pteg; 539 int slot; 540 int rc; 541 542 for (pteg = 0; pteg < n_ptegs; pteg++) { 543 hwaddr ptex = pteg * HPTES_PER_GROUP; 544 const ppc_hash_pte64_t *hptes 545 = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); 546 547 if (!hptes) { 548 return H_HARDWARE; 549 } 550 551 for (slot = 0; slot < HPTES_PER_GROUP; slot++) { 552 rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize, 553 pteg, slot); 554 if (rc != H_SUCCESS) { 555 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); 556 return rc; 557 } 558 } 559 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); 560 } 561 562 return H_SUCCESS; 563 } 564 565 target_ulong vhyp_mmu_resize_hpt_commit(PowerPCCPU *cpu, 566 SpaprMachineState *spapr, 567 target_ulong flags, 568 target_ulong shift) 569 { 570 SpaprPendingHpt *pending = spapr->pending_hpt; 571 int rc; 572 size_t newsize; 573 574 if (flags != 0) { 575 return H_PARAMETER; 576 } 577 578 if (!pending || (pending->shift != shift)) { 579 /* no matching prepare */ 580 return H_CLOSED; 581 } 582 583 if (!pending->complete) { 584 /* prepare has not completed */ 585 return H_BUSY; 586 } 587 588 /* Shouldn't have got past PREPARE without an HPT */ 589 g_assert(spapr->htab_shift); 590 591 newsize = 1ULL << pending->shift; 592 rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr), 593 pending->hpt, newsize); 594 if (rc == H_SUCCESS) { 595 qemu_vfree(spapr->htab); 596 spapr->htab = pending->hpt; 597 spapr->htab_shift = pending->shift; 598 599 push_sregs_to_kvm_pr(spapr); 600 601 pending->hpt = NULL; /* so it's not free()d */ 602 } 603 604 /* Clean up */ 605 spapr->pending_hpt = NULL; 606 free_pending_hpt(pending); 607 608 return rc; 609 } 610 611 static void hypercall_register_types(void) 612 { 613 /* hcall-pft */ 614 spapr_register_hypercall(H_ENTER, h_enter); 615 spapr_register_hypercall(H_REMOVE, h_remove); 616 spapr_register_hypercall(H_PROTECT, h_protect); 617 spapr_register_hypercall(H_READ, h_read); 618 619 /* hcall-bulk */ 620 spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove); 621 622 } 623 624 type_init(hypercall_register_types) 625