1 /* 2 * MMU hypercalls for the sPAPR (pseries) vHyp hypervisor that is used by TCG 3 * 4 * Copyright (c) 2004-2007 Fabrice Bellard 5 * Copyright (c) 2007 Jocelyn Mayer 6 * Copyright (c) 2010 David Gibson, IBM Corporation. 7 * 8 * SPDX-License-Identifier: MIT 9 */ 10 #include "qemu/osdep.h" 11 #include "qemu/cutils.h" 12 #include "qemu/memalign.h" 13 #include "qemu/error-report.h" 14 #include "cpu.h" 15 #include "helper_regs.h" 16 #include "hw/ppc/spapr.h" 17 #include "mmu-hash64.h" 18 19 static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr, 20 target_ulong opcode, target_ulong *args) 21 { 22 target_ulong flags = args[0]; 23 target_ulong ptex = args[1]; 24 target_ulong pteh = args[2]; 25 target_ulong ptel = args[3]; 26 unsigned apshift; 27 target_ulong raddr; 28 target_ulong slot; 29 const ppc_hash_pte64_t *hptes; 30 31 apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel); 32 if (!apshift) { 33 /* Bad page size encoding */ 34 return H_PARAMETER; 35 } 36 37 raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1); 38 39 if (is_ram_address(spapr, raddr)) { 40 /* Regular RAM - should have WIMG=0010 */ 41 if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) { 42 return H_PARAMETER; 43 } 44 } else { 45 target_ulong wimg_flags; 46 /* Looks like an IO address */ 47 /* FIXME: What WIMG combinations could be sensible for IO? 48 * For now we allow WIMG=010x, but are there others? */ 49 /* FIXME: Should we check against registered IO addresses? */ 50 wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M)); 51 52 if (wimg_flags != HPTE64_R_I && 53 wimg_flags != (HPTE64_R_I | HPTE64_R_M)) { 54 return H_PARAMETER; 55 } 56 } 57 58 pteh &= ~0x60ULL; 59 60 if (!ppc_hash64_valid_ptex(cpu, ptex)) { 61 return H_PARAMETER; 62 } 63 64 slot = ptex & 7ULL; 65 ptex = ptex & ~7ULL; 66 67 if (likely((flags & H_EXACT) == 0)) { 68 hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); 69 for (slot = 0; slot < 8; slot++) { 70 if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) { 71 break; 72 } 73 } 74 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); 75 if (slot == 8) { 76 return H_PTEG_FULL; 77 } 78 } else { 79 hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1); 80 if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) { 81 ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1); 82 return H_PTEG_FULL; 83 } 84 ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); 85 } 86 87 spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel); 88 89 args[0] = ptex + slot; 90 return H_SUCCESS; 91 } 92 93 typedef enum { 94 REMOVE_SUCCESS = 0, 95 REMOVE_NOT_FOUND = 1, 96 REMOVE_PARM = 2, 97 REMOVE_HW = 3, 98 } RemoveResult; 99 100 static RemoveResult remove_hpte(PowerPCCPU *cpu 101 , target_ulong ptex, 102 target_ulong avpn, 103 target_ulong flags, 104 target_ulong *vp, target_ulong *rp) 105 { 106 const ppc_hash_pte64_t *hptes; 107 target_ulong v, r; 108 109 if (!ppc_hash64_valid_ptex(cpu, ptex)) { 110 return REMOVE_PARM; 111 } 112 113 hptes = ppc_hash64_map_hptes(cpu, ptex, 1); 114 v = ppc_hash64_hpte0(cpu, hptes, 0); 115 r = ppc_hash64_hpte1(cpu, hptes, 0); 116 ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); 117 118 if ((v & HPTE64_V_VALID) == 0 || 119 ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || 120 ((flags & H_ANDCOND) && (v & avpn) != 0)) { 121 return REMOVE_NOT_FOUND; 122 } 123 *vp = v; 124 *rp = r; 125 spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0); 126 ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); 127 return REMOVE_SUCCESS; 128 } 129 130 static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, 131 target_ulong opcode, target_ulong *args) 132 { 133 CPUPPCState *env = &cpu->env; 134 target_ulong flags = args[0]; 135 target_ulong ptex = args[1]; 136 target_ulong avpn = args[2]; 137 RemoveResult ret; 138 139 ret = remove_hpte(cpu, ptex, avpn, flags, 140 &args[0], &args[1]); 141 142 switch (ret) { 143 case REMOVE_SUCCESS: 144 check_tlb_flush(env, true); 145 return H_SUCCESS; 146 147 case REMOVE_NOT_FOUND: 148 return H_NOT_FOUND; 149 150 case REMOVE_PARM: 151 return H_PARAMETER; 152 153 case REMOVE_HW: 154 return H_HARDWARE; 155 } 156 157 g_assert_not_reached(); 158 } 159 160 #define H_BULK_REMOVE_TYPE 0xc000000000000000ULL 161 #define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL 162 #define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL 163 #define H_BULK_REMOVE_END 0xc000000000000000ULL 164 #define H_BULK_REMOVE_CODE 0x3000000000000000ULL 165 #define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL 166 #define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL 167 #define H_BULK_REMOVE_PARM 0x2000000000000000ULL 168 #define H_BULK_REMOVE_HW 0x3000000000000000ULL 169 #define H_BULK_REMOVE_RC 0x0c00000000000000ULL 170 #define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL 171 #define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL 172 #define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL 173 #define H_BULK_REMOVE_AVPN 0x0200000000000000ULL 174 #define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL 175 176 #define H_BULK_REMOVE_MAX_BATCH 4 177 178 static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, 179 target_ulong opcode, target_ulong *args) 180 { 181 CPUPPCState *env = &cpu->env; 182 int i; 183 target_ulong rc = H_SUCCESS; 184 185 for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { 186 target_ulong *tsh = &args[i*2]; 187 target_ulong tsl = args[i*2 + 1]; 188 target_ulong v, r, ret; 189 190 if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) { 191 break; 192 } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) { 193 return H_PARAMETER; 194 } 195 196 *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS; 197 *tsh |= H_BULK_REMOVE_RESPONSE; 198 199 if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) { 200 *tsh |= H_BULK_REMOVE_PARM; 201 return H_PARAMETER; 202 } 203 204 ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl, 205 (*tsh & H_BULK_REMOVE_FLAGS) >> 26, 206 &v, &r); 207 208 *tsh |= ret << 60; 209 210 switch (ret) { 211 case REMOVE_SUCCESS: 212 *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43; 213 break; 214 215 case REMOVE_PARM: 216 rc = H_PARAMETER; 217 goto exit; 218 219 case REMOVE_HW: 220 rc = H_HARDWARE; 221 goto exit; 222 } 223 } 224 exit: 225 check_tlb_flush(env, true); 226 227 return rc; 228 } 229 230 static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr, 231 target_ulong opcode, target_ulong *args) 232 { 233 CPUPPCState *env = &cpu->env; 234 target_ulong flags = args[0]; 235 target_ulong ptex = args[1]; 236 target_ulong avpn = args[2]; 237 const ppc_hash_pte64_t *hptes; 238 target_ulong v, r; 239 240 if (!ppc_hash64_valid_ptex(cpu, ptex)) { 241 return H_PARAMETER; 242 } 243 244 hptes = ppc_hash64_map_hptes(cpu, ptex, 1); 245 v = ppc_hash64_hpte0(cpu, hptes, 0); 246 r = ppc_hash64_hpte1(cpu, hptes, 0); 247 ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); 248 249 if ((v & HPTE64_V_VALID) == 0 || 250 ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { 251 return H_NOT_FOUND; 252 } 253 254 r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N | 255 HPTE64_R_KEY_HI | HPTE64_R_KEY_LO); 256 r |= (flags << 55) & HPTE64_R_PP0; 257 r |= (flags << 48) & HPTE64_R_KEY_HI; 258 r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO); 259 spapr_store_hpte(cpu, ptex, 260 (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0); 261 ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); 262 /* Flush the tlb */ 263 check_tlb_flush(env, true); 264 /* Don't need a memory barrier, due to qemu's global lock */ 265 spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r); 266 return H_SUCCESS; 267 } 268 269 static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr, 270 target_ulong opcode, target_ulong *args) 271 { 272 target_ulong flags = args[0]; 273 target_ulong ptex = args[1]; 274 int i, ridx, n_entries = 1; 275 const ppc_hash_pte64_t *hptes; 276 277 if (!ppc_hash64_valid_ptex(cpu, ptex)) { 278 return H_PARAMETER; 279 } 280 281 if (flags & H_READ_4) { 282 /* Clear the two low order bits */ 283 ptex &= ~(3ULL); 284 n_entries = 4; 285 } 286 287 hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries); 288 for (i = 0, ridx = 0; i < n_entries; i++) { 289 args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i); 290 args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i); 291 } 292 ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries); 293 294 return H_SUCCESS; 295 } 296 297 struct SpaprPendingHpt { 298 /* These fields are read-only after initialization */ 299 int shift; 300 QemuThread thread; 301 302 /* These fields are protected by the BQL */ 303 bool complete; 304 305 /* These fields are private to the preparation thread if 306 * !complete, otherwise protected by the BQL */ 307 int ret; 308 void *hpt; 309 }; 310 311 static void free_pending_hpt(SpaprPendingHpt *pending) 312 { 313 if (pending->hpt) { 314 qemu_vfree(pending->hpt); 315 } 316 317 g_free(pending); 318 } 319 320 static void *hpt_prepare_thread(void *opaque) 321 { 322 SpaprPendingHpt *pending = opaque; 323 size_t size = 1ULL << pending->shift; 324 325 pending->hpt = qemu_try_memalign(size, size); 326 if (pending->hpt) { 327 memset(pending->hpt, 0, size); 328 pending->ret = H_SUCCESS; 329 } else { 330 pending->ret = H_NO_MEM; 331 } 332 333 bql_lock(); 334 335 if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { 336 /* Ready to go */ 337 pending->complete = true; 338 } else { 339 /* We've been cancelled, clean ourselves up */ 340 free_pending_hpt(pending); 341 } 342 343 bql_unlock(); 344 return NULL; 345 } 346 347 /* Must be called with BQL held */ 348 static void cancel_hpt_prepare(SpaprMachineState *spapr) 349 { 350 SpaprPendingHpt *pending = spapr->pending_hpt; 351 352 /* Let the thread know it's cancelled */ 353 spapr->pending_hpt = NULL; 354 355 if (!pending) { 356 /* Nothing to do */ 357 return; 358 } 359 360 if (!pending->complete) { 361 /* thread will clean itself up */ 362 return; 363 } 364 365 free_pending_hpt(pending); 366 } 367 368 target_ulong vhyp_mmu_resize_hpt_prepare(PowerPCCPU *cpu, 369 SpaprMachineState *spapr, 370 target_ulong shift) 371 { 372 SpaprPendingHpt *pending = spapr->pending_hpt; 373 374 if (pending) { 375 /* something already in progress */ 376 if (pending->shift == shift) { 377 /* and it's suitable */ 378 if (pending->complete) { 379 return pending->ret; 380 } else { 381 return H_LONG_BUSY_ORDER_100_MSEC; 382 } 383 } 384 385 /* not suitable, cancel and replace */ 386 cancel_hpt_prepare(spapr); 387 } 388 389 if (!shift) { 390 /* nothing to do */ 391 return H_SUCCESS; 392 } 393 394 /* start new prepare */ 395 396 pending = g_new0(SpaprPendingHpt, 1); 397 pending->shift = shift; 398 pending->ret = H_HARDWARE; 399 400 qemu_thread_create(&pending->thread, "sPAPR HPT prepare", 401 hpt_prepare_thread, pending, QEMU_THREAD_DETACHED); 402 403 spapr->pending_hpt = pending; 404 405 /* In theory we could estimate the time more accurately based on 406 * the new size, but there's not much point */ 407 return H_LONG_BUSY_ORDER_100_MSEC; 408 } 409 410 static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot) 411 { 412 uint8_t *addr = htab; 413 414 addr += pteg * HASH_PTEG_SIZE_64; 415 addr += slot * HASH_PTE_SIZE_64; 416 return ldq_p(addr); 417 } 418 419 static void new_hpte_store(void *htab, uint64_t pteg, int slot, 420 uint64_t pte0, uint64_t pte1) 421 { 422 uint8_t *addr = htab; 423 424 addr += pteg * HASH_PTEG_SIZE_64; 425 addr += slot * HASH_PTE_SIZE_64; 426 427 stq_p(addr, pte0); 428 stq_p(addr + HPTE64_DW1, pte1); 429 } 430 431 static int rehash_hpte(PowerPCCPU *cpu, 432 const ppc_hash_pte64_t *hptes, 433 void *old_hpt, uint64_t oldsize, 434 void *new_hpt, uint64_t newsize, 435 uint64_t pteg, int slot) 436 { 437 uint64_t old_hash_mask = (oldsize >> 7) - 1; 438 uint64_t new_hash_mask = (newsize >> 7) - 1; 439 target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot); 440 target_ulong pte1; 441 uint64_t avpn; 442 unsigned base_pg_shift; 443 uint64_t hash, new_pteg, replace_pte0; 444 445 if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) { 446 return H_SUCCESS; 447 } 448 449 pte1 = ppc_hash64_hpte1(cpu, hptes, slot); 450 451 base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1); 452 assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */ 453 avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23); 454 455 if (pte0 & HPTE64_V_SECONDARY) { 456 pteg = ~pteg; 457 } 458 459 if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) { 460 uint64_t offset, vsid; 461 462 /* We only have 28 - 23 bits of offset in avpn */ 463 offset = (avpn & 0x1f) << 23; 464 vsid = avpn >> 5; 465 /* We can find more bits from the pteg value */ 466 if (base_pg_shift < 23) { 467 offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift; 468 } 469 470 hash = vsid ^ (offset >> base_pg_shift); 471 } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) { 472 uint64_t offset, vsid; 473 474 /* We only have 40 - 23 bits of seg_off in avpn */ 475 offset = (avpn & 0x1ffff) << 23; 476 vsid = avpn >> 17; 477 if (base_pg_shift < 23) { 478 offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) 479 << base_pg_shift; 480 } 481 482 hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift); 483 } else { 484 error_report("rehash_pte: Bad segment size in HPTE"); 485 return H_HARDWARE; 486 } 487 488 new_pteg = hash & new_hash_mask; 489 if (pte0 & HPTE64_V_SECONDARY) { 490 assert(~pteg == (hash & old_hash_mask)); 491 new_pteg = ~new_pteg; 492 } else { 493 assert(pteg == (hash & old_hash_mask)); 494 } 495 assert((oldsize != newsize) || (pteg == new_pteg)); 496 replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot); 497 /* 498 * Strictly speaking, we don't need all these tests, since we only 499 * ever rehash bolted HPTEs. We might in future handle non-bolted 500 * HPTEs, though so make the logic correct for those cases as 501 * well. 502 */ 503 if (replace_pte0 & HPTE64_V_VALID) { 504 assert(newsize < oldsize); 505 if (replace_pte0 & HPTE64_V_BOLTED) { 506 if (pte0 & HPTE64_V_BOLTED) { 507 /* Bolted collision, nothing we can do */ 508 return H_PTEG_FULL; 509 } else { 510 /* Discard this hpte */ 511 return H_SUCCESS; 512 } 513 } 514 } 515 516 new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1); 517 return H_SUCCESS; 518 } 519 520 static int rehash_hpt(PowerPCCPU *cpu, 521 void *old_hpt, uint64_t oldsize, 522 void *new_hpt, uint64_t newsize) 523 { 524 uint64_t n_ptegs = oldsize >> 7; 525 uint64_t pteg; 526 int slot; 527 int rc; 528 529 for (pteg = 0; pteg < n_ptegs; pteg++) { 530 hwaddr ptex = pteg * HPTES_PER_GROUP; 531 const ppc_hash_pte64_t *hptes 532 = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); 533 534 if (!hptes) { 535 return H_HARDWARE; 536 } 537 538 for (slot = 0; slot < HPTES_PER_GROUP; slot++) { 539 rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize, 540 pteg, slot); 541 if (rc != H_SUCCESS) { 542 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); 543 return rc; 544 } 545 } 546 ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); 547 } 548 549 return H_SUCCESS; 550 } 551 552 target_ulong vhyp_mmu_resize_hpt_commit(PowerPCCPU *cpu, 553 SpaprMachineState *spapr, 554 target_ulong flags, 555 target_ulong shift) 556 { 557 SpaprPendingHpt *pending = spapr->pending_hpt; 558 int rc; 559 size_t newsize; 560 561 if (flags != 0) { 562 return H_PARAMETER; 563 } 564 565 if (!pending || (pending->shift != shift)) { 566 /* no matching prepare */ 567 return H_CLOSED; 568 } 569 570 if (!pending->complete) { 571 /* prepare has not completed */ 572 return H_BUSY; 573 } 574 575 /* Shouldn't have got past PREPARE without an HPT */ 576 g_assert(spapr->htab_shift); 577 578 newsize = 1ULL << pending->shift; 579 rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr), 580 pending->hpt, newsize); 581 if (rc == H_SUCCESS) { 582 qemu_vfree(spapr->htab); 583 spapr->htab = pending->hpt; 584 spapr->htab_shift = pending->shift; 585 586 push_sregs_to_kvm_pr(spapr); 587 588 pending->hpt = NULL; /* so it's not free()d */ 589 } 590 591 /* Clean up */ 592 spapr->pending_hpt = NULL; 593 free_pending_hpt(pending); 594 595 return rc; 596 } 597 598 static void hypercall_register_types(void) 599 { 600 /* hcall-pft */ 601 spapr_register_hypercall(H_ENTER, h_enter); 602 spapr_register_hypercall(H_REMOVE, h_remove); 603 spapr_register_hypercall(H_PROTECT, h_protect); 604 spapr_register_hypercall(H_READ, h_read); 605 606 /* hcall-bulk */ 607 spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove); 608 609 } 610 611 type_init(hypercall_register_types) 612