1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * pSeries_lpar.c 4 * Copyright (C) 2001 Todd Inglett, IBM Corporation 5 * 6 * pSeries LPAR support. 7 */ 8 9 /* Enables debugging of low-level hash table routines - careful! */ 10 #undef DEBUG 11 #define pr_fmt(fmt) "lpar: " fmt 12 13 #include <linux/kernel.h> 14 #include <linux/dma-mapping.h> 15 #include <linux/console.h> 16 #include <linux/export.h> 17 #include <linux/jump_label.h> 18 #include <linux/delay.h> 19 #include <linux/stop_machine.h> 20 #include <asm/processor.h> 21 #include <asm/mmu.h> 22 #include <asm/page.h> 23 #include <asm/pgtable.h> 24 #include <asm/machdep.h> 25 #include <asm/mmu_context.h> 26 #include <asm/iommu.h> 27 #include <asm/tlb.h> 28 #include <asm/prom.h> 29 #include <asm/cputable.h> 30 #include <asm/udbg.h> 31 #include <asm/smp.h> 32 #include <asm/trace.h> 33 #include <asm/firmware.h> 34 #include <asm/plpar_wrappers.h> 35 #include <asm/kexec.h> 36 #include <asm/fadump.h> 37 #include <asm/asm-prototypes.h> 38 #include <asm/debugfs.h> 39 40 #include "pseries.h" 41 42 /* Flag bits for H_BULK_REMOVE */ 43 #define HBR_REQUEST 0x4000000000000000UL 44 #define HBR_RESPONSE 0x8000000000000000UL 45 #define HBR_END 0xc000000000000000UL 46 #define HBR_AVPN 0x0200000000000000UL 47 #define HBR_ANDCOND 0x0100000000000000UL 48 49 50 /* in hvCall.S */ 51 EXPORT_SYMBOL(plpar_hcall); 52 EXPORT_SYMBOL(plpar_hcall9); 53 EXPORT_SYMBOL(plpar_hcall_norets); 54 55 void vpa_init(int cpu) 56 { 57 int hwcpu = get_hard_smp_processor_id(cpu); 58 unsigned long addr; 59 long ret; 60 struct paca_struct *pp; 61 struct dtl_entry *dtl; 62 63 /* 64 * The spec says it "may be problematic" if CPU x registers the VPA of 65 * CPU y. We should never do that, but wail if we ever do. 66 */ 67 WARN_ON(cpu != smp_processor_id()); 68 69 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 70 lppaca_of(cpu).vmxregs_in_use = 1; 71 72 if (cpu_has_feature(CPU_FTR_ARCH_207S)) 73 lppaca_of(cpu).ebb_regs_in_use = 1; 74 75 addr = __pa(&lppaca_of(cpu)); 76 ret = register_vpa(hwcpu, addr); 77 78 if (ret) { 79 pr_err("WARNING: VPA registration for cpu %d (hw %d) of area " 80 "%lx failed with %ld\n", cpu, hwcpu, addr, ret); 81 return; 82 } 83 84 #ifdef CONFIG_PPC_BOOK3S_64 85 /* 86 * PAPR says this feature is SLB-Buffer but firmware never 87 * reports that. All SPLPAR support SLB shadow buffer. 88 */ 89 if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) { 90 addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr); 91 ret = register_slb_shadow(hwcpu, addr); 92 if (ret) 93 pr_err("WARNING: SLB shadow buffer registration for " 94 "cpu %d (hw %d) of area %lx failed with %ld\n", 95 cpu, hwcpu, addr, ret); 96 } 97 #endif /* CONFIG_PPC_BOOK3S_64 */ 98 99 /* 100 * Register dispatch trace log, if one has been allocated. 101 */ 102 pp = paca_ptrs[cpu]; 103 dtl = pp->dispatch_log; 104 if (dtl) { 105 pp->dtl_ridx = 0; 106 pp->dtl_curr = dtl; 107 lppaca_of(cpu).dtl_idx = 0; 108 109 /* hypervisor reads buffer length from this field */ 110 dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); 111 ret = register_dtl(hwcpu, __pa(dtl)); 112 if (ret) 113 pr_err("WARNING: DTL registration of cpu %d (hw %d) " 114 "failed with %ld\n", smp_processor_id(), 115 hwcpu, ret); 116 lppaca_of(cpu).dtl_enable_mask = 2; 117 } 118 } 119 120 #ifdef CONFIG_PPC_BOOK3S_64 121 122 static long pSeries_lpar_hpte_insert(unsigned long hpte_group, 123 unsigned long vpn, unsigned long pa, 124 unsigned long rflags, unsigned long vflags, 125 int psize, int apsize, int ssize) 126 { 127 unsigned long lpar_rc; 128 unsigned long flags; 129 unsigned long slot; 130 unsigned long hpte_v, hpte_r; 131 132 if (!(vflags & HPTE_V_BOLTED)) 133 pr_devel("hpte_insert(group=%lx, vpn=%016lx, " 134 "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n", 135 hpte_group, vpn, pa, rflags, vflags, psize); 136 137 hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; 138 hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; 139 140 if (!(vflags & HPTE_V_BOLTED)) 141 pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); 142 143 /* Now fill in the actual HPTE */ 144 /* Set CEC cookie to 0 */ 145 /* Zero page = 0 */ 146 /* I-cache Invalidate = 0 */ 147 /* I-cache synchronize = 0 */ 148 /* Exact = 0 */ 149 flags = 0; 150 151 if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) 152 flags |= H_COALESCE_CAND; 153 154 lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); 155 if (unlikely(lpar_rc == H_PTEG_FULL)) { 156 pr_devel("Hash table group is full\n"); 157 return -1; 158 } 159 160 /* 161 * Since we try and ioremap PHBs we don't own, the pte insert 162 * will fail. However we must catch the failure in hash_page 163 * or we will loop forever, so return -2 in this case. 164 */ 165 if (unlikely(lpar_rc != H_SUCCESS)) { 166 pr_err("Failed hash pte insert with error %ld\n", lpar_rc); 167 return -2; 168 } 169 if (!(vflags & HPTE_V_BOLTED)) 170 pr_devel(" -> slot: %lu\n", slot & 7); 171 172 /* Because of iSeries, we have to pass down the secondary 173 * bucket bit here as well 174 */ 175 return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3); 176 } 177 178 static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); 179 180 static long pSeries_lpar_hpte_remove(unsigned long hpte_group) 181 { 182 unsigned long slot_offset; 183 unsigned long lpar_rc; 184 int i; 185 unsigned long dummy1, dummy2; 186 187 /* pick a random slot to start at */ 188 slot_offset = mftb() & 0x7; 189 190 for (i = 0; i < HPTES_PER_GROUP; i++) { 191 192 /* don't remove a bolted entry */ 193 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, 194 (0x1UL << 4), &dummy1, &dummy2); 195 if (lpar_rc == H_SUCCESS) 196 return i; 197 198 /* 199 * The test for adjunct partition is performed before the 200 * ANDCOND test. H_RESOURCE may be returned, so we need to 201 * check for that as well. 202 */ 203 BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE); 204 205 slot_offset++; 206 slot_offset &= 0x7; 207 } 208 209 return -1; 210 } 211 212 static void manual_hpte_clear_all(void) 213 { 214 unsigned long size_bytes = 1UL << ppc64_pft_size; 215 unsigned long hpte_count = size_bytes >> 4; 216 struct { 217 unsigned long pteh; 218 unsigned long ptel; 219 } ptes[4]; 220 long lpar_rc; 221 unsigned long i, j; 222 223 /* Read in batches of 4, 224 * invalidate only valid entries not in the VRMA 225 * hpte_count will be a multiple of 4 226 */ 227 for (i = 0; i < hpte_count; i += 4) { 228 lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes); 229 if (lpar_rc != H_SUCCESS) { 230 pr_info("Failed to read hash page table at %ld err %ld\n", 231 i, lpar_rc); 232 continue; 233 } 234 for (j = 0; j < 4; j++){ 235 if ((ptes[j].pteh & HPTE_V_VRMA_MASK) == 236 HPTE_V_VRMA_MASK) 237 continue; 238 if (ptes[j].pteh & HPTE_V_VALID) 239 plpar_pte_remove_raw(0, i + j, 0, 240 &(ptes[j].pteh), &(ptes[j].ptel)); 241 } 242 } 243 } 244 245 static int hcall_hpte_clear_all(void) 246 { 247 int rc; 248 249 do { 250 rc = plpar_hcall_norets(H_CLEAR_HPT); 251 } while (rc == H_CONTINUE); 252 253 return rc; 254 } 255 256 static void pseries_hpte_clear_all(void) 257 { 258 int rc; 259 260 rc = hcall_hpte_clear_all(); 261 if (rc != H_SUCCESS) 262 manual_hpte_clear_all(); 263 264 #ifdef __LITTLE_ENDIAN__ 265 /* 266 * Reset exceptions to big endian. 267 * 268 * FIXME this is a hack for kexec, we need to reset the exception 269 * endian before starting the new kernel and this is a convenient place 270 * to do it. 271 * 272 * This is also called on boot when a fadump happens. In that case we 273 * must not change the exception endian mode. 274 */ 275 if (firmware_has_feature(FW_FEATURE_SET_MODE) && !is_fadump_active()) 276 pseries_big_endian_exceptions(); 277 #endif 278 } 279 280 /* 281 * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and 282 * the low 3 bits of flags happen to line up. So no transform is needed. 283 * We can probably optimize here and assume the high bits of newpp are 284 * already zero. For now I am paranoid. 285 */ 286 static long pSeries_lpar_hpte_updatepp(unsigned long slot, 287 unsigned long newpp, 288 unsigned long vpn, 289 int psize, int apsize, 290 int ssize, unsigned long inv_flags) 291 { 292 unsigned long lpar_rc; 293 unsigned long flags; 294 unsigned long want_v; 295 296 want_v = hpte_encode_avpn(vpn, psize, ssize); 297 298 flags = (newpp & 7) | H_AVPN; 299 if (mmu_has_feature(MMU_FTR_KERNEL_RO)) 300 /* Move pp0 into bit 8 (IBM 55) */ 301 flags |= (newpp & HPTE_R_PP0) >> 55; 302 303 pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", 304 want_v, slot, flags, psize); 305 306 lpar_rc = plpar_pte_protect(flags, slot, want_v); 307 308 if (lpar_rc == H_NOT_FOUND) { 309 pr_devel("not found !\n"); 310 return -1; 311 } 312 313 pr_devel("ok\n"); 314 315 BUG_ON(lpar_rc != H_SUCCESS); 316 317 return 0; 318 } 319 320 static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_group) 321 { 322 long lpar_rc; 323 unsigned long i, j; 324 struct { 325 unsigned long pteh; 326 unsigned long ptel; 327 } ptes[4]; 328 329 for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { 330 331 lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); 332 if (lpar_rc != H_SUCCESS) { 333 pr_info("Failed to read hash page table at %ld err %ld\n", 334 hpte_group, lpar_rc); 335 continue; 336 } 337 338 for (j = 0; j < 4; j++) { 339 if (HPTE_V_COMPARE(ptes[j].pteh, want_v) && 340 (ptes[j].pteh & HPTE_V_VALID)) 341 return i + j; 342 } 343 } 344 345 return -1; 346 } 347 348 static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize) 349 { 350 long slot; 351 unsigned long hash; 352 unsigned long want_v; 353 unsigned long hpte_group; 354 355 hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); 356 want_v = hpte_encode_avpn(vpn, psize, ssize); 357 358 /* Bolted entries are always in the primary group */ 359 hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; 360 slot = __pSeries_lpar_hpte_find(want_v, hpte_group); 361 if (slot < 0) 362 return -1; 363 return hpte_group + slot; 364 } 365 366 static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, 367 unsigned long ea, 368 int psize, int ssize) 369 { 370 unsigned long vpn; 371 unsigned long lpar_rc, slot, vsid, flags; 372 373 vsid = get_kernel_vsid(ea, ssize); 374 vpn = hpt_vpn(ea, vsid, ssize); 375 376 slot = pSeries_lpar_hpte_find(vpn, psize, ssize); 377 BUG_ON(slot == -1); 378 379 flags = newpp & 7; 380 if (mmu_has_feature(MMU_FTR_KERNEL_RO)) 381 /* Move pp0 into bit 8 (IBM 55) */ 382 flags |= (newpp & HPTE_R_PP0) >> 55; 383 384 lpar_rc = plpar_pte_protect(flags, slot, 0); 385 386 BUG_ON(lpar_rc != H_SUCCESS); 387 } 388 389 static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, 390 int psize, int apsize, 391 int ssize, int local) 392 { 393 unsigned long want_v; 394 unsigned long lpar_rc; 395 unsigned long dummy1, dummy2; 396 397 pr_devel(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n", 398 slot, vpn, psize, local); 399 400 want_v = hpte_encode_avpn(vpn, psize, ssize); 401 lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2); 402 if (lpar_rc == H_NOT_FOUND) 403 return; 404 405 BUG_ON(lpar_rc != H_SUCCESS); 406 } 407 408 409 /* 410 * As defined in the PAPR's section 14.5.4.1.8 411 * The control mask doesn't include the returned reference and change bit from 412 * the processed PTE. 413 */ 414 #define HBLKR_AVPN 0x0100000000000000UL 415 #define HBLKR_CTRL_MASK 0xf800000000000000UL 416 #define HBLKR_CTRL_SUCCESS 0x8000000000000000UL 417 #define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL 418 #define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL 419 420 /** 421 * H_BLOCK_REMOVE caller. 422 * @idx should point to the latest @param entry set with a PTEX. 423 * If PTE cannot be processed because another CPUs has already locked that 424 * group, those entries are put back in @param starting at index 1. 425 * If entries has to be retried and @retry_busy is set to true, these entries 426 * are retried until success. If @retry_busy is set to false, the returned 427 * is the number of entries yet to process. 428 */ 429 static unsigned long call_block_remove(unsigned long idx, unsigned long *param, 430 bool retry_busy) 431 { 432 unsigned long i, rc, new_idx; 433 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; 434 435 if (idx < 2) { 436 pr_warn("Unexpected empty call to H_BLOCK_REMOVE"); 437 return 0; 438 } 439 again: 440 new_idx = 0; 441 if (idx > PLPAR_HCALL9_BUFSIZE) { 442 pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx); 443 idx = PLPAR_HCALL9_BUFSIZE; 444 } else if (idx < PLPAR_HCALL9_BUFSIZE) 445 param[idx] = HBR_END; 446 447 rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf, 448 param[0], /* AVA */ 449 param[1], param[2], param[3], param[4], /* TS0-7 */ 450 param[5], param[6], param[7], param[8]); 451 if (rc == H_SUCCESS) 452 return 0; 453 454 BUG_ON(rc != H_PARTIAL); 455 456 /* Check that the unprocessed entries were 'not found' or 'busy' */ 457 for (i = 0; i < idx-1; i++) { 458 unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK; 459 460 if (ctrl == HBLKR_CTRL_ERRBUSY) { 461 param[++new_idx] = param[i+1]; 462 continue; 463 } 464 465 BUG_ON(ctrl != HBLKR_CTRL_SUCCESS 466 && ctrl != HBLKR_CTRL_ERRNOTFOUND); 467 } 468 469 /* 470 * If there were entries found busy, retry these entries if requested, 471 * of if all the entries have to be retried. 472 */ 473 if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) { 474 idx = new_idx + 1; 475 goto again; 476 } 477 478 return new_idx; 479 } 480 481 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 482 /* 483 * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need 484 * to make sure that we avoid bouncing the hypervisor tlbie lock. 485 */ 486 #define PPC64_HUGE_HPTE_BATCH 12 487 488 static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn, 489 int count, int psize, int ssize) 490 { 491 unsigned long param[PLPAR_HCALL9_BUFSIZE]; 492 unsigned long shift, current_vpgb, vpgb; 493 int i, pix = 0; 494 495 shift = mmu_psize_defs[psize].shift; 496 497 for (i = 0; i < count; i++) { 498 /* 499 * Shifting 3 bits more on the right to get a 500 * 8 pages aligned virtual addresse. 501 */ 502 vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3)); 503 if (!pix || vpgb != current_vpgb) { 504 /* 505 * Need to start a new 8 pages block, flush 506 * the current one if needed. 507 */ 508 if (pix) 509 (void)call_block_remove(pix, param, true); 510 current_vpgb = vpgb; 511 param[0] = hpte_encode_avpn(vpn[i], psize, ssize); 512 pix = 1; 513 } 514 515 param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i]; 516 if (pix == PLPAR_HCALL9_BUFSIZE) { 517 pix = call_block_remove(pix, param, false); 518 /* 519 * pix = 0 means that all the entries were 520 * removed, we can start a new block. 521 * Otherwise, this means that there are entries 522 * to retry, and pix points to latest one, so 523 * we should increment it and try to continue 524 * the same block. 525 */ 526 if (pix) 527 pix++; 528 } 529 } 530 if (pix) 531 (void)call_block_remove(pix, param, true); 532 } 533 534 static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn, 535 int count, int psize, int ssize) 536 { 537 unsigned long param[PLPAR_HCALL9_BUFSIZE]; 538 int i = 0, pix = 0, rc; 539 540 for (i = 0; i < count; i++) { 541 542 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 543 pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0, 544 ssize, 0); 545 } else { 546 param[pix] = HBR_REQUEST | HBR_AVPN | slot[i]; 547 param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize); 548 pix += 2; 549 if (pix == 8) { 550 rc = plpar_hcall9(H_BULK_REMOVE, param, 551 param[0], param[1], param[2], 552 param[3], param[4], param[5], 553 param[6], param[7]); 554 BUG_ON(rc != H_SUCCESS); 555 pix = 0; 556 } 557 } 558 } 559 if (pix) { 560 param[pix] = HBR_END; 561 rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], 562 param[2], param[3], param[4], param[5], 563 param[6], param[7]); 564 BUG_ON(rc != H_SUCCESS); 565 } 566 } 567 568 static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, 569 unsigned long *vpn, 570 int count, int psize, 571 int ssize) 572 { 573 unsigned long flags = 0; 574 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 575 576 if (lock_tlbie) 577 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 578 579 if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) 580 hugepage_block_invalidate(slot, vpn, count, psize, ssize); 581 else 582 hugepage_bulk_invalidate(slot, vpn, count, psize, ssize); 583 584 if (lock_tlbie) 585 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 586 } 587 588 static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, 589 unsigned long addr, 590 unsigned char *hpte_slot_array, 591 int psize, int ssize, int local) 592 { 593 int i, index = 0; 594 unsigned long s_addr = addr; 595 unsigned int max_hpte_count, valid; 596 unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; 597 unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; 598 unsigned long shift, hidx, vpn = 0, hash, slot; 599 600 shift = mmu_psize_defs[psize].shift; 601 max_hpte_count = 1U << (PMD_SHIFT - shift); 602 603 for (i = 0; i < max_hpte_count; i++) { 604 valid = hpte_valid(hpte_slot_array, i); 605 if (!valid) 606 continue; 607 hidx = hpte_hash_index(hpte_slot_array, i); 608 609 /* get the vpn */ 610 addr = s_addr + (i * (1ul << shift)); 611 vpn = hpt_vpn(addr, vsid, ssize); 612 hash = hpt_hash(vpn, shift, ssize); 613 if (hidx & _PTEIDX_SECONDARY) 614 hash = ~hash; 615 616 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 617 slot += hidx & _PTEIDX_GROUP_IX; 618 619 slot_array[index] = slot; 620 vpn_array[index] = vpn; 621 if (index == PPC64_HUGE_HPTE_BATCH - 1) { 622 /* 623 * Now do a bluk invalidate 624 */ 625 __pSeries_lpar_hugepage_invalidate(slot_array, 626 vpn_array, 627 PPC64_HUGE_HPTE_BATCH, 628 psize, ssize); 629 index = 0; 630 } else 631 index++; 632 } 633 if (index) 634 __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array, 635 index, psize, ssize); 636 } 637 #else 638 static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, 639 unsigned long addr, 640 unsigned char *hpte_slot_array, 641 int psize, int ssize, int local) 642 { 643 WARN(1, "%s called without THP support\n", __func__); 644 } 645 #endif 646 647 static int pSeries_lpar_hpte_removebolted(unsigned long ea, 648 int psize, int ssize) 649 { 650 unsigned long vpn; 651 unsigned long slot, vsid; 652 653 vsid = get_kernel_vsid(ea, ssize); 654 vpn = hpt_vpn(ea, vsid, ssize); 655 656 slot = pSeries_lpar_hpte_find(vpn, psize, ssize); 657 if (slot == -1) 658 return -ENOENT; 659 660 /* 661 * lpar doesn't use the passed actual page size 662 */ 663 pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0); 664 return 0; 665 } 666 667 668 static inline unsigned long compute_slot(real_pte_t pte, 669 unsigned long vpn, 670 unsigned long index, 671 unsigned long shift, 672 int ssize) 673 { 674 unsigned long slot, hash, hidx; 675 676 hash = hpt_hash(vpn, shift, ssize); 677 hidx = __rpte_to_hidx(pte, index); 678 if (hidx & _PTEIDX_SECONDARY) 679 hash = ~hash; 680 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 681 slot += hidx & _PTEIDX_GROUP_IX; 682 return slot; 683 } 684 685 /** 686 * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are 687 * "all within the same naturally aligned 8 page virtual address block". 688 */ 689 static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch, 690 unsigned long *param) 691 { 692 unsigned long vpn; 693 unsigned long i, pix = 0; 694 unsigned long index, shift, slot, current_vpgb, vpgb; 695 real_pte_t pte; 696 int psize, ssize; 697 698 psize = batch->psize; 699 ssize = batch->ssize; 700 701 for (i = 0; i < number; i++) { 702 vpn = batch->vpn[i]; 703 pte = batch->pte[i]; 704 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { 705 /* 706 * Shifting 3 bits more on the right to get a 707 * 8 pages aligned virtual addresse. 708 */ 709 vpgb = (vpn >> (shift - VPN_SHIFT + 3)); 710 if (!pix || vpgb != current_vpgb) { 711 /* 712 * Need to start a new 8 pages block, flush 713 * the current one if needed. 714 */ 715 if (pix) 716 (void)call_block_remove(pix, param, 717 true); 718 current_vpgb = vpgb; 719 param[0] = hpte_encode_avpn(vpn, psize, 720 ssize); 721 pix = 1; 722 } 723 724 slot = compute_slot(pte, vpn, index, shift, ssize); 725 param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot; 726 727 if (pix == PLPAR_HCALL9_BUFSIZE) { 728 pix = call_block_remove(pix, param, false); 729 /* 730 * pix = 0 means that all the entries were 731 * removed, we can start a new block. 732 * Otherwise, this means that there are entries 733 * to retry, and pix points to latest one, so 734 * we should increment it and try to continue 735 * the same block. 736 */ 737 if (pix) 738 pix++; 739 } 740 } pte_iterate_hashed_end(); 741 } 742 743 if (pix) 744 (void)call_block_remove(pix, param, true); 745 } 746 747 /* 748 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie 749 * lock. 750 */ 751 static void pSeries_lpar_flush_hash_range(unsigned long number, int local) 752 { 753 unsigned long vpn; 754 unsigned long i, pix, rc; 755 unsigned long flags = 0; 756 struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); 757 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 758 unsigned long param[PLPAR_HCALL9_BUFSIZE]; 759 unsigned long index, shift, slot; 760 real_pte_t pte; 761 int psize, ssize; 762 763 if (lock_tlbie) 764 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 765 766 if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) { 767 do_block_remove(number, batch, param); 768 goto out; 769 } 770 771 psize = batch->psize; 772 ssize = batch->ssize; 773 pix = 0; 774 for (i = 0; i < number; i++) { 775 vpn = batch->vpn[i]; 776 pte = batch->pte[i]; 777 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { 778 slot = compute_slot(pte, vpn, index, shift, ssize); 779 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 780 /* 781 * lpar doesn't use the passed actual page size 782 */ 783 pSeries_lpar_hpte_invalidate(slot, vpn, psize, 784 0, ssize, local); 785 } else { 786 param[pix] = HBR_REQUEST | HBR_AVPN | slot; 787 param[pix+1] = hpte_encode_avpn(vpn, psize, 788 ssize); 789 pix += 2; 790 if (pix == 8) { 791 rc = plpar_hcall9(H_BULK_REMOVE, param, 792 param[0], param[1], param[2], 793 param[3], param[4], param[5], 794 param[6], param[7]); 795 BUG_ON(rc != H_SUCCESS); 796 pix = 0; 797 } 798 } 799 } pte_iterate_hashed_end(); 800 } 801 if (pix) { 802 param[pix] = HBR_END; 803 rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], 804 param[2], param[3], param[4], param[5], 805 param[6], param[7]); 806 BUG_ON(rc != H_SUCCESS); 807 } 808 809 out: 810 if (lock_tlbie) 811 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 812 } 813 814 static int __init disable_bulk_remove(char *str) 815 { 816 if (strcmp(str, "off") == 0 && 817 firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 818 pr_info("Disabling BULK_REMOVE firmware feature"); 819 powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE; 820 } 821 return 1; 822 } 823 824 __setup("bulk_remove=", disable_bulk_remove); 825 826 #define HPT_RESIZE_TIMEOUT 10000 /* ms */ 827 828 struct hpt_resize_state { 829 unsigned long shift; 830 int commit_rc; 831 }; 832 833 static int pseries_lpar_resize_hpt_commit(void *data) 834 { 835 struct hpt_resize_state *state = data; 836 837 state->commit_rc = plpar_resize_hpt_commit(0, state->shift); 838 if (state->commit_rc != H_SUCCESS) 839 return -EIO; 840 841 /* Hypervisor has transitioned the HTAB, update our globals */ 842 ppc64_pft_size = state->shift; 843 htab_size_bytes = 1UL << ppc64_pft_size; 844 htab_hash_mask = (htab_size_bytes >> 7) - 1; 845 846 return 0; 847 } 848 849 /* Must be called in user context */ 850 static int pseries_lpar_resize_hpt(unsigned long shift) 851 { 852 struct hpt_resize_state state = { 853 .shift = shift, 854 .commit_rc = H_FUNCTION, 855 }; 856 unsigned int delay, total_delay = 0; 857 int rc; 858 ktime_t t0, t1, t2; 859 860 might_sleep(); 861 862 if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE)) 863 return -ENODEV; 864 865 pr_info("Attempting to resize HPT to shift %lu\n", shift); 866 867 t0 = ktime_get(); 868 869 rc = plpar_resize_hpt_prepare(0, shift); 870 while (H_IS_LONG_BUSY(rc)) { 871 delay = get_longbusy_msecs(rc); 872 total_delay += delay; 873 if (total_delay > HPT_RESIZE_TIMEOUT) { 874 /* prepare with shift==0 cancels an in-progress resize */ 875 rc = plpar_resize_hpt_prepare(0, 0); 876 if (rc != H_SUCCESS) 877 pr_warn("Unexpected error %d cancelling timed out HPT resize\n", 878 rc); 879 return -ETIMEDOUT; 880 } 881 msleep(delay); 882 rc = plpar_resize_hpt_prepare(0, shift); 883 }; 884 885 switch (rc) { 886 case H_SUCCESS: 887 /* Continue on */ 888 break; 889 890 case H_PARAMETER: 891 pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n"); 892 return -EINVAL; 893 case H_RESOURCE: 894 pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n"); 895 return -EPERM; 896 default: 897 pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc); 898 return -EIO; 899 } 900 901 t1 = ktime_get(); 902 903 rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL); 904 905 t2 = ktime_get(); 906 907 if (rc != 0) { 908 switch (state.commit_rc) { 909 case H_PTEG_FULL: 910 return -ENOSPC; 911 912 default: 913 pr_warn("Unexpected error %d from H_RESIZE_HPT_COMMIT\n", 914 state.commit_rc); 915 return -EIO; 916 }; 917 } 918 919 pr_info("HPT resize to shift %lu complete (%lld ms / %lld ms)\n", 920 shift, (long long) ktime_ms_delta(t1, t0), 921 (long long) ktime_ms_delta(t2, t1)); 922 923 return 0; 924 } 925 926 static int pseries_lpar_register_process_table(unsigned long base, 927 unsigned long page_size, unsigned long table_size) 928 { 929 long rc; 930 unsigned long flags = 0; 931 932 if (table_size) 933 flags |= PROC_TABLE_NEW; 934 if (radix_enabled()) 935 flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; 936 else 937 flags |= PROC_TABLE_HPT_SLB; 938 for (;;) { 939 rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, 940 page_size, table_size); 941 if (!H_IS_LONG_BUSY(rc)) 942 break; 943 mdelay(get_longbusy_msecs(rc)); 944 } 945 if (rc != H_SUCCESS) { 946 pr_err("Failed to register process table (rc=%ld)\n", rc); 947 BUG(); 948 } 949 return rc; 950 } 951 952 void __init hpte_init_pseries(void) 953 { 954 mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; 955 mmu_hash_ops.hpte_updatepp = pSeries_lpar_hpte_updatepp; 956 mmu_hash_ops.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; 957 mmu_hash_ops.hpte_insert = pSeries_lpar_hpte_insert; 958 mmu_hash_ops.hpte_remove = pSeries_lpar_hpte_remove; 959 mmu_hash_ops.hpte_removebolted = pSeries_lpar_hpte_removebolted; 960 mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; 961 mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; 962 mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; 963 register_process_table = pseries_lpar_register_process_table; 964 965 if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) 966 mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; 967 } 968 969 void radix_init_pseries(void) 970 { 971 pr_info("Using radix MMU under hypervisor\n"); 972 register_process_table = pseries_lpar_register_process_table; 973 } 974 975 #ifdef CONFIG_PPC_SMLPAR 976 #define CMO_FREE_HINT_DEFAULT 1 977 static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT; 978 979 static int __init cmo_free_hint(char *str) 980 { 981 char *parm; 982 parm = strstrip(str); 983 984 if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) { 985 pr_info("%s: CMO free page hinting is not active.\n", __func__); 986 cmo_free_hint_flag = 0; 987 return 1; 988 } 989 990 cmo_free_hint_flag = 1; 991 pr_info("%s: CMO free page hinting is active.\n", __func__); 992 993 if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0) 994 return 1; 995 996 return 0; 997 } 998 999 __setup("cmo_free_hint=", cmo_free_hint); 1000 1001 static void pSeries_set_page_state(struct page *page, int order, 1002 unsigned long state) 1003 { 1004 int i, j; 1005 unsigned long cmo_page_sz, addr; 1006 1007 cmo_page_sz = cmo_get_page_size(); 1008 addr = __pa((unsigned long)page_address(page)); 1009 1010 for (i = 0; i < (1 << order); i++, addr += PAGE_SIZE) { 1011 for (j = 0; j < PAGE_SIZE; j += cmo_page_sz) 1012 plpar_hcall_norets(H_PAGE_INIT, state, addr + j, 0); 1013 } 1014 } 1015 1016 void arch_free_page(struct page *page, int order) 1017 { 1018 if (radix_enabled()) 1019 return; 1020 if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO)) 1021 return; 1022 1023 pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED); 1024 } 1025 EXPORT_SYMBOL(arch_free_page); 1026 1027 #endif /* CONFIG_PPC_SMLPAR */ 1028 #endif /* CONFIG_PPC_BOOK3S_64 */ 1029 1030 #ifdef CONFIG_TRACEPOINTS 1031 #ifdef CONFIG_JUMP_LABEL 1032 struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; 1033 1034 int hcall_tracepoint_regfunc(void) 1035 { 1036 static_key_slow_inc(&hcall_tracepoint_key); 1037 return 0; 1038 } 1039 1040 void hcall_tracepoint_unregfunc(void) 1041 { 1042 static_key_slow_dec(&hcall_tracepoint_key); 1043 } 1044 #else 1045 /* 1046 * We optimise our hcall path by placing hcall_tracepoint_refcount 1047 * directly in the TOC so we can check if the hcall tracepoints are 1048 * enabled via a single load. 1049 */ 1050 1051 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ 1052 extern long hcall_tracepoint_refcount; 1053 1054 int hcall_tracepoint_regfunc(void) 1055 { 1056 hcall_tracepoint_refcount++; 1057 return 0; 1058 } 1059 1060 void hcall_tracepoint_unregfunc(void) 1061 { 1062 hcall_tracepoint_refcount--; 1063 } 1064 #endif 1065 1066 /* 1067 * Since the tracing code might execute hcalls we need to guard against 1068 * recursion. One example of this are spinlocks calling H_YIELD on 1069 * shared processor partitions. 1070 */ 1071 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); 1072 1073 1074 void __trace_hcall_entry(unsigned long opcode, unsigned long *args) 1075 { 1076 unsigned long flags; 1077 unsigned int *depth; 1078 1079 /* 1080 * We cannot call tracepoints inside RCU idle regions which 1081 * means we must not trace H_CEDE. 1082 */ 1083 if (opcode == H_CEDE) 1084 return; 1085 1086 local_irq_save(flags); 1087 1088 depth = this_cpu_ptr(&hcall_trace_depth); 1089 1090 if (*depth) 1091 goto out; 1092 1093 (*depth)++; 1094 preempt_disable(); 1095 trace_hcall_entry(opcode, args); 1096 (*depth)--; 1097 1098 out: 1099 local_irq_restore(flags); 1100 } 1101 1102 void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) 1103 { 1104 unsigned long flags; 1105 unsigned int *depth; 1106 1107 if (opcode == H_CEDE) 1108 return; 1109 1110 local_irq_save(flags); 1111 1112 depth = this_cpu_ptr(&hcall_trace_depth); 1113 1114 if (*depth) 1115 goto out; 1116 1117 (*depth)++; 1118 trace_hcall_exit(opcode, retval, retbuf); 1119 preempt_enable(); 1120 (*depth)--; 1121 1122 out: 1123 local_irq_restore(flags); 1124 } 1125 #endif 1126 1127 /** 1128 * h_get_mpp 1129 * H_GET_MPP hcall returns info in 7 parms 1130 */ 1131 int h_get_mpp(struct hvcall_mpp_data *mpp_data) 1132 { 1133 int rc; 1134 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; 1135 1136 rc = plpar_hcall9(H_GET_MPP, retbuf); 1137 1138 mpp_data->entitled_mem = retbuf[0]; 1139 mpp_data->mapped_mem = retbuf[1]; 1140 1141 mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; 1142 mpp_data->pool_num = retbuf[2] & 0xffff; 1143 1144 mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; 1145 mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; 1146 mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffffUL; 1147 1148 mpp_data->pool_size = retbuf[4]; 1149 mpp_data->loan_request = retbuf[5]; 1150 mpp_data->backing_mem = retbuf[6]; 1151 1152 return rc; 1153 } 1154 EXPORT_SYMBOL(h_get_mpp); 1155 1156 int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data) 1157 { 1158 int rc; 1159 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 }; 1160 1161 rc = plpar_hcall9(H_GET_MPP_X, retbuf); 1162 1163 mpp_x_data->coalesced_bytes = retbuf[0]; 1164 mpp_x_data->pool_coalesced_bytes = retbuf[1]; 1165 mpp_x_data->pool_purr_cycles = retbuf[2]; 1166 mpp_x_data->pool_spurr_cycles = retbuf[3]; 1167 1168 return rc; 1169 } 1170 1171 static unsigned long vsid_unscramble(unsigned long vsid, int ssize) 1172 { 1173 unsigned long protovsid; 1174 unsigned long va_bits = VA_BITS; 1175 unsigned long modinv, vsid_modulus; 1176 unsigned long max_mod_inv, tmp_modinv; 1177 1178 if (!mmu_has_feature(MMU_FTR_68_BIT_VA)) 1179 va_bits = 65; 1180 1181 if (ssize == MMU_SEGSIZE_256M) { 1182 modinv = VSID_MULINV_256M; 1183 vsid_modulus = ((1UL << (va_bits - SID_SHIFT)) - 1); 1184 } else { 1185 modinv = VSID_MULINV_1T; 1186 vsid_modulus = ((1UL << (va_bits - SID_SHIFT_1T)) - 1); 1187 } 1188 1189 /* 1190 * vsid outside our range. 1191 */ 1192 if (vsid >= vsid_modulus) 1193 return 0; 1194 1195 /* 1196 * If modinv is the modular multiplicate inverse of (x % vsid_modulus) 1197 * and vsid = (protovsid * x) % vsid_modulus, then we say: 1198 * protovsid = (vsid * modinv) % vsid_modulus 1199 */ 1200 1201 /* Check if (vsid * modinv) overflow (63 bits) */ 1202 max_mod_inv = 0x7fffffffffffffffull / vsid; 1203 if (modinv < max_mod_inv) 1204 return (vsid * modinv) % vsid_modulus; 1205 1206 tmp_modinv = modinv/max_mod_inv; 1207 modinv %= max_mod_inv; 1208 1209 protovsid = (((vsid * max_mod_inv) % vsid_modulus) * tmp_modinv) % vsid_modulus; 1210 protovsid = (protovsid + vsid * modinv) % vsid_modulus; 1211 1212 return protovsid; 1213 } 1214 1215 static int __init reserve_vrma_context_id(void) 1216 { 1217 unsigned long protovsid; 1218 1219 /* 1220 * Reserve context ids which map to reserved virtual addresses. For now 1221 * we only reserve the context id which maps to the VRMA VSID. We ignore 1222 * the addresses in "ibm,adjunct-virtual-addresses" because we don't 1223 * enable adjunct support via the "ibm,client-architecture-support" 1224 * interface. 1225 */ 1226 protovsid = vsid_unscramble(VRMA_VSID, MMU_SEGSIZE_1T); 1227 hash__reserve_context_id(protovsid >> ESID_BITS_1T); 1228 return 0; 1229 } 1230 machine_device_initcall(pseries, reserve_vrma_context_id); 1231 1232 #ifdef CONFIG_DEBUG_FS 1233 /* debugfs file interface for vpa data */ 1234 static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len, 1235 loff_t *pos) 1236 { 1237 int cpu = (long)filp->private_data; 1238 struct lppaca *lppaca = &lppaca_of(cpu); 1239 1240 return simple_read_from_buffer(buf, len, pos, lppaca, 1241 sizeof(struct lppaca)); 1242 } 1243 1244 static const struct file_operations vpa_fops = { 1245 .open = simple_open, 1246 .read = vpa_file_read, 1247 .llseek = default_llseek, 1248 }; 1249 1250 static int __init vpa_debugfs_init(void) 1251 { 1252 char name[16]; 1253 long i; 1254 static struct dentry *vpa_dir; 1255 1256 if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 1257 return 0; 1258 1259 vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root); 1260 if (!vpa_dir) { 1261 pr_warn("%s: can't create vpa root dir\n", __func__); 1262 return -ENOMEM; 1263 } 1264 1265 /* set up the per-cpu vpa file*/ 1266 for_each_possible_cpu(i) { 1267 struct dentry *d; 1268 1269 sprintf(name, "cpu-%ld", i); 1270 1271 d = debugfs_create_file(name, 0400, vpa_dir, (void *)i, 1272 &vpa_fops); 1273 if (!d) { 1274 pr_warn("%s: can't create per-cpu vpa file\n", 1275 __func__); 1276 return -ENOMEM; 1277 } 1278 } 1279 1280 return 0; 1281 } 1282 machine_arch_initcall(pseries, vpa_debugfs_init); 1283 #endif /* CONFIG_DEBUG_FS */ 1284