1 /* 2 * pSeries_lpar.c 3 * Copyright (C) 2001 Todd Inglett, IBM Corporation 4 * 5 * pSeries LPAR support. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 */ 21 22 /* Enables debugging of low-level hash table routines - careful! */ 23 #undef DEBUG 24 25 #include <linux/kernel.h> 26 #include <linux/dma-mapping.h> 27 #include <linux/console.h> 28 #include <linux/export.h> 29 #include <asm/processor.h> 30 #include <asm/mmu.h> 31 #include <asm/page.h> 32 #include <asm/pgtable.h> 33 #include <asm/machdep.h> 34 #include <asm/mmu_context.h> 35 #include <asm/iommu.h> 36 #include <asm/tlbflush.h> 37 #include <asm/tlb.h> 38 #include <asm/prom.h> 39 #include <asm/cputable.h> 40 #include <asm/udbg.h> 41 #include <asm/smp.h> 42 #include <asm/trace.h> 43 #include <asm/firmware.h> 44 45 #include "plpar_wrappers.h" 46 #include "pseries.h" 47 48 49 /* in hvCall.S */ 50 EXPORT_SYMBOL(plpar_hcall); 51 EXPORT_SYMBOL(plpar_hcall9); 52 EXPORT_SYMBOL(plpar_hcall_norets); 53 54 extern void pSeries_find_serial_port(void); 55 56 void vpa_init(int cpu) 57 { 58 int hwcpu = get_hard_smp_processor_id(cpu); 59 unsigned long addr; 60 long ret; 61 struct paca_struct *pp; 62 struct dtl_entry *dtl; 63 64 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 65 lppaca_of(cpu).vmxregs_in_use = 1; 66 67 addr = __pa(&lppaca_of(cpu)); 68 ret = register_vpa(hwcpu, addr); 69 70 if (ret) { 71 pr_err("WARNING: VPA registration for cpu %d (hw %d) of area " 72 "%lx failed with %ld\n", cpu, hwcpu, addr, ret); 73 return; 74 } 75 /* 76 * PAPR says this feature is SLB-Buffer but firmware never 77 * reports that. All SPLPAR support SLB shadow buffer. 78 */ 79 addr = __pa(&slb_shadow[cpu]); 80 if (firmware_has_feature(FW_FEATURE_SPLPAR)) { 81 ret = register_slb_shadow(hwcpu, addr); 82 if (ret) 83 pr_err("WARNING: SLB shadow buffer registration for " 84 "cpu %d (hw %d) of area %lx failed with %ld\n", 85 cpu, hwcpu, addr, ret); 86 } 87 88 /* 89 * Register dispatch trace log, if one has been allocated. 90 */ 91 pp = &paca[cpu]; 92 dtl = pp->dispatch_log; 93 if (dtl) { 94 pp->dtl_ridx = 0; 95 pp->dtl_curr = dtl; 96 lppaca_of(cpu).dtl_idx = 0; 97 98 /* hypervisor reads buffer length from this field */ 99 dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; 100 ret = register_dtl(hwcpu, __pa(dtl)); 101 if (ret) 102 pr_err("WARNING: DTL registration of cpu %d (hw %d) " 103 "failed with %ld\n", smp_processor_id(), 104 hwcpu, ret); 105 lppaca_of(cpu).dtl_enable_mask = 2; 106 } 107 } 108 109 static long pSeries_lpar_hpte_insert(unsigned long hpte_group, 110 unsigned long vpn, unsigned long pa, 111 unsigned long rflags, unsigned long vflags, 112 int psize, int apsize, int ssize) 113 { 114 unsigned long lpar_rc; 115 unsigned long flags; 116 unsigned long slot; 117 unsigned long hpte_v, hpte_r; 118 119 if (!(vflags & HPTE_V_BOLTED)) 120 pr_devel("hpte_insert(group=%lx, vpn=%016lx, " 121 "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n", 122 hpte_group, vpn, pa, rflags, vflags, psize); 123 124 hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; 125 hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; 126 127 if (!(vflags & HPTE_V_BOLTED)) 128 pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); 129 130 /* Now fill in the actual HPTE */ 131 /* Set CEC cookie to 0 */ 132 /* Zero page = 0 */ 133 /* I-cache Invalidate = 0 */ 134 /* I-cache synchronize = 0 */ 135 /* Exact = 0 */ 136 flags = 0; 137 138 /* Make pHyp happy */ 139 if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU)) 140 hpte_r &= ~_PAGE_COHERENT; 141 if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) 142 flags |= H_COALESCE_CAND; 143 144 lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); 145 if (unlikely(lpar_rc == H_PTEG_FULL)) { 146 if (!(vflags & HPTE_V_BOLTED)) 147 pr_devel(" full\n"); 148 return -1; 149 } 150 151 /* 152 * Since we try and ioremap PHBs we don't own, the pte insert 153 * will fail. However we must catch the failure in hash_page 154 * or we will loop forever, so return -2 in this case. 155 */ 156 if (unlikely(lpar_rc != H_SUCCESS)) { 157 if (!(vflags & HPTE_V_BOLTED)) 158 pr_devel(" lpar err %ld\n", lpar_rc); 159 return -2; 160 } 161 if (!(vflags & HPTE_V_BOLTED)) 162 pr_devel(" -> slot: %lu\n", slot & 7); 163 164 /* Because of iSeries, we have to pass down the secondary 165 * bucket bit here as well 166 */ 167 return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3); 168 } 169 170 static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); 171 172 static long pSeries_lpar_hpte_remove(unsigned long hpte_group) 173 { 174 unsigned long slot_offset; 175 unsigned long lpar_rc; 176 int i; 177 unsigned long dummy1, dummy2; 178 179 /* pick a random slot to start at */ 180 slot_offset = mftb() & 0x7; 181 182 for (i = 0; i < HPTES_PER_GROUP; i++) { 183 184 /* don't remove a bolted entry */ 185 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, 186 (0x1UL << 4), &dummy1, &dummy2); 187 if (lpar_rc == H_SUCCESS) 188 return i; 189 190 /* 191 * The test for adjunct partition is performed before the 192 * ANDCOND test. H_RESOURCE may be returned, so we need to 193 * check for that as well. 194 */ 195 BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE); 196 197 slot_offset++; 198 slot_offset &= 0x7; 199 } 200 201 return -1; 202 } 203 204 static void pSeries_lpar_hptab_clear(void) 205 { 206 unsigned long size_bytes = 1UL << ppc64_pft_size; 207 unsigned long hpte_count = size_bytes >> 4; 208 struct { 209 unsigned long pteh; 210 unsigned long ptel; 211 } ptes[4]; 212 long lpar_rc; 213 unsigned long i, j; 214 215 /* Read in batches of 4, 216 * invalidate only valid entries not in the VRMA 217 * hpte_count will be a multiple of 4 218 */ 219 for (i = 0; i < hpte_count; i += 4) { 220 lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes); 221 if (lpar_rc != H_SUCCESS) 222 continue; 223 for (j = 0; j < 4; j++){ 224 if ((ptes[j].pteh & HPTE_V_VRMA_MASK) == 225 HPTE_V_VRMA_MASK) 226 continue; 227 if (ptes[j].pteh & HPTE_V_VALID) 228 plpar_pte_remove_raw(0, i + j, 0, 229 &(ptes[j].pteh), &(ptes[j].ptel)); 230 } 231 } 232 } 233 234 /* 235 * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and 236 * the low 3 bits of flags happen to line up. So no transform is needed. 237 * We can probably optimize here and assume the high bits of newpp are 238 * already zero. For now I am paranoid. 239 */ 240 static long pSeries_lpar_hpte_updatepp(unsigned long slot, 241 unsigned long newpp, 242 unsigned long vpn, 243 int psize, int ssize, int local) 244 { 245 unsigned long lpar_rc; 246 unsigned long flags = (newpp & 7) | H_AVPN; 247 unsigned long want_v; 248 249 want_v = hpte_encode_avpn(vpn, psize, ssize); 250 251 pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", 252 want_v, slot, flags, psize); 253 254 lpar_rc = plpar_pte_protect(flags, slot, want_v); 255 256 if (lpar_rc == H_NOT_FOUND) { 257 pr_devel("not found !\n"); 258 return -1; 259 } 260 261 pr_devel("ok\n"); 262 263 BUG_ON(lpar_rc != H_SUCCESS); 264 265 return 0; 266 } 267 268 static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) 269 { 270 unsigned long dword0; 271 unsigned long lpar_rc; 272 unsigned long dummy_word1; 273 unsigned long flags; 274 275 /* Read 1 pte at a time */ 276 /* Do not need RPN to logical page translation */ 277 /* No cross CEC PFT access */ 278 flags = 0; 279 280 lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1); 281 282 BUG_ON(lpar_rc != H_SUCCESS); 283 284 return dword0; 285 } 286 287 static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize) 288 { 289 unsigned long hash; 290 unsigned long i; 291 long slot; 292 unsigned long want_v, hpte_v; 293 294 hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); 295 want_v = hpte_encode_avpn(vpn, psize, ssize); 296 297 /* Bolted entries are always in the primary group */ 298 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 299 for (i = 0; i < HPTES_PER_GROUP; i++) { 300 hpte_v = pSeries_lpar_hpte_getword0(slot); 301 302 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) 303 /* HPTE matches */ 304 return slot; 305 ++slot; 306 } 307 308 return -1; 309 } 310 311 static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, 312 unsigned long ea, 313 int psize, int ssize) 314 { 315 unsigned long vpn; 316 unsigned long lpar_rc, slot, vsid, flags; 317 318 vsid = get_kernel_vsid(ea, ssize); 319 vpn = hpt_vpn(ea, vsid, ssize); 320 321 slot = pSeries_lpar_hpte_find(vpn, psize, ssize); 322 BUG_ON(slot == -1); 323 324 flags = newpp & 7; 325 lpar_rc = plpar_pte_protect(flags, slot, 0); 326 327 BUG_ON(lpar_rc != H_SUCCESS); 328 } 329 330 static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, 331 int psize, int ssize, int local) 332 { 333 unsigned long want_v; 334 unsigned long lpar_rc; 335 unsigned long dummy1, dummy2; 336 337 pr_devel(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n", 338 slot, vpn, psize, local); 339 340 want_v = hpte_encode_avpn(vpn, psize, ssize); 341 lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2); 342 if (lpar_rc == H_NOT_FOUND) 343 return; 344 345 BUG_ON(lpar_rc != H_SUCCESS); 346 } 347 348 static void pSeries_lpar_hpte_removebolted(unsigned long ea, 349 int psize, int ssize) 350 { 351 unsigned long vpn; 352 unsigned long slot, vsid; 353 354 vsid = get_kernel_vsid(ea, ssize); 355 vpn = hpt_vpn(ea, vsid, ssize); 356 357 slot = pSeries_lpar_hpte_find(vpn, psize, ssize); 358 BUG_ON(slot == -1); 359 360 pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0); 361 } 362 363 /* Flag bits for H_BULK_REMOVE */ 364 #define HBR_REQUEST 0x4000000000000000UL 365 #define HBR_RESPONSE 0x8000000000000000UL 366 #define HBR_END 0xc000000000000000UL 367 #define HBR_AVPN 0x0200000000000000UL 368 #define HBR_ANDCOND 0x0100000000000000UL 369 370 /* 371 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie 372 * lock. 373 */ 374 static void pSeries_lpar_flush_hash_range(unsigned long number, int local) 375 { 376 unsigned long vpn; 377 unsigned long i, pix, rc; 378 unsigned long flags = 0; 379 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 380 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 381 unsigned long param[9]; 382 unsigned long hash, index, shift, hidx, slot; 383 real_pte_t pte; 384 int psize, ssize; 385 386 if (lock_tlbie) 387 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 388 389 psize = batch->psize; 390 ssize = batch->ssize; 391 pix = 0; 392 for (i = 0; i < number; i++) { 393 vpn = batch->vpn[i]; 394 pte = batch->pte[i]; 395 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { 396 hash = hpt_hash(vpn, shift, ssize); 397 hidx = __rpte_to_hidx(pte, index); 398 if (hidx & _PTEIDX_SECONDARY) 399 hash = ~hash; 400 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 401 slot += hidx & _PTEIDX_GROUP_IX; 402 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 403 pSeries_lpar_hpte_invalidate(slot, vpn, psize, 404 ssize, local); 405 } else { 406 param[pix] = HBR_REQUEST | HBR_AVPN | slot; 407 param[pix+1] = hpte_encode_avpn(vpn, psize, 408 ssize); 409 pix += 2; 410 if (pix == 8) { 411 rc = plpar_hcall9(H_BULK_REMOVE, param, 412 param[0], param[1], param[2], 413 param[3], param[4], param[5], 414 param[6], param[7]); 415 BUG_ON(rc != H_SUCCESS); 416 pix = 0; 417 } 418 } 419 } pte_iterate_hashed_end(); 420 } 421 if (pix) { 422 param[pix] = HBR_END; 423 rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], 424 param[2], param[3], param[4], param[5], 425 param[6], param[7]); 426 BUG_ON(rc != H_SUCCESS); 427 } 428 429 if (lock_tlbie) 430 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 431 } 432 433 static int __init disable_bulk_remove(char *str) 434 { 435 if (strcmp(str, "off") == 0 && 436 firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 437 printk(KERN_INFO "Disabling BULK_REMOVE firmware feature"); 438 powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE; 439 } 440 return 1; 441 } 442 443 __setup("bulk_remove=", disable_bulk_remove); 444 445 void __init hpte_init_lpar(void) 446 { 447 ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate; 448 ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp; 449 ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; 450 ppc_md.hpte_insert = pSeries_lpar_hpte_insert; 451 ppc_md.hpte_remove = pSeries_lpar_hpte_remove; 452 ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; 453 ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; 454 ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; 455 } 456 457 #ifdef CONFIG_PPC_SMLPAR 458 #define CMO_FREE_HINT_DEFAULT 1 459 static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT; 460 461 static int __init cmo_free_hint(char *str) 462 { 463 char *parm; 464 parm = strstrip(str); 465 466 if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) { 467 printk(KERN_INFO "cmo_free_hint: CMO free page hinting is not active.\n"); 468 cmo_free_hint_flag = 0; 469 return 1; 470 } 471 472 cmo_free_hint_flag = 1; 473 printk(KERN_INFO "cmo_free_hint: CMO free page hinting is active.\n"); 474 475 if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0) 476 return 1; 477 478 return 0; 479 } 480 481 __setup("cmo_free_hint=", cmo_free_hint); 482 483 static void pSeries_set_page_state(struct page *page, int order, 484 unsigned long state) 485 { 486 int i, j; 487 unsigned long cmo_page_sz, addr; 488 489 cmo_page_sz = cmo_get_page_size(); 490 addr = __pa((unsigned long)page_address(page)); 491 492 for (i = 0; i < (1 << order); i++, addr += PAGE_SIZE) { 493 for (j = 0; j < PAGE_SIZE; j += cmo_page_sz) 494 plpar_hcall_norets(H_PAGE_INIT, state, addr + j, 0); 495 } 496 } 497 498 void arch_free_page(struct page *page, int order) 499 { 500 if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO)) 501 return; 502 503 pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED); 504 } 505 EXPORT_SYMBOL(arch_free_page); 506 507 #endif 508 509 #ifdef CONFIG_TRACEPOINTS 510 /* 511 * We optimise our hcall path by placing hcall_tracepoint_refcount 512 * directly in the TOC so we can check if the hcall tracepoints are 513 * enabled via a single load. 514 */ 515 516 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ 517 extern long hcall_tracepoint_refcount; 518 519 /* 520 * Since the tracing code might execute hcalls we need to guard against 521 * recursion. One example of this are spinlocks calling H_YIELD on 522 * shared processor partitions. 523 */ 524 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); 525 526 void hcall_tracepoint_regfunc(void) 527 { 528 hcall_tracepoint_refcount++; 529 } 530 531 void hcall_tracepoint_unregfunc(void) 532 { 533 hcall_tracepoint_refcount--; 534 } 535 536 void __trace_hcall_entry(unsigned long opcode, unsigned long *args) 537 { 538 unsigned long flags; 539 unsigned int *depth; 540 541 /* 542 * We cannot call tracepoints inside RCU idle regions which 543 * means we must not trace H_CEDE. 544 */ 545 if (opcode == H_CEDE) 546 return; 547 548 local_irq_save(flags); 549 550 depth = &__get_cpu_var(hcall_trace_depth); 551 552 if (*depth) 553 goto out; 554 555 (*depth)++; 556 preempt_disable(); 557 trace_hcall_entry(opcode, args); 558 (*depth)--; 559 560 out: 561 local_irq_restore(flags); 562 } 563 564 void __trace_hcall_exit(long opcode, unsigned long retval, 565 unsigned long *retbuf) 566 { 567 unsigned long flags; 568 unsigned int *depth; 569 570 if (opcode == H_CEDE) 571 return; 572 573 local_irq_save(flags); 574 575 depth = &__get_cpu_var(hcall_trace_depth); 576 577 if (*depth) 578 goto out; 579 580 (*depth)++; 581 trace_hcall_exit(opcode, retval, retbuf); 582 preempt_enable(); 583 (*depth)--; 584 585 out: 586 local_irq_restore(flags); 587 } 588 #endif 589 590 /** 591 * h_get_mpp 592 * H_GET_MPP hcall returns info in 7 parms 593 */ 594 int h_get_mpp(struct hvcall_mpp_data *mpp_data) 595 { 596 int rc; 597 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; 598 599 rc = plpar_hcall9(H_GET_MPP, retbuf); 600 601 mpp_data->entitled_mem = retbuf[0]; 602 mpp_data->mapped_mem = retbuf[1]; 603 604 mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; 605 mpp_data->pool_num = retbuf[2] & 0xffff; 606 607 mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; 608 mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; 609 mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; 610 611 mpp_data->pool_size = retbuf[4]; 612 mpp_data->loan_request = retbuf[5]; 613 mpp_data->backing_mem = retbuf[6]; 614 615 return rc; 616 } 617 EXPORT_SYMBOL(h_get_mpp); 618 619 int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data) 620 { 621 int rc; 622 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 }; 623 624 rc = plpar_hcall9(H_GET_MPP_X, retbuf); 625 626 mpp_x_data->coalesced_bytes = retbuf[0]; 627 mpp_x_data->pool_coalesced_bytes = retbuf[1]; 628 mpp_x_data->pool_purr_cycles = retbuf[2]; 629 mpp_x_data->pool_spurr_cycles = retbuf[3]; 630 631 return rc; 632 } 633