1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * PowerNV cpuidle code 4 * 5 * Copyright 2015 IBM Corp. 6 */ 7 8 #include <linux/types.h> 9 #include <linux/mm.h> 10 #include <linux/slab.h> 11 #include <linux/of.h> 12 #include <linux/device.h> 13 #include <linux/cpu.h> 14 15 #include <asm/asm-prototypes.h> 16 #include <asm/firmware.h> 17 #include <asm/interrupt.h> 18 #include <asm/machdep.h> 19 #include <asm/opal.h> 20 #include <asm/cputhreads.h> 21 #include <asm/cpuidle.h> 22 #include <asm/code-patching.h> 23 #include <asm/smp.h> 24 #include <asm/runlatch.h> 25 #include <asm/dbell.h> 26 27 #include "powernv.h" 28 #include "subcore.h" 29 30 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */ 31 #define MAX_STOP_STATE 0xF 32 33 #define P9_STOP_SPR_MSR 2000 34 #define P9_STOP_SPR_PSSCR 855 35 36 static u32 supported_cpuidle_states; 37 struct pnv_idle_states_t *pnv_idle_states; 38 int nr_pnv_idle_states; 39 40 /* 41 * The default stop state that will be used by ppc_md.power_save 42 * function on platforms that support stop instruction. 43 */ 44 static u64 pnv_default_stop_val; 45 static u64 pnv_default_stop_mask; 46 static bool default_stop_found; 47 48 /* 49 * First stop state levels when SPR and TB loss can occur. 50 */ 51 static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 52 static u64 deep_spr_loss_state = MAX_STOP_STATE + 1; 53 54 /* 55 * psscr value and mask of the deepest stop idle state. 56 * Used when a cpu is offlined. 57 */ 58 static u64 pnv_deepest_stop_psscr_val; 59 static u64 pnv_deepest_stop_psscr_mask; 60 static u64 pnv_deepest_stop_flag; 61 static bool deepest_stop_found; 62 63 static unsigned long power7_offline_type; 64 65 static int pnv_save_sprs_for_deep_states(void) 66 { 67 int cpu; 68 int rc; 69 70 /* 71 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across 72 * all cpus at boot. Get these reg values of current cpu and use the 73 * same across all cpus. 74 */ 75 uint64_t lpcr_val = mfspr(SPRN_LPCR); 76 uint64_t hid0_val = mfspr(SPRN_HID0); 77 uint64_t hmeer_val = mfspr(SPRN_HMEER); 78 uint64_t msr_val = MSR_IDLE; 79 uint64_t psscr_val = pnv_deepest_stop_psscr_val; 80 81 for_each_present_cpu(cpu) { 82 uint64_t pir = get_hard_smp_processor_id(cpu); 83 uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; 84 85 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 86 if (rc != 0) 87 return rc; 88 89 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 90 if (rc != 0) 91 return rc; 92 93 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 94 rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val); 95 if (rc) 96 return rc; 97 98 rc = opal_slw_set_reg(pir, 99 P9_STOP_SPR_PSSCR, psscr_val); 100 101 if (rc) 102 return rc; 103 } 104 105 /* HIDs are per core registers */ 106 if (cpu_thread_in_core(cpu) == 0) { 107 108 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); 109 if (rc != 0) 110 return rc; 111 112 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); 113 if (rc != 0) 114 return rc; 115 116 /* Only p8 needs to set extra HID regiters */ 117 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 118 uint64_t hid1_val = mfspr(SPRN_HID1); 119 uint64_t hid4_val = mfspr(SPRN_HID4); 120 uint64_t hid5_val = mfspr(SPRN_HID5); 121 122 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 123 if (rc != 0) 124 return rc; 125 126 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 127 if (rc != 0) 128 return rc; 129 130 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 131 if (rc != 0) 132 return rc; 133 } 134 } 135 } 136 137 return 0; 138 } 139 140 u32 pnv_get_supported_cpuidle_states(void) 141 { 142 return supported_cpuidle_states; 143 } 144 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); 145 146 static void pnv_fastsleep_workaround_apply(void *info) 147 148 { 149 int rc; 150 int *err = info; 151 152 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 153 OPAL_CONFIG_IDLE_APPLY); 154 if (rc) 155 *err = 1; 156 } 157 158 static bool power7_fastsleep_workaround_entry = true; 159 static bool power7_fastsleep_workaround_exit = true; 160 161 /* 162 * Used to store fastsleep workaround state 163 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) 164 * 1 - Workaround applied once, never undone. 165 */ 166 static u8 fastsleep_workaround_applyonce; 167 168 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, 169 struct device_attribute *attr, char *buf) 170 { 171 return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); 172 } 173 174 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, 175 struct device_attribute *attr, const char *buf, 176 size_t count) 177 { 178 cpumask_t primary_thread_mask; 179 int err; 180 u8 val; 181 182 if (kstrtou8(buf, 0, &val) || val != 1) 183 return -EINVAL; 184 185 if (fastsleep_workaround_applyonce == 1) 186 return count; 187 188 /* 189 * fastsleep_workaround_applyonce = 1 implies 190 * fastsleep workaround needs to be left in 'applied' state on all 191 * the cores. Do this by- 192 * 1. Disable the 'undo' workaround in fastsleep exit path 193 * 2. Sendi IPIs to all the cores which have at least one online thread 194 * 3. Disable the 'apply' workaround in fastsleep entry path 195 * 196 * There is no need to send ipi to cores which have all threads 197 * offlined, as last thread of the core entering fastsleep or deeper 198 * state would have applied workaround. 199 */ 200 power7_fastsleep_workaround_exit = false; 201 202 get_online_cpus(); 203 primary_thread_mask = cpu_online_cores_map(); 204 on_each_cpu_mask(&primary_thread_mask, 205 pnv_fastsleep_workaround_apply, 206 &err, 1); 207 put_online_cpus(); 208 if (err) { 209 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); 210 goto fail; 211 } 212 213 power7_fastsleep_workaround_entry = false; 214 215 fastsleep_workaround_applyonce = 1; 216 217 return count; 218 fail: 219 return -EIO; 220 } 221 222 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, 223 show_fastsleep_workaround_applyonce, 224 store_fastsleep_workaround_applyonce); 225 226 static inline void atomic_start_thread_idle(void) 227 { 228 int cpu = raw_smp_processor_id(); 229 int first = cpu_first_thread_sibling(cpu); 230 int thread_nr = cpu_thread_in_core(cpu); 231 unsigned long *state = &paca_ptrs[first]->idle_state; 232 233 clear_bit(thread_nr, state); 234 } 235 236 static inline void atomic_stop_thread_idle(void) 237 { 238 int cpu = raw_smp_processor_id(); 239 int first = cpu_first_thread_sibling(cpu); 240 int thread_nr = cpu_thread_in_core(cpu); 241 unsigned long *state = &paca_ptrs[first]->idle_state; 242 243 set_bit(thread_nr, state); 244 } 245 246 static inline void atomic_lock_thread_idle(void) 247 { 248 int cpu = raw_smp_processor_id(); 249 int first = cpu_first_thread_sibling(cpu); 250 unsigned long *state = &paca_ptrs[first]->idle_state; 251 252 while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state))) 253 barrier(); 254 } 255 256 static inline void atomic_unlock_and_stop_thread_idle(void) 257 { 258 int cpu = raw_smp_processor_id(); 259 int first = cpu_first_thread_sibling(cpu); 260 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 261 unsigned long *state = &paca_ptrs[first]->idle_state; 262 u64 s = READ_ONCE(*state); 263 u64 new, tmp; 264 265 BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT)); 266 BUG_ON(s & thread); 267 268 again: 269 new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT; 270 tmp = cmpxchg(state, s, new); 271 if (unlikely(tmp != s)) { 272 s = tmp; 273 goto again; 274 } 275 } 276 277 static inline void atomic_unlock_thread_idle(void) 278 { 279 int cpu = raw_smp_processor_id(); 280 int first = cpu_first_thread_sibling(cpu); 281 unsigned long *state = &paca_ptrs[first]->idle_state; 282 283 BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state)); 284 clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state); 285 } 286 287 /* P7 and P8 */ 288 struct p7_sprs { 289 /* per core */ 290 u64 tscr; 291 u64 worc; 292 293 /* per subcore */ 294 u64 sdr1; 295 u64 rpr; 296 297 /* per thread */ 298 u64 lpcr; 299 u64 hfscr; 300 u64 fscr; 301 u64 purr; 302 u64 spurr; 303 u64 dscr; 304 u64 wort; 305 306 /* per thread SPRs that get lost in shallow states */ 307 u64 amr; 308 u64 iamr; 309 u64 amor; 310 u64 uamor; 311 }; 312 313 static unsigned long power7_idle_insn(unsigned long type) 314 { 315 int cpu = raw_smp_processor_id(); 316 int first = cpu_first_thread_sibling(cpu); 317 unsigned long *state = &paca_ptrs[first]->idle_state; 318 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 319 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 320 unsigned long srr1; 321 bool full_winkle; 322 struct p7_sprs sprs = {}; /* avoid false use-uninitialised */ 323 bool sprs_saved = false; 324 int rc; 325 326 if (unlikely(type != PNV_THREAD_NAP)) { 327 atomic_lock_thread_idle(); 328 329 BUG_ON(!(*state & thread)); 330 *state &= ~thread; 331 332 if (power7_fastsleep_workaround_entry) { 333 if ((*state & core_thread_mask) == 0) { 334 rc = opal_config_cpu_idle_state( 335 OPAL_CONFIG_IDLE_FASTSLEEP, 336 OPAL_CONFIG_IDLE_APPLY); 337 BUG_ON(rc); 338 } 339 } 340 341 if (type == PNV_THREAD_WINKLE) { 342 sprs.tscr = mfspr(SPRN_TSCR); 343 sprs.worc = mfspr(SPRN_WORC); 344 345 sprs.sdr1 = mfspr(SPRN_SDR1); 346 sprs.rpr = mfspr(SPRN_RPR); 347 348 sprs.lpcr = mfspr(SPRN_LPCR); 349 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 350 sprs.hfscr = mfspr(SPRN_HFSCR); 351 sprs.fscr = mfspr(SPRN_FSCR); 352 } 353 sprs.purr = mfspr(SPRN_PURR); 354 sprs.spurr = mfspr(SPRN_SPURR); 355 sprs.dscr = mfspr(SPRN_DSCR); 356 sprs.wort = mfspr(SPRN_WORT); 357 358 sprs_saved = true; 359 360 /* 361 * Increment winkle counter and set all winkle bits if 362 * all threads are winkling. This allows wakeup side to 363 * distinguish between fast sleep and winkle state 364 * loss. Fast sleep still has to resync the timebase so 365 * this may not be a really big win. 366 */ 367 *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 368 if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) 369 >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 370 == threads_per_core) 371 *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS; 372 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 373 } 374 375 atomic_unlock_thread_idle(); 376 } 377 378 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 379 sprs.amr = mfspr(SPRN_AMR); 380 sprs.iamr = mfspr(SPRN_IAMR); 381 sprs.amor = mfspr(SPRN_AMOR); 382 sprs.uamor = mfspr(SPRN_UAMOR); 383 } 384 385 local_paca->thread_idle_state = type; 386 srr1 = isa206_idle_insn_mayloss(type); /* go idle */ 387 local_paca->thread_idle_state = PNV_THREAD_RUNNING; 388 389 WARN_ON_ONCE(!srr1); 390 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 391 392 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 393 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 394 /* 395 * We don't need an isync after the mtsprs here because 396 * the upcoming mtmsrd is execution synchronizing. 397 */ 398 mtspr(SPRN_AMR, sprs.amr); 399 mtspr(SPRN_IAMR, sprs.iamr); 400 mtspr(SPRN_AMOR, sprs.amor); 401 mtspr(SPRN_UAMOR, sprs.uamor); 402 } 403 } 404 405 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 406 hmi_exception_realmode(NULL); 407 408 if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) { 409 if (unlikely(type != PNV_THREAD_NAP)) { 410 atomic_lock_thread_idle(); 411 if (type == PNV_THREAD_WINKLE) { 412 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 413 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 414 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 415 } 416 atomic_unlock_and_stop_thread_idle(); 417 } 418 return srr1; 419 } 420 421 /* HV state loss */ 422 BUG_ON(type == PNV_THREAD_NAP); 423 424 atomic_lock_thread_idle(); 425 426 full_winkle = false; 427 if (type == PNV_THREAD_WINKLE) { 428 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 429 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 430 if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) { 431 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 432 full_winkle = true; 433 BUG_ON(!sprs_saved); 434 } 435 } 436 437 WARN_ON(*state & thread); 438 439 if ((*state & core_thread_mask) != 0) 440 goto core_woken; 441 442 /* Per-core SPRs */ 443 if (full_winkle) { 444 mtspr(SPRN_TSCR, sprs.tscr); 445 mtspr(SPRN_WORC, sprs.worc); 446 } 447 448 if (power7_fastsleep_workaround_exit) { 449 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 450 OPAL_CONFIG_IDLE_UNDO); 451 BUG_ON(rc); 452 } 453 454 /* TB */ 455 if (opal_resync_timebase() != OPAL_SUCCESS) 456 BUG(); 457 458 core_woken: 459 if (!full_winkle) 460 goto subcore_woken; 461 462 if ((*state & local_paca->subcore_sibling_mask) != 0) 463 goto subcore_woken; 464 465 /* Per-subcore SPRs */ 466 mtspr(SPRN_SDR1, sprs.sdr1); 467 mtspr(SPRN_RPR, sprs.rpr); 468 469 subcore_woken: 470 /* 471 * isync after restoring shared SPRs and before unlocking. Unlock 472 * only contains hwsync which does not necessarily do the right 473 * thing for SPRs. 474 */ 475 isync(); 476 atomic_unlock_and_stop_thread_idle(); 477 478 /* Fast sleep does not lose SPRs */ 479 if (!full_winkle) 480 return srr1; 481 482 /* Per-thread SPRs */ 483 mtspr(SPRN_LPCR, sprs.lpcr); 484 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 485 mtspr(SPRN_HFSCR, sprs.hfscr); 486 mtspr(SPRN_FSCR, sprs.fscr); 487 } 488 mtspr(SPRN_PURR, sprs.purr); 489 mtspr(SPRN_SPURR, sprs.spurr); 490 mtspr(SPRN_DSCR, sprs.dscr); 491 mtspr(SPRN_WORT, sprs.wort); 492 493 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 494 495 /* 496 * The SLB has to be restored here, but it sometimes still 497 * contains entries, so the __ variant must be used to prevent 498 * multi hits. 499 */ 500 __slb_restore_bolted_realmode(); 501 502 return srr1; 503 } 504 505 extern unsigned long idle_kvm_start_guest(unsigned long srr1); 506 507 #ifdef CONFIG_HOTPLUG_CPU 508 static unsigned long power7_offline(void) 509 { 510 unsigned long srr1; 511 512 mtmsr(MSR_IDLE); 513 514 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 515 /* Tell KVM we're entering idle. */ 516 /******************************************************/ 517 /* N O T E W E L L ! ! ! N O T E W E L L */ 518 /* The following store to HSTATE_HWTHREAD_STATE(r13) */ 519 /* MUST occur in real mode, i.e. with the MMU off, */ 520 /* and the MMU must stay off until we clear this flag */ 521 /* and test HSTATE_HWTHREAD_REQ(r13) in */ 522 /* pnv_powersave_wakeup in this file. */ 523 /* The reason is that another thread can switch the */ 524 /* MMU to a guest context whenever this flag is set */ 525 /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ 526 /* that would potentially cause this thread to start */ 527 /* executing instructions from guest memory in */ 528 /* hypervisor mode, leading to a host crash or data */ 529 /* corruption, or worse. */ 530 /******************************************************/ 531 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; 532 #endif 533 534 __ppc64_runlatch_off(); 535 srr1 = power7_idle_insn(power7_offline_type); 536 __ppc64_runlatch_on(); 537 538 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 539 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; 540 /* Order setting hwthread_state vs. testing hwthread_req */ 541 smp_mb(); 542 if (local_paca->kvm_hstate.hwthread_req) 543 srr1 = idle_kvm_start_guest(srr1); 544 #endif 545 546 mtmsr(MSR_KERNEL); 547 548 return srr1; 549 } 550 #endif 551 552 void power7_idle_type(unsigned long type) 553 { 554 unsigned long srr1; 555 556 if (!prep_irq_for_idle_irqsoff()) 557 return; 558 559 mtmsr(MSR_IDLE); 560 __ppc64_runlatch_off(); 561 srr1 = power7_idle_insn(type); 562 __ppc64_runlatch_on(); 563 mtmsr(MSR_KERNEL); 564 565 fini_irq_for_idle_irqsoff(); 566 irq_set_pending_from_srr1(srr1); 567 } 568 569 static void power7_idle(void) 570 { 571 if (!powersave_nap) 572 return; 573 574 power7_idle_type(PNV_THREAD_NAP); 575 } 576 577 struct p9_sprs { 578 /* per core */ 579 u64 ptcr; 580 u64 rpr; 581 u64 tscr; 582 u64 ldbar; 583 584 /* per thread */ 585 u64 lpcr; 586 u64 hfscr; 587 u64 fscr; 588 u64 pid; 589 u64 purr; 590 u64 spurr; 591 u64 dscr; 592 u64 wort; 593 u64 ciabr; 594 595 u64 mmcra; 596 u32 mmcr0; 597 u32 mmcr1; 598 u64 mmcr2; 599 600 /* per thread SPRs that get lost in shallow states */ 601 u64 amr; 602 u64 iamr; 603 u64 amor; 604 u64 uamor; 605 }; 606 607 static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) 608 { 609 int cpu = raw_smp_processor_id(); 610 int first = cpu_first_thread_sibling(cpu); 611 unsigned long *state = &paca_ptrs[first]->idle_state; 612 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 613 unsigned long srr1; 614 unsigned long pls; 615 unsigned long mmcr0 = 0; 616 unsigned long mmcra = 0; 617 struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ 618 bool sprs_saved = false; 619 620 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 621 /* EC=ESL=0 case */ 622 623 BUG_ON(!mmu_on); 624 625 /* 626 * Wake synchronously. SRESET via xscom may still cause 627 * a 0x100 powersave wakeup with SRR1 reason! 628 */ 629 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 630 if (likely(!srr1)) 631 return 0; 632 633 /* 634 * Registers not saved, can't recover! 635 * This would be a hardware bug 636 */ 637 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 638 639 goto out; 640 } 641 642 /* EC=ESL=1 case */ 643 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 644 if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) { 645 local_paca->requested_psscr = psscr; 646 /* order setting requested_psscr vs testing dont_stop */ 647 smp_mb(); 648 if (atomic_read(&local_paca->dont_stop)) { 649 local_paca->requested_psscr = 0; 650 return 0; 651 } 652 } 653 #endif 654 655 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 656 /* 657 * POWER9 DD2 can incorrectly set PMAO when waking up 658 * after a state-loss idle. Saving and restoring MMCR0 659 * over idle is a workaround. 660 */ 661 mmcr0 = mfspr(SPRN_MMCR0); 662 } 663 664 if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 665 sprs.lpcr = mfspr(SPRN_LPCR); 666 sprs.hfscr = mfspr(SPRN_HFSCR); 667 sprs.fscr = mfspr(SPRN_FSCR); 668 sprs.pid = mfspr(SPRN_PID); 669 sprs.purr = mfspr(SPRN_PURR); 670 sprs.spurr = mfspr(SPRN_SPURR); 671 sprs.dscr = mfspr(SPRN_DSCR); 672 sprs.wort = mfspr(SPRN_WORT); 673 sprs.ciabr = mfspr(SPRN_CIABR); 674 675 sprs.mmcra = mfspr(SPRN_MMCRA); 676 sprs.mmcr0 = mfspr(SPRN_MMCR0); 677 sprs.mmcr1 = mfspr(SPRN_MMCR1); 678 sprs.mmcr2 = mfspr(SPRN_MMCR2); 679 680 sprs.ptcr = mfspr(SPRN_PTCR); 681 sprs.rpr = mfspr(SPRN_RPR); 682 sprs.tscr = mfspr(SPRN_TSCR); 683 if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) 684 sprs.ldbar = mfspr(SPRN_LDBAR); 685 686 sprs_saved = true; 687 688 atomic_start_thread_idle(); 689 } 690 691 sprs.amr = mfspr(SPRN_AMR); 692 sprs.iamr = mfspr(SPRN_IAMR); 693 sprs.amor = mfspr(SPRN_AMOR); 694 sprs.uamor = mfspr(SPRN_UAMOR); 695 696 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 697 698 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 699 local_paca->requested_psscr = 0; 700 #endif 701 702 psscr = mfspr(SPRN_PSSCR); 703 704 WARN_ON_ONCE(!srr1); 705 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 706 707 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 708 /* 709 * We don't need an isync after the mtsprs here because the 710 * upcoming mtmsrd is execution synchronizing. 711 */ 712 mtspr(SPRN_AMR, sprs.amr); 713 mtspr(SPRN_IAMR, sprs.iamr); 714 mtspr(SPRN_AMOR, sprs.amor); 715 mtspr(SPRN_UAMOR, sprs.uamor); 716 717 /* 718 * Workaround for POWER9 DD2.0, if we lost resources, the ERAT 719 * might have been corrupted and needs flushing. We also need 720 * to reload MMCR0 (see mmcr0 comment above). 721 */ 722 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 723 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT); 724 mtspr(SPRN_MMCR0, mmcr0); 725 } 726 727 /* 728 * DD2.2 and earlier need to set then clear bit 60 in MMCRA 729 * to ensure the PMU starts running. 730 */ 731 mmcra = mfspr(SPRN_MMCRA); 732 mmcra |= PPC_BIT(60); 733 mtspr(SPRN_MMCRA, mmcra); 734 mmcra &= ~PPC_BIT(60); 735 mtspr(SPRN_MMCRA, mmcra); 736 } 737 738 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 739 hmi_exception_realmode(NULL); 740 741 /* 742 * On POWER9, SRR1 bits do not match exactly as expected. 743 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 744 * just always test PSSCR for SPR/TB state loss. 745 */ 746 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 747 if (likely(pls < deep_spr_loss_state)) { 748 if (sprs_saved) 749 atomic_stop_thread_idle(); 750 goto out; 751 } 752 753 /* HV state loss */ 754 BUG_ON(!sprs_saved); 755 756 atomic_lock_thread_idle(); 757 758 if ((*state & core_thread_mask) != 0) 759 goto core_woken; 760 761 /* Per-core SPRs */ 762 mtspr(SPRN_PTCR, sprs.ptcr); 763 mtspr(SPRN_RPR, sprs.rpr); 764 mtspr(SPRN_TSCR, sprs.tscr); 765 766 if (pls >= pnv_first_tb_loss_level) { 767 /* TB loss */ 768 if (opal_resync_timebase() != OPAL_SUCCESS) 769 BUG(); 770 } 771 772 /* 773 * isync after restoring shared SPRs and before unlocking. Unlock 774 * only contains hwsync which does not necessarily do the right 775 * thing for SPRs. 776 */ 777 isync(); 778 779 core_woken: 780 atomic_unlock_and_stop_thread_idle(); 781 782 /* Per-thread SPRs */ 783 mtspr(SPRN_LPCR, sprs.lpcr); 784 mtspr(SPRN_HFSCR, sprs.hfscr); 785 mtspr(SPRN_FSCR, sprs.fscr); 786 mtspr(SPRN_PID, sprs.pid); 787 mtspr(SPRN_PURR, sprs.purr); 788 mtspr(SPRN_SPURR, sprs.spurr); 789 mtspr(SPRN_DSCR, sprs.dscr); 790 mtspr(SPRN_WORT, sprs.wort); 791 mtspr(SPRN_CIABR, sprs.ciabr); 792 793 mtspr(SPRN_MMCRA, sprs.mmcra); 794 mtspr(SPRN_MMCR0, sprs.mmcr0); 795 mtspr(SPRN_MMCR1, sprs.mmcr1); 796 mtspr(SPRN_MMCR2, sprs.mmcr2); 797 if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) 798 mtspr(SPRN_LDBAR, sprs.ldbar); 799 800 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 801 802 if (!radix_enabled()) 803 __slb_restore_bolted_realmode(); 804 805 out: 806 if (mmu_on) 807 mtmsr(MSR_KERNEL); 808 809 return srr1; 810 } 811 812 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 813 /* 814 * This is used in working around bugs in thread reconfiguration 815 * on POWER9 (at least up to Nimbus DD2.2) relating to transactional 816 * memory and the way that XER[SO] is checkpointed. 817 * This function forces the core into SMT4 in order by asking 818 * all other threads not to stop, and sending a message to any 819 * that are in a stop state. 820 * Must be called with preemption disabled. 821 */ 822 void pnv_power9_force_smt4_catch(void) 823 { 824 int cpu, cpu0, thr; 825 int awake_threads = 1; /* this thread is awake */ 826 int poke_threads = 0; 827 int need_awake = threads_per_core; 828 829 cpu = smp_processor_id(); 830 cpu0 = cpu & ~(threads_per_core - 1); 831 for (thr = 0; thr < threads_per_core; ++thr) { 832 if (cpu != cpu0 + thr) 833 atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); 834 } 835 /* order setting dont_stop vs testing requested_psscr */ 836 smp_mb(); 837 for (thr = 0; thr < threads_per_core; ++thr) { 838 if (!paca_ptrs[cpu0+thr]->requested_psscr) 839 ++awake_threads; 840 else 841 poke_threads |= (1 << thr); 842 } 843 844 /* If at least 3 threads are awake, the core is in SMT4 already */ 845 if (awake_threads < need_awake) { 846 /* We have to wake some threads; we'll use msgsnd */ 847 for (thr = 0; thr < threads_per_core; ++thr) { 848 if (poke_threads & (1 << thr)) { 849 ppc_msgsnd_sync(); 850 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, 851 paca_ptrs[cpu0+thr]->hw_cpu_id); 852 } 853 } 854 /* now spin until at least 3 threads are awake */ 855 do { 856 for (thr = 0; thr < threads_per_core; ++thr) { 857 if ((poke_threads & (1 << thr)) && 858 !paca_ptrs[cpu0+thr]->requested_psscr) { 859 ++awake_threads; 860 poke_threads &= ~(1 << thr); 861 } 862 } 863 } while (awake_threads < need_awake); 864 } 865 } 866 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); 867 868 void pnv_power9_force_smt4_release(void) 869 { 870 int cpu, cpu0, thr; 871 872 cpu = smp_processor_id(); 873 cpu0 = cpu & ~(threads_per_core - 1); 874 875 /* clear all the dont_stop flags */ 876 for (thr = 0; thr < threads_per_core; ++thr) { 877 if (cpu != cpu0 + thr) 878 atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); 879 } 880 } 881 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); 882 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 883 884 struct p10_sprs { 885 /* 886 * SPRs that get lost in shallow states: 887 * 888 * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1 889 * isa300 idle routines restore CR, LR. 890 * CTR is volatile 891 * idle thread doesn't use FP or VEC 892 * kernel doesn't use TAR 893 * HSPRG1 is only live in HV interrupt entry 894 * SPRG2 is only live in KVM guests, KVM handles it. 895 */ 896 }; 897 898 static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on) 899 { 900 int cpu = raw_smp_processor_id(); 901 int first = cpu_first_thread_sibling(cpu); 902 unsigned long *state = &paca_ptrs[first]->idle_state; 903 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 904 unsigned long srr1; 905 unsigned long pls; 906 // struct p10_sprs sprs = {}; /* avoid false used-uninitialised */ 907 bool sprs_saved = false; 908 909 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 910 /* EC=ESL=0 case */ 911 912 BUG_ON(!mmu_on); 913 914 /* 915 * Wake synchronously. SRESET via xscom may still cause 916 * a 0x100 powersave wakeup with SRR1 reason! 917 */ 918 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 919 if (likely(!srr1)) 920 return 0; 921 922 /* 923 * Registers not saved, can't recover! 924 * This would be a hardware bug 925 */ 926 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 927 928 goto out; 929 } 930 931 /* EC=ESL=1 case */ 932 if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 933 /* XXX: save SPRs for deep state loss here. */ 934 935 sprs_saved = true; 936 937 atomic_start_thread_idle(); 938 } 939 940 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 941 942 psscr = mfspr(SPRN_PSSCR); 943 944 WARN_ON_ONCE(!srr1); 945 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 946 947 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 948 hmi_exception_realmode(NULL); 949 950 /* 951 * On POWER10, SRR1 bits do not match exactly as expected. 952 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 953 * just always test PSSCR for SPR/TB state loss. 954 */ 955 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 956 if (likely(pls < deep_spr_loss_state)) { 957 if (sprs_saved) 958 atomic_stop_thread_idle(); 959 goto out; 960 } 961 962 /* HV state loss */ 963 BUG_ON(!sprs_saved); 964 965 atomic_lock_thread_idle(); 966 967 if ((*state & core_thread_mask) != 0) 968 goto core_woken; 969 970 /* XXX: restore per-core SPRs here */ 971 972 if (pls >= pnv_first_tb_loss_level) { 973 /* TB loss */ 974 if (opal_resync_timebase() != OPAL_SUCCESS) 975 BUG(); 976 } 977 978 /* 979 * isync after restoring shared SPRs and before unlocking. Unlock 980 * only contains hwsync which does not necessarily do the right 981 * thing for SPRs. 982 */ 983 isync(); 984 985 core_woken: 986 atomic_unlock_and_stop_thread_idle(); 987 988 /* XXX: restore per-thread SPRs here */ 989 990 if (!radix_enabled()) 991 __slb_restore_bolted_realmode(); 992 993 out: 994 if (mmu_on) 995 mtmsr(MSR_KERNEL); 996 997 return srr1; 998 } 999 1000 #ifdef CONFIG_HOTPLUG_CPU 1001 static unsigned long arch300_offline_stop(unsigned long psscr) 1002 { 1003 unsigned long srr1; 1004 1005 #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1006 __ppc64_runlatch_off(); 1007 if (cpu_has_feature(CPU_FTR_ARCH_31)) 1008 srr1 = power10_idle_stop(psscr, true); 1009 else 1010 srr1 = power9_idle_stop(psscr, true); 1011 __ppc64_runlatch_on(); 1012 #else 1013 /* 1014 * Tell KVM we're entering idle. 1015 * This does not have to be done in real mode because the P9 MMU 1016 * is independent per-thread. Some steppings share radix/hash mode 1017 * between threads, but in that case KVM has a barrier sync in real 1018 * mode before and after switching between radix and hash. 1019 * 1020 * kvm_start_guest must still be called in real mode though, hence 1021 * the false argument. 1022 */ 1023 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; 1024 1025 __ppc64_runlatch_off(); 1026 if (cpu_has_feature(CPU_FTR_ARCH_31)) 1027 srr1 = power10_idle_stop(psscr, false); 1028 else 1029 srr1 = power9_idle_stop(psscr, false); 1030 __ppc64_runlatch_on(); 1031 1032 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; 1033 /* Order setting hwthread_state vs. testing hwthread_req */ 1034 smp_mb(); 1035 if (local_paca->kvm_hstate.hwthread_req) 1036 srr1 = idle_kvm_start_guest(srr1); 1037 mtmsr(MSR_KERNEL); 1038 #endif 1039 1040 return srr1; 1041 } 1042 #endif 1043 1044 void arch300_idle_type(unsigned long stop_psscr_val, 1045 unsigned long stop_psscr_mask) 1046 { 1047 unsigned long psscr; 1048 unsigned long srr1; 1049 1050 if (!prep_irq_for_idle_irqsoff()) 1051 return; 1052 1053 psscr = mfspr(SPRN_PSSCR); 1054 psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; 1055 1056 __ppc64_runlatch_off(); 1057 if (cpu_has_feature(CPU_FTR_ARCH_31)) 1058 srr1 = power10_idle_stop(psscr, true); 1059 else 1060 srr1 = power9_idle_stop(psscr, true); 1061 __ppc64_runlatch_on(); 1062 1063 fini_irq_for_idle_irqsoff(); 1064 1065 irq_set_pending_from_srr1(srr1); 1066 } 1067 1068 /* 1069 * Used for ppc_md.power_save which needs a function with no parameters 1070 */ 1071 static void arch300_idle(void) 1072 { 1073 arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 1074 } 1075 1076 #ifdef CONFIG_HOTPLUG_CPU 1077 1078 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) 1079 { 1080 u64 pir = get_hard_smp_processor_id(cpu); 1081 1082 mtspr(SPRN_LPCR, lpcr_val); 1083 1084 /* 1085 * Program the LPCR via stop-api only if the deepest stop state 1086 * can lose hypervisor context. 1087 */ 1088 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) 1089 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 1090 } 1091 1092 /* 1093 * pnv_cpu_offline: A function that puts the CPU into the deepest 1094 * available platform idle state on a CPU-Offline. 1095 * interrupts hard disabled and no lazy irq pending. 1096 */ 1097 unsigned long pnv_cpu_offline(unsigned int cpu) 1098 { 1099 unsigned long srr1; 1100 1101 __ppc64_runlatch_off(); 1102 1103 if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { 1104 unsigned long psscr; 1105 1106 psscr = mfspr(SPRN_PSSCR); 1107 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 1108 pnv_deepest_stop_psscr_val; 1109 srr1 = arch300_offline_stop(psscr); 1110 } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { 1111 srr1 = power7_offline(); 1112 } else { 1113 /* This is the fallback method. We emulate snooze */ 1114 while (!generic_check_cpu_restart(cpu)) { 1115 HMT_low(); 1116 HMT_very_low(); 1117 } 1118 srr1 = 0; 1119 HMT_medium(); 1120 } 1121 1122 __ppc64_runlatch_on(); 1123 1124 return srr1; 1125 } 1126 #endif 1127 1128 /* 1129 * Power ISA 3.0 idle initialization. 1130 * 1131 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control 1132 * Register (PSSCR) to control idle behavior. 1133 * 1134 * PSSCR layout: 1135 * ---------------------------------------------------------- 1136 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | 1137 * ---------------------------------------------------------- 1138 * 0 4 41 42 43 44 48 54 56 60 1139 * 1140 * PSSCR key fields: 1141 * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the 1142 * lowest power-saving state the thread entered since stop instruction was 1143 * last executed. 1144 * 1145 * Bit 41 - Status Disable(SD) 1146 * 0 - Shows PLS entries 1147 * 1 - PLS entries are all 0 1148 * 1149 * Bit 42 - Enable State Loss 1150 * 0 - No state is lost irrespective of other fields 1151 * 1 - Allows state loss 1152 * 1153 * Bit 43 - Exit Criterion 1154 * 0 - Exit from power-save mode on any interrupt 1155 * 1 - Exit from power-save mode controlled by LPCR's PECE bits 1156 * 1157 * Bits 44:47 - Power-Saving Level Limit 1158 * This limits the power-saving level that can be entered into. 1159 * 1160 * Bits 60:63 - Requested Level 1161 * Used to specify which power-saving level must be entered on executing 1162 * stop instruction 1163 */ 1164 1165 int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) 1166 { 1167 int err = 0; 1168 1169 /* 1170 * psscr_mask == 0xf indicates an older firmware. 1171 * Set remaining fields of psscr to the default values. 1172 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL 1173 */ 1174 if (*psscr_mask == 0xf) { 1175 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; 1176 *psscr_mask = PSSCR_HV_DEFAULT_MASK; 1177 return err; 1178 } 1179 1180 /* 1181 * New firmware is expected to set the psscr_val bits correctly. 1182 * Validate that the following invariants are correctly maintained by 1183 * the new firmware. 1184 * - ESL bit value matches the EC bit value. 1185 * - ESL bit is set for all the deep stop states. 1186 */ 1187 if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { 1188 err = ERR_EC_ESL_MISMATCH; 1189 } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1190 GET_PSSCR_ESL(*psscr_val) == 0) { 1191 err = ERR_DEEP_STATE_ESL_MISMATCH; 1192 } 1193 1194 return err; 1195 } 1196 1197 /* 1198 * pnv_arch300_idle_init: Initializes the default idle state, first 1199 * deep idle state and deepest idle state on 1200 * ISA 3.0 CPUs. 1201 * 1202 * @np: /ibm,opal/power-mgt device node 1203 * @flags: cpu-idle-state-flags array 1204 * @dt_idle_states: Number of idle state entries 1205 * Returns 0 on success 1206 */ 1207 static void __init pnv_arch300_idle_init(void) 1208 { 1209 u64 max_residency_ns = 0; 1210 int i; 1211 1212 /* stop is not really architected, we only have p9,p10 drivers */ 1213 if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9)) 1214 return; 1215 1216 /* 1217 * pnv_deepest_stop_{val,mask} should be set to values corresponding to 1218 * the deepest stop state. 1219 * 1220 * pnv_default_stop_{val,mask} should be set to values corresponding to 1221 * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. 1222 */ 1223 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 1224 deep_spr_loss_state = MAX_STOP_STATE + 1; 1225 for (i = 0; i < nr_pnv_idle_states; i++) { 1226 int err; 1227 struct pnv_idle_states_t *state = &pnv_idle_states[i]; 1228 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; 1229 1230 /* No deep loss driver implemented for POWER10 yet */ 1231 if (pvr_version_is(PVR_POWER10) && 1232 state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT)) 1233 continue; 1234 1235 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1236 (pnv_first_tb_loss_level > psscr_rl)) 1237 pnv_first_tb_loss_level = psscr_rl; 1238 1239 if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1240 (deep_spr_loss_state > psscr_rl)) 1241 deep_spr_loss_state = psscr_rl; 1242 1243 /* 1244 * The idle code does not deal with TB loss occurring 1245 * in a shallower state than SPR loss, so force it to 1246 * behave like SPRs are lost if TB is lost. POWER9 would 1247 * never encouter this, but a POWER8 core would if it 1248 * implemented the stop instruction. So this is for forward 1249 * compatibility. 1250 */ 1251 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1252 (deep_spr_loss_state > psscr_rl)) 1253 deep_spr_loss_state = psscr_rl; 1254 1255 err = validate_psscr_val_mask(&state->psscr_val, 1256 &state->psscr_mask, 1257 state->flags); 1258 if (err) { 1259 report_invalid_psscr_val(state->psscr_val, err); 1260 continue; 1261 } 1262 1263 state->valid = true; 1264 1265 if (max_residency_ns < state->residency_ns) { 1266 max_residency_ns = state->residency_ns; 1267 pnv_deepest_stop_psscr_val = state->psscr_val; 1268 pnv_deepest_stop_psscr_mask = state->psscr_mask; 1269 pnv_deepest_stop_flag = state->flags; 1270 deepest_stop_found = true; 1271 } 1272 1273 if (!default_stop_found && 1274 (state->flags & OPAL_PM_STOP_INST_FAST)) { 1275 pnv_default_stop_val = state->psscr_val; 1276 pnv_default_stop_mask = state->psscr_mask; 1277 default_stop_found = true; 1278 WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT); 1279 } 1280 } 1281 1282 if (unlikely(!default_stop_found)) { 1283 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); 1284 } else { 1285 ppc_md.power_save = arch300_idle; 1286 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", 1287 pnv_default_stop_val, pnv_default_stop_mask); 1288 } 1289 1290 if (unlikely(!deepest_stop_found)) { 1291 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait"); 1292 } else { 1293 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n", 1294 pnv_deepest_stop_psscr_val, 1295 pnv_deepest_stop_psscr_mask); 1296 } 1297 1298 pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", 1299 deep_spr_loss_state); 1300 1301 pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", 1302 pnv_first_tb_loss_level); 1303 } 1304 1305 static void __init pnv_disable_deep_states(void) 1306 { 1307 /* 1308 * The stop-api is unable to restore hypervisor 1309 * resources on wakeup from platform idle states which 1310 * lose full context. So disable such states. 1311 */ 1312 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; 1313 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); 1314 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); 1315 1316 if (cpu_has_feature(CPU_FTR_ARCH_300) && 1317 (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { 1318 /* 1319 * Use the default stop state for CPU-Hotplug 1320 * if available. 1321 */ 1322 if (default_stop_found) { 1323 pnv_deepest_stop_psscr_val = pnv_default_stop_val; 1324 pnv_deepest_stop_psscr_mask = pnv_default_stop_mask; 1325 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", 1326 pnv_deepest_stop_psscr_val); 1327 } else { /* Fallback to snooze loop for CPU-Hotplug */ 1328 deepest_stop_found = false; 1329 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); 1330 } 1331 } 1332 } 1333 1334 /* 1335 * Probe device tree for supported idle states 1336 */ 1337 static void __init pnv_probe_idle_states(void) 1338 { 1339 int i; 1340 1341 if (nr_pnv_idle_states < 0) { 1342 pr_warn("cpuidle-powernv: no idle states found in the DT\n"); 1343 return; 1344 } 1345 1346 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1347 pnv_arch300_idle_init(); 1348 1349 for (i = 0; i < nr_pnv_idle_states; i++) 1350 supported_cpuidle_states |= pnv_idle_states[i].flags; 1351 } 1352 1353 /* 1354 * This function parses device-tree and populates all the information 1355 * into pnv_idle_states structure. It also sets up nr_pnv_idle_states 1356 * which is the number of cpuidle states discovered through device-tree. 1357 */ 1358 1359 static int pnv_parse_cpuidle_dt(void) 1360 { 1361 struct device_node *np; 1362 int nr_idle_states, i; 1363 int rc = 0; 1364 u32 *temp_u32; 1365 u64 *temp_u64; 1366 const char **temp_string; 1367 1368 np = of_find_node_by_path("/ibm,opal/power-mgt"); 1369 if (!np) { 1370 pr_warn("opal: PowerMgmt Node not found\n"); 1371 return -ENODEV; 1372 } 1373 nr_idle_states = of_property_count_u32_elems(np, 1374 "ibm,cpu-idle-state-flags"); 1375 1376 pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), 1377 GFP_KERNEL); 1378 temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); 1379 temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); 1380 temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); 1381 1382 if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { 1383 pr_err("Could not allocate memory for dt parsing\n"); 1384 rc = -ENOMEM; 1385 goto out; 1386 } 1387 1388 /* Read flags */ 1389 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", 1390 temp_u32, nr_idle_states)) { 1391 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); 1392 rc = -EINVAL; 1393 goto out; 1394 } 1395 for (i = 0; i < nr_idle_states; i++) 1396 pnv_idle_states[i].flags = temp_u32[i]; 1397 1398 /* Read latencies */ 1399 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", 1400 temp_u32, nr_idle_states)) { 1401 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); 1402 rc = -EINVAL; 1403 goto out; 1404 } 1405 for (i = 0; i < nr_idle_states; i++) 1406 pnv_idle_states[i].latency_ns = temp_u32[i]; 1407 1408 /* Read residencies */ 1409 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", 1410 temp_u32, nr_idle_states)) { 1411 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); 1412 rc = -EINVAL; 1413 goto out; 1414 } 1415 for (i = 0; i < nr_idle_states; i++) 1416 pnv_idle_states[i].residency_ns = temp_u32[i]; 1417 1418 /* For power9 and later */ 1419 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1420 /* Read pm_crtl_val */ 1421 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", 1422 temp_u64, nr_idle_states)) { 1423 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); 1424 rc = -EINVAL; 1425 goto out; 1426 } 1427 for (i = 0; i < nr_idle_states; i++) 1428 pnv_idle_states[i].psscr_val = temp_u64[i]; 1429 1430 /* Read pm_crtl_mask */ 1431 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", 1432 temp_u64, nr_idle_states)) { 1433 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); 1434 rc = -EINVAL; 1435 goto out; 1436 } 1437 for (i = 0; i < nr_idle_states; i++) 1438 pnv_idle_states[i].psscr_mask = temp_u64[i]; 1439 } 1440 1441 /* 1442 * power8 specific properties ibm,cpu-idle-state-pmicr-mask and 1443 * ibm,cpu-idle-state-pmicr-val were never used and there is no 1444 * plan to use it in near future. Hence, not parsing these properties 1445 */ 1446 1447 if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", 1448 temp_string, nr_idle_states) < 0) { 1449 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); 1450 rc = -EINVAL; 1451 goto out; 1452 } 1453 for (i = 0; i < nr_idle_states; i++) 1454 strlcpy(pnv_idle_states[i].name, temp_string[i], 1455 PNV_IDLE_NAME_LEN); 1456 nr_pnv_idle_states = nr_idle_states; 1457 rc = 0; 1458 out: 1459 kfree(temp_u32); 1460 kfree(temp_u64); 1461 kfree(temp_string); 1462 return rc; 1463 } 1464 1465 static int __init pnv_init_idle_states(void) 1466 { 1467 int cpu; 1468 int rc = 0; 1469 1470 /* Set up PACA fields */ 1471 for_each_present_cpu(cpu) { 1472 struct paca_struct *p = paca_ptrs[cpu]; 1473 1474 p->idle_state = 0; 1475 if (cpu == cpu_first_thread_sibling(cpu)) 1476 p->idle_state = (1 << threads_per_core) - 1; 1477 1478 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1479 /* P7/P8 nap */ 1480 p->thread_idle_state = PNV_THREAD_RUNNING; 1481 } else if (pvr_version_is(PVR_POWER9)) { 1482 /* P9 stop workarounds */ 1483 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1484 p->requested_psscr = 0; 1485 atomic_set(&p->dont_stop, 0); 1486 #endif 1487 } 1488 } 1489 1490 /* In case we error out nr_pnv_idle_states will be zero */ 1491 nr_pnv_idle_states = 0; 1492 supported_cpuidle_states = 0; 1493 1494 if (cpuidle_disable != IDLE_NO_OVERRIDE) 1495 goto out; 1496 rc = pnv_parse_cpuidle_dt(); 1497 if (rc) 1498 return rc; 1499 pnv_probe_idle_states(); 1500 1501 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1502 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 1503 power7_fastsleep_workaround_entry = false; 1504 power7_fastsleep_workaround_exit = false; 1505 } else { 1506 /* 1507 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that 1508 * workaround is needed to use fastsleep. Provide sysfs 1509 * control to choose how this workaround has to be 1510 * applied. 1511 */ 1512 device_create_file(cpu_subsys.dev_root, 1513 &dev_attr_fastsleep_workaround_applyonce); 1514 } 1515 1516 update_subcore_sibling_mask(); 1517 1518 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) { 1519 ppc_md.power_save = power7_idle; 1520 power7_offline_type = PNV_THREAD_NAP; 1521 } 1522 1523 if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) && 1524 (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)) 1525 power7_offline_type = PNV_THREAD_WINKLE; 1526 else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) || 1527 (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) 1528 power7_offline_type = PNV_THREAD_SLEEP; 1529 } 1530 1531 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { 1532 if (pnv_save_sprs_for_deep_states()) 1533 pnv_disable_deep_states(); 1534 } 1535 1536 out: 1537 return 0; 1538 } 1539 machine_subsys_initcall(powernv, pnv_init_idle_states); 1540