1 /* 2 * PowerNV cpuidle code 3 * 4 * Copyright 2015 IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/types.h> 13 #include <linux/mm.h> 14 #include <linux/slab.h> 15 #include <linux/of.h> 16 #include <linux/device.h> 17 #include <linux/cpu.h> 18 19 #include <asm/asm-prototypes.h> 20 #include <asm/firmware.h> 21 #include <asm/machdep.h> 22 #include <asm/opal.h> 23 #include <asm/cputhreads.h> 24 #include <asm/cpuidle.h> 25 #include <asm/code-patching.h> 26 #include <asm/smp.h> 27 #include <asm/runlatch.h> 28 #include <asm/dbell.h> 29 30 #include "powernv.h" 31 #include "subcore.h" 32 33 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */ 34 #define MAX_STOP_STATE 0xF 35 36 #define P9_STOP_SPR_MSR 2000 37 #define P9_STOP_SPR_PSSCR 855 38 39 static u32 supported_cpuidle_states; 40 struct pnv_idle_states_t *pnv_idle_states; 41 int nr_pnv_idle_states; 42 43 /* 44 * The default stop state that will be used by ppc_md.power_save 45 * function on platforms that support stop instruction. 46 */ 47 static u64 pnv_default_stop_val; 48 static u64 pnv_default_stop_mask; 49 static bool default_stop_found; 50 51 /* 52 * First stop state levels when SPR and TB loss can occur. 53 */ 54 static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 55 static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1; 56 57 /* 58 * psscr value and mask of the deepest stop idle state. 59 * Used when a cpu is offlined. 60 */ 61 static u64 pnv_deepest_stop_psscr_val; 62 static u64 pnv_deepest_stop_psscr_mask; 63 static u64 pnv_deepest_stop_flag; 64 static bool deepest_stop_found; 65 66 static unsigned long power7_offline_type; 67 68 static int pnv_save_sprs_for_deep_states(void) 69 { 70 int cpu; 71 int rc; 72 73 /* 74 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across 75 * all cpus at boot. Get these reg values of current cpu and use the 76 * same across all cpus. 77 */ 78 uint64_t lpcr_val = mfspr(SPRN_LPCR); 79 uint64_t hid0_val = mfspr(SPRN_HID0); 80 uint64_t hid1_val = mfspr(SPRN_HID1); 81 uint64_t hid4_val = mfspr(SPRN_HID4); 82 uint64_t hid5_val = mfspr(SPRN_HID5); 83 uint64_t hmeer_val = mfspr(SPRN_HMEER); 84 uint64_t msr_val = MSR_IDLE; 85 uint64_t psscr_val = pnv_deepest_stop_psscr_val; 86 87 for_each_present_cpu(cpu) { 88 uint64_t pir = get_hard_smp_processor_id(cpu); 89 uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; 90 91 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 92 if (rc != 0) 93 return rc; 94 95 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 96 if (rc != 0) 97 return rc; 98 99 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 100 rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val); 101 if (rc) 102 return rc; 103 104 rc = opal_slw_set_reg(pir, 105 P9_STOP_SPR_PSSCR, psscr_val); 106 107 if (rc) 108 return rc; 109 } 110 111 /* HIDs are per core registers */ 112 if (cpu_thread_in_core(cpu) == 0) { 113 114 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); 115 if (rc != 0) 116 return rc; 117 118 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); 119 if (rc != 0) 120 return rc; 121 122 /* Only p8 needs to set extra HID regiters */ 123 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 124 125 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 126 if (rc != 0) 127 return rc; 128 129 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 130 if (rc != 0) 131 return rc; 132 133 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 134 if (rc != 0) 135 return rc; 136 } 137 } 138 } 139 140 return 0; 141 } 142 143 u32 pnv_get_supported_cpuidle_states(void) 144 { 145 return supported_cpuidle_states; 146 } 147 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); 148 149 static void pnv_fastsleep_workaround_apply(void *info) 150 151 { 152 int rc; 153 int *err = info; 154 155 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 156 OPAL_CONFIG_IDLE_APPLY); 157 if (rc) 158 *err = 1; 159 } 160 161 static bool power7_fastsleep_workaround_entry = true; 162 static bool power7_fastsleep_workaround_exit = true; 163 164 /* 165 * Used to store fastsleep workaround state 166 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) 167 * 1 - Workaround applied once, never undone. 168 */ 169 static u8 fastsleep_workaround_applyonce; 170 171 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, 172 struct device_attribute *attr, char *buf) 173 { 174 return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); 175 } 176 177 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, 178 struct device_attribute *attr, const char *buf, 179 size_t count) 180 { 181 cpumask_t primary_thread_mask; 182 int err; 183 u8 val; 184 185 if (kstrtou8(buf, 0, &val) || val != 1) 186 return -EINVAL; 187 188 if (fastsleep_workaround_applyonce == 1) 189 return count; 190 191 /* 192 * fastsleep_workaround_applyonce = 1 implies 193 * fastsleep workaround needs to be left in 'applied' state on all 194 * the cores. Do this by- 195 * 1. Disable the 'undo' workaround in fastsleep exit path 196 * 2. Sendi IPIs to all the cores which have at least one online thread 197 * 3. Disable the 'apply' workaround in fastsleep entry path 198 * 199 * There is no need to send ipi to cores which have all threads 200 * offlined, as last thread of the core entering fastsleep or deeper 201 * state would have applied workaround. 202 */ 203 power7_fastsleep_workaround_exit = false; 204 205 get_online_cpus(); 206 primary_thread_mask = cpu_online_cores_map(); 207 on_each_cpu_mask(&primary_thread_mask, 208 pnv_fastsleep_workaround_apply, 209 &err, 1); 210 put_online_cpus(); 211 if (err) { 212 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); 213 goto fail; 214 } 215 216 power7_fastsleep_workaround_entry = false; 217 218 fastsleep_workaround_applyonce = 1; 219 220 return count; 221 fail: 222 return -EIO; 223 } 224 225 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, 226 show_fastsleep_workaround_applyonce, 227 store_fastsleep_workaround_applyonce); 228 229 static inline void atomic_start_thread_idle(void) 230 { 231 int cpu = raw_smp_processor_id(); 232 int first = cpu_first_thread_sibling(cpu); 233 int thread_nr = cpu_thread_in_core(cpu); 234 unsigned long *state = &paca_ptrs[first]->idle_state; 235 236 clear_bit(thread_nr, state); 237 } 238 239 static inline void atomic_stop_thread_idle(void) 240 { 241 int cpu = raw_smp_processor_id(); 242 int first = cpu_first_thread_sibling(cpu); 243 int thread_nr = cpu_thread_in_core(cpu); 244 unsigned long *state = &paca_ptrs[first]->idle_state; 245 246 set_bit(thread_nr, state); 247 } 248 249 static inline void atomic_lock_thread_idle(void) 250 { 251 int cpu = raw_smp_processor_id(); 252 int first = cpu_first_thread_sibling(cpu); 253 unsigned long *state = &paca_ptrs[first]->idle_state; 254 255 while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state))) 256 barrier(); 257 } 258 259 static inline void atomic_unlock_and_stop_thread_idle(void) 260 { 261 int cpu = raw_smp_processor_id(); 262 int first = cpu_first_thread_sibling(cpu); 263 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 264 unsigned long *state = &paca_ptrs[first]->idle_state; 265 u64 s = READ_ONCE(*state); 266 u64 new, tmp; 267 268 BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT)); 269 BUG_ON(s & thread); 270 271 again: 272 new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT; 273 tmp = cmpxchg(state, s, new); 274 if (unlikely(tmp != s)) { 275 s = tmp; 276 goto again; 277 } 278 } 279 280 static inline void atomic_unlock_thread_idle(void) 281 { 282 int cpu = raw_smp_processor_id(); 283 int first = cpu_first_thread_sibling(cpu); 284 unsigned long *state = &paca_ptrs[first]->idle_state; 285 286 BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state)); 287 clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state); 288 } 289 290 /* P7 and P8 */ 291 struct p7_sprs { 292 /* per core */ 293 u64 tscr; 294 u64 worc; 295 296 /* per subcore */ 297 u64 sdr1; 298 u64 rpr; 299 300 /* per thread */ 301 u64 lpcr; 302 u64 hfscr; 303 u64 fscr; 304 u64 purr; 305 u64 spurr; 306 u64 dscr; 307 u64 wort; 308 309 /* per thread SPRs that get lost in shallow states */ 310 u64 amr; 311 u64 iamr; 312 u64 amor; 313 u64 uamor; 314 }; 315 316 static unsigned long power7_idle_insn(unsigned long type) 317 { 318 int cpu = raw_smp_processor_id(); 319 int first = cpu_first_thread_sibling(cpu); 320 unsigned long *state = &paca_ptrs[first]->idle_state; 321 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 322 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 323 unsigned long srr1; 324 bool full_winkle; 325 struct p7_sprs sprs = {}; /* avoid false use-uninitialised */ 326 bool sprs_saved = false; 327 int rc; 328 329 if (unlikely(type != PNV_THREAD_NAP)) { 330 atomic_lock_thread_idle(); 331 332 BUG_ON(!(*state & thread)); 333 *state &= ~thread; 334 335 if (power7_fastsleep_workaround_entry) { 336 if ((*state & core_thread_mask) == 0) { 337 rc = opal_config_cpu_idle_state( 338 OPAL_CONFIG_IDLE_FASTSLEEP, 339 OPAL_CONFIG_IDLE_APPLY); 340 BUG_ON(rc); 341 } 342 } 343 344 if (type == PNV_THREAD_WINKLE) { 345 sprs.tscr = mfspr(SPRN_TSCR); 346 sprs.worc = mfspr(SPRN_WORC); 347 348 sprs.sdr1 = mfspr(SPRN_SDR1); 349 sprs.rpr = mfspr(SPRN_RPR); 350 351 sprs.lpcr = mfspr(SPRN_LPCR); 352 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 353 sprs.hfscr = mfspr(SPRN_HFSCR); 354 sprs.fscr = mfspr(SPRN_FSCR); 355 } 356 sprs.purr = mfspr(SPRN_PURR); 357 sprs.spurr = mfspr(SPRN_SPURR); 358 sprs.dscr = mfspr(SPRN_DSCR); 359 sprs.wort = mfspr(SPRN_WORT); 360 361 sprs_saved = true; 362 363 /* 364 * Increment winkle counter and set all winkle bits if 365 * all threads are winkling. This allows wakeup side to 366 * distinguish between fast sleep and winkle state 367 * loss. Fast sleep still has to resync the timebase so 368 * this may not be a really big win. 369 */ 370 *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 371 if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) 372 >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 373 == threads_per_core) 374 *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS; 375 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 376 } 377 378 atomic_unlock_thread_idle(); 379 } 380 381 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 382 sprs.amr = mfspr(SPRN_AMR); 383 sprs.iamr = mfspr(SPRN_IAMR); 384 sprs.amor = mfspr(SPRN_AMOR); 385 sprs.uamor = mfspr(SPRN_UAMOR); 386 } 387 388 local_paca->thread_idle_state = type; 389 srr1 = isa206_idle_insn_mayloss(type); /* go idle */ 390 local_paca->thread_idle_state = PNV_THREAD_RUNNING; 391 392 WARN_ON_ONCE(!srr1); 393 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 394 395 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 396 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 397 /* 398 * We don't need an isync after the mtsprs here because 399 * the upcoming mtmsrd is execution synchronizing. 400 */ 401 mtspr(SPRN_AMR, sprs.amr); 402 mtspr(SPRN_IAMR, sprs.iamr); 403 mtspr(SPRN_AMOR, sprs.amor); 404 mtspr(SPRN_UAMOR, sprs.uamor); 405 } 406 } 407 408 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 409 hmi_exception_realmode(NULL); 410 411 if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) { 412 if (unlikely(type != PNV_THREAD_NAP)) { 413 atomic_lock_thread_idle(); 414 if (type == PNV_THREAD_WINKLE) { 415 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 416 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 417 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 418 } 419 atomic_unlock_and_stop_thread_idle(); 420 } 421 return srr1; 422 } 423 424 /* HV state loss */ 425 BUG_ON(type == PNV_THREAD_NAP); 426 427 atomic_lock_thread_idle(); 428 429 full_winkle = false; 430 if (type == PNV_THREAD_WINKLE) { 431 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 432 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 433 if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) { 434 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 435 full_winkle = true; 436 BUG_ON(!sprs_saved); 437 } 438 } 439 440 WARN_ON(*state & thread); 441 442 if ((*state & core_thread_mask) != 0) 443 goto core_woken; 444 445 /* Per-core SPRs */ 446 if (full_winkle) { 447 mtspr(SPRN_TSCR, sprs.tscr); 448 mtspr(SPRN_WORC, sprs.worc); 449 } 450 451 if (power7_fastsleep_workaround_exit) { 452 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 453 OPAL_CONFIG_IDLE_UNDO); 454 BUG_ON(rc); 455 } 456 457 /* TB */ 458 if (opal_resync_timebase() != OPAL_SUCCESS) 459 BUG(); 460 461 core_woken: 462 if (!full_winkle) 463 goto subcore_woken; 464 465 if ((*state & local_paca->subcore_sibling_mask) != 0) 466 goto subcore_woken; 467 468 /* Per-subcore SPRs */ 469 mtspr(SPRN_SDR1, sprs.sdr1); 470 mtspr(SPRN_RPR, sprs.rpr); 471 472 subcore_woken: 473 /* 474 * isync after restoring shared SPRs and before unlocking. Unlock 475 * only contains hwsync which does not necessarily do the right 476 * thing for SPRs. 477 */ 478 isync(); 479 atomic_unlock_and_stop_thread_idle(); 480 481 /* Fast sleep does not lose SPRs */ 482 if (!full_winkle) 483 return srr1; 484 485 /* Per-thread SPRs */ 486 mtspr(SPRN_LPCR, sprs.lpcr); 487 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 488 mtspr(SPRN_HFSCR, sprs.hfscr); 489 mtspr(SPRN_FSCR, sprs.fscr); 490 } 491 mtspr(SPRN_PURR, sprs.purr); 492 mtspr(SPRN_SPURR, sprs.spurr); 493 mtspr(SPRN_DSCR, sprs.dscr); 494 mtspr(SPRN_WORT, sprs.wort); 495 496 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 497 498 /* 499 * The SLB has to be restored here, but it sometimes still 500 * contains entries, so the __ variant must be used to prevent 501 * multi hits. 502 */ 503 __slb_restore_bolted_realmode(); 504 505 return srr1; 506 } 507 508 extern unsigned long idle_kvm_start_guest(unsigned long srr1); 509 510 #ifdef CONFIG_HOTPLUG_CPU 511 static unsigned long power7_offline(void) 512 { 513 unsigned long srr1; 514 515 mtmsr(MSR_IDLE); 516 517 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 518 /* Tell KVM we're entering idle. */ 519 /******************************************************/ 520 /* N O T E W E L L ! ! ! N O T E W E L L */ 521 /* The following store to HSTATE_HWTHREAD_STATE(r13) */ 522 /* MUST occur in real mode, i.e. with the MMU off, */ 523 /* and the MMU must stay off until we clear this flag */ 524 /* and test HSTATE_HWTHREAD_REQ(r13) in */ 525 /* pnv_powersave_wakeup in this file. */ 526 /* The reason is that another thread can switch the */ 527 /* MMU to a guest context whenever this flag is set */ 528 /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ 529 /* that would potentially cause this thread to start */ 530 /* executing instructions from guest memory in */ 531 /* hypervisor mode, leading to a host crash or data */ 532 /* corruption, or worse. */ 533 /******************************************************/ 534 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; 535 #endif 536 537 __ppc64_runlatch_off(); 538 srr1 = power7_idle_insn(power7_offline_type); 539 __ppc64_runlatch_on(); 540 541 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 542 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; 543 /* Order setting hwthread_state vs. testing hwthread_req */ 544 smp_mb(); 545 if (local_paca->kvm_hstate.hwthread_req) 546 srr1 = idle_kvm_start_guest(srr1); 547 #endif 548 549 mtmsr(MSR_KERNEL); 550 551 return srr1; 552 } 553 #endif 554 555 void power7_idle_type(unsigned long type) 556 { 557 unsigned long srr1; 558 559 if (!prep_irq_for_idle_irqsoff()) 560 return; 561 562 mtmsr(MSR_IDLE); 563 __ppc64_runlatch_off(); 564 srr1 = power7_idle_insn(type); 565 __ppc64_runlatch_on(); 566 mtmsr(MSR_KERNEL); 567 568 fini_irq_for_idle_irqsoff(); 569 irq_set_pending_from_srr1(srr1); 570 } 571 572 void power7_idle(void) 573 { 574 if (!powersave_nap) 575 return; 576 577 power7_idle_type(PNV_THREAD_NAP); 578 } 579 580 struct p9_sprs { 581 /* per core */ 582 u64 ptcr; 583 u64 rpr; 584 u64 tscr; 585 u64 ldbar; 586 587 /* per thread */ 588 u64 lpcr; 589 u64 hfscr; 590 u64 fscr; 591 u64 pid; 592 u64 purr; 593 u64 spurr; 594 u64 dscr; 595 u64 wort; 596 597 u64 mmcra; 598 u32 mmcr0; 599 u32 mmcr1; 600 u64 mmcr2; 601 602 /* per thread SPRs that get lost in shallow states */ 603 u64 amr; 604 u64 iamr; 605 u64 amor; 606 u64 uamor; 607 }; 608 609 static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) 610 { 611 int cpu = raw_smp_processor_id(); 612 int first = cpu_first_thread_sibling(cpu); 613 unsigned long *state = &paca_ptrs[first]->idle_state; 614 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 615 unsigned long srr1; 616 unsigned long pls; 617 unsigned long mmcr0 = 0; 618 struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ 619 bool sprs_saved = false; 620 621 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 622 /* EC=ESL=0 case */ 623 624 BUG_ON(!mmu_on); 625 626 /* 627 * Wake synchronously. SRESET via xscom may still cause 628 * a 0x100 powersave wakeup with SRR1 reason! 629 */ 630 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 631 if (likely(!srr1)) 632 return 0; 633 634 /* 635 * Registers not saved, can't recover! 636 * This would be a hardware bug 637 */ 638 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 639 640 goto out; 641 } 642 643 /* EC=ESL=1 case */ 644 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 645 if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) { 646 local_paca->requested_psscr = psscr; 647 /* order setting requested_psscr vs testing dont_stop */ 648 smp_mb(); 649 if (atomic_read(&local_paca->dont_stop)) { 650 local_paca->requested_psscr = 0; 651 return 0; 652 } 653 } 654 #endif 655 656 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 657 /* 658 * POWER9 DD2 can incorrectly set PMAO when waking up 659 * after a state-loss idle. Saving and restoring MMCR0 660 * over idle is a workaround. 661 */ 662 mmcr0 = mfspr(SPRN_MMCR0); 663 } 664 if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) { 665 sprs.lpcr = mfspr(SPRN_LPCR); 666 sprs.hfscr = mfspr(SPRN_HFSCR); 667 sprs.fscr = mfspr(SPRN_FSCR); 668 sprs.pid = mfspr(SPRN_PID); 669 sprs.purr = mfspr(SPRN_PURR); 670 sprs.spurr = mfspr(SPRN_SPURR); 671 sprs.dscr = mfspr(SPRN_DSCR); 672 sprs.wort = mfspr(SPRN_WORT); 673 674 sprs.mmcra = mfspr(SPRN_MMCRA); 675 sprs.mmcr0 = mfspr(SPRN_MMCR0); 676 sprs.mmcr1 = mfspr(SPRN_MMCR1); 677 sprs.mmcr2 = mfspr(SPRN_MMCR2); 678 679 sprs.ptcr = mfspr(SPRN_PTCR); 680 sprs.rpr = mfspr(SPRN_RPR); 681 sprs.tscr = mfspr(SPRN_TSCR); 682 sprs.ldbar = mfspr(SPRN_LDBAR); 683 684 sprs_saved = true; 685 686 atomic_start_thread_idle(); 687 } 688 689 sprs.amr = mfspr(SPRN_AMR); 690 sprs.iamr = mfspr(SPRN_IAMR); 691 sprs.amor = mfspr(SPRN_AMOR); 692 sprs.uamor = mfspr(SPRN_UAMOR); 693 694 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 695 696 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 697 local_paca->requested_psscr = 0; 698 #endif 699 700 psscr = mfspr(SPRN_PSSCR); 701 702 WARN_ON_ONCE(!srr1); 703 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 704 705 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 706 unsigned long mmcra; 707 708 /* 709 * We don't need an isync after the mtsprs here because the 710 * upcoming mtmsrd is execution synchronizing. 711 */ 712 mtspr(SPRN_AMR, sprs.amr); 713 mtspr(SPRN_IAMR, sprs.iamr); 714 mtspr(SPRN_AMOR, sprs.amor); 715 mtspr(SPRN_UAMOR, sprs.uamor); 716 717 /* 718 * Workaround for POWER9 DD2.0, if we lost resources, the ERAT 719 * might have been corrupted and needs flushing. We also need 720 * to reload MMCR0 (see mmcr0 comment above). 721 */ 722 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 723 asm volatile(PPC_INVALIDATE_ERAT); 724 mtspr(SPRN_MMCR0, mmcr0); 725 } 726 727 /* 728 * DD2.2 and earlier need to set then clear bit 60 in MMCRA 729 * to ensure the PMU starts running. 730 */ 731 mmcra = mfspr(SPRN_MMCRA); 732 mmcra |= PPC_BIT(60); 733 mtspr(SPRN_MMCRA, mmcra); 734 mmcra &= ~PPC_BIT(60); 735 mtspr(SPRN_MMCRA, mmcra); 736 } 737 738 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 739 hmi_exception_realmode(NULL); 740 741 /* 742 * On POWER9, SRR1 bits do not match exactly as expected. 743 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 744 * just always test PSSCR for SPR/TB state loss. 745 */ 746 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 747 if (likely(pls < pnv_first_spr_loss_level)) { 748 if (sprs_saved) 749 atomic_stop_thread_idle(); 750 goto out; 751 } 752 753 /* HV state loss */ 754 BUG_ON(!sprs_saved); 755 756 atomic_lock_thread_idle(); 757 758 if ((*state & core_thread_mask) != 0) 759 goto core_woken; 760 761 /* Per-core SPRs */ 762 mtspr(SPRN_PTCR, sprs.ptcr); 763 mtspr(SPRN_RPR, sprs.rpr); 764 mtspr(SPRN_TSCR, sprs.tscr); 765 mtspr(SPRN_LDBAR, sprs.ldbar); 766 767 if (pls >= pnv_first_tb_loss_level) { 768 /* TB loss */ 769 if (opal_resync_timebase() != OPAL_SUCCESS) 770 BUG(); 771 } 772 773 /* 774 * isync after restoring shared SPRs and before unlocking. Unlock 775 * only contains hwsync which does not necessarily do the right 776 * thing for SPRs. 777 */ 778 isync(); 779 780 core_woken: 781 atomic_unlock_and_stop_thread_idle(); 782 783 /* Per-thread SPRs */ 784 mtspr(SPRN_LPCR, sprs.lpcr); 785 mtspr(SPRN_HFSCR, sprs.hfscr); 786 mtspr(SPRN_FSCR, sprs.fscr); 787 mtspr(SPRN_PID, sprs.pid); 788 mtspr(SPRN_PURR, sprs.purr); 789 mtspr(SPRN_SPURR, sprs.spurr); 790 mtspr(SPRN_DSCR, sprs.dscr); 791 mtspr(SPRN_WORT, sprs.wort); 792 793 mtspr(SPRN_MMCRA, sprs.mmcra); 794 mtspr(SPRN_MMCR0, sprs.mmcr0); 795 mtspr(SPRN_MMCR1, sprs.mmcr1); 796 mtspr(SPRN_MMCR2, sprs.mmcr2); 797 798 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 799 800 if (!radix_enabled()) 801 __slb_restore_bolted_realmode(); 802 803 out: 804 if (mmu_on) 805 mtmsr(MSR_KERNEL); 806 807 return srr1; 808 } 809 810 #ifdef CONFIG_HOTPLUG_CPU 811 static unsigned long power9_offline_stop(unsigned long psscr) 812 { 813 unsigned long srr1; 814 815 #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE 816 __ppc64_runlatch_off(); 817 srr1 = power9_idle_stop(psscr, true); 818 __ppc64_runlatch_on(); 819 #else 820 /* 821 * Tell KVM we're entering idle. 822 * This does not have to be done in real mode because the P9 MMU 823 * is independent per-thread. Some steppings share radix/hash mode 824 * between threads, but in that case KVM has a barrier sync in real 825 * mode before and after switching between radix and hash. 826 * 827 * kvm_start_guest must still be called in real mode though, hence 828 * the false argument. 829 */ 830 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; 831 832 __ppc64_runlatch_off(); 833 srr1 = power9_idle_stop(psscr, false); 834 __ppc64_runlatch_on(); 835 836 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; 837 /* Order setting hwthread_state vs. testing hwthread_req */ 838 smp_mb(); 839 if (local_paca->kvm_hstate.hwthread_req) 840 srr1 = idle_kvm_start_guest(srr1); 841 mtmsr(MSR_KERNEL); 842 #endif 843 844 return srr1; 845 } 846 #endif 847 848 void power9_idle_type(unsigned long stop_psscr_val, 849 unsigned long stop_psscr_mask) 850 { 851 unsigned long psscr; 852 unsigned long srr1; 853 854 if (!prep_irq_for_idle_irqsoff()) 855 return; 856 857 psscr = mfspr(SPRN_PSSCR); 858 psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; 859 860 __ppc64_runlatch_off(); 861 srr1 = power9_idle_stop(psscr, true); 862 __ppc64_runlatch_on(); 863 864 fini_irq_for_idle_irqsoff(); 865 866 irq_set_pending_from_srr1(srr1); 867 } 868 869 /* 870 * Used for ppc_md.power_save which needs a function with no parameters 871 */ 872 void power9_idle(void) 873 { 874 power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 875 } 876 877 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 878 /* 879 * This is used in working around bugs in thread reconfiguration 880 * on POWER9 (at least up to Nimbus DD2.2) relating to transactional 881 * memory and the way that XER[SO] is checkpointed. 882 * This function forces the core into SMT4 in order by asking 883 * all other threads not to stop, and sending a message to any 884 * that are in a stop state. 885 * Must be called with preemption disabled. 886 */ 887 void pnv_power9_force_smt4_catch(void) 888 { 889 int cpu, cpu0, thr; 890 int awake_threads = 1; /* this thread is awake */ 891 int poke_threads = 0; 892 int need_awake = threads_per_core; 893 894 cpu = smp_processor_id(); 895 cpu0 = cpu & ~(threads_per_core - 1); 896 for (thr = 0; thr < threads_per_core; ++thr) { 897 if (cpu != cpu0 + thr) 898 atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); 899 } 900 /* order setting dont_stop vs testing requested_psscr */ 901 smp_mb(); 902 for (thr = 0; thr < threads_per_core; ++thr) { 903 if (!paca_ptrs[cpu0+thr]->requested_psscr) 904 ++awake_threads; 905 else 906 poke_threads |= (1 << thr); 907 } 908 909 /* If at least 3 threads are awake, the core is in SMT4 already */ 910 if (awake_threads < need_awake) { 911 /* We have to wake some threads; we'll use msgsnd */ 912 for (thr = 0; thr < threads_per_core; ++thr) { 913 if (poke_threads & (1 << thr)) { 914 ppc_msgsnd_sync(); 915 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, 916 paca_ptrs[cpu0+thr]->hw_cpu_id); 917 } 918 } 919 /* now spin until at least 3 threads are awake */ 920 do { 921 for (thr = 0; thr < threads_per_core; ++thr) { 922 if ((poke_threads & (1 << thr)) && 923 !paca_ptrs[cpu0+thr]->requested_psscr) { 924 ++awake_threads; 925 poke_threads &= ~(1 << thr); 926 } 927 } 928 } while (awake_threads < need_awake); 929 } 930 } 931 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); 932 933 void pnv_power9_force_smt4_release(void) 934 { 935 int cpu, cpu0, thr; 936 937 cpu = smp_processor_id(); 938 cpu0 = cpu & ~(threads_per_core - 1); 939 940 /* clear all the dont_stop flags */ 941 for (thr = 0; thr < threads_per_core; ++thr) { 942 if (cpu != cpu0 + thr) 943 atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); 944 } 945 } 946 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); 947 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 948 949 #ifdef CONFIG_HOTPLUG_CPU 950 951 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) 952 { 953 u64 pir = get_hard_smp_processor_id(cpu); 954 955 mtspr(SPRN_LPCR, lpcr_val); 956 957 /* 958 * Program the LPCR via stop-api only if the deepest stop state 959 * can lose hypervisor context. 960 */ 961 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) 962 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 963 } 964 965 /* 966 * pnv_cpu_offline: A function that puts the CPU into the deepest 967 * available platform idle state on a CPU-Offline. 968 * interrupts hard disabled and no lazy irq pending. 969 */ 970 unsigned long pnv_cpu_offline(unsigned int cpu) 971 { 972 unsigned long srr1; 973 974 __ppc64_runlatch_off(); 975 976 if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { 977 unsigned long psscr; 978 979 psscr = mfspr(SPRN_PSSCR); 980 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 981 pnv_deepest_stop_psscr_val; 982 srr1 = power9_offline_stop(psscr); 983 } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { 984 srr1 = power7_offline(); 985 } else { 986 /* This is the fallback method. We emulate snooze */ 987 while (!generic_check_cpu_restart(cpu)) { 988 HMT_low(); 989 HMT_very_low(); 990 } 991 srr1 = 0; 992 HMT_medium(); 993 } 994 995 __ppc64_runlatch_on(); 996 997 return srr1; 998 } 999 #endif 1000 1001 /* 1002 * Power ISA 3.0 idle initialization. 1003 * 1004 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control 1005 * Register (PSSCR) to control idle behavior. 1006 * 1007 * PSSCR layout: 1008 * ---------------------------------------------------------- 1009 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | 1010 * ---------------------------------------------------------- 1011 * 0 4 41 42 43 44 48 54 56 60 1012 * 1013 * PSSCR key fields: 1014 * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the 1015 * lowest power-saving state the thread entered since stop instruction was 1016 * last executed. 1017 * 1018 * Bit 41 - Status Disable(SD) 1019 * 0 - Shows PLS entries 1020 * 1 - PLS entries are all 0 1021 * 1022 * Bit 42 - Enable State Loss 1023 * 0 - No state is lost irrespective of other fields 1024 * 1 - Allows state loss 1025 * 1026 * Bit 43 - Exit Criterion 1027 * 0 - Exit from power-save mode on any interrupt 1028 * 1 - Exit from power-save mode controlled by LPCR's PECE bits 1029 * 1030 * Bits 44:47 - Power-Saving Level Limit 1031 * This limits the power-saving level that can be entered into. 1032 * 1033 * Bits 60:63 - Requested Level 1034 * Used to specify which power-saving level must be entered on executing 1035 * stop instruction 1036 */ 1037 1038 int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) 1039 { 1040 int err = 0; 1041 1042 /* 1043 * psscr_mask == 0xf indicates an older firmware. 1044 * Set remaining fields of psscr to the default values. 1045 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL 1046 */ 1047 if (*psscr_mask == 0xf) { 1048 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; 1049 *psscr_mask = PSSCR_HV_DEFAULT_MASK; 1050 return err; 1051 } 1052 1053 /* 1054 * New firmware is expected to set the psscr_val bits correctly. 1055 * Validate that the following invariants are correctly maintained by 1056 * the new firmware. 1057 * - ESL bit value matches the EC bit value. 1058 * - ESL bit is set for all the deep stop states. 1059 */ 1060 if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { 1061 err = ERR_EC_ESL_MISMATCH; 1062 } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1063 GET_PSSCR_ESL(*psscr_val) == 0) { 1064 err = ERR_DEEP_STATE_ESL_MISMATCH; 1065 } 1066 1067 return err; 1068 } 1069 1070 /* 1071 * pnv_arch300_idle_init: Initializes the default idle state, first 1072 * deep idle state and deepest idle state on 1073 * ISA 3.0 CPUs. 1074 * 1075 * @np: /ibm,opal/power-mgt device node 1076 * @flags: cpu-idle-state-flags array 1077 * @dt_idle_states: Number of idle state entries 1078 * Returns 0 on success 1079 */ 1080 static void __init pnv_power9_idle_init(void) 1081 { 1082 u64 max_residency_ns = 0; 1083 int i; 1084 1085 /* 1086 * pnv_deepest_stop_{val,mask} should be set to values corresponding to 1087 * the deepest stop state. 1088 * 1089 * pnv_default_stop_{val,mask} should be set to values corresponding to 1090 * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. 1091 */ 1092 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 1093 pnv_first_spr_loss_level = MAX_STOP_STATE + 1; 1094 for (i = 0; i < nr_pnv_idle_states; i++) { 1095 int err; 1096 struct pnv_idle_states_t *state = &pnv_idle_states[i]; 1097 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; 1098 1099 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1100 (pnv_first_tb_loss_level > psscr_rl)) 1101 pnv_first_tb_loss_level = psscr_rl; 1102 1103 if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1104 (pnv_first_spr_loss_level > psscr_rl)) 1105 pnv_first_spr_loss_level = psscr_rl; 1106 1107 /* 1108 * The idle code does not deal with TB loss occurring 1109 * in a shallower state than SPR loss, so force it to 1110 * behave like SPRs are lost if TB is lost. POWER9 would 1111 * never encouter this, but a POWER8 core would if it 1112 * implemented the stop instruction. So this is for forward 1113 * compatibility. 1114 */ 1115 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1116 (pnv_first_spr_loss_level > psscr_rl)) 1117 pnv_first_spr_loss_level = psscr_rl; 1118 1119 err = validate_psscr_val_mask(&state->psscr_val, 1120 &state->psscr_mask, 1121 state->flags); 1122 if (err) { 1123 report_invalid_psscr_val(state->psscr_val, err); 1124 continue; 1125 } 1126 1127 state->valid = true; 1128 1129 if (max_residency_ns < state->residency_ns) { 1130 max_residency_ns = state->residency_ns; 1131 pnv_deepest_stop_psscr_val = state->psscr_val; 1132 pnv_deepest_stop_psscr_mask = state->psscr_mask; 1133 pnv_deepest_stop_flag = state->flags; 1134 deepest_stop_found = true; 1135 } 1136 1137 if (!default_stop_found && 1138 (state->flags & OPAL_PM_STOP_INST_FAST)) { 1139 pnv_default_stop_val = state->psscr_val; 1140 pnv_default_stop_mask = state->psscr_mask; 1141 default_stop_found = true; 1142 WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT); 1143 } 1144 } 1145 1146 if (unlikely(!default_stop_found)) { 1147 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); 1148 } else { 1149 ppc_md.power_save = power9_idle; 1150 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", 1151 pnv_default_stop_val, pnv_default_stop_mask); 1152 } 1153 1154 if (unlikely(!deepest_stop_found)) { 1155 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait"); 1156 } else { 1157 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n", 1158 pnv_deepest_stop_psscr_val, 1159 pnv_deepest_stop_psscr_mask); 1160 } 1161 1162 pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%lld\n", 1163 pnv_first_spr_loss_level); 1164 1165 pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%lld\n", 1166 pnv_first_tb_loss_level); 1167 } 1168 1169 static void __init pnv_disable_deep_states(void) 1170 { 1171 /* 1172 * The stop-api is unable to restore hypervisor 1173 * resources on wakeup from platform idle states which 1174 * lose full context. So disable such states. 1175 */ 1176 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; 1177 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); 1178 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); 1179 1180 if (cpu_has_feature(CPU_FTR_ARCH_300) && 1181 (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { 1182 /* 1183 * Use the default stop state for CPU-Hotplug 1184 * if available. 1185 */ 1186 if (default_stop_found) { 1187 pnv_deepest_stop_psscr_val = pnv_default_stop_val; 1188 pnv_deepest_stop_psscr_mask = pnv_default_stop_mask; 1189 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", 1190 pnv_deepest_stop_psscr_val); 1191 } else { /* Fallback to snooze loop for CPU-Hotplug */ 1192 deepest_stop_found = false; 1193 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); 1194 } 1195 } 1196 } 1197 1198 /* 1199 * Probe device tree for supported idle states 1200 */ 1201 static void __init pnv_probe_idle_states(void) 1202 { 1203 int i; 1204 1205 if (nr_pnv_idle_states < 0) { 1206 pr_warn("cpuidle-powernv: no idle states found in the DT\n"); 1207 return; 1208 } 1209 1210 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1211 pnv_power9_idle_init(); 1212 1213 for (i = 0; i < nr_pnv_idle_states; i++) 1214 supported_cpuidle_states |= pnv_idle_states[i].flags; 1215 } 1216 1217 /* 1218 * This function parses device-tree and populates all the information 1219 * into pnv_idle_states structure. It also sets up nr_pnv_idle_states 1220 * which is the number of cpuidle states discovered through device-tree. 1221 */ 1222 1223 static int pnv_parse_cpuidle_dt(void) 1224 { 1225 struct device_node *np; 1226 int nr_idle_states, i; 1227 int rc = 0; 1228 u32 *temp_u32; 1229 u64 *temp_u64; 1230 const char **temp_string; 1231 1232 np = of_find_node_by_path("/ibm,opal/power-mgt"); 1233 if (!np) { 1234 pr_warn("opal: PowerMgmt Node not found\n"); 1235 return -ENODEV; 1236 } 1237 nr_idle_states = of_property_count_u32_elems(np, 1238 "ibm,cpu-idle-state-flags"); 1239 1240 pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), 1241 GFP_KERNEL); 1242 temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); 1243 temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); 1244 temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); 1245 1246 if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { 1247 pr_err("Could not allocate memory for dt parsing\n"); 1248 rc = -ENOMEM; 1249 goto out; 1250 } 1251 1252 /* Read flags */ 1253 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", 1254 temp_u32, nr_idle_states)) { 1255 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); 1256 rc = -EINVAL; 1257 goto out; 1258 } 1259 for (i = 0; i < nr_idle_states; i++) 1260 pnv_idle_states[i].flags = temp_u32[i]; 1261 1262 /* Read latencies */ 1263 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", 1264 temp_u32, nr_idle_states)) { 1265 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); 1266 rc = -EINVAL; 1267 goto out; 1268 } 1269 for (i = 0; i < nr_idle_states; i++) 1270 pnv_idle_states[i].latency_ns = temp_u32[i]; 1271 1272 /* Read residencies */ 1273 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", 1274 temp_u32, nr_idle_states)) { 1275 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); 1276 rc = -EINVAL; 1277 goto out; 1278 } 1279 for (i = 0; i < nr_idle_states; i++) 1280 pnv_idle_states[i].residency_ns = temp_u32[i]; 1281 1282 /* For power9 */ 1283 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1284 /* Read pm_crtl_val */ 1285 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", 1286 temp_u64, nr_idle_states)) { 1287 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); 1288 rc = -EINVAL; 1289 goto out; 1290 } 1291 for (i = 0; i < nr_idle_states; i++) 1292 pnv_idle_states[i].psscr_val = temp_u64[i]; 1293 1294 /* Read pm_crtl_mask */ 1295 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", 1296 temp_u64, nr_idle_states)) { 1297 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); 1298 rc = -EINVAL; 1299 goto out; 1300 } 1301 for (i = 0; i < nr_idle_states; i++) 1302 pnv_idle_states[i].psscr_mask = temp_u64[i]; 1303 } 1304 1305 /* 1306 * power8 specific properties ibm,cpu-idle-state-pmicr-mask and 1307 * ibm,cpu-idle-state-pmicr-val were never used and there is no 1308 * plan to use it in near future. Hence, not parsing these properties 1309 */ 1310 1311 if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", 1312 temp_string, nr_idle_states) < 0) { 1313 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); 1314 rc = -EINVAL; 1315 goto out; 1316 } 1317 for (i = 0; i < nr_idle_states; i++) 1318 strlcpy(pnv_idle_states[i].name, temp_string[i], 1319 PNV_IDLE_NAME_LEN); 1320 nr_pnv_idle_states = nr_idle_states; 1321 rc = 0; 1322 out: 1323 kfree(temp_u32); 1324 kfree(temp_u64); 1325 kfree(temp_string); 1326 return rc; 1327 } 1328 1329 static int __init pnv_init_idle_states(void) 1330 { 1331 int cpu; 1332 int rc = 0; 1333 1334 /* Set up PACA fields */ 1335 for_each_present_cpu(cpu) { 1336 struct paca_struct *p = paca_ptrs[cpu]; 1337 1338 p->idle_state = 0; 1339 if (cpu == cpu_first_thread_sibling(cpu)) 1340 p->idle_state = (1 << threads_per_core) - 1; 1341 1342 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1343 /* P7/P8 nap */ 1344 p->thread_idle_state = PNV_THREAD_RUNNING; 1345 } else { 1346 /* P9 stop */ 1347 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1348 p->requested_psscr = 0; 1349 atomic_set(&p->dont_stop, 0); 1350 #endif 1351 } 1352 } 1353 1354 /* In case we error out nr_pnv_idle_states will be zero */ 1355 nr_pnv_idle_states = 0; 1356 supported_cpuidle_states = 0; 1357 1358 if (cpuidle_disable != IDLE_NO_OVERRIDE) 1359 goto out; 1360 rc = pnv_parse_cpuidle_dt(); 1361 if (rc) 1362 return rc; 1363 pnv_probe_idle_states(); 1364 1365 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1366 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 1367 power7_fastsleep_workaround_entry = false; 1368 power7_fastsleep_workaround_exit = false; 1369 } else { 1370 /* 1371 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that 1372 * workaround is needed to use fastsleep. Provide sysfs 1373 * control to choose how this workaround has to be 1374 * applied. 1375 */ 1376 device_create_file(cpu_subsys.dev_root, 1377 &dev_attr_fastsleep_workaround_applyonce); 1378 } 1379 1380 update_subcore_sibling_mask(); 1381 1382 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) { 1383 ppc_md.power_save = power7_idle; 1384 power7_offline_type = PNV_THREAD_NAP; 1385 } 1386 1387 if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) && 1388 (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)) 1389 power7_offline_type = PNV_THREAD_WINKLE; 1390 else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) || 1391 (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) 1392 power7_offline_type = PNV_THREAD_SLEEP; 1393 } 1394 1395 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { 1396 if (pnv_save_sprs_for_deep_states()) 1397 pnv_disable_deep_states(); 1398 } 1399 1400 out: 1401 return 0; 1402 } 1403 machine_subsys_initcall(powernv, pnv_init_idle_states); 1404