1 /* 2 * PowerNV cpuidle code 3 * 4 * Copyright 2015 IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/types.h> 13 #include <linux/mm.h> 14 #include <linux/slab.h> 15 #include <linux/of.h> 16 #include <linux/device.h> 17 #include <linux/cpu.h> 18 19 #include <asm/firmware.h> 20 #include <asm/machdep.h> 21 #include <asm/opal.h> 22 #include <asm/cputhreads.h> 23 #include <asm/cpuidle.h> 24 #include <asm/code-patching.h> 25 #include <asm/smp.h> 26 #include <asm/runlatch.h> 27 #include <asm/dbell.h> 28 29 #include "powernv.h" 30 #include "subcore.h" 31 32 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */ 33 #define MAX_STOP_STATE 0xF 34 35 #define P9_STOP_SPR_MSR 2000 36 #define P9_STOP_SPR_PSSCR 855 37 38 static u32 supported_cpuidle_states; 39 struct pnv_idle_states_t *pnv_idle_states; 40 int nr_pnv_idle_states; 41 42 /* 43 * The default stop state that will be used by ppc_md.power_save 44 * function on platforms that support stop instruction. 45 */ 46 static u64 pnv_default_stop_val; 47 static u64 pnv_default_stop_mask; 48 static bool default_stop_found; 49 50 /* 51 * First deep stop state. Used to figure out when to save/restore 52 * hypervisor context. 53 */ 54 u64 pnv_first_deep_stop_state = MAX_STOP_STATE; 55 56 /* 57 * psscr value and mask of the deepest stop idle state. 58 * Used when a cpu is offlined. 59 */ 60 static u64 pnv_deepest_stop_psscr_val; 61 static u64 pnv_deepest_stop_psscr_mask; 62 static u64 pnv_deepest_stop_flag; 63 static bool deepest_stop_found; 64 65 static int pnv_save_sprs_for_deep_states(void) 66 { 67 int cpu; 68 int rc; 69 70 /* 71 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across 72 * all cpus at boot. Get these reg values of current cpu and use the 73 * same across all cpus. 74 */ 75 uint64_t lpcr_val = mfspr(SPRN_LPCR); 76 uint64_t hid0_val = mfspr(SPRN_HID0); 77 uint64_t hid1_val = mfspr(SPRN_HID1); 78 uint64_t hid4_val = mfspr(SPRN_HID4); 79 uint64_t hid5_val = mfspr(SPRN_HID5); 80 uint64_t hmeer_val = mfspr(SPRN_HMEER); 81 uint64_t msr_val = MSR_IDLE; 82 uint64_t psscr_val = pnv_deepest_stop_psscr_val; 83 84 for_each_present_cpu(cpu) { 85 uint64_t pir = get_hard_smp_processor_id(cpu); 86 uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; 87 88 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 89 if (rc != 0) 90 return rc; 91 92 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 93 if (rc != 0) 94 return rc; 95 96 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 97 rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val); 98 if (rc) 99 return rc; 100 101 rc = opal_slw_set_reg(pir, 102 P9_STOP_SPR_PSSCR, psscr_val); 103 104 if (rc) 105 return rc; 106 } 107 108 /* HIDs are per core registers */ 109 if (cpu_thread_in_core(cpu) == 0) { 110 111 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); 112 if (rc != 0) 113 return rc; 114 115 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); 116 if (rc != 0) 117 return rc; 118 119 /* Only p8 needs to set extra HID regiters */ 120 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 121 122 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 123 if (rc != 0) 124 return rc; 125 126 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 127 if (rc != 0) 128 return rc; 129 130 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 131 if (rc != 0) 132 return rc; 133 } 134 } 135 } 136 137 return 0; 138 } 139 140 static void pnv_alloc_idle_core_states(void) 141 { 142 int i, j; 143 int nr_cores = cpu_nr_cores(); 144 u32 *core_idle_state; 145 146 /* 147 * core_idle_state - The lower 8 bits track the idle state of 148 * each thread of the core. 149 * 150 * The most significant bit is the lock bit. 151 * 152 * Initially all the bits corresponding to threads_per_core 153 * are set. They are cleared when the thread enters deep idle 154 * state like sleep and winkle/stop. 155 * 156 * Initially the lock bit is cleared. The lock bit has 2 157 * purposes: 158 * a. While the first thread in the core waking up from 159 * idle is restoring core state, it prevents other 160 * threads in the core from switching to process 161 * context. 162 * b. While the last thread in the core is saving the 163 * core state, it prevents a different thread from 164 * waking up. 165 */ 166 for (i = 0; i < nr_cores; i++) { 167 int first_cpu = i * threads_per_core; 168 int node = cpu_to_node(first_cpu); 169 size_t paca_ptr_array_size; 170 171 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); 172 *core_idle_state = (1 << threads_per_core) - 1; 173 paca_ptr_array_size = (threads_per_core * 174 sizeof(struct paca_struct *)); 175 176 for (j = 0; j < threads_per_core; j++) { 177 int cpu = first_cpu + j; 178 179 paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state; 180 paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING; 181 paca_ptrs[cpu]->thread_mask = 1 << j; 182 } 183 } 184 185 update_subcore_sibling_mask(); 186 187 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { 188 int rc = pnv_save_sprs_for_deep_states(); 189 190 if (likely(!rc)) 191 return; 192 193 /* 194 * The stop-api is unable to restore hypervisor 195 * resources on wakeup from platform idle states which 196 * lose full context. So disable such states. 197 */ 198 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; 199 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); 200 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); 201 202 if (cpu_has_feature(CPU_FTR_ARCH_300) && 203 (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { 204 /* 205 * Use the default stop state for CPU-Hotplug 206 * if available. 207 */ 208 if (default_stop_found) { 209 pnv_deepest_stop_psscr_val = 210 pnv_default_stop_val; 211 pnv_deepest_stop_psscr_mask = 212 pnv_default_stop_mask; 213 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", 214 pnv_deepest_stop_psscr_val); 215 } else { /* Fallback to snooze loop for CPU-Hotplug */ 216 deepest_stop_found = false; 217 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); 218 } 219 } 220 } 221 } 222 223 u32 pnv_get_supported_cpuidle_states(void) 224 { 225 return supported_cpuidle_states; 226 } 227 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); 228 229 static void pnv_fastsleep_workaround_apply(void *info) 230 231 { 232 int rc; 233 int *err = info; 234 235 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 236 OPAL_CONFIG_IDLE_APPLY); 237 if (rc) 238 *err = 1; 239 } 240 241 /* 242 * Used to store fastsleep workaround state 243 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) 244 * 1 - Workaround applied once, never undone. 245 */ 246 static u8 fastsleep_workaround_applyonce; 247 248 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, 249 struct device_attribute *attr, char *buf) 250 { 251 return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); 252 } 253 254 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, 255 struct device_attribute *attr, const char *buf, 256 size_t count) 257 { 258 cpumask_t primary_thread_mask; 259 int err; 260 u8 val; 261 262 if (kstrtou8(buf, 0, &val) || val != 1) 263 return -EINVAL; 264 265 if (fastsleep_workaround_applyonce == 1) 266 return count; 267 268 /* 269 * fastsleep_workaround_applyonce = 1 implies 270 * fastsleep workaround needs to be left in 'applied' state on all 271 * the cores. Do this by- 272 * 1. Patching out the call to 'undo' workaround in fastsleep exit path 273 * 2. Sending ipi to all the cores which have at least one online thread 274 * 3. Patching out the call to 'apply' workaround in fastsleep entry 275 * path 276 * There is no need to send ipi to cores which have all threads 277 * offlined, as last thread of the core entering fastsleep or deeper 278 * state would have applied workaround. 279 */ 280 err = patch_instruction( 281 (unsigned int *)pnv_fastsleep_workaround_at_exit, 282 PPC_INST_NOP); 283 if (err) { 284 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit"); 285 goto fail; 286 } 287 288 get_online_cpus(); 289 primary_thread_mask = cpu_online_cores_map(); 290 on_each_cpu_mask(&primary_thread_mask, 291 pnv_fastsleep_workaround_apply, 292 &err, 1); 293 put_online_cpus(); 294 if (err) { 295 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); 296 goto fail; 297 } 298 299 err = patch_instruction( 300 (unsigned int *)pnv_fastsleep_workaround_at_entry, 301 PPC_INST_NOP); 302 if (err) { 303 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry"); 304 goto fail; 305 } 306 307 fastsleep_workaround_applyonce = 1; 308 309 return count; 310 fail: 311 return -EIO; 312 } 313 314 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, 315 show_fastsleep_workaround_applyonce, 316 store_fastsleep_workaround_applyonce); 317 318 static unsigned long __power7_idle_type(unsigned long type) 319 { 320 unsigned long srr1; 321 322 if (!prep_irq_for_idle_irqsoff()) 323 return 0; 324 325 __ppc64_runlatch_off(); 326 srr1 = power7_idle_insn(type); 327 __ppc64_runlatch_on(); 328 329 fini_irq_for_idle_irqsoff(); 330 331 return srr1; 332 } 333 334 void power7_idle_type(unsigned long type) 335 { 336 unsigned long srr1; 337 338 srr1 = __power7_idle_type(type); 339 irq_set_pending_from_srr1(srr1); 340 } 341 342 void power7_idle(void) 343 { 344 if (!powersave_nap) 345 return; 346 347 power7_idle_type(PNV_THREAD_NAP); 348 } 349 350 static unsigned long __power9_idle_type(unsigned long stop_psscr_val, 351 unsigned long stop_psscr_mask) 352 { 353 unsigned long psscr; 354 unsigned long srr1; 355 356 if (!prep_irq_for_idle_irqsoff()) 357 return 0; 358 359 psscr = mfspr(SPRN_PSSCR); 360 psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; 361 362 __ppc64_runlatch_off(); 363 srr1 = power9_idle_stop(psscr); 364 __ppc64_runlatch_on(); 365 366 fini_irq_for_idle_irqsoff(); 367 368 return srr1; 369 } 370 371 void power9_idle_type(unsigned long stop_psscr_val, 372 unsigned long stop_psscr_mask) 373 { 374 unsigned long srr1; 375 376 srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask); 377 irq_set_pending_from_srr1(srr1); 378 } 379 380 /* 381 * Used for ppc_md.power_save which needs a function with no parameters 382 */ 383 void power9_idle(void) 384 { 385 power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 386 } 387 388 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 389 /* 390 * This is used in working around bugs in thread reconfiguration 391 * on POWER9 (at least up to Nimbus DD2.2) relating to transactional 392 * memory and the way that XER[SO] is checkpointed. 393 * This function forces the core into SMT4 in order by asking 394 * all other threads not to stop, and sending a message to any 395 * that are in a stop state. 396 * Must be called with preemption disabled. 397 */ 398 void pnv_power9_force_smt4_catch(void) 399 { 400 int cpu, cpu0, thr; 401 int awake_threads = 1; /* this thread is awake */ 402 int poke_threads = 0; 403 int need_awake = threads_per_core; 404 405 cpu = smp_processor_id(); 406 cpu0 = cpu & ~(threads_per_core - 1); 407 for (thr = 0; thr < threads_per_core; ++thr) { 408 if (cpu != cpu0 + thr) 409 atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); 410 } 411 /* order setting dont_stop vs testing requested_psscr */ 412 mb(); 413 for (thr = 0; thr < threads_per_core; ++thr) { 414 if (!paca_ptrs[cpu0+thr]->requested_psscr) 415 ++awake_threads; 416 else 417 poke_threads |= (1 << thr); 418 } 419 420 /* If at least 3 threads are awake, the core is in SMT4 already */ 421 if (awake_threads < need_awake) { 422 /* We have to wake some threads; we'll use msgsnd */ 423 for (thr = 0; thr < threads_per_core; ++thr) { 424 if (poke_threads & (1 << thr)) { 425 ppc_msgsnd_sync(); 426 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, 427 paca_ptrs[cpu0+thr]->hw_cpu_id); 428 } 429 } 430 /* now spin until at least 3 threads are awake */ 431 do { 432 for (thr = 0; thr < threads_per_core; ++thr) { 433 if ((poke_threads & (1 << thr)) && 434 !paca_ptrs[cpu0+thr]->requested_psscr) { 435 ++awake_threads; 436 poke_threads &= ~(1 << thr); 437 } 438 } 439 } while (awake_threads < need_awake); 440 } 441 } 442 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); 443 444 void pnv_power9_force_smt4_release(void) 445 { 446 int cpu, cpu0, thr; 447 448 cpu = smp_processor_id(); 449 cpu0 = cpu & ~(threads_per_core - 1); 450 451 /* clear all the dont_stop flags */ 452 for (thr = 0; thr < threads_per_core; ++thr) { 453 if (cpu != cpu0 + thr) 454 atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); 455 } 456 } 457 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); 458 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 459 460 #ifdef CONFIG_HOTPLUG_CPU 461 static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) 462 { 463 u64 pir = get_hard_smp_processor_id(cpu); 464 465 mtspr(SPRN_LPCR, lpcr_val); 466 467 /* 468 * Program the LPCR via stop-api only if the deepest stop state 469 * can lose hypervisor context. 470 */ 471 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) 472 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 473 } 474 475 /* 476 * pnv_cpu_offline: A function that puts the CPU into the deepest 477 * available platform idle state on a CPU-Offline. 478 * interrupts hard disabled and no lazy irq pending. 479 */ 480 unsigned long pnv_cpu_offline(unsigned int cpu) 481 { 482 unsigned long srr1; 483 u32 idle_states = pnv_get_supported_cpuidle_states(); 484 u64 lpcr_val; 485 486 /* 487 * We don't want to take decrementer interrupts while we are 488 * offline, so clear LPCR:PECE1. We keep PECE2 (and 489 * LPCR_PECE_HVEE on P9) enabled as to let IPIs in. 490 * 491 * If the CPU gets woken up by a special wakeup, ensure that 492 * the SLW engine sets LPCR with decrementer bit cleared, else 493 * the CPU will come back to the kernel due to a spurious 494 * wakeup. 495 */ 496 lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; 497 pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); 498 499 __ppc64_runlatch_off(); 500 501 if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { 502 unsigned long psscr; 503 504 psscr = mfspr(SPRN_PSSCR); 505 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 506 pnv_deepest_stop_psscr_val; 507 srr1 = power9_offline_stop(psscr); 508 509 } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) && 510 (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) { 511 srr1 = power7_idle_insn(PNV_THREAD_WINKLE); 512 } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || 513 (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 514 srr1 = power7_idle_insn(PNV_THREAD_SLEEP); 515 } else if (idle_states & OPAL_PM_NAP_ENABLED) { 516 srr1 = power7_idle_insn(PNV_THREAD_NAP); 517 } else { 518 /* This is the fallback method. We emulate snooze */ 519 while (!generic_check_cpu_restart(cpu)) { 520 HMT_low(); 521 HMT_very_low(); 522 } 523 srr1 = 0; 524 HMT_medium(); 525 } 526 527 __ppc64_runlatch_on(); 528 529 /* 530 * Re-enable decrementer interrupts in LPCR. 531 * 532 * Further, we want stop states to be woken up by decrementer 533 * for non-hotplug cases. So program the LPCR via stop api as 534 * well. 535 */ 536 lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1; 537 pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); 538 539 return srr1; 540 } 541 #endif 542 543 /* 544 * Power ISA 3.0 idle initialization. 545 * 546 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control 547 * Register (PSSCR) to control idle behavior. 548 * 549 * PSSCR layout: 550 * ---------------------------------------------------------- 551 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | 552 * ---------------------------------------------------------- 553 * 0 4 41 42 43 44 48 54 56 60 554 * 555 * PSSCR key fields: 556 * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the 557 * lowest power-saving state the thread entered since stop instruction was 558 * last executed. 559 * 560 * Bit 41 - Status Disable(SD) 561 * 0 - Shows PLS entries 562 * 1 - PLS entries are all 0 563 * 564 * Bit 42 - Enable State Loss 565 * 0 - No state is lost irrespective of other fields 566 * 1 - Allows state loss 567 * 568 * Bit 43 - Exit Criterion 569 * 0 - Exit from power-save mode on any interrupt 570 * 1 - Exit from power-save mode controlled by LPCR's PECE bits 571 * 572 * Bits 44:47 - Power-Saving Level Limit 573 * This limits the power-saving level that can be entered into. 574 * 575 * Bits 60:63 - Requested Level 576 * Used to specify which power-saving level must be entered on executing 577 * stop instruction 578 */ 579 580 int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) 581 { 582 int err = 0; 583 584 /* 585 * psscr_mask == 0xf indicates an older firmware. 586 * Set remaining fields of psscr to the default values. 587 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL 588 */ 589 if (*psscr_mask == 0xf) { 590 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; 591 *psscr_mask = PSSCR_HV_DEFAULT_MASK; 592 return err; 593 } 594 595 /* 596 * New firmware is expected to set the psscr_val bits correctly. 597 * Validate that the following invariants are correctly maintained by 598 * the new firmware. 599 * - ESL bit value matches the EC bit value. 600 * - ESL bit is set for all the deep stop states. 601 */ 602 if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { 603 err = ERR_EC_ESL_MISMATCH; 604 } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && 605 GET_PSSCR_ESL(*psscr_val) == 0) { 606 err = ERR_DEEP_STATE_ESL_MISMATCH; 607 } 608 609 return err; 610 } 611 612 /* 613 * pnv_arch300_idle_init: Initializes the default idle state, first 614 * deep idle state and deepest idle state on 615 * ISA 3.0 CPUs. 616 * 617 * @np: /ibm,opal/power-mgt device node 618 * @flags: cpu-idle-state-flags array 619 * @dt_idle_states: Number of idle state entries 620 * Returns 0 on success 621 */ 622 static int __init pnv_power9_idle_init(void) 623 { 624 u64 max_residency_ns = 0; 625 int i; 626 627 /* 628 * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask}, 629 * and the pnv_default_stop_{val,mask}. 630 * 631 * pnv_first_deep_stop_state should be set to the first stop 632 * level to cause hypervisor state loss. 633 * 634 * pnv_deepest_stop_{val,mask} should be set to values corresponding to 635 * the deepest stop state. 636 * 637 * pnv_default_stop_{val,mask} should be set to values corresponding to 638 * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state. 639 */ 640 pnv_first_deep_stop_state = MAX_STOP_STATE; 641 for (i = 0; i < nr_pnv_idle_states; i++) { 642 int err; 643 struct pnv_idle_states_t *state = &pnv_idle_states[i]; 644 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; 645 646 if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && 647 pnv_first_deep_stop_state > psscr_rl) 648 pnv_first_deep_stop_state = psscr_rl; 649 650 err = validate_psscr_val_mask(&state->psscr_val, 651 &state->psscr_mask, 652 state->flags); 653 if (err) { 654 report_invalid_psscr_val(state->psscr_val, err); 655 continue; 656 } 657 658 state->valid = true; 659 660 if (max_residency_ns < state->residency_ns) { 661 max_residency_ns = state->residency_ns; 662 pnv_deepest_stop_psscr_val = state->psscr_val; 663 pnv_deepest_stop_psscr_mask = state->psscr_mask; 664 pnv_deepest_stop_flag = state->flags; 665 deepest_stop_found = true; 666 } 667 668 if (!default_stop_found && 669 (state->flags & OPAL_PM_STOP_INST_FAST)) { 670 pnv_default_stop_val = state->psscr_val; 671 pnv_default_stop_mask = state->psscr_mask; 672 default_stop_found = true; 673 } 674 } 675 676 if (unlikely(!default_stop_found)) { 677 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); 678 } else { 679 ppc_md.power_save = power9_idle; 680 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", 681 pnv_default_stop_val, pnv_default_stop_mask); 682 } 683 684 if (unlikely(!deepest_stop_found)) { 685 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait"); 686 } else { 687 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n", 688 pnv_deepest_stop_psscr_val, 689 pnv_deepest_stop_psscr_mask); 690 } 691 692 pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n", 693 pnv_first_deep_stop_state); 694 695 return 0; 696 } 697 698 /* 699 * Probe device tree for supported idle states 700 */ 701 static void __init pnv_probe_idle_states(void) 702 { 703 int i; 704 705 if (nr_pnv_idle_states < 0) { 706 pr_warn("cpuidle-powernv: no idle states found in the DT\n"); 707 return; 708 } 709 710 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 711 if (pnv_power9_idle_init()) 712 return; 713 } 714 715 for (i = 0; i < nr_pnv_idle_states; i++) 716 supported_cpuidle_states |= pnv_idle_states[i].flags; 717 } 718 719 /* 720 * This function parses device-tree and populates all the information 721 * into pnv_idle_states structure. It also sets up nr_pnv_idle_states 722 * which is the number of cpuidle states discovered through device-tree. 723 */ 724 725 static int pnv_parse_cpuidle_dt(void) 726 { 727 struct device_node *np; 728 int nr_idle_states, i; 729 int rc = 0; 730 u32 *temp_u32; 731 u64 *temp_u64; 732 const char **temp_string; 733 734 np = of_find_node_by_path("/ibm,opal/power-mgt"); 735 if (!np) { 736 pr_warn("opal: PowerMgmt Node not found\n"); 737 return -ENODEV; 738 } 739 nr_idle_states = of_property_count_u32_elems(np, 740 "ibm,cpu-idle-state-flags"); 741 742 pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), 743 GFP_KERNEL); 744 temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); 745 temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); 746 temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); 747 748 if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { 749 pr_err("Could not allocate memory for dt parsing\n"); 750 rc = -ENOMEM; 751 goto out; 752 } 753 754 /* Read flags */ 755 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", 756 temp_u32, nr_idle_states)) { 757 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); 758 rc = -EINVAL; 759 goto out; 760 } 761 for (i = 0; i < nr_idle_states; i++) 762 pnv_idle_states[i].flags = temp_u32[i]; 763 764 /* Read latencies */ 765 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", 766 temp_u32, nr_idle_states)) { 767 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); 768 rc = -EINVAL; 769 goto out; 770 } 771 for (i = 0; i < nr_idle_states; i++) 772 pnv_idle_states[i].latency_ns = temp_u32[i]; 773 774 /* Read residencies */ 775 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", 776 temp_u32, nr_idle_states)) { 777 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); 778 rc = -EINVAL; 779 goto out; 780 } 781 for (i = 0; i < nr_idle_states; i++) 782 pnv_idle_states[i].residency_ns = temp_u32[i]; 783 784 /* For power9 */ 785 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 786 /* Read pm_crtl_val */ 787 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", 788 temp_u64, nr_idle_states)) { 789 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); 790 rc = -EINVAL; 791 goto out; 792 } 793 for (i = 0; i < nr_idle_states; i++) 794 pnv_idle_states[i].psscr_val = temp_u64[i]; 795 796 /* Read pm_crtl_mask */ 797 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", 798 temp_u64, nr_idle_states)) { 799 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); 800 rc = -EINVAL; 801 goto out; 802 } 803 for (i = 0; i < nr_idle_states; i++) 804 pnv_idle_states[i].psscr_mask = temp_u64[i]; 805 } 806 807 /* 808 * power8 specific properties ibm,cpu-idle-state-pmicr-mask and 809 * ibm,cpu-idle-state-pmicr-val were never used and there is no 810 * plan to use it in near future. Hence, not parsing these properties 811 */ 812 813 if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", 814 temp_string, nr_idle_states) < 0) { 815 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); 816 rc = -EINVAL; 817 goto out; 818 } 819 for (i = 0; i < nr_idle_states; i++) 820 strlcpy(pnv_idle_states[i].name, temp_string[i], 821 PNV_IDLE_NAME_LEN); 822 nr_pnv_idle_states = nr_idle_states; 823 rc = 0; 824 out: 825 kfree(temp_u32); 826 kfree(temp_u64); 827 kfree(temp_string); 828 return rc; 829 } 830 831 static int __init pnv_init_idle_states(void) 832 { 833 int rc = 0; 834 supported_cpuidle_states = 0; 835 836 /* In case we error out nr_pnv_idle_states will be zero */ 837 nr_pnv_idle_states = 0; 838 if (cpuidle_disable != IDLE_NO_OVERRIDE) 839 goto out; 840 rc = pnv_parse_cpuidle_dt(); 841 if (rc) 842 return rc; 843 pnv_probe_idle_states(); 844 845 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 846 patch_instruction( 847 (unsigned int *)pnv_fastsleep_workaround_at_entry, 848 PPC_INST_NOP); 849 patch_instruction( 850 (unsigned int *)pnv_fastsleep_workaround_at_exit, 851 PPC_INST_NOP); 852 } else { 853 /* 854 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that 855 * workaround is needed to use fastsleep. Provide sysfs 856 * control to choose how this workaround has to be applied. 857 */ 858 device_create_file(cpu_subsys.dev_root, 859 &dev_attr_fastsleep_workaround_applyonce); 860 } 861 862 pnv_alloc_idle_core_states(); 863 864 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) 865 ppc_md.power_save = power7_idle; 866 867 out: 868 return 0; 869 } 870 machine_subsys_initcall(powernv, pnv_init_idle_states); 871