1 /* 2 * PowerNV cpuidle code 3 * 4 * Copyright 2015 IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/types.h> 13 #include <linux/mm.h> 14 #include <linux/slab.h> 15 #include <linux/of.h> 16 #include <linux/device.h> 17 #include <linux/cpu.h> 18 19 #include <asm/firmware.h> 20 #include <asm/machdep.h> 21 #include <asm/opal.h> 22 #include <asm/cputhreads.h> 23 #include <asm/cpuidle.h> 24 #include <asm/code-patching.h> 25 #include <asm/smp.h> 26 #include <asm/runlatch.h> 27 28 #include "powernv.h" 29 #include "subcore.h" 30 31 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */ 32 #define MAX_STOP_STATE 0xF 33 34 #define P9_STOP_SPR_MSR 2000 35 #define P9_STOP_SPR_PSSCR 855 36 37 static u32 supported_cpuidle_states; 38 39 /* 40 * The default stop state that will be used by ppc_md.power_save 41 * function on platforms that support stop instruction. 42 */ 43 static u64 pnv_default_stop_val; 44 static u64 pnv_default_stop_mask; 45 static bool default_stop_found; 46 47 /* 48 * First deep stop state. Used to figure out when to save/restore 49 * hypervisor context. 50 */ 51 u64 pnv_first_deep_stop_state = MAX_STOP_STATE; 52 53 /* 54 * psscr value and mask of the deepest stop idle state. 55 * Used when a cpu is offlined. 56 */ 57 static u64 pnv_deepest_stop_psscr_val; 58 static u64 pnv_deepest_stop_psscr_mask; 59 static bool deepest_stop_found; 60 61 static int pnv_save_sprs_for_deep_states(void) 62 { 63 int cpu; 64 int rc; 65 66 /* 67 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across 68 * all cpus at boot. Get these reg values of current cpu and use the 69 * same across all cpus. 70 */ 71 uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; 72 uint64_t hid0_val = mfspr(SPRN_HID0); 73 uint64_t hid1_val = mfspr(SPRN_HID1); 74 uint64_t hid4_val = mfspr(SPRN_HID4); 75 uint64_t hid5_val = mfspr(SPRN_HID5); 76 uint64_t hmeer_val = mfspr(SPRN_HMEER); 77 uint64_t msr_val = MSR_IDLE; 78 uint64_t psscr_val = pnv_deepest_stop_psscr_val; 79 80 for_each_possible_cpu(cpu) { 81 uint64_t pir = get_hard_smp_processor_id(cpu); 82 uint64_t hsprg0_val = (uint64_t)&paca[cpu]; 83 84 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 85 if (rc != 0) 86 return rc; 87 88 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 89 if (rc != 0) 90 return rc; 91 92 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 93 rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val); 94 if (rc) 95 return rc; 96 97 rc = opal_slw_set_reg(pir, 98 P9_STOP_SPR_PSSCR, psscr_val); 99 100 if (rc) 101 return rc; 102 } 103 104 /* HIDs are per core registers */ 105 if (cpu_thread_in_core(cpu) == 0) { 106 107 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); 108 if (rc != 0) 109 return rc; 110 111 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); 112 if (rc != 0) 113 return rc; 114 115 /* Only p8 needs to set extra HID regiters */ 116 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 117 118 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 119 if (rc != 0) 120 return rc; 121 122 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 123 if (rc != 0) 124 return rc; 125 126 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 127 if (rc != 0) 128 return rc; 129 } 130 } 131 } 132 133 return 0; 134 } 135 136 static void pnv_alloc_idle_core_states(void) 137 { 138 int i, j; 139 int nr_cores = cpu_nr_cores(); 140 u32 *core_idle_state; 141 142 /* 143 * core_idle_state - The lower 8 bits track the idle state of 144 * each thread of the core. 145 * 146 * The most significant bit is the lock bit. 147 * 148 * Initially all the bits corresponding to threads_per_core 149 * are set. They are cleared when the thread enters deep idle 150 * state like sleep and winkle/stop. 151 * 152 * Initially the lock bit is cleared. The lock bit has 2 153 * purposes: 154 * a. While the first thread in the core waking up from 155 * idle is restoring core state, it prevents other 156 * threads in the core from switching to process 157 * context. 158 * b. While the last thread in the core is saving the 159 * core state, it prevents a different thread from 160 * waking up. 161 */ 162 for (i = 0; i < nr_cores; i++) { 163 int first_cpu = i * threads_per_core; 164 int node = cpu_to_node(first_cpu); 165 size_t paca_ptr_array_size; 166 167 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); 168 *core_idle_state = (1 << threads_per_core) - 1; 169 paca_ptr_array_size = (threads_per_core * 170 sizeof(struct paca_struct *)); 171 172 for (j = 0; j < threads_per_core; j++) { 173 int cpu = first_cpu + j; 174 175 paca[cpu].core_idle_state_ptr = core_idle_state; 176 paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; 177 paca[cpu].thread_mask = 1 << j; 178 if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) 179 continue; 180 paca[cpu].thread_sibling_pacas = 181 kmalloc_node(paca_ptr_array_size, 182 GFP_KERNEL, node); 183 } 184 } 185 186 update_subcore_sibling_mask(); 187 188 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) 189 pnv_save_sprs_for_deep_states(); 190 } 191 192 u32 pnv_get_supported_cpuidle_states(void) 193 { 194 return supported_cpuidle_states; 195 } 196 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); 197 198 static void pnv_fastsleep_workaround_apply(void *info) 199 200 { 201 int rc; 202 int *err = info; 203 204 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 205 OPAL_CONFIG_IDLE_APPLY); 206 if (rc) 207 *err = 1; 208 } 209 210 /* 211 * Used to store fastsleep workaround state 212 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) 213 * 1 - Workaround applied once, never undone. 214 */ 215 static u8 fastsleep_workaround_applyonce; 216 217 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, 218 struct device_attribute *attr, char *buf) 219 { 220 return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); 221 } 222 223 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, 224 struct device_attribute *attr, const char *buf, 225 size_t count) 226 { 227 cpumask_t primary_thread_mask; 228 int err; 229 u8 val; 230 231 if (kstrtou8(buf, 0, &val) || val != 1) 232 return -EINVAL; 233 234 if (fastsleep_workaround_applyonce == 1) 235 return count; 236 237 /* 238 * fastsleep_workaround_applyonce = 1 implies 239 * fastsleep workaround needs to be left in 'applied' state on all 240 * the cores. Do this by- 241 * 1. Patching out the call to 'undo' workaround in fastsleep exit path 242 * 2. Sending ipi to all the cores which have at least one online thread 243 * 3. Patching out the call to 'apply' workaround in fastsleep entry 244 * path 245 * There is no need to send ipi to cores which have all threads 246 * offlined, as last thread of the core entering fastsleep or deeper 247 * state would have applied workaround. 248 */ 249 err = patch_instruction( 250 (unsigned int *)pnv_fastsleep_workaround_at_exit, 251 PPC_INST_NOP); 252 if (err) { 253 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit"); 254 goto fail; 255 } 256 257 get_online_cpus(); 258 primary_thread_mask = cpu_online_cores_map(); 259 on_each_cpu_mask(&primary_thread_mask, 260 pnv_fastsleep_workaround_apply, 261 &err, 1); 262 put_online_cpus(); 263 if (err) { 264 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); 265 goto fail; 266 } 267 268 err = patch_instruction( 269 (unsigned int *)pnv_fastsleep_workaround_at_entry, 270 PPC_INST_NOP); 271 if (err) { 272 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry"); 273 goto fail; 274 } 275 276 fastsleep_workaround_applyonce = 1; 277 278 return count; 279 fail: 280 return -EIO; 281 } 282 283 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, 284 show_fastsleep_workaround_applyonce, 285 store_fastsleep_workaround_applyonce); 286 287 static unsigned long __power7_idle_type(unsigned long type) 288 { 289 unsigned long srr1; 290 291 if (!prep_irq_for_idle_irqsoff()) 292 return 0; 293 294 __ppc64_runlatch_off(); 295 srr1 = power7_idle_insn(type); 296 __ppc64_runlatch_on(); 297 298 fini_irq_for_idle_irqsoff(); 299 300 return srr1; 301 } 302 303 void power7_idle_type(unsigned long type) 304 { 305 unsigned long srr1; 306 307 srr1 = __power7_idle_type(type); 308 irq_set_pending_from_srr1(srr1); 309 } 310 311 void power7_idle(void) 312 { 313 if (!powersave_nap) 314 return; 315 316 power7_idle_type(PNV_THREAD_NAP); 317 } 318 319 static unsigned long __power9_idle_type(unsigned long stop_psscr_val, 320 unsigned long stop_psscr_mask) 321 { 322 unsigned long psscr; 323 unsigned long srr1; 324 325 if (!prep_irq_for_idle_irqsoff()) 326 return 0; 327 328 psscr = mfspr(SPRN_PSSCR); 329 psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; 330 331 __ppc64_runlatch_off(); 332 srr1 = power9_idle_stop(psscr); 333 __ppc64_runlatch_on(); 334 335 fini_irq_for_idle_irqsoff(); 336 337 return srr1; 338 } 339 340 void power9_idle_type(unsigned long stop_psscr_val, 341 unsigned long stop_psscr_mask) 342 { 343 unsigned long srr1; 344 345 srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask); 346 irq_set_pending_from_srr1(srr1); 347 } 348 349 /* 350 * Used for ppc_md.power_save which needs a function with no parameters 351 */ 352 void power9_idle(void) 353 { 354 power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 355 } 356 357 #ifdef CONFIG_HOTPLUG_CPU 358 /* 359 * pnv_cpu_offline: A function that puts the CPU into the deepest 360 * available platform idle state on a CPU-Offline. 361 * interrupts hard disabled and no lazy irq pending. 362 */ 363 unsigned long pnv_cpu_offline(unsigned int cpu) 364 { 365 unsigned long srr1; 366 u32 idle_states = pnv_get_supported_cpuidle_states(); 367 368 __ppc64_runlatch_off(); 369 370 if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { 371 unsigned long psscr; 372 373 psscr = mfspr(SPRN_PSSCR); 374 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 375 pnv_deepest_stop_psscr_val; 376 srr1 = power9_idle_stop(psscr); 377 378 } else if (idle_states & OPAL_PM_WINKLE_ENABLED) { 379 srr1 = power7_idle_insn(PNV_THREAD_WINKLE); 380 } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || 381 (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 382 srr1 = power7_idle_insn(PNV_THREAD_SLEEP); 383 } else if (idle_states & OPAL_PM_NAP_ENABLED) { 384 srr1 = power7_idle_insn(PNV_THREAD_NAP); 385 } else { 386 /* This is the fallback method. We emulate snooze */ 387 while (!generic_check_cpu_restart(cpu)) { 388 HMT_low(); 389 HMT_very_low(); 390 } 391 srr1 = 0; 392 HMT_medium(); 393 } 394 395 __ppc64_runlatch_on(); 396 397 return srr1; 398 } 399 #endif 400 401 /* 402 * Power ISA 3.0 idle initialization. 403 * 404 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control 405 * Register (PSSCR) to control idle behavior. 406 * 407 * PSSCR layout: 408 * ---------------------------------------------------------- 409 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | 410 * ---------------------------------------------------------- 411 * 0 4 41 42 43 44 48 54 56 60 412 * 413 * PSSCR key fields: 414 * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the 415 * lowest power-saving state the thread entered since stop instruction was 416 * last executed. 417 * 418 * Bit 41 - Status Disable(SD) 419 * 0 - Shows PLS entries 420 * 1 - PLS entries are all 0 421 * 422 * Bit 42 - Enable State Loss 423 * 0 - No state is lost irrespective of other fields 424 * 1 - Allows state loss 425 * 426 * Bit 43 - Exit Criterion 427 * 0 - Exit from power-save mode on any interrupt 428 * 1 - Exit from power-save mode controlled by LPCR's PECE bits 429 * 430 * Bits 44:47 - Power-Saving Level Limit 431 * This limits the power-saving level that can be entered into. 432 * 433 * Bits 60:63 - Requested Level 434 * Used to specify which power-saving level must be entered on executing 435 * stop instruction 436 */ 437 438 int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) 439 { 440 int err = 0; 441 442 /* 443 * psscr_mask == 0xf indicates an older firmware. 444 * Set remaining fields of psscr to the default values. 445 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL 446 */ 447 if (*psscr_mask == 0xf) { 448 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; 449 *psscr_mask = PSSCR_HV_DEFAULT_MASK; 450 return err; 451 } 452 453 /* 454 * New firmware is expected to set the psscr_val bits correctly. 455 * Validate that the following invariants are correctly maintained by 456 * the new firmware. 457 * - ESL bit value matches the EC bit value. 458 * - ESL bit is set for all the deep stop states. 459 */ 460 if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { 461 err = ERR_EC_ESL_MISMATCH; 462 } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && 463 GET_PSSCR_ESL(*psscr_val) == 0) { 464 err = ERR_DEEP_STATE_ESL_MISMATCH; 465 } 466 467 return err; 468 } 469 470 /* 471 * pnv_arch300_idle_init: Initializes the default idle state, first 472 * deep idle state and deepest idle state on 473 * ISA 3.0 CPUs. 474 * 475 * @np: /ibm,opal/power-mgt device node 476 * @flags: cpu-idle-state-flags array 477 * @dt_idle_states: Number of idle state entries 478 * Returns 0 on success 479 */ 480 static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, 481 int dt_idle_states) 482 { 483 u64 *psscr_val = NULL; 484 u64 *psscr_mask = NULL; 485 u32 *residency_ns = NULL; 486 u64 max_residency_ns = 0; 487 int rc = 0, i; 488 489 psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL); 490 psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL); 491 residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns), 492 GFP_KERNEL); 493 494 if (!psscr_val || !psscr_mask || !residency_ns) { 495 rc = -1; 496 goto out; 497 } 498 499 if (of_property_read_u64_array(np, 500 "ibm,cpu-idle-state-psscr", 501 psscr_val, dt_idle_states)) { 502 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); 503 rc = -1; 504 goto out; 505 } 506 507 if (of_property_read_u64_array(np, 508 "ibm,cpu-idle-state-psscr-mask", 509 psscr_mask, dt_idle_states)) { 510 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); 511 rc = -1; 512 goto out; 513 } 514 515 if (of_property_read_u32_array(np, 516 "ibm,cpu-idle-state-residency-ns", 517 residency_ns, dt_idle_states)) { 518 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); 519 rc = -1; 520 goto out; 521 } 522 523 /* 524 * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask}, 525 * and the pnv_default_stop_{val,mask}. 526 * 527 * pnv_first_deep_stop_state should be set to the first stop 528 * level to cause hypervisor state loss. 529 * 530 * pnv_deepest_stop_{val,mask} should be set to values corresponding to 531 * the deepest stop state. 532 * 533 * pnv_default_stop_{val,mask} should be set to values corresponding to 534 * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state. 535 */ 536 pnv_first_deep_stop_state = MAX_STOP_STATE; 537 for (i = 0; i < dt_idle_states; i++) { 538 int err; 539 u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK; 540 541 if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) && 542 (pnv_first_deep_stop_state > psscr_rl)) 543 pnv_first_deep_stop_state = psscr_rl; 544 545 err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i], 546 flags[i]); 547 if (err) { 548 report_invalid_psscr_val(psscr_val[i], err); 549 continue; 550 } 551 552 if (max_residency_ns < residency_ns[i]) { 553 max_residency_ns = residency_ns[i]; 554 pnv_deepest_stop_psscr_val = psscr_val[i]; 555 pnv_deepest_stop_psscr_mask = psscr_mask[i]; 556 deepest_stop_found = true; 557 } 558 559 if (!default_stop_found && 560 (flags[i] & OPAL_PM_STOP_INST_FAST)) { 561 pnv_default_stop_val = psscr_val[i]; 562 pnv_default_stop_mask = psscr_mask[i]; 563 default_stop_found = true; 564 } 565 } 566 567 if (unlikely(!default_stop_found)) { 568 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); 569 } else { 570 ppc_md.power_save = power9_idle; 571 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", 572 pnv_default_stop_val, pnv_default_stop_mask); 573 } 574 575 if (unlikely(!deepest_stop_found)) { 576 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait"); 577 } else { 578 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n", 579 pnv_deepest_stop_psscr_val, 580 pnv_deepest_stop_psscr_mask); 581 } 582 583 pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n", 584 pnv_first_deep_stop_state); 585 out: 586 kfree(psscr_val); 587 kfree(psscr_mask); 588 kfree(residency_ns); 589 return rc; 590 } 591 592 /* 593 * Probe device tree for supported idle states 594 */ 595 static void __init pnv_probe_idle_states(void) 596 { 597 struct device_node *np; 598 int dt_idle_states; 599 u32 *flags = NULL; 600 int i; 601 602 np = of_find_node_by_path("/ibm,opal/power-mgt"); 603 if (!np) { 604 pr_warn("opal: PowerMgmt Node not found\n"); 605 goto out; 606 } 607 dt_idle_states = of_property_count_u32_elems(np, 608 "ibm,cpu-idle-state-flags"); 609 if (dt_idle_states < 0) { 610 pr_warn("cpuidle-powernv: no idle states found in the DT\n"); 611 goto out; 612 } 613 614 flags = kcalloc(dt_idle_states, sizeof(*flags), GFP_KERNEL); 615 616 if (of_property_read_u32_array(np, 617 "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { 618 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); 619 goto out; 620 } 621 622 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 623 if (pnv_power9_idle_init(np, flags, dt_idle_states)) 624 goto out; 625 } 626 627 for (i = 0; i < dt_idle_states; i++) 628 supported_cpuidle_states |= flags[i]; 629 630 out: 631 kfree(flags); 632 } 633 static int __init pnv_init_idle_states(void) 634 { 635 636 supported_cpuidle_states = 0; 637 638 if (cpuidle_disable != IDLE_NO_OVERRIDE) 639 goto out; 640 641 pnv_probe_idle_states(); 642 643 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 644 patch_instruction( 645 (unsigned int *)pnv_fastsleep_workaround_at_entry, 646 PPC_INST_NOP); 647 patch_instruction( 648 (unsigned int *)pnv_fastsleep_workaround_at_exit, 649 PPC_INST_NOP); 650 } else { 651 /* 652 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that 653 * workaround is needed to use fastsleep. Provide sysfs 654 * control to choose how this workaround has to be applied. 655 */ 656 device_create_file(cpu_subsys.dev_root, 657 &dev_attr_fastsleep_workaround_applyonce); 658 } 659 660 pnv_alloc_idle_core_states(); 661 662 /* 663 * For each CPU, record its PACA address in each of it's 664 * sibling thread's PACA at the slot corresponding to this 665 * CPU's index in the core. 666 */ 667 if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { 668 int cpu; 669 670 pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n"); 671 for_each_possible_cpu(cpu) { 672 int base_cpu = cpu_first_thread_sibling(cpu); 673 int idx = cpu_thread_in_core(cpu); 674 int i; 675 676 for (i = 0; i < threads_per_core; i++) { 677 int j = base_cpu + i; 678 679 paca[j].thread_sibling_pacas[idx] = &paca[cpu]; 680 } 681 } 682 } 683 684 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) 685 ppc_md.power_save = power7_idle; 686 687 out: 688 return 0; 689 } 690 machine_subsys_initcall(powernv, pnv_init_idle_states); 691