1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/sched/smt.h> 51 #include <linux/notifier.h> 52 #include <linux/cpu.h> 53 #include <linux/moduleparam.h> 54 #include <asm/cpu_device_id.h> 55 #include <asm/intel-family.h> 56 #include <asm/nospec-branch.h> 57 #include <asm/mwait.h> 58 #include <asm/msr.h> 59 #include <asm/fpu/api.h> 60 61 #define INTEL_IDLE_VERSION "0.5.1" 62 63 static struct cpuidle_driver intel_idle_driver = { 64 .name = "intel_idle", 65 .owner = THIS_MODULE, 66 }; 67 /* intel_idle.max_cstate=0 disables driver */ 68 static int max_cstate = CPUIDLE_STATE_MAX - 1; 69 static unsigned int disabled_states_mask; 70 static unsigned int preferred_states_mask; 71 72 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 73 74 static unsigned long auto_demotion_disable_flags; 75 76 static enum { 77 C1E_PROMOTION_PRESERVE, 78 C1E_PROMOTION_ENABLE, 79 C1E_PROMOTION_DISABLE 80 } c1e_promotion = C1E_PROMOTION_PRESERVE; 81 82 struct idle_cpu { 83 struct cpuidle_state *state_table; 84 85 /* 86 * Hardware C-state auto-demotion may not always be optimal. 87 * Indicate which enable bits to clear here. 88 */ 89 unsigned long auto_demotion_disable_flags; 90 bool byt_auto_demotion_disable_flag; 91 bool disable_promotion_to_c1e; 92 bool use_acpi; 93 }; 94 95 static const struct idle_cpu *icpu __initdata; 96 static struct cpuidle_state *cpuidle_state_table __initdata; 97 98 static unsigned int mwait_substates __initdata; 99 100 /* 101 * Enable interrupts before entering the C-state. On some platforms and for 102 * some C-states, this may measurably decrease interrupt latency. 103 */ 104 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) 105 106 /* 107 * Enable this state by default even if the ACPI _CST does not list it. 108 */ 109 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 110 111 /* 112 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE 113 * above. 114 */ 115 #define CPUIDLE_FLAG_IBRS BIT(16) 116 117 /* 118 * Initialize large xstate for the C6-state entrance. 119 */ 120 #define CPUIDLE_FLAG_INIT_XSTATE BIT(17) 121 122 /* 123 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 124 * the C-state (top nibble) and sub-state (bottom nibble) 125 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 126 * 127 * We store the hint at the top of our "flags" for each state. 128 */ 129 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 130 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 131 132 static __always_inline int __intel_idle(struct cpuidle_device *dev, 133 struct cpuidle_driver *drv, int index) 134 { 135 struct cpuidle_state *state = &drv->states[index]; 136 unsigned long eax = flg2MWAIT(state->flags); 137 unsigned long ecx = 1; /* break on interrupt flag */ 138 139 mwait_idle_with_hints(eax, ecx); 140 141 return index; 142 } 143 144 /** 145 * intel_idle - Ask the processor to enter the given idle state. 146 * @dev: cpuidle device of the target CPU. 147 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 148 * @index: Target idle state index. 149 * 150 * Use the MWAIT instruction to notify the processor that the CPU represented by 151 * @dev is idle and it can try to enter the idle state corresponding to @index. 152 * 153 * If the local APIC timer is not known to be reliable in the target idle state, 154 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 155 * 156 * Must be called under local_irq_disable(). 157 */ 158 static __cpuidle int intel_idle(struct cpuidle_device *dev, 159 struct cpuidle_driver *drv, int index) 160 { 161 return __intel_idle(dev, drv, index); 162 } 163 164 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, 165 struct cpuidle_driver *drv, int index) 166 { 167 int ret; 168 169 raw_local_irq_enable(); 170 ret = __intel_idle(dev, drv, index); 171 raw_local_irq_disable(); 172 173 return ret; 174 } 175 176 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, 177 struct cpuidle_driver *drv, int index) 178 { 179 bool smt_active = sched_smt_active(); 180 u64 spec_ctrl = spec_ctrl_current(); 181 int ret; 182 183 if (smt_active) 184 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); 185 186 ret = __intel_idle(dev, drv, index); 187 188 if (smt_active) 189 native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); 190 191 return ret; 192 } 193 194 static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, 195 struct cpuidle_driver *drv, int index) 196 { 197 fpu_idle_fpregs(); 198 return __intel_idle(dev, drv, index); 199 } 200 201 /** 202 * intel_idle_s2idle - Ask the processor to enter the given idle state. 203 * @dev: cpuidle device of the target CPU. 204 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 205 * @index: Target idle state index. 206 * 207 * Use the MWAIT instruction to notify the processor that the CPU represented by 208 * @dev is idle and it can try to enter the idle state corresponding to @index. 209 * 210 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 211 * scheduler tick and suspended scheduler clock on the target CPU. 212 */ 213 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 214 struct cpuidle_driver *drv, int index) 215 { 216 unsigned long ecx = 1; /* break on interrupt flag */ 217 struct cpuidle_state *state = &drv->states[index]; 218 unsigned long eax = flg2MWAIT(state->flags); 219 220 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) 221 fpu_idle_fpregs(); 222 223 mwait_idle_with_hints(eax, ecx); 224 225 return 0; 226 } 227 228 /* 229 * States are indexed by the cstate number, 230 * which is also the index into the MWAIT hint array. 231 * Thus C0 is a dummy. 232 */ 233 static struct cpuidle_state nehalem_cstates[] __initdata = { 234 { 235 .name = "C1", 236 .desc = "MWAIT 0x00", 237 .flags = MWAIT2flg(0x00), 238 .exit_latency = 3, 239 .target_residency = 6, 240 .enter = &intel_idle, 241 .enter_s2idle = intel_idle_s2idle, }, 242 { 243 .name = "C1E", 244 .desc = "MWAIT 0x01", 245 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 246 .exit_latency = 10, 247 .target_residency = 20, 248 .enter = &intel_idle, 249 .enter_s2idle = intel_idle_s2idle, }, 250 { 251 .name = "C3", 252 .desc = "MWAIT 0x10", 253 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 254 .exit_latency = 20, 255 .target_residency = 80, 256 .enter = &intel_idle, 257 .enter_s2idle = intel_idle_s2idle, }, 258 { 259 .name = "C6", 260 .desc = "MWAIT 0x20", 261 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 262 .exit_latency = 200, 263 .target_residency = 800, 264 .enter = &intel_idle, 265 .enter_s2idle = intel_idle_s2idle, }, 266 { 267 .enter = NULL } 268 }; 269 270 static struct cpuidle_state snb_cstates[] __initdata = { 271 { 272 .name = "C1", 273 .desc = "MWAIT 0x00", 274 .flags = MWAIT2flg(0x00), 275 .exit_latency = 2, 276 .target_residency = 2, 277 .enter = &intel_idle, 278 .enter_s2idle = intel_idle_s2idle, }, 279 { 280 .name = "C1E", 281 .desc = "MWAIT 0x01", 282 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 283 .exit_latency = 10, 284 .target_residency = 20, 285 .enter = &intel_idle, 286 .enter_s2idle = intel_idle_s2idle, }, 287 { 288 .name = "C3", 289 .desc = "MWAIT 0x10", 290 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 291 .exit_latency = 80, 292 .target_residency = 211, 293 .enter = &intel_idle, 294 .enter_s2idle = intel_idle_s2idle, }, 295 { 296 .name = "C6", 297 .desc = "MWAIT 0x20", 298 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 299 .exit_latency = 104, 300 .target_residency = 345, 301 .enter = &intel_idle, 302 .enter_s2idle = intel_idle_s2idle, }, 303 { 304 .name = "C7", 305 .desc = "MWAIT 0x30", 306 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 307 .exit_latency = 109, 308 .target_residency = 345, 309 .enter = &intel_idle, 310 .enter_s2idle = intel_idle_s2idle, }, 311 { 312 .enter = NULL } 313 }; 314 315 static struct cpuidle_state byt_cstates[] __initdata = { 316 { 317 .name = "C1", 318 .desc = "MWAIT 0x00", 319 .flags = MWAIT2flg(0x00), 320 .exit_latency = 1, 321 .target_residency = 1, 322 .enter = &intel_idle, 323 .enter_s2idle = intel_idle_s2idle, }, 324 { 325 .name = "C6N", 326 .desc = "MWAIT 0x58", 327 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 328 .exit_latency = 300, 329 .target_residency = 275, 330 .enter = &intel_idle, 331 .enter_s2idle = intel_idle_s2idle, }, 332 { 333 .name = "C6S", 334 .desc = "MWAIT 0x52", 335 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 336 .exit_latency = 500, 337 .target_residency = 560, 338 .enter = &intel_idle, 339 .enter_s2idle = intel_idle_s2idle, }, 340 { 341 .name = "C7", 342 .desc = "MWAIT 0x60", 343 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 344 .exit_latency = 1200, 345 .target_residency = 4000, 346 .enter = &intel_idle, 347 .enter_s2idle = intel_idle_s2idle, }, 348 { 349 .name = "C7S", 350 .desc = "MWAIT 0x64", 351 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 352 .exit_latency = 10000, 353 .target_residency = 20000, 354 .enter = &intel_idle, 355 .enter_s2idle = intel_idle_s2idle, }, 356 { 357 .enter = NULL } 358 }; 359 360 static struct cpuidle_state cht_cstates[] __initdata = { 361 { 362 .name = "C1", 363 .desc = "MWAIT 0x00", 364 .flags = MWAIT2flg(0x00), 365 .exit_latency = 1, 366 .target_residency = 1, 367 .enter = &intel_idle, 368 .enter_s2idle = intel_idle_s2idle, }, 369 { 370 .name = "C6N", 371 .desc = "MWAIT 0x58", 372 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 373 .exit_latency = 80, 374 .target_residency = 275, 375 .enter = &intel_idle, 376 .enter_s2idle = intel_idle_s2idle, }, 377 { 378 .name = "C6S", 379 .desc = "MWAIT 0x52", 380 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 381 .exit_latency = 200, 382 .target_residency = 560, 383 .enter = &intel_idle, 384 .enter_s2idle = intel_idle_s2idle, }, 385 { 386 .name = "C7", 387 .desc = "MWAIT 0x60", 388 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 389 .exit_latency = 1200, 390 .target_residency = 4000, 391 .enter = &intel_idle, 392 .enter_s2idle = intel_idle_s2idle, }, 393 { 394 .name = "C7S", 395 .desc = "MWAIT 0x64", 396 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 397 .exit_latency = 10000, 398 .target_residency = 20000, 399 .enter = &intel_idle, 400 .enter_s2idle = intel_idle_s2idle, }, 401 { 402 .enter = NULL } 403 }; 404 405 static struct cpuidle_state ivb_cstates[] __initdata = { 406 { 407 .name = "C1", 408 .desc = "MWAIT 0x00", 409 .flags = MWAIT2flg(0x00), 410 .exit_latency = 1, 411 .target_residency = 1, 412 .enter = &intel_idle, 413 .enter_s2idle = intel_idle_s2idle, }, 414 { 415 .name = "C1E", 416 .desc = "MWAIT 0x01", 417 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 418 .exit_latency = 10, 419 .target_residency = 20, 420 .enter = &intel_idle, 421 .enter_s2idle = intel_idle_s2idle, }, 422 { 423 .name = "C3", 424 .desc = "MWAIT 0x10", 425 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 426 .exit_latency = 59, 427 .target_residency = 156, 428 .enter = &intel_idle, 429 .enter_s2idle = intel_idle_s2idle, }, 430 { 431 .name = "C6", 432 .desc = "MWAIT 0x20", 433 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 434 .exit_latency = 80, 435 .target_residency = 300, 436 .enter = &intel_idle, 437 .enter_s2idle = intel_idle_s2idle, }, 438 { 439 .name = "C7", 440 .desc = "MWAIT 0x30", 441 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 442 .exit_latency = 87, 443 .target_residency = 300, 444 .enter = &intel_idle, 445 .enter_s2idle = intel_idle_s2idle, }, 446 { 447 .enter = NULL } 448 }; 449 450 static struct cpuidle_state ivt_cstates[] __initdata = { 451 { 452 .name = "C1", 453 .desc = "MWAIT 0x00", 454 .flags = MWAIT2flg(0x00), 455 .exit_latency = 1, 456 .target_residency = 1, 457 .enter = &intel_idle, 458 .enter_s2idle = intel_idle_s2idle, }, 459 { 460 .name = "C1E", 461 .desc = "MWAIT 0x01", 462 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 463 .exit_latency = 10, 464 .target_residency = 80, 465 .enter = &intel_idle, 466 .enter_s2idle = intel_idle_s2idle, }, 467 { 468 .name = "C3", 469 .desc = "MWAIT 0x10", 470 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 471 .exit_latency = 59, 472 .target_residency = 156, 473 .enter = &intel_idle, 474 .enter_s2idle = intel_idle_s2idle, }, 475 { 476 .name = "C6", 477 .desc = "MWAIT 0x20", 478 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 479 .exit_latency = 82, 480 .target_residency = 300, 481 .enter = &intel_idle, 482 .enter_s2idle = intel_idle_s2idle, }, 483 { 484 .enter = NULL } 485 }; 486 487 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 488 { 489 .name = "C1", 490 .desc = "MWAIT 0x00", 491 .flags = MWAIT2flg(0x00), 492 .exit_latency = 1, 493 .target_residency = 1, 494 .enter = &intel_idle, 495 .enter_s2idle = intel_idle_s2idle, }, 496 { 497 .name = "C1E", 498 .desc = "MWAIT 0x01", 499 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 500 .exit_latency = 10, 501 .target_residency = 250, 502 .enter = &intel_idle, 503 .enter_s2idle = intel_idle_s2idle, }, 504 { 505 .name = "C3", 506 .desc = "MWAIT 0x10", 507 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 508 .exit_latency = 59, 509 .target_residency = 300, 510 .enter = &intel_idle, 511 .enter_s2idle = intel_idle_s2idle, }, 512 { 513 .name = "C6", 514 .desc = "MWAIT 0x20", 515 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 516 .exit_latency = 84, 517 .target_residency = 400, 518 .enter = &intel_idle, 519 .enter_s2idle = intel_idle_s2idle, }, 520 { 521 .enter = NULL } 522 }; 523 524 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 525 { 526 .name = "C1", 527 .desc = "MWAIT 0x00", 528 .flags = MWAIT2flg(0x00), 529 .exit_latency = 1, 530 .target_residency = 1, 531 .enter = &intel_idle, 532 .enter_s2idle = intel_idle_s2idle, }, 533 { 534 .name = "C1E", 535 .desc = "MWAIT 0x01", 536 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 537 .exit_latency = 10, 538 .target_residency = 500, 539 .enter = &intel_idle, 540 .enter_s2idle = intel_idle_s2idle, }, 541 { 542 .name = "C3", 543 .desc = "MWAIT 0x10", 544 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 545 .exit_latency = 59, 546 .target_residency = 600, 547 .enter = &intel_idle, 548 .enter_s2idle = intel_idle_s2idle, }, 549 { 550 .name = "C6", 551 .desc = "MWAIT 0x20", 552 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 553 .exit_latency = 88, 554 .target_residency = 700, 555 .enter = &intel_idle, 556 .enter_s2idle = intel_idle_s2idle, }, 557 { 558 .enter = NULL } 559 }; 560 561 static struct cpuidle_state hsw_cstates[] __initdata = { 562 { 563 .name = "C1", 564 .desc = "MWAIT 0x00", 565 .flags = MWAIT2flg(0x00), 566 .exit_latency = 2, 567 .target_residency = 2, 568 .enter = &intel_idle, 569 .enter_s2idle = intel_idle_s2idle, }, 570 { 571 .name = "C1E", 572 .desc = "MWAIT 0x01", 573 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 574 .exit_latency = 10, 575 .target_residency = 20, 576 .enter = &intel_idle, 577 .enter_s2idle = intel_idle_s2idle, }, 578 { 579 .name = "C3", 580 .desc = "MWAIT 0x10", 581 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 582 .exit_latency = 33, 583 .target_residency = 100, 584 .enter = &intel_idle, 585 .enter_s2idle = intel_idle_s2idle, }, 586 { 587 .name = "C6", 588 .desc = "MWAIT 0x20", 589 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 590 .exit_latency = 133, 591 .target_residency = 400, 592 .enter = &intel_idle, 593 .enter_s2idle = intel_idle_s2idle, }, 594 { 595 .name = "C7s", 596 .desc = "MWAIT 0x32", 597 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 598 .exit_latency = 166, 599 .target_residency = 500, 600 .enter = &intel_idle, 601 .enter_s2idle = intel_idle_s2idle, }, 602 { 603 .name = "C8", 604 .desc = "MWAIT 0x40", 605 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 606 .exit_latency = 300, 607 .target_residency = 900, 608 .enter = &intel_idle, 609 .enter_s2idle = intel_idle_s2idle, }, 610 { 611 .name = "C9", 612 .desc = "MWAIT 0x50", 613 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 614 .exit_latency = 600, 615 .target_residency = 1800, 616 .enter = &intel_idle, 617 .enter_s2idle = intel_idle_s2idle, }, 618 { 619 .name = "C10", 620 .desc = "MWAIT 0x60", 621 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 622 .exit_latency = 2600, 623 .target_residency = 7700, 624 .enter = &intel_idle, 625 .enter_s2idle = intel_idle_s2idle, }, 626 { 627 .enter = NULL } 628 }; 629 static struct cpuidle_state bdw_cstates[] __initdata = { 630 { 631 .name = "C1", 632 .desc = "MWAIT 0x00", 633 .flags = MWAIT2flg(0x00), 634 .exit_latency = 2, 635 .target_residency = 2, 636 .enter = &intel_idle, 637 .enter_s2idle = intel_idle_s2idle, }, 638 { 639 .name = "C1E", 640 .desc = "MWAIT 0x01", 641 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 642 .exit_latency = 10, 643 .target_residency = 20, 644 .enter = &intel_idle, 645 .enter_s2idle = intel_idle_s2idle, }, 646 { 647 .name = "C3", 648 .desc = "MWAIT 0x10", 649 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 650 .exit_latency = 40, 651 .target_residency = 100, 652 .enter = &intel_idle, 653 .enter_s2idle = intel_idle_s2idle, }, 654 { 655 .name = "C6", 656 .desc = "MWAIT 0x20", 657 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 658 .exit_latency = 133, 659 .target_residency = 400, 660 .enter = &intel_idle, 661 .enter_s2idle = intel_idle_s2idle, }, 662 { 663 .name = "C7s", 664 .desc = "MWAIT 0x32", 665 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 666 .exit_latency = 166, 667 .target_residency = 500, 668 .enter = &intel_idle, 669 .enter_s2idle = intel_idle_s2idle, }, 670 { 671 .name = "C8", 672 .desc = "MWAIT 0x40", 673 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 674 .exit_latency = 300, 675 .target_residency = 900, 676 .enter = &intel_idle, 677 .enter_s2idle = intel_idle_s2idle, }, 678 { 679 .name = "C9", 680 .desc = "MWAIT 0x50", 681 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 682 .exit_latency = 600, 683 .target_residency = 1800, 684 .enter = &intel_idle, 685 .enter_s2idle = intel_idle_s2idle, }, 686 { 687 .name = "C10", 688 .desc = "MWAIT 0x60", 689 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 690 .exit_latency = 2600, 691 .target_residency = 7700, 692 .enter = &intel_idle, 693 .enter_s2idle = intel_idle_s2idle, }, 694 { 695 .enter = NULL } 696 }; 697 698 static struct cpuidle_state skl_cstates[] __initdata = { 699 { 700 .name = "C1", 701 .desc = "MWAIT 0x00", 702 .flags = MWAIT2flg(0x00), 703 .exit_latency = 2, 704 .target_residency = 2, 705 .enter = &intel_idle, 706 .enter_s2idle = intel_idle_s2idle, }, 707 { 708 .name = "C1E", 709 .desc = "MWAIT 0x01", 710 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 711 .exit_latency = 10, 712 .target_residency = 20, 713 .enter = &intel_idle, 714 .enter_s2idle = intel_idle_s2idle, }, 715 { 716 .name = "C3", 717 .desc = "MWAIT 0x10", 718 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 719 .exit_latency = 70, 720 .target_residency = 100, 721 .enter = &intel_idle, 722 .enter_s2idle = intel_idle_s2idle, }, 723 { 724 .name = "C6", 725 .desc = "MWAIT 0x20", 726 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 727 .exit_latency = 85, 728 .target_residency = 200, 729 .enter = &intel_idle, 730 .enter_s2idle = intel_idle_s2idle, }, 731 { 732 .name = "C7s", 733 .desc = "MWAIT 0x33", 734 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 735 .exit_latency = 124, 736 .target_residency = 800, 737 .enter = &intel_idle, 738 .enter_s2idle = intel_idle_s2idle, }, 739 { 740 .name = "C8", 741 .desc = "MWAIT 0x40", 742 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 743 .exit_latency = 200, 744 .target_residency = 800, 745 .enter = &intel_idle, 746 .enter_s2idle = intel_idle_s2idle, }, 747 { 748 .name = "C9", 749 .desc = "MWAIT 0x50", 750 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 751 .exit_latency = 480, 752 .target_residency = 5000, 753 .enter = &intel_idle, 754 .enter_s2idle = intel_idle_s2idle, }, 755 { 756 .name = "C10", 757 .desc = "MWAIT 0x60", 758 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 759 .exit_latency = 890, 760 .target_residency = 5000, 761 .enter = &intel_idle, 762 .enter_s2idle = intel_idle_s2idle, }, 763 { 764 .enter = NULL } 765 }; 766 767 static struct cpuidle_state skx_cstates[] __initdata = { 768 { 769 .name = "C1", 770 .desc = "MWAIT 0x00", 771 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 772 .exit_latency = 2, 773 .target_residency = 2, 774 .enter = &intel_idle, 775 .enter_s2idle = intel_idle_s2idle, }, 776 { 777 .name = "C1E", 778 .desc = "MWAIT 0x01", 779 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 780 .exit_latency = 10, 781 .target_residency = 20, 782 .enter = &intel_idle, 783 .enter_s2idle = intel_idle_s2idle, }, 784 { 785 .name = "C6", 786 .desc = "MWAIT 0x20", 787 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 788 .exit_latency = 133, 789 .target_residency = 600, 790 .enter = &intel_idle, 791 .enter_s2idle = intel_idle_s2idle, }, 792 { 793 .enter = NULL } 794 }; 795 796 static struct cpuidle_state icx_cstates[] __initdata = { 797 { 798 .name = "C1", 799 .desc = "MWAIT 0x00", 800 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 801 .exit_latency = 1, 802 .target_residency = 1, 803 .enter = &intel_idle, 804 .enter_s2idle = intel_idle_s2idle, }, 805 { 806 .name = "C1E", 807 .desc = "MWAIT 0x01", 808 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 809 .exit_latency = 4, 810 .target_residency = 4, 811 .enter = &intel_idle, 812 .enter_s2idle = intel_idle_s2idle, }, 813 { 814 .name = "C6", 815 .desc = "MWAIT 0x20", 816 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 817 .exit_latency = 170, 818 .target_residency = 600, 819 .enter = &intel_idle, 820 .enter_s2idle = intel_idle_s2idle, }, 821 { 822 .enter = NULL } 823 }; 824 825 /* 826 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. 827 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. 828 * But in this case there is effectively no C1, because C1 requests are 829 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 830 * and C1E requests end up with C1, so there is effectively no C1E. 831 * 832 * By default we enable C1E and disable C1 by marking it with 833 * 'CPUIDLE_FLAG_UNUSABLE'. 834 */ 835 static struct cpuidle_state adl_cstates[] __initdata = { 836 { 837 .name = "C1", 838 .desc = "MWAIT 0x00", 839 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 840 .exit_latency = 1, 841 .target_residency = 1, 842 .enter = &intel_idle, 843 .enter_s2idle = intel_idle_s2idle, }, 844 { 845 .name = "C1E", 846 .desc = "MWAIT 0x01", 847 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 848 .exit_latency = 2, 849 .target_residency = 4, 850 .enter = &intel_idle, 851 .enter_s2idle = intel_idle_s2idle, }, 852 { 853 .name = "C6", 854 .desc = "MWAIT 0x20", 855 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 856 .exit_latency = 220, 857 .target_residency = 600, 858 .enter = &intel_idle, 859 .enter_s2idle = intel_idle_s2idle, }, 860 { 861 .name = "C8", 862 .desc = "MWAIT 0x40", 863 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 864 .exit_latency = 280, 865 .target_residency = 800, 866 .enter = &intel_idle, 867 .enter_s2idle = intel_idle_s2idle, }, 868 { 869 .name = "C10", 870 .desc = "MWAIT 0x60", 871 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 872 .exit_latency = 680, 873 .target_residency = 2000, 874 .enter = &intel_idle, 875 .enter_s2idle = intel_idle_s2idle, }, 876 { 877 .enter = NULL } 878 }; 879 880 static struct cpuidle_state adl_l_cstates[] __initdata = { 881 { 882 .name = "C1", 883 .desc = "MWAIT 0x00", 884 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 885 .exit_latency = 1, 886 .target_residency = 1, 887 .enter = &intel_idle, 888 .enter_s2idle = intel_idle_s2idle, }, 889 { 890 .name = "C1E", 891 .desc = "MWAIT 0x01", 892 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 893 .exit_latency = 2, 894 .target_residency = 4, 895 .enter = &intel_idle, 896 .enter_s2idle = intel_idle_s2idle, }, 897 { 898 .name = "C6", 899 .desc = "MWAIT 0x20", 900 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 901 .exit_latency = 170, 902 .target_residency = 500, 903 .enter = &intel_idle, 904 .enter_s2idle = intel_idle_s2idle, }, 905 { 906 .name = "C8", 907 .desc = "MWAIT 0x40", 908 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 909 .exit_latency = 200, 910 .target_residency = 600, 911 .enter = &intel_idle, 912 .enter_s2idle = intel_idle_s2idle, }, 913 { 914 .name = "C10", 915 .desc = "MWAIT 0x60", 916 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 917 .exit_latency = 230, 918 .target_residency = 700, 919 .enter = &intel_idle, 920 .enter_s2idle = intel_idle_s2idle, }, 921 { 922 .enter = NULL } 923 }; 924 925 static struct cpuidle_state adl_n_cstates[] __initdata = { 926 { 927 .name = "C1", 928 .desc = "MWAIT 0x00", 929 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 930 .exit_latency = 1, 931 .target_residency = 1, 932 .enter = &intel_idle, 933 .enter_s2idle = intel_idle_s2idle, }, 934 { 935 .name = "C1E", 936 .desc = "MWAIT 0x01", 937 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 938 .exit_latency = 2, 939 .target_residency = 4, 940 .enter = &intel_idle, 941 .enter_s2idle = intel_idle_s2idle, }, 942 { 943 .name = "C6", 944 .desc = "MWAIT 0x20", 945 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 946 .exit_latency = 195, 947 .target_residency = 585, 948 .enter = &intel_idle, 949 .enter_s2idle = intel_idle_s2idle, }, 950 { 951 .name = "C8", 952 .desc = "MWAIT 0x40", 953 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 954 .exit_latency = 260, 955 .target_residency = 1040, 956 .enter = &intel_idle, 957 .enter_s2idle = intel_idle_s2idle, }, 958 { 959 .name = "C10", 960 .desc = "MWAIT 0x60", 961 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 962 .exit_latency = 660, 963 .target_residency = 1980, 964 .enter = &intel_idle, 965 .enter_s2idle = intel_idle_s2idle, }, 966 { 967 .enter = NULL } 968 }; 969 970 static struct cpuidle_state spr_cstates[] __initdata = { 971 { 972 .name = "C1", 973 .desc = "MWAIT 0x00", 974 .flags = MWAIT2flg(0x00), 975 .exit_latency = 1, 976 .target_residency = 1, 977 .enter = &intel_idle, 978 .enter_s2idle = intel_idle_s2idle, }, 979 { 980 .name = "C1E", 981 .desc = "MWAIT 0x01", 982 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 983 .exit_latency = 2, 984 .target_residency = 4, 985 .enter = &intel_idle, 986 .enter_s2idle = intel_idle_s2idle, }, 987 { 988 .name = "C6", 989 .desc = "MWAIT 0x20", 990 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 991 CPUIDLE_FLAG_INIT_XSTATE, 992 .exit_latency = 290, 993 .target_residency = 800, 994 .enter = &intel_idle, 995 .enter_s2idle = intel_idle_s2idle, }, 996 { 997 .enter = NULL } 998 }; 999 1000 static struct cpuidle_state atom_cstates[] __initdata = { 1001 { 1002 .name = "C1E", 1003 .desc = "MWAIT 0x00", 1004 .flags = MWAIT2flg(0x00), 1005 .exit_latency = 10, 1006 .target_residency = 20, 1007 .enter = &intel_idle, 1008 .enter_s2idle = intel_idle_s2idle, }, 1009 { 1010 .name = "C2", 1011 .desc = "MWAIT 0x10", 1012 .flags = MWAIT2flg(0x10), 1013 .exit_latency = 20, 1014 .target_residency = 80, 1015 .enter = &intel_idle, 1016 .enter_s2idle = intel_idle_s2idle, }, 1017 { 1018 .name = "C4", 1019 .desc = "MWAIT 0x30", 1020 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1021 .exit_latency = 100, 1022 .target_residency = 400, 1023 .enter = &intel_idle, 1024 .enter_s2idle = intel_idle_s2idle, }, 1025 { 1026 .name = "C6", 1027 .desc = "MWAIT 0x52", 1028 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1029 .exit_latency = 140, 1030 .target_residency = 560, 1031 .enter = &intel_idle, 1032 .enter_s2idle = intel_idle_s2idle, }, 1033 { 1034 .enter = NULL } 1035 }; 1036 static struct cpuidle_state tangier_cstates[] __initdata = { 1037 { 1038 .name = "C1", 1039 .desc = "MWAIT 0x00", 1040 .flags = MWAIT2flg(0x00), 1041 .exit_latency = 1, 1042 .target_residency = 4, 1043 .enter = &intel_idle, 1044 .enter_s2idle = intel_idle_s2idle, }, 1045 { 1046 .name = "C4", 1047 .desc = "MWAIT 0x30", 1048 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1049 .exit_latency = 100, 1050 .target_residency = 400, 1051 .enter = &intel_idle, 1052 .enter_s2idle = intel_idle_s2idle, }, 1053 { 1054 .name = "C6", 1055 .desc = "MWAIT 0x52", 1056 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1057 .exit_latency = 140, 1058 .target_residency = 560, 1059 .enter = &intel_idle, 1060 .enter_s2idle = intel_idle_s2idle, }, 1061 { 1062 .name = "C7", 1063 .desc = "MWAIT 0x60", 1064 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1065 .exit_latency = 1200, 1066 .target_residency = 4000, 1067 .enter = &intel_idle, 1068 .enter_s2idle = intel_idle_s2idle, }, 1069 { 1070 .name = "C9", 1071 .desc = "MWAIT 0x64", 1072 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 1073 .exit_latency = 10000, 1074 .target_residency = 20000, 1075 .enter = &intel_idle, 1076 .enter_s2idle = intel_idle_s2idle, }, 1077 { 1078 .enter = NULL } 1079 }; 1080 static struct cpuidle_state avn_cstates[] __initdata = { 1081 { 1082 .name = "C1", 1083 .desc = "MWAIT 0x00", 1084 .flags = MWAIT2flg(0x00), 1085 .exit_latency = 2, 1086 .target_residency = 2, 1087 .enter = &intel_idle, 1088 .enter_s2idle = intel_idle_s2idle, }, 1089 { 1090 .name = "C6", 1091 .desc = "MWAIT 0x51", 1092 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 1093 .exit_latency = 15, 1094 .target_residency = 45, 1095 .enter = &intel_idle, 1096 .enter_s2idle = intel_idle_s2idle, }, 1097 { 1098 .enter = NULL } 1099 }; 1100 static struct cpuidle_state knl_cstates[] __initdata = { 1101 { 1102 .name = "C1", 1103 .desc = "MWAIT 0x00", 1104 .flags = MWAIT2flg(0x00), 1105 .exit_latency = 1, 1106 .target_residency = 2, 1107 .enter = &intel_idle, 1108 .enter_s2idle = intel_idle_s2idle }, 1109 { 1110 .name = "C6", 1111 .desc = "MWAIT 0x10", 1112 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 1113 .exit_latency = 120, 1114 .target_residency = 500, 1115 .enter = &intel_idle, 1116 .enter_s2idle = intel_idle_s2idle }, 1117 { 1118 .enter = NULL } 1119 }; 1120 1121 static struct cpuidle_state bxt_cstates[] __initdata = { 1122 { 1123 .name = "C1", 1124 .desc = "MWAIT 0x00", 1125 .flags = MWAIT2flg(0x00), 1126 .exit_latency = 2, 1127 .target_residency = 2, 1128 .enter = &intel_idle, 1129 .enter_s2idle = intel_idle_s2idle, }, 1130 { 1131 .name = "C1E", 1132 .desc = "MWAIT 0x01", 1133 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1134 .exit_latency = 10, 1135 .target_residency = 20, 1136 .enter = &intel_idle, 1137 .enter_s2idle = intel_idle_s2idle, }, 1138 { 1139 .name = "C6", 1140 .desc = "MWAIT 0x20", 1141 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1142 .exit_latency = 133, 1143 .target_residency = 133, 1144 .enter = &intel_idle, 1145 .enter_s2idle = intel_idle_s2idle, }, 1146 { 1147 .name = "C7s", 1148 .desc = "MWAIT 0x31", 1149 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 1150 .exit_latency = 155, 1151 .target_residency = 155, 1152 .enter = &intel_idle, 1153 .enter_s2idle = intel_idle_s2idle, }, 1154 { 1155 .name = "C8", 1156 .desc = "MWAIT 0x40", 1157 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 1158 .exit_latency = 1000, 1159 .target_residency = 1000, 1160 .enter = &intel_idle, 1161 .enter_s2idle = intel_idle_s2idle, }, 1162 { 1163 .name = "C9", 1164 .desc = "MWAIT 0x50", 1165 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 1166 .exit_latency = 2000, 1167 .target_residency = 2000, 1168 .enter = &intel_idle, 1169 .enter_s2idle = intel_idle_s2idle, }, 1170 { 1171 .name = "C10", 1172 .desc = "MWAIT 0x60", 1173 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1174 .exit_latency = 10000, 1175 .target_residency = 10000, 1176 .enter = &intel_idle, 1177 .enter_s2idle = intel_idle_s2idle, }, 1178 { 1179 .enter = NULL } 1180 }; 1181 1182 static struct cpuidle_state dnv_cstates[] __initdata = { 1183 { 1184 .name = "C1", 1185 .desc = "MWAIT 0x00", 1186 .flags = MWAIT2flg(0x00), 1187 .exit_latency = 2, 1188 .target_residency = 2, 1189 .enter = &intel_idle, 1190 .enter_s2idle = intel_idle_s2idle, }, 1191 { 1192 .name = "C1E", 1193 .desc = "MWAIT 0x01", 1194 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1195 .exit_latency = 10, 1196 .target_residency = 20, 1197 .enter = &intel_idle, 1198 .enter_s2idle = intel_idle_s2idle, }, 1199 { 1200 .name = "C6", 1201 .desc = "MWAIT 0x20", 1202 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1203 .exit_latency = 50, 1204 .target_residency = 500, 1205 .enter = &intel_idle, 1206 .enter_s2idle = intel_idle_s2idle, }, 1207 { 1208 .enter = NULL } 1209 }; 1210 1211 /* 1212 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 1213 * C6, and this is indicated in the CPUID mwait leaf. 1214 */ 1215 static struct cpuidle_state snr_cstates[] __initdata = { 1216 { 1217 .name = "C1", 1218 .desc = "MWAIT 0x00", 1219 .flags = MWAIT2flg(0x00), 1220 .exit_latency = 2, 1221 .target_residency = 2, 1222 .enter = &intel_idle, 1223 .enter_s2idle = intel_idle_s2idle, }, 1224 { 1225 .name = "C1E", 1226 .desc = "MWAIT 0x01", 1227 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1228 .exit_latency = 15, 1229 .target_residency = 25, 1230 .enter = &intel_idle, 1231 .enter_s2idle = intel_idle_s2idle, }, 1232 { 1233 .name = "C6", 1234 .desc = "MWAIT 0x20", 1235 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1236 .exit_latency = 130, 1237 .target_residency = 500, 1238 .enter = &intel_idle, 1239 .enter_s2idle = intel_idle_s2idle, }, 1240 { 1241 .enter = NULL } 1242 }; 1243 1244 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1245 .state_table = nehalem_cstates, 1246 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1247 .disable_promotion_to_c1e = true, 1248 }; 1249 1250 static const struct idle_cpu idle_cpu_nhx __initconst = { 1251 .state_table = nehalem_cstates, 1252 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1253 .disable_promotion_to_c1e = true, 1254 .use_acpi = true, 1255 }; 1256 1257 static const struct idle_cpu idle_cpu_atom __initconst = { 1258 .state_table = atom_cstates, 1259 }; 1260 1261 static const struct idle_cpu idle_cpu_tangier __initconst = { 1262 .state_table = tangier_cstates, 1263 }; 1264 1265 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1266 .state_table = atom_cstates, 1267 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1268 }; 1269 1270 static const struct idle_cpu idle_cpu_snb __initconst = { 1271 .state_table = snb_cstates, 1272 .disable_promotion_to_c1e = true, 1273 }; 1274 1275 static const struct idle_cpu idle_cpu_snx __initconst = { 1276 .state_table = snb_cstates, 1277 .disable_promotion_to_c1e = true, 1278 .use_acpi = true, 1279 }; 1280 1281 static const struct idle_cpu idle_cpu_byt __initconst = { 1282 .state_table = byt_cstates, 1283 .disable_promotion_to_c1e = true, 1284 .byt_auto_demotion_disable_flag = true, 1285 }; 1286 1287 static const struct idle_cpu idle_cpu_cht __initconst = { 1288 .state_table = cht_cstates, 1289 .disable_promotion_to_c1e = true, 1290 .byt_auto_demotion_disable_flag = true, 1291 }; 1292 1293 static const struct idle_cpu idle_cpu_ivb __initconst = { 1294 .state_table = ivb_cstates, 1295 .disable_promotion_to_c1e = true, 1296 }; 1297 1298 static const struct idle_cpu idle_cpu_ivt __initconst = { 1299 .state_table = ivt_cstates, 1300 .disable_promotion_to_c1e = true, 1301 .use_acpi = true, 1302 }; 1303 1304 static const struct idle_cpu idle_cpu_hsw __initconst = { 1305 .state_table = hsw_cstates, 1306 .disable_promotion_to_c1e = true, 1307 }; 1308 1309 static const struct idle_cpu idle_cpu_hsx __initconst = { 1310 .state_table = hsw_cstates, 1311 .disable_promotion_to_c1e = true, 1312 .use_acpi = true, 1313 }; 1314 1315 static const struct idle_cpu idle_cpu_bdw __initconst = { 1316 .state_table = bdw_cstates, 1317 .disable_promotion_to_c1e = true, 1318 }; 1319 1320 static const struct idle_cpu idle_cpu_bdx __initconst = { 1321 .state_table = bdw_cstates, 1322 .disable_promotion_to_c1e = true, 1323 .use_acpi = true, 1324 }; 1325 1326 static const struct idle_cpu idle_cpu_skl __initconst = { 1327 .state_table = skl_cstates, 1328 .disable_promotion_to_c1e = true, 1329 }; 1330 1331 static const struct idle_cpu idle_cpu_skx __initconst = { 1332 .state_table = skx_cstates, 1333 .disable_promotion_to_c1e = true, 1334 .use_acpi = true, 1335 }; 1336 1337 static const struct idle_cpu idle_cpu_icx __initconst = { 1338 .state_table = icx_cstates, 1339 .disable_promotion_to_c1e = true, 1340 .use_acpi = true, 1341 }; 1342 1343 static const struct idle_cpu idle_cpu_adl __initconst = { 1344 .state_table = adl_cstates, 1345 }; 1346 1347 static const struct idle_cpu idle_cpu_adl_l __initconst = { 1348 .state_table = adl_l_cstates, 1349 }; 1350 1351 static const struct idle_cpu idle_cpu_adl_n __initconst = { 1352 .state_table = adl_n_cstates, 1353 }; 1354 1355 static const struct idle_cpu idle_cpu_spr __initconst = { 1356 .state_table = spr_cstates, 1357 .disable_promotion_to_c1e = true, 1358 .use_acpi = true, 1359 }; 1360 1361 static const struct idle_cpu idle_cpu_avn __initconst = { 1362 .state_table = avn_cstates, 1363 .disable_promotion_to_c1e = true, 1364 .use_acpi = true, 1365 }; 1366 1367 static const struct idle_cpu idle_cpu_knl __initconst = { 1368 .state_table = knl_cstates, 1369 .use_acpi = true, 1370 }; 1371 1372 static const struct idle_cpu idle_cpu_bxt __initconst = { 1373 .state_table = bxt_cstates, 1374 .disable_promotion_to_c1e = true, 1375 }; 1376 1377 static const struct idle_cpu idle_cpu_dnv __initconst = { 1378 .state_table = dnv_cstates, 1379 .disable_promotion_to_c1e = true, 1380 .use_acpi = true, 1381 }; 1382 1383 static const struct idle_cpu idle_cpu_snr __initconst = { 1384 .state_table = snr_cstates, 1385 .disable_promotion_to_c1e = true, 1386 .use_acpi = true, 1387 }; 1388 1389 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1390 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1391 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1392 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1393 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1394 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1395 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1396 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1397 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1398 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1399 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1400 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1401 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1402 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1403 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1404 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1405 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1406 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1407 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1408 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1409 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1410 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1411 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1412 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1413 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1414 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1415 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1416 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1417 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1418 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1419 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1420 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1421 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1422 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), 1423 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), 1424 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), 1425 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n), 1426 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), 1427 X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr), 1428 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1429 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1430 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1431 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1432 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1433 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), 1434 {} 1435 }; 1436 1437 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1438 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1439 {} 1440 }; 1441 1442 static bool __init intel_idle_max_cstate_reached(int cstate) 1443 { 1444 if (cstate + 1 > max_cstate) { 1445 pr_info("max_cstate %d reached\n", max_cstate); 1446 return true; 1447 } 1448 return false; 1449 } 1450 1451 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1452 { 1453 unsigned long eax = flg2MWAIT(state->flags); 1454 1455 if (boot_cpu_has(X86_FEATURE_ARAT)) 1456 return false; 1457 1458 /* 1459 * Switch over to one-shot tick broadcast if the target C-state 1460 * is deeper than C1. 1461 */ 1462 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1463 } 1464 1465 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1466 #include <acpi/processor.h> 1467 1468 static bool no_acpi __read_mostly; 1469 module_param(no_acpi, bool, 0444); 1470 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1471 1472 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1473 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1474 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1475 1476 static struct acpi_processor_power acpi_state_table __initdata; 1477 1478 /** 1479 * intel_idle_cst_usable - Check if the _CST information can be used. 1480 * 1481 * Check if all of the C-states listed by _CST in the max_cstate range are 1482 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1483 */ 1484 static bool __init intel_idle_cst_usable(void) 1485 { 1486 int cstate, limit; 1487 1488 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1489 acpi_state_table.count); 1490 1491 for (cstate = 1; cstate < limit; cstate++) { 1492 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1493 1494 if (cx->entry_method != ACPI_CSTATE_FFH) 1495 return false; 1496 } 1497 1498 return true; 1499 } 1500 1501 static bool __init intel_idle_acpi_cst_extract(void) 1502 { 1503 unsigned int cpu; 1504 1505 if (no_acpi) { 1506 pr_debug("Not allowed to use ACPI _CST\n"); 1507 return false; 1508 } 1509 1510 for_each_possible_cpu(cpu) { 1511 struct acpi_processor *pr = per_cpu(processors, cpu); 1512 1513 if (!pr) 1514 continue; 1515 1516 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1517 continue; 1518 1519 acpi_state_table.count++; 1520 1521 if (!intel_idle_cst_usable()) 1522 continue; 1523 1524 if (!acpi_processor_claim_cst_control()) 1525 break; 1526 1527 return true; 1528 } 1529 1530 acpi_state_table.count = 0; 1531 pr_debug("ACPI _CST not found or not usable\n"); 1532 return false; 1533 } 1534 1535 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1536 { 1537 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1538 1539 /* 1540 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1541 * the interesting states are ACPI_CSTATE_FFH. 1542 */ 1543 for (cstate = 1; cstate < limit; cstate++) { 1544 struct acpi_processor_cx *cx; 1545 struct cpuidle_state *state; 1546 1547 if (intel_idle_max_cstate_reached(cstate - 1)) 1548 break; 1549 1550 cx = &acpi_state_table.states[cstate]; 1551 1552 state = &drv->states[drv->state_count++]; 1553 1554 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1555 strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1556 state->exit_latency = cx->latency; 1557 /* 1558 * For C1-type C-states use the same number for both the exit 1559 * latency and target residency, because that is the case for 1560 * C1 in the majority of the static C-states tables above. 1561 * For the other types of C-states, however, set the target 1562 * residency to 3 times the exit latency which should lead to 1563 * a reasonable balance between energy-efficiency and 1564 * performance in the majority of interesting cases. 1565 */ 1566 state->target_residency = cx->latency; 1567 if (cx->type > ACPI_STATE_C1) 1568 state->target_residency *= 3; 1569 1570 state->flags = MWAIT2flg(cx->address); 1571 if (cx->type > ACPI_STATE_C2) 1572 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1573 1574 if (disabled_states_mask & BIT(cstate)) 1575 state->flags |= CPUIDLE_FLAG_OFF; 1576 1577 if (intel_idle_state_needs_timer_stop(state)) 1578 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1579 1580 state->enter = intel_idle; 1581 state->enter_s2idle = intel_idle_s2idle; 1582 } 1583 } 1584 1585 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1586 { 1587 int cstate, limit; 1588 1589 /* 1590 * If there are no _CST C-states, do not disable any C-states by 1591 * default. 1592 */ 1593 if (!acpi_state_table.count) 1594 return false; 1595 1596 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1597 /* 1598 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1599 * the interesting states are ACPI_CSTATE_FFH. 1600 */ 1601 for (cstate = 1; cstate < limit; cstate++) { 1602 if (acpi_state_table.states[cstate].address == mwait_hint) 1603 return false; 1604 } 1605 return true; 1606 } 1607 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1608 #define force_use_acpi (false) 1609 1610 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1611 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1612 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1613 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1614 1615 /** 1616 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1617 * 1618 * Tune IVT multi-socket targets. 1619 * Assumption: num_sockets == (max_package_num + 1). 1620 */ 1621 static void __init ivt_idle_state_table_update(void) 1622 { 1623 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1624 int cpu, package_num, num_sockets = 1; 1625 1626 for_each_online_cpu(cpu) { 1627 package_num = topology_physical_package_id(cpu); 1628 if (package_num + 1 > num_sockets) { 1629 num_sockets = package_num + 1; 1630 1631 if (num_sockets > 4) { 1632 cpuidle_state_table = ivt_cstates_8s; 1633 return; 1634 } 1635 } 1636 } 1637 1638 if (num_sockets > 2) 1639 cpuidle_state_table = ivt_cstates_4s; 1640 1641 /* else, 1 and 2 socket systems use default ivt_cstates */ 1642 } 1643 1644 /** 1645 * irtl_2_usec - IRTL to microseconds conversion. 1646 * @irtl: IRTL MSR value. 1647 * 1648 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1649 */ 1650 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1651 { 1652 static const unsigned int irtl_ns_units[] __initconst = { 1653 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1654 }; 1655 unsigned long long ns; 1656 1657 if (!irtl) 1658 return 0; 1659 1660 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1661 1662 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1663 } 1664 1665 /** 1666 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1667 * 1668 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1669 * definitive maximum latency and use the same value for target_residency. 1670 */ 1671 static void __init bxt_idle_state_table_update(void) 1672 { 1673 unsigned long long msr; 1674 unsigned int usec; 1675 1676 rdmsrl(MSR_PKGC6_IRTL, msr); 1677 usec = irtl_2_usec(msr); 1678 if (usec) { 1679 bxt_cstates[2].exit_latency = usec; 1680 bxt_cstates[2].target_residency = usec; 1681 } 1682 1683 rdmsrl(MSR_PKGC7_IRTL, msr); 1684 usec = irtl_2_usec(msr); 1685 if (usec) { 1686 bxt_cstates[3].exit_latency = usec; 1687 bxt_cstates[3].target_residency = usec; 1688 } 1689 1690 rdmsrl(MSR_PKGC8_IRTL, msr); 1691 usec = irtl_2_usec(msr); 1692 if (usec) { 1693 bxt_cstates[4].exit_latency = usec; 1694 bxt_cstates[4].target_residency = usec; 1695 } 1696 1697 rdmsrl(MSR_PKGC9_IRTL, msr); 1698 usec = irtl_2_usec(msr); 1699 if (usec) { 1700 bxt_cstates[5].exit_latency = usec; 1701 bxt_cstates[5].target_residency = usec; 1702 } 1703 1704 rdmsrl(MSR_PKGC10_IRTL, msr); 1705 usec = irtl_2_usec(msr); 1706 if (usec) { 1707 bxt_cstates[6].exit_latency = usec; 1708 bxt_cstates[6].target_residency = usec; 1709 } 1710 1711 } 1712 1713 /** 1714 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1715 * 1716 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1717 */ 1718 static void __init sklh_idle_state_table_update(void) 1719 { 1720 unsigned long long msr; 1721 unsigned int eax, ebx, ecx, edx; 1722 1723 1724 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1725 if (max_cstate <= 7) 1726 return; 1727 1728 /* if PC10 not present in CPUID.MWAIT.EDX */ 1729 if ((mwait_substates & (0xF << 28)) == 0) 1730 return; 1731 1732 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1733 1734 /* PC10 is not enabled in PKG C-state limit */ 1735 if ((msr & 0xF) != 8) 1736 return; 1737 1738 ecx = 0; 1739 cpuid(7, &eax, &ebx, &ecx, &edx); 1740 1741 /* if SGX is present */ 1742 if (ebx & (1 << 2)) { 1743 1744 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1745 1746 /* if SGX is enabled */ 1747 if (msr & (1 << 18)) 1748 return; 1749 } 1750 1751 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1752 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1753 } 1754 1755 /** 1756 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake 1757 * idle states table. 1758 */ 1759 static void __init skx_idle_state_table_update(void) 1760 { 1761 unsigned long long msr; 1762 1763 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1764 1765 /* 1766 * 000b: C0/C1 (no package C-state support) 1767 * 001b: C2 1768 * 010b: C6 (non-retention) 1769 * 011b: C6 (retention) 1770 * 111b: No Package C state limits. 1771 */ 1772 if ((msr & 0x7) < 2) { 1773 /* 1774 * Uses the CC6 + PC0 latency and 3 times of 1775 * latency for target_residency if the PC6 1776 * is disabled in BIOS. This is consistent 1777 * with how intel_idle driver uses _CST 1778 * to set the target_residency. 1779 */ 1780 skx_cstates[2].exit_latency = 92; 1781 skx_cstates[2].target_residency = 276; 1782 } 1783 } 1784 1785 /** 1786 * adl_idle_state_table_update - Adjust AlderLake idle states table. 1787 */ 1788 static void __init adl_idle_state_table_update(void) 1789 { 1790 /* Check if user prefers C1 over C1E. */ 1791 if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) { 1792 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1793 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE; 1794 1795 /* Disable C1E by clearing the "C1E promotion" bit. */ 1796 c1e_promotion = C1E_PROMOTION_DISABLE; 1797 return; 1798 } 1799 1800 /* Make sure C1E is enabled by default */ 1801 c1e_promotion = C1E_PROMOTION_ENABLE; 1802 } 1803 1804 /** 1805 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. 1806 */ 1807 static void __init spr_idle_state_table_update(void) 1808 { 1809 unsigned long long msr; 1810 1811 /* 1812 * By default, the C6 state assumes the worst-case scenario of package 1813 * C6. However, if PC6 is disabled, we update the numbers to match 1814 * core C6. 1815 */ 1816 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1817 1818 /* Limit value 2 and above allow for PC6. */ 1819 if ((msr & 0x7) < 2) { 1820 spr_cstates[2].exit_latency = 190; 1821 spr_cstates[2].target_residency = 600; 1822 } 1823 } 1824 1825 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1826 { 1827 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1828 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1829 MWAIT_SUBSTATE_MASK; 1830 1831 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1832 if (num_substates == 0) 1833 return false; 1834 1835 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1836 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1837 1838 return true; 1839 } 1840 1841 static bool force_irq_on __read_mostly; 1842 module_param(force_irq_on, bool, 0444); 1843 1844 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1845 { 1846 int cstate; 1847 1848 switch (boot_cpu_data.x86_model) { 1849 case INTEL_FAM6_IVYBRIDGE_X: 1850 ivt_idle_state_table_update(); 1851 break; 1852 case INTEL_FAM6_ATOM_GOLDMONT: 1853 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1854 bxt_idle_state_table_update(); 1855 break; 1856 case INTEL_FAM6_SKYLAKE: 1857 sklh_idle_state_table_update(); 1858 break; 1859 case INTEL_FAM6_SKYLAKE_X: 1860 skx_idle_state_table_update(); 1861 break; 1862 case INTEL_FAM6_SAPPHIRERAPIDS_X: 1863 case INTEL_FAM6_EMERALDRAPIDS_X: 1864 spr_idle_state_table_update(); 1865 break; 1866 case INTEL_FAM6_ALDERLAKE: 1867 case INTEL_FAM6_ALDERLAKE_L: 1868 case INTEL_FAM6_ALDERLAKE_N: 1869 adl_idle_state_table_update(); 1870 break; 1871 } 1872 1873 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1874 unsigned int mwait_hint; 1875 1876 if (intel_idle_max_cstate_reached(cstate)) 1877 break; 1878 1879 if (!cpuidle_state_table[cstate].enter && 1880 !cpuidle_state_table[cstate].enter_s2idle) 1881 break; 1882 1883 /* If marked as unusable, skip this state. */ 1884 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1885 pr_debug("state %s is disabled\n", 1886 cpuidle_state_table[cstate].name); 1887 continue; 1888 } 1889 1890 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1891 if (!intel_idle_verify_cstate(mwait_hint)) 1892 continue; 1893 1894 /* Structure copy. */ 1895 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1896 1897 if ((cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) || force_irq_on) { 1898 printk("intel_idle: forced intel_idle_irq for state %d\n", cstate); 1899 drv->states[drv->state_count].enter = intel_idle_irq; 1900 } 1901 1902 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && 1903 cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { 1904 WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE); 1905 drv->states[drv->state_count].enter = intel_idle_ibrs; 1906 } 1907 1908 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_INIT_XSTATE) 1909 drv->states[drv->state_count].enter = intel_idle_xstate; 1910 1911 if ((disabled_states_mask & BIT(drv->state_count)) || 1912 ((icpu->use_acpi || force_use_acpi) && 1913 intel_idle_off_by_default(mwait_hint) && 1914 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1915 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1916 1917 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count])) 1918 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP; 1919 1920 drv->state_count++; 1921 } 1922 1923 if (icpu->byt_auto_demotion_disable_flag) { 1924 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1925 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1926 } 1927 } 1928 1929 /** 1930 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1931 * @drv: cpuidle driver structure to initialize. 1932 */ 1933 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1934 { 1935 cpuidle_poll_state_init(drv); 1936 1937 if (disabled_states_mask & BIT(0)) 1938 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1939 1940 drv->state_count = 1; 1941 1942 if (icpu) 1943 intel_idle_init_cstates_icpu(drv); 1944 else 1945 intel_idle_init_cstates_acpi(drv); 1946 } 1947 1948 static void auto_demotion_disable(void) 1949 { 1950 unsigned long long msr_bits; 1951 1952 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1953 msr_bits &= ~auto_demotion_disable_flags; 1954 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1955 } 1956 1957 static void c1e_promotion_enable(void) 1958 { 1959 unsigned long long msr_bits; 1960 1961 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1962 msr_bits |= 0x2; 1963 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1964 } 1965 1966 static void c1e_promotion_disable(void) 1967 { 1968 unsigned long long msr_bits; 1969 1970 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1971 msr_bits &= ~0x2; 1972 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1973 } 1974 1975 /** 1976 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1977 * @cpu: CPU to initialize. 1978 * 1979 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1980 * with the processor model flags. 1981 */ 1982 static int intel_idle_cpu_init(unsigned int cpu) 1983 { 1984 struct cpuidle_device *dev; 1985 1986 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1987 dev->cpu = cpu; 1988 1989 if (cpuidle_register_device(dev)) { 1990 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1991 return -EIO; 1992 } 1993 1994 if (auto_demotion_disable_flags) 1995 auto_demotion_disable(); 1996 1997 if (c1e_promotion == C1E_PROMOTION_ENABLE) 1998 c1e_promotion_enable(); 1999 else if (c1e_promotion == C1E_PROMOTION_DISABLE) 2000 c1e_promotion_disable(); 2001 2002 return 0; 2003 } 2004 2005 static int intel_idle_cpu_online(unsigned int cpu) 2006 { 2007 struct cpuidle_device *dev; 2008 2009 if (!boot_cpu_has(X86_FEATURE_ARAT)) 2010 tick_broadcast_enable(); 2011 2012 /* 2013 * Some systems can hotplug a cpu at runtime after 2014 * the kernel has booted, we have to initialize the 2015 * driver in this case 2016 */ 2017 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 2018 if (!dev->registered) 2019 return intel_idle_cpu_init(cpu); 2020 2021 return 0; 2022 } 2023 2024 /** 2025 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 2026 */ 2027 static void __init intel_idle_cpuidle_devices_uninit(void) 2028 { 2029 int i; 2030 2031 for_each_online_cpu(i) 2032 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 2033 } 2034 2035 static int __init intel_idle_init(void) 2036 { 2037 const struct x86_cpu_id *id; 2038 unsigned int eax, ebx, ecx; 2039 int retval; 2040 2041 /* Do not load intel_idle at all for now if idle= is passed */ 2042 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 2043 return -ENODEV; 2044 2045 if (max_cstate == 0) { 2046 pr_debug("disabled\n"); 2047 return -EPERM; 2048 } 2049 2050 id = x86_match_cpu(intel_idle_ids); 2051 if (id) { 2052 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 2053 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 2054 return -ENODEV; 2055 } 2056 } else { 2057 id = x86_match_cpu(intel_mwait_ids); 2058 if (!id) 2059 return -ENODEV; 2060 } 2061 2062 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 2063 return -ENODEV; 2064 2065 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 2066 2067 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 2068 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 2069 !mwait_substates) 2070 return -ENODEV; 2071 2072 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 2073 2074 icpu = (const struct idle_cpu *)id->driver_data; 2075 if (icpu) { 2076 cpuidle_state_table = icpu->state_table; 2077 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 2078 if (icpu->disable_promotion_to_c1e) 2079 c1e_promotion = C1E_PROMOTION_DISABLE; 2080 if (icpu->use_acpi || force_use_acpi) 2081 intel_idle_acpi_cst_extract(); 2082 } else if (!intel_idle_acpi_cst_extract()) { 2083 return -ENODEV; 2084 } 2085 2086 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 2087 boot_cpu_data.x86_model); 2088 2089 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 2090 if (!intel_idle_cpuidle_devices) 2091 return -ENOMEM; 2092 2093 intel_idle_cpuidle_driver_init(&intel_idle_driver); 2094 2095 retval = cpuidle_register_driver(&intel_idle_driver); 2096 if (retval) { 2097 struct cpuidle_driver *drv = cpuidle_get_driver(); 2098 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 2099 drv ? drv->name : "none"); 2100 goto init_driver_fail; 2101 } 2102 2103 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 2104 intel_idle_cpu_online, NULL); 2105 if (retval < 0) 2106 goto hp_setup_fail; 2107 2108 pr_debug("Local APIC timer is reliable in %s\n", 2109 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 2110 2111 return 0; 2112 2113 hp_setup_fail: 2114 intel_idle_cpuidle_devices_uninit(); 2115 cpuidle_unregister_driver(&intel_idle_driver); 2116 init_driver_fail: 2117 free_percpu(intel_idle_cpuidle_devices); 2118 return retval; 2119 2120 } 2121 device_initcall(intel_idle_init); 2122 2123 /* 2124 * We are not really modular, but we used to support that. Meaning we also 2125 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 2126 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 2127 * is the easiest way (currently) to continue doing that. 2128 */ 2129 module_param(max_cstate, int, 0444); 2130 /* 2131 * The positions of the bits that are set in this number are the indices of the 2132 * idle states to be disabled by default (as reflected by the names of the 2133 * corresponding idle state directories in sysfs, "state0", "state1" ... 2134 * "state<i>" ..., where <i> is the index of the given state). 2135 */ 2136 module_param_named(states_off, disabled_states_mask, uint, 0444); 2137 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 2138 /* 2139 * Some platforms come with mutually exclusive C-states, so that if one is 2140 * enabled, the other C-states must not be used. Example: C1 and C1E on 2141 * Sapphire Rapids platform. This parameter allows for selecting the 2142 * preferred C-states among the groups of mutually exclusive C-states - the 2143 * selected C-states will be registered, the other C-states from the mutually 2144 * exclusive group won't be registered. If the platform has no mutually 2145 * exclusive C-states, this parameter has no effect. 2146 */ 2147 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); 2148 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); 2149