1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/sched/smt.h> 51 #include <linux/notifier.h> 52 #include <linux/cpu.h> 53 #include <linux/moduleparam.h> 54 #include <asm/cpu_device_id.h> 55 #include <asm/intel-family.h> 56 #include <asm/nospec-branch.h> 57 #include <asm/mwait.h> 58 #include <asm/msr.h> 59 #include <asm/tsc.h> 60 #include <asm/fpu/api.h> 61 62 #define INTEL_IDLE_VERSION "0.5.1" 63 64 static struct cpuidle_driver intel_idle_driver = { 65 .name = "intel_idle", 66 .owner = THIS_MODULE, 67 }; 68 /* intel_idle.max_cstate=0 disables driver */ 69 static int max_cstate = CPUIDLE_STATE_MAX - 1; 70 static unsigned int disabled_states_mask __read_mostly; 71 static unsigned int preferred_states_mask __read_mostly; 72 static bool force_irq_on __read_mostly; 73 74 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 75 76 static unsigned long auto_demotion_disable_flags; 77 78 static enum { 79 C1E_PROMOTION_PRESERVE, 80 C1E_PROMOTION_ENABLE, 81 C1E_PROMOTION_DISABLE 82 } c1e_promotion = C1E_PROMOTION_PRESERVE; 83 84 struct idle_cpu { 85 struct cpuidle_state *state_table; 86 87 /* 88 * Hardware C-state auto-demotion may not always be optimal. 89 * Indicate which enable bits to clear here. 90 */ 91 unsigned long auto_demotion_disable_flags; 92 bool byt_auto_demotion_disable_flag; 93 bool disable_promotion_to_c1e; 94 bool use_acpi; 95 }; 96 97 static const struct idle_cpu *icpu __initdata; 98 static struct cpuidle_state *cpuidle_state_table __initdata; 99 100 static unsigned int mwait_substates __initdata; 101 102 /* 103 * Enable interrupts before entering the C-state. On some platforms and for 104 * some C-states, this may measurably decrease interrupt latency. 105 */ 106 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) 107 108 /* 109 * Enable this state by default even if the ACPI _CST does not list it. 110 */ 111 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 112 113 /* 114 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE 115 * above. 116 */ 117 #define CPUIDLE_FLAG_IBRS BIT(16) 118 119 /* 120 * Initialize large xstate for the C6-state entrance. 121 */ 122 #define CPUIDLE_FLAG_INIT_XSTATE BIT(17) 123 124 /* 125 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 126 * the C-state (top nibble) and sub-state (bottom nibble) 127 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 128 * 129 * We store the hint at the top of our "flags" for each state. 130 */ 131 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 132 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 133 134 static __always_inline int __intel_idle(struct cpuidle_device *dev, 135 struct cpuidle_driver *drv, 136 int index, bool irqoff) 137 { 138 struct cpuidle_state *state = &drv->states[index]; 139 unsigned long eax = flg2MWAIT(state->flags); 140 unsigned long ecx = 1*irqoff; /* break on interrupt flag */ 141 142 mwait_idle_with_hints(eax, ecx); 143 144 return index; 145 } 146 147 /** 148 * intel_idle - Ask the processor to enter the given idle state. 149 * @dev: cpuidle device of the target CPU. 150 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 151 * @index: Target idle state index. 152 * 153 * Use the MWAIT instruction to notify the processor that the CPU represented by 154 * @dev is idle and it can try to enter the idle state corresponding to @index. 155 * 156 * If the local APIC timer is not known to be reliable in the target idle state, 157 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 158 * 159 * Must be called under local_irq_disable(). 160 */ 161 static __cpuidle int intel_idle(struct cpuidle_device *dev, 162 struct cpuidle_driver *drv, int index) 163 { 164 return __intel_idle(dev, drv, index, true); 165 } 166 167 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, 168 struct cpuidle_driver *drv, int index) 169 { 170 return __intel_idle(dev, drv, index, false); 171 } 172 173 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, 174 struct cpuidle_driver *drv, int index) 175 { 176 bool smt_active = sched_smt_active(); 177 u64 spec_ctrl = spec_ctrl_current(); 178 int ret; 179 180 if (smt_active) 181 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); 182 183 ret = __intel_idle(dev, drv, index, true); 184 185 if (smt_active) 186 native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); 187 188 return ret; 189 } 190 191 static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, 192 struct cpuidle_driver *drv, int index) 193 { 194 fpu_idle_fpregs(); 195 return __intel_idle(dev, drv, index, true); 196 } 197 198 /** 199 * intel_idle_s2idle - Ask the processor to enter the given idle state. 200 * @dev: cpuidle device of the target CPU. 201 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 202 * @index: Target idle state index. 203 * 204 * Use the MWAIT instruction to notify the processor that the CPU represented by 205 * @dev is idle and it can try to enter the idle state corresponding to @index. 206 * 207 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 208 * scheduler tick and suspended scheduler clock on the target CPU. 209 */ 210 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 211 struct cpuidle_driver *drv, int index) 212 { 213 unsigned long ecx = 1; /* break on interrupt flag */ 214 struct cpuidle_state *state = &drv->states[index]; 215 unsigned long eax = flg2MWAIT(state->flags); 216 217 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) 218 fpu_idle_fpregs(); 219 220 mwait_idle_with_hints(eax, ecx); 221 222 return 0; 223 } 224 225 /* 226 * States are indexed by the cstate number, 227 * which is also the index into the MWAIT hint array. 228 * Thus C0 is a dummy. 229 */ 230 static struct cpuidle_state nehalem_cstates[] __initdata = { 231 { 232 .name = "C1", 233 .desc = "MWAIT 0x00", 234 .flags = MWAIT2flg(0x00), 235 .exit_latency = 3, 236 .target_residency = 6, 237 .enter = &intel_idle, 238 .enter_s2idle = intel_idle_s2idle, }, 239 { 240 .name = "C1E", 241 .desc = "MWAIT 0x01", 242 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 243 .exit_latency = 10, 244 .target_residency = 20, 245 .enter = &intel_idle, 246 .enter_s2idle = intel_idle_s2idle, }, 247 { 248 .name = "C3", 249 .desc = "MWAIT 0x10", 250 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 251 .exit_latency = 20, 252 .target_residency = 80, 253 .enter = &intel_idle, 254 .enter_s2idle = intel_idle_s2idle, }, 255 { 256 .name = "C6", 257 .desc = "MWAIT 0x20", 258 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 259 .exit_latency = 200, 260 .target_residency = 800, 261 .enter = &intel_idle, 262 .enter_s2idle = intel_idle_s2idle, }, 263 { 264 .enter = NULL } 265 }; 266 267 static struct cpuidle_state snb_cstates[] __initdata = { 268 { 269 .name = "C1", 270 .desc = "MWAIT 0x00", 271 .flags = MWAIT2flg(0x00), 272 .exit_latency = 2, 273 .target_residency = 2, 274 .enter = &intel_idle, 275 .enter_s2idle = intel_idle_s2idle, }, 276 { 277 .name = "C1E", 278 .desc = "MWAIT 0x01", 279 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 280 .exit_latency = 10, 281 .target_residency = 20, 282 .enter = &intel_idle, 283 .enter_s2idle = intel_idle_s2idle, }, 284 { 285 .name = "C3", 286 .desc = "MWAIT 0x10", 287 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 288 .exit_latency = 80, 289 .target_residency = 211, 290 .enter = &intel_idle, 291 .enter_s2idle = intel_idle_s2idle, }, 292 { 293 .name = "C6", 294 .desc = "MWAIT 0x20", 295 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 296 .exit_latency = 104, 297 .target_residency = 345, 298 .enter = &intel_idle, 299 .enter_s2idle = intel_idle_s2idle, }, 300 { 301 .name = "C7", 302 .desc = "MWAIT 0x30", 303 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 304 .exit_latency = 109, 305 .target_residency = 345, 306 .enter = &intel_idle, 307 .enter_s2idle = intel_idle_s2idle, }, 308 { 309 .enter = NULL } 310 }; 311 312 static struct cpuidle_state byt_cstates[] __initdata = { 313 { 314 .name = "C1", 315 .desc = "MWAIT 0x00", 316 .flags = MWAIT2flg(0x00), 317 .exit_latency = 1, 318 .target_residency = 1, 319 .enter = &intel_idle, 320 .enter_s2idle = intel_idle_s2idle, }, 321 { 322 .name = "C6N", 323 .desc = "MWAIT 0x58", 324 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 325 .exit_latency = 300, 326 .target_residency = 275, 327 .enter = &intel_idle, 328 .enter_s2idle = intel_idle_s2idle, }, 329 { 330 .name = "C6S", 331 .desc = "MWAIT 0x52", 332 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 333 .exit_latency = 500, 334 .target_residency = 560, 335 .enter = &intel_idle, 336 .enter_s2idle = intel_idle_s2idle, }, 337 { 338 .name = "C7", 339 .desc = "MWAIT 0x60", 340 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 341 .exit_latency = 1200, 342 .target_residency = 4000, 343 .enter = &intel_idle, 344 .enter_s2idle = intel_idle_s2idle, }, 345 { 346 .name = "C7S", 347 .desc = "MWAIT 0x64", 348 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 349 .exit_latency = 10000, 350 .target_residency = 20000, 351 .enter = &intel_idle, 352 .enter_s2idle = intel_idle_s2idle, }, 353 { 354 .enter = NULL } 355 }; 356 357 static struct cpuidle_state cht_cstates[] __initdata = { 358 { 359 .name = "C1", 360 .desc = "MWAIT 0x00", 361 .flags = MWAIT2flg(0x00), 362 .exit_latency = 1, 363 .target_residency = 1, 364 .enter = &intel_idle, 365 .enter_s2idle = intel_idle_s2idle, }, 366 { 367 .name = "C6N", 368 .desc = "MWAIT 0x58", 369 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 370 .exit_latency = 80, 371 .target_residency = 275, 372 .enter = &intel_idle, 373 .enter_s2idle = intel_idle_s2idle, }, 374 { 375 .name = "C6S", 376 .desc = "MWAIT 0x52", 377 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 378 .exit_latency = 200, 379 .target_residency = 560, 380 .enter = &intel_idle, 381 .enter_s2idle = intel_idle_s2idle, }, 382 { 383 .name = "C7", 384 .desc = "MWAIT 0x60", 385 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 386 .exit_latency = 1200, 387 .target_residency = 4000, 388 .enter = &intel_idle, 389 .enter_s2idle = intel_idle_s2idle, }, 390 { 391 .name = "C7S", 392 .desc = "MWAIT 0x64", 393 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 394 .exit_latency = 10000, 395 .target_residency = 20000, 396 .enter = &intel_idle, 397 .enter_s2idle = intel_idle_s2idle, }, 398 { 399 .enter = NULL } 400 }; 401 402 static struct cpuidle_state ivb_cstates[] __initdata = { 403 { 404 .name = "C1", 405 .desc = "MWAIT 0x00", 406 .flags = MWAIT2flg(0x00), 407 .exit_latency = 1, 408 .target_residency = 1, 409 .enter = &intel_idle, 410 .enter_s2idle = intel_idle_s2idle, }, 411 { 412 .name = "C1E", 413 .desc = "MWAIT 0x01", 414 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 415 .exit_latency = 10, 416 .target_residency = 20, 417 .enter = &intel_idle, 418 .enter_s2idle = intel_idle_s2idle, }, 419 { 420 .name = "C3", 421 .desc = "MWAIT 0x10", 422 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 423 .exit_latency = 59, 424 .target_residency = 156, 425 .enter = &intel_idle, 426 .enter_s2idle = intel_idle_s2idle, }, 427 { 428 .name = "C6", 429 .desc = "MWAIT 0x20", 430 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 431 .exit_latency = 80, 432 .target_residency = 300, 433 .enter = &intel_idle, 434 .enter_s2idle = intel_idle_s2idle, }, 435 { 436 .name = "C7", 437 .desc = "MWAIT 0x30", 438 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 439 .exit_latency = 87, 440 .target_residency = 300, 441 .enter = &intel_idle, 442 .enter_s2idle = intel_idle_s2idle, }, 443 { 444 .enter = NULL } 445 }; 446 447 static struct cpuidle_state ivt_cstates[] __initdata = { 448 { 449 .name = "C1", 450 .desc = "MWAIT 0x00", 451 .flags = MWAIT2flg(0x00), 452 .exit_latency = 1, 453 .target_residency = 1, 454 .enter = &intel_idle, 455 .enter_s2idle = intel_idle_s2idle, }, 456 { 457 .name = "C1E", 458 .desc = "MWAIT 0x01", 459 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 460 .exit_latency = 10, 461 .target_residency = 80, 462 .enter = &intel_idle, 463 .enter_s2idle = intel_idle_s2idle, }, 464 { 465 .name = "C3", 466 .desc = "MWAIT 0x10", 467 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 468 .exit_latency = 59, 469 .target_residency = 156, 470 .enter = &intel_idle, 471 .enter_s2idle = intel_idle_s2idle, }, 472 { 473 .name = "C6", 474 .desc = "MWAIT 0x20", 475 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 476 .exit_latency = 82, 477 .target_residency = 300, 478 .enter = &intel_idle, 479 .enter_s2idle = intel_idle_s2idle, }, 480 { 481 .enter = NULL } 482 }; 483 484 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 485 { 486 .name = "C1", 487 .desc = "MWAIT 0x00", 488 .flags = MWAIT2flg(0x00), 489 .exit_latency = 1, 490 .target_residency = 1, 491 .enter = &intel_idle, 492 .enter_s2idle = intel_idle_s2idle, }, 493 { 494 .name = "C1E", 495 .desc = "MWAIT 0x01", 496 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 497 .exit_latency = 10, 498 .target_residency = 250, 499 .enter = &intel_idle, 500 .enter_s2idle = intel_idle_s2idle, }, 501 { 502 .name = "C3", 503 .desc = "MWAIT 0x10", 504 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 505 .exit_latency = 59, 506 .target_residency = 300, 507 .enter = &intel_idle, 508 .enter_s2idle = intel_idle_s2idle, }, 509 { 510 .name = "C6", 511 .desc = "MWAIT 0x20", 512 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 513 .exit_latency = 84, 514 .target_residency = 400, 515 .enter = &intel_idle, 516 .enter_s2idle = intel_idle_s2idle, }, 517 { 518 .enter = NULL } 519 }; 520 521 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 522 { 523 .name = "C1", 524 .desc = "MWAIT 0x00", 525 .flags = MWAIT2flg(0x00), 526 .exit_latency = 1, 527 .target_residency = 1, 528 .enter = &intel_idle, 529 .enter_s2idle = intel_idle_s2idle, }, 530 { 531 .name = "C1E", 532 .desc = "MWAIT 0x01", 533 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 534 .exit_latency = 10, 535 .target_residency = 500, 536 .enter = &intel_idle, 537 .enter_s2idle = intel_idle_s2idle, }, 538 { 539 .name = "C3", 540 .desc = "MWAIT 0x10", 541 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 542 .exit_latency = 59, 543 .target_residency = 600, 544 .enter = &intel_idle, 545 .enter_s2idle = intel_idle_s2idle, }, 546 { 547 .name = "C6", 548 .desc = "MWAIT 0x20", 549 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 550 .exit_latency = 88, 551 .target_residency = 700, 552 .enter = &intel_idle, 553 .enter_s2idle = intel_idle_s2idle, }, 554 { 555 .enter = NULL } 556 }; 557 558 static struct cpuidle_state hsw_cstates[] __initdata = { 559 { 560 .name = "C1", 561 .desc = "MWAIT 0x00", 562 .flags = MWAIT2flg(0x00), 563 .exit_latency = 2, 564 .target_residency = 2, 565 .enter = &intel_idle, 566 .enter_s2idle = intel_idle_s2idle, }, 567 { 568 .name = "C1E", 569 .desc = "MWAIT 0x01", 570 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 571 .exit_latency = 10, 572 .target_residency = 20, 573 .enter = &intel_idle, 574 .enter_s2idle = intel_idle_s2idle, }, 575 { 576 .name = "C3", 577 .desc = "MWAIT 0x10", 578 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 579 .exit_latency = 33, 580 .target_residency = 100, 581 .enter = &intel_idle, 582 .enter_s2idle = intel_idle_s2idle, }, 583 { 584 .name = "C6", 585 .desc = "MWAIT 0x20", 586 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 587 .exit_latency = 133, 588 .target_residency = 400, 589 .enter = &intel_idle, 590 .enter_s2idle = intel_idle_s2idle, }, 591 { 592 .name = "C7s", 593 .desc = "MWAIT 0x32", 594 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 595 .exit_latency = 166, 596 .target_residency = 500, 597 .enter = &intel_idle, 598 .enter_s2idle = intel_idle_s2idle, }, 599 { 600 .name = "C8", 601 .desc = "MWAIT 0x40", 602 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 603 .exit_latency = 300, 604 .target_residency = 900, 605 .enter = &intel_idle, 606 .enter_s2idle = intel_idle_s2idle, }, 607 { 608 .name = "C9", 609 .desc = "MWAIT 0x50", 610 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 611 .exit_latency = 600, 612 .target_residency = 1800, 613 .enter = &intel_idle, 614 .enter_s2idle = intel_idle_s2idle, }, 615 { 616 .name = "C10", 617 .desc = "MWAIT 0x60", 618 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 619 .exit_latency = 2600, 620 .target_residency = 7700, 621 .enter = &intel_idle, 622 .enter_s2idle = intel_idle_s2idle, }, 623 { 624 .enter = NULL } 625 }; 626 static struct cpuidle_state bdw_cstates[] __initdata = { 627 { 628 .name = "C1", 629 .desc = "MWAIT 0x00", 630 .flags = MWAIT2flg(0x00), 631 .exit_latency = 2, 632 .target_residency = 2, 633 .enter = &intel_idle, 634 .enter_s2idle = intel_idle_s2idle, }, 635 { 636 .name = "C1E", 637 .desc = "MWAIT 0x01", 638 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 639 .exit_latency = 10, 640 .target_residency = 20, 641 .enter = &intel_idle, 642 .enter_s2idle = intel_idle_s2idle, }, 643 { 644 .name = "C3", 645 .desc = "MWAIT 0x10", 646 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 647 .exit_latency = 40, 648 .target_residency = 100, 649 .enter = &intel_idle, 650 .enter_s2idle = intel_idle_s2idle, }, 651 { 652 .name = "C6", 653 .desc = "MWAIT 0x20", 654 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 655 .exit_latency = 133, 656 .target_residency = 400, 657 .enter = &intel_idle, 658 .enter_s2idle = intel_idle_s2idle, }, 659 { 660 .name = "C7s", 661 .desc = "MWAIT 0x32", 662 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 663 .exit_latency = 166, 664 .target_residency = 500, 665 .enter = &intel_idle, 666 .enter_s2idle = intel_idle_s2idle, }, 667 { 668 .name = "C8", 669 .desc = "MWAIT 0x40", 670 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 671 .exit_latency = 300, 672 .target_residency = 900, 673 .enter = &intel_idle, 674 .enter_s2idle = intel_idle_s2idle, }, 675 { 676 .name = "C9", 677 .desc = "MWAIT 0x50", 678 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 679 .exit_latency = 600, 680 .target_residency = 1800, 681 .enter = &intel_idle, 682 .enter_s2idle = intel_idle_s2idle, }, 683 { 684 .name = "C10", 685 .desc = "MWAIT 0x60", 686 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 687 .exit_latency = 2600, 688 .target_residency = 7700, 689 .enter = &intel_idle, 690 .enter_s2idle = intel_idle_s2idle, }, 691 { 692 .enter = NULL } 693 }; 694 695 static struct cpuidle_state skl_cstates[] __initdata = { 696 { 697 .name = "C1", 698 .desc = "MWAIT 0x00", 699 .flags = MWAIT2flg(0x00), 700 .exit_latency = 2, 701 .target_residency = 2, 702 .enter = &intel_idle, 703 .enter_s2idle = intel_idle_s2idle, }, 704 { 705 .name = "C1E", 706 .desc = "MWAIT 0x01", 707 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 708 .exit_latency = 10, 709 .target_residency = 20, 710 .enter = &intel_idle, 711 .enter_s2idle = intel_idle_s2idle, }, 712 { 713 .name = "C3", 714 .desc = "MWAIT 0x10", 715 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 716 .exit_latency = 70, 717 .target_residency = 100, 718 .enter = &intel_idle, 719 .enter_s2idle = intel_idle_s2idle, }, 720 { 721 .name = "C6", 722 .desc = "MWAIT 0x20", 723 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 724 .exit_latency = 85, 725 .target_residency = 200, 726 .enter = &intel_idle, 727 .enter_s2idle = intel_idle_s2idle, }, 728 { 729 .name = "C7s", 730 .desc = "MWAIT 0x33", 731 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 732 .exit_latency = 124, 733 .target_residency = 800, 734 .enter = &intel_idle, 735 .enter_s2idle = intel_idle_s2idle, }, 736 { 737 .name = "C8", 738 .desc = "MWAIT 0x40", 739 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 740 .exit_latency = 200, 741 .target_residency = 800, 742 .enter = &intel_idle, 743 .enter_s2idle = intel_idle_s2idle, }, 744 { 745 .name = "C9", 746 .desc = "MWAIT 0x50", 747 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 748 .exit_latency = 480, 749 .target_residency = 5000, 750 .enter = &intel_idle, 751 .enter_s2idle = intel_idle_s2idle, }, 752 { 753 .name = "C10", 754 .desc = "MWAIT 0x60", 755 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 756 .exit_latency = 890, 757 .target_residency = 5000, 758 .enter = &intel_idle, 759 .enter_s2idle = intel_idle_s2idle, }, 760 { 761 .enter = NULL } 762 }; 763 764 static struct cpuidle_state skx_cstates[] __initdata = { 765 { 766 .name = "C1", 767 .desc = "MWAIT 0x00", 768 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 769 .exit_latency = 2, 770 .target_residency = 2, 771 .enter = &intel_idle, 772 .enter_s2idle = intel_idle_s2idle, }, 773 { 774 .name = "C1E", 775 .desc = "MWAIT 0x01", 776 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 777 .exit_latency = 10, 778 .target_residency = 20, 779 .enter = &intel_idle, 780 .enter_s2idle = intel_idle_s2idle, }, 781 { 782 .name = "C6", 783 .desc = "MWAIT 0x20", 784 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 785 .exit_latency = 133, 786 .target_residency = 600, 787 .enter = &intel_idle, 788 .enter_s2idle = intel_idle_s2idle, }, 789 { 790 .enter = NULL } 791 }; 792 793 static struct cpuidle_state icx_cstates[] __initdata = { 794 { 795 .name = "C1", 796 .desc = "MWAIT 0x00", 797 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 798 .exit_latency = 1, 799 .target_residency = 1, 800 .enter = &intel_idle, 801 .enter_s2idle = intel_idle_s2idle, }, 802 { 803 .name = "C1E", 804 .desc = "MWAIT 0x01", 805 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 806 .exit_latency = 4, 807 .target_residency = 4, 808 .enter = &intel_idle, 809 .enter_s2idle = intel_idle_s2idle, }, 810 { 811 .name = "C6", 812 .desc = "MWAIT 0x20", 813 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 814 .exit_latency = 170, 815 .target_residency = 600, 816 .enter = &intel_idle, 817 .enter_s2idle = intel_idle_s2idle, }, 818 { 819 .enter = NULL } 820 }; 821 822 /* 823 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. 824 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. 825 * But in this case there is effectively no C1, because C1 requests are 826 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 827 * and C1E requests end up with C1, so there is effectively no C1E. 828 * 829 * By default we enable C1E and disable C1 by marking it with 830 * 'CPUIDLE_FLAG_UNUSABLE'. 831 */ 832 static struct cpuidle_state adl_cstates[] __initdata = { 833 { 834 .name = "C1", 835 .desc = "MWAIT 0x00", 836 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 837 .exit_latency = 1, 838 .target_residency = 1, 839 .enter = &intel_idle, 840 .enter_s2idle = intel_idle_s2idle, }, 841 { 842 .name = "C1E", 843 .desc = "MWAIT 0x01", 844 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 845 .exit_latency = 2, 846 .target_residency = 4, 847 .enter = &intel_idle, 848 .enter_s2idle = intel_idle_s2idle, }, 849 { 850 .name = "C6", 851 .desc = "MWAIT 0x20", 852 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 853 .exit_latency = 220, 854 .target_residency = 600, 855 .enter = &intel_idle, 856 .enter_s2idle = intel_idle_s2idle, }, 857 { 858 .name = "C8", 859 .desc = "MWAIT 0x40", 860 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 861 .exit_latency = 280, 862 .target_residency = 800, 863 .enter = &intel_idle, 864 .enter_s2idle = intel_idle_s2idle, }, 865 { 866 .name = "C10", 867 .desc = "MWAIT 0x60", 868 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 869 .exit_latency = 680, 870 .target_residency = 2000, 871 .enter = &intel_idle, 872 .enter_s2idle = intel_idle_s2idle, }, 873 { 874 .enter = NULL } 875 }; 876 877 static struct cpuidle_state adl_l_cstates[] __initdata = { 878 { 879 .name = "C1", 880 .desc = "MWAIT 0x00", 881 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 882 .exit_latency = 1, 883 .target_residency = 1, 884 .enter = &intel_idle, 885 .enter_s2idle = intel_idle_s2idle, }, 886 { 887 .name = "C1E", 888 .desc = "MWAIT 0x01", 889 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 890 .exit_latency = 2, 891 .target_residency = 4, 892 .enter = &intel_idle, 893 .enter_s2idle = intel_idle_s2idle, }, 894 { 895 .name = "C6", 896 .desc = "MWAIT 0x20", 897 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 898 .exit_latency = 170, 899 .target_residency = 500, 900 .enter = &intel_idle, 901 .enter_s2idle = intel_idle_s2idle, }, 902 { 903 .name = "C8", 904 .desc = "MWAIT 0x40", 905 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 906 .exit_latency = 200, 907 .target_residency = 600, 908 .enter = &intel_idle, 909 .enter_s2idle = intel_idle_s2idle, }, 910 { 911 .name = "C10", 912 .desc = "MWAIT 0x60", 913 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 914 .exit_latency = 230, 915 .target_residency = 700, 916 .enter = &intel_idle, 917 .enter_s2idle = intel_idle_s2idle, }, 918 { 919 .enter = NULL } 920 }; 921 922 static struct cpuidle_state gmt_cstates[] __initdata = { 923 { 924 .name = "C1", 925 .desc = "MWAIT 0x00", 926 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 927 .exit_latency = 1, 928 .target_residency = 1, 929 .enter = &intel_idle, 930 .enter_s2idle = intel_idle_s2idle, }, 931 { 932 .name = "C1E", 933 .desc = "MWAIT 0x01", 934 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 935 .exit_latency = 2, 936 .target_residency = 4, 937 .enter = &intel_idle, 938 .enter_s2idle = intel_idle_s2idle, }, 939 { 940 .name = "C6", 941 .desc = "MWAIT 0x20", 942 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 943 .exit_latency = 195, 944 .target_residency = 585, 945 .enter = &intel_idle, 946 .enter_s2idle = intel_idle_s2idle, }, 947 { 948 .name = "C8", 949 .desc = "MWAIT 0x40", 950 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 951 .exit_latency = 260, 952 .target_residency = 1040, 953 .enter = &intel_idle, 954 .enter_s2idle = intel_idle_s2idle, }, 955 { 956 .name = "C10", 957 .desc = "MWAIT 0x60", 958 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 959 .exit_latency = 660, 960 .target_residency = 1980, 961 .enter = &intel_idle, 962 .enter_s2idle = intel_idle_s2idle, }, 963 { 964 .enter = NULL } 965 }; 966 967 static struct cpuidle_state spr_cstates[] __initdata = { 968 { 969 .name = "C1", 970 .desc = "MWAIT 0x00", 971 .flags = MWAIT2flg(0x00), 972 .exit_latency = 1, 973 .target_residency = 1, 974 .enter = &intel_idle, 975 .enter_s2idle = intel_idle_s2idle, }, 976 { 977 .name = "C1E", 978 .desc = "MWAIT 0x01", 979 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 980 .exit_latency = 2, 981 .target_residency = 4, 982 .enter = &intel_idle, 983 .enter_s2idle = intel_idle_s2idle, }, 984 { 985 .name = "C6", 986 .desc = "MWAIT 0x20", 987 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 988 CPUIDLE_FLAG_INIT_XSTATE, 989 .exit_latency = 290, 990 .target_residency = 800, 991 .enter = &intel_idle, 992 .enter_s2idle = intel_idle_s2idle, }, 993 { 994 .enter = NULL } 995 }; 996 997 static struct cpuidle_state atom_cstates[] __initdata = { 998 { 999 .name = "C1E", 1000 .desc = "MWAIT 0x00", 1001 .flags = MWAIT2flg(0x00), 1002 .exit_latency = 10, 1003 .target_residency = 20, 1004 .enter = &intel_idle, 1005 .enter_s2idle = intel_idle_s2idle, }, 1006 { 1007 .name = "C2", 1008 .desc = "MWAIT 0x10", 1009 .flags = MWAIT2flg(0x10), 1010 .exit_latency = 20, 1011 .target_residency = 80, 1012 .enter = &intel_idle, 1013 .enter_s2idle = intel_idle_s2idle, }, 1014 { 1015 .name = "C4", 1016 .desc = "MWAIT 0x30", 1017 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1018 .exit_latency = 100, 1019 .target_residency = 400, 1020 .enter = &intel_idle, 1021 .enter_s2idle = intel_idle_s2idle, }, 1022 { 1023 .name = "C6", 1024 .desc = "MWAIT 0x52", 1025 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1026 .exit_latency = 140, 1027 .target_residency = 560, 1028 .enter = &intel_idle, 1029 .enter_s2idle = intel_idle_s2idle, }, 1030 { 1031 .enter = NULL } 1032 }; 1033 static struct cpuidle_state tangier_cstates[] __initdata = { 1034 { 1035 .name = "C1", 1036 .desc = "MWAIT 0x00", 1037 .flags = MWAIT2flg(0x00), 1038 .exit_latency = 1, 1039 .target_residency = 4, 1040 .enter = &intel_idle, 1041 .enter_s2idle = intel_idle_s2idle, }, 1042 { 1043 .name = "C4", 1044 .desc = "MWAIT 0x30", 1045 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1046 .exit_latency = 100, 1047 .target_residency = 400, 1048 .enter = &intel_idle, 1049 .enter_s2idle = intel_idle_s2idle, }, 1050 { 1051 .name = "C6", 1052 .desc = "MWAIT 0x52", 1053 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1054 .exit_latency = 140, 1055 .target_residency = 560, 1056 .enter = &intel_idle, 1057 .enter_s2idle = intel_idle_s2idle, }, 1058 { 1059 .name = "C7", 1060 .desc = "MWAIT 0x60", 1061 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1062 .exit_latency = 1200, 1063 .target_residency = 4000, 1064 .enter = &intel_idle, 1065 .enter_s2idle = intel_idle_s2idle, }, 1066 { 1067 .name = "C9", 1068 .desc = "MWAIT 0x64", 1069 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 1070 .exit_latency = 10000, 1071 .target_residency = 20000, 1072 .enter = &intel_idle, 1073 .enter_s2idle = intel_idle_s2idle, }, 1074 { 1075 .enter = NULL } 1076 }; 1077 static struct cpuidle_state avn_cstates[] __initdata = { 1078 { 1079 .name = "C1", 1080 .desc = "MWAIT 0x00", 1081 .flags = MWAIT2flg(0x00), 1082 .exit_latency = 2, 1083 .target_residency = 2, 1084 .enter = &intel_idle, 1085 .enter_s2idle = intel_idle_s2idle, }, 1086 { 1087 .name = "C6", 1088 .desc = "MWAIT 0x51", 1089 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 1090 .exit_latency = 15, 1091 .target_residency = 45, 1092 .enter = &intel_idle, 1093 .enter_s2idle = intel_idle_s2idle, }, 1094 { 1095 .enter = NULL } 1096 }; 1097 static struct cpuidle_state knl_cstates[] __initdata = { 1098 { 1099 .name = "C1", 1100 .desc = "MWAIT 0x00", 1101 .flags = MWAIT2flg(0x00), 1102 .exit_latency = 1, 1103 .target_residency = 2, 1104 .enter = &intel_idle, 1105 .enter_s2idle = intel_idle_s2idle }, 1106 { 1107 .name = "C6", 1108 .desc = "MWAIT 0x10", 1109 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 1110 .exit_latency = 120, 1111 .target_residency = 500, 1112 .enter = &intel_idle, 1113 .enter_s2idle = intel_idle_s2idle }, 1114 { 1115 .enter = NULL } 1116 }; 1117 1118 static struct cpuidle_state bxt_cstates[] __initdata = { 1119 { 1120 .name = "C1", 1121 .desc = "MWAIT 0x00", 1122 .flags = MWAIT2flg(0x00), 1123 .exit_latency = 2, 1124 .target_residency = 2, 1125 .enter = &intel_idle, 1126 .enter_s2idle = intel_idle_s2idle, }, 1127 { 1128 .name = "C1E", 1129 .desc = "MWAIT 0x01", 1130 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1131 .exit_latency = 10, 1132 .target_residency = 20, 1133 .enter = &intel_idle, 1134 .enter_s2idle = intel_idle_s2idle, }, 1135 { 1136 .name = "C6", 1137 .desc = "MWAIT 0x20", 1138 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1139 .exit_latency = 133, 1140 .target_residency = 133, 1141 .enter = &intel_idle, 1142 .enter_s2idle = intel_idle_s2idle, }, 1143 { 1144 .name = "C7s", 1145 .desc = "MWAIT 0x31", 1146 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 1147 .exit_latency = 155, 1148 .target_residency = 155, 1149 .enter = &intel_idle, 1150 .enter_s2idle = intel_idle_s2idle, }, 1151 { 1152 .name = "C8", 1153 .desc = "MWAIT 0x40", 1154 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 1155 .exit_latency = 1000, 1156 .target_residency = 1000, 1157 .enter = &intel_idle, 1158 .enter_s2idle = intel_idle_s2idle, }, 1159 { 1160 .name = "C9", 1161 .desc = "MWAIT 0x50", 1162 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 1163 .exit_latency = 2000, 1164 .target_residency = 2000, 1165 .enter = &intel_idle, 1166 .enter_s2idle = intel_idle_s2idle, }, 1167 { 1168 .name = "C10", 1169 .desc = "MWAIT 0x60", 1170 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1171 .exit_latency = 10000, 1172 .target_residency = 10000, 1173 .enter = &intel_idle, 1174 .enter_s2idle = intel_idle_s2idle, }, 1175 { 1176 .enter = NULL } 1177 }; 1178 1179 static struct cpuidle_state dnv_cstates[] __initdata = { 1180 { 1181 .name = "C1", 1182 .desc = "MWAIT 0x00", 1183 .flags = MWAIT2flg(0x00), 1184 .exit_latency = 2, 1185 .target_residency = 2, 1186 .enter = &intel_idle, 1187 .enter_s2idle = intel_idle_s2idle, }, 1188 { 1189 .name = "C1E", 1190 .desc = "MWAIT 0x01", 1191 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1192 .exit_latency = 10, 1193 .target_residency = 20, 1194 .enter = &intel_idle, 1195 .enter_s2idle = intel_idle_s2idle, }, 1196 { 1197 .name = "C6", 1198 .desc = "MWAIT 0x20", 1199 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1200 .exit_latency = 50, 1201 .target_residency = 500, 1202 .enter = &intel_idle, 1203 .enter_s2idle = intel_idle_s2idle, }, 1204 { 1205 .enter = NULL } 1206 }; 1207 1208 /* 1209 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 1210 * C6, and this is indicated in the CPUID mwait leaf. 1211 */ 1212 static struct cpuidle_state snr_cstates[] __initdata = { 1213 { 1214 .name = "C1", 1215 .desc = "MWAIT 0x00", 1216 .flags = MWAIT2flg(0x00), 1217 .exit_latency = 2, 1218 .target_residency = 2, 1219 .enter = &intel_idle, 1220 .enter_s2idle = intel_idle_s2idle, }, 1221 { 1222 .name = "C1E", 1223 .desc = "MWAIT 0x01", 1224 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1225 .exit_latency = 15, 1226 .target_residency = 25, 1227 .enter = &intel_idle, 1228 .enter_s2idle = intel_idle_s2idle, }, 1229 { 1230 .name = "C6", 1231 .desc = "MWAIT 0x20", 1232 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1233 .exit_latency = 130, 1234 .target_residency = 500, 1235 .enter = &intel_idle, 1236 .enter_s2idle = intel_idle_s2idle, }, 1237 { 1238 .enter = NULL } 1239 }; 1240 1241 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1242 .state_table = nehalem_cstates, 1243 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1244 .disable_promotion_to_c1e = true, 1245 }; 1246 1247 static const struct idle_cpu idle_cpu_nhx __initconst = { 1248 .state_table = nehalem_cstates, 1249 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1250 .disable_promotion_to_c1e = true, 1251 .use_acpi = true, 1252 }; 1253 1254 static const struct idle_cpu idle_cpu_atom __initconst = { 1255 .state_table = atom_cstates, 1256 }; 1257 1258 static const struct idle_cpu idle_cpu_tangier __initconst = { 1259 .state_table = tangier_cstates, 1260 }; 1261 1262 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1263 .state_table = atom_cstates, 1264 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1265 }; 1266 1267 static const struct idle_cpu idle_cpu_snb __initconst = { 1268 .state_table = snb_cstates, 1269 .disable_promotion_to_c1e = true, 1270 }; 1271 1272 static const struct idle_cpu idle_cpu_snx __initconst = { 1273 .state_table = snb_cstates, 1274 .disable_promotion_to_c1e = true, 1275 .use_acpi = true, 1276 }; 1277 1278 static const struct idle_cpu idle_cpu_byt __initconst = { 1279 .state_table = byt_cstates, 1280 .disable_promotion_to_c1e = true, 1281 .byt_auto_demotion_disable_flag = true, 1282 }; 1283 1284 static const struct idle_cpu idle_cpu_cht __initconst = { 1285 .state_table = cht_cstates, 1286 .disable_promotion_to_c1e = true, 1287 .byt_auto_demotion_disable_flag = true, 1288 }; 1289 1290 static const struct idle_cpu idle_cpu_ivb __initconst = { 1291 .state_table = ivb_cstates, 1292 .disable_promotion_to_c1e = true, 1293 }; 1294 1295 static const struct idle_cpu idle_cpu_ivt __initconst = { 1296 .state_table = ivt_cstates, 1297 .disable_promotion_to_c1e = true, 1298 .use_acpi = true, 1299 }; 1300 1301 static const struct idle_cpu idle_cpu_hsw __initconst = { 1302 .state_table = hsw_cstates, 1303 .disable_promotion_to_c1e = true, 1304 }; 1305 1306 static const struct idle_cpu idle_cpu_hsx __initconst = { 1307 .state_table = hsw_cstates, 1308 .disable_promotion_to_c1e = true, 1309 .use_acpi = true, 1310 }; 1311 1312 static const struct idle_cpu idle_cpu_bdw __initconst = { 1313 .state_table = bdw_cstates, 1314 .disable_promotion_to_c1e = true, 1315 }; 1316 1317 static const struct idle_cpu idle_cpu_bdx __initconst = { 1318 .state_table = bdw_cstates, 1319 .disable_promotion_to_c1e = true, 1320 .use_acpi = true, 1321 }; 1322 1323 static const struct idle_cpu idle_cpu_skl __initconst = { 1324 .state_table = skl_cstates, 1325 .disable_promotion_to_c1e = true, 1326 }; 1327 1328 static const struct idle_cpu idle_cpu_skx __initconst = { 1329 .state_table = skx_cstates, 1330 .disable_promotion_to_c1e = true, 1331 .use_acpi = true, 1332 }; 1333 1334 static const struct idle_cpu idle_cpu_icx __initconst = { 1335 .state_table = icx_cstates, 1336 .disable_promotion_to_c1e = true, 1337 .use_acpi = true, 1338 }; 1339 1340 static const struct idle_cpu idle_cpu_adl __initconst = { 1341 .state_table = adl_cstates, 1342 }; 1343 1344 static const struct idle_cpu idle_cpu_adl_l __initconst = { 1345 .state_table = adl_l_cstates, 1346 }; 1347 1348 static const struct idle_cpu idle_cpu_gmt __initconst = { 1349 .state_table = gmt_cstates, 1350 }; 1351 1352 static const struct idle_cpu idle_cpu_spr __initconst = { 1353 .state_table = spr_cstates, 1354 .disable_promotion_to_c1e = true, 1355 .use_acpi = true, 1356 }; 1357 1358 static const struct idle_cpu idle_cpu_avn __initconst = { 1359 .state_table = avn_cstates, 1360 .disable_promotion_to_c1e = true, 1361 .use_acpi = true, 1362 }; 1363 1364 static const struct idle_cpu idle_cpu_knl __initconst = { 1365 .state_table = knl_cstates, 1366 .use_acpi = true, 1367 }; 1368 1369 static const struct idle_cpu idle_cpu_bxt __initconst = { 1370 .state_table = bxt_cstates, 1371 .disable_promotion_to_c1e = true, 1372 }; 1373 1374 static const struct idle_cpu idle_cpu_dnv __initconst = { 1375 .state_table = dnv_cstates, 1376 .disable_promotion_to_c1e = true, 1377 .use_acpi = true, 1378 }; 1379 1380 static const struct idle_cpu idle_cpu_snr __initconst = { 1381 .state_table = snr_cstates, 1382 .disable_promotion_to_c1e = true, 1383 .use_acpi = true, 1384 }; 1385 1386 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1387 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1388 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1389 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1390 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1391 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1392 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1393 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1394 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1395 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1396 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1397 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1398 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1399 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1400 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1401 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1402 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1403 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1404 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1405 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1406 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1407 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1408 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1409 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1410 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1411 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1412 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1413 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1414 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1415 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1416 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1417 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1418 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1419 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), 1420 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), 1421 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), 1422 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &idle_cpu_gmt), 1423 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), 1424 X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr), 1425 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1426 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1427 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1428 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1429 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1430 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), 1431 {} 1432 }; 1433 1434 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1435 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1436 {} 1437 }; 1438 1439 static bool __init intel_idle_max_cstate_reached(int cstate) 1440 { 1441 if (cstate + 1 > max_cstate) { 1442 pr_info("max_cstate %d reached\n", max_cstate); 1443 return true; 1444 } 1445 return false; 1446 } 1447 1448 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1449 { 1450 unsigned long eax = flg2MWAIT(state->flags); 1451 1452 if (boot_cpu_has(X86_FEATURE_ARAT)) 1453 return false; 1454 1455 /* 1456 * Switch over to one-shot tick broadcast if the target C-state 1457 * is deeper than C1. 1458 */ 1459 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1460 } 1461 1462 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1463 #include <acpi/processor.h> 1464 1465 static bool no_acpi __read_mostly; 1466 module_param(no_acpi, bool, 0444); 1467 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1468 1469 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1470 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1471 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1472 1473 static struct acpi_processor_power acpi_state_table __initdata; 1474 1475 /** 1476 * intel_idle_cst_usable - Check if the _CST information can be used. 1477 * 1478 * Check if all of the C-states listed by _CST in the max_cstate range are 1479 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1480 */ 1481 static bool __init intel_idle_cst_usable(void) 1482 { 1483 int cstate, limit; 1484 1485 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1486 acpi_state_table.count); 1487 1488 for (cstate = 1; cstate < limit; cstate++) { 1489 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1490 1491 if (cx->entry_method != ACPI_CSTATE_FFH) 1492 return false; 1493 } 1494 1495 return true; 1496 } 1497 1498 static bool __init intel_idle_acpi_cst_extract(void) 1499 { 1500 unsigned int cpu; 1501 1502 if (no_acpi) { 1503 pr_debug("Not allowed to use ACPI _CST\n"); 1504 return false; 1505 } 1506 1507 for_each_possible_cpu(cpu) { 1508 struct acpi_processor *pr = per_cpu(processors, cpu); 1509 1510 if (!pr) 1511 continue; 1512 1513 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1514 continue; 1515 1516 acpi_state_table.count++; 1517 1518 if (!intel_idle_cst_usable()) 1519 continue; 1520 1521 if (!acpi_processor_claim_cst_control()) 1522 break; 1523 1524 return true; 1525 } 1526 1527 acpi_state_table.count = 0; 1528 pr_debug("ACPI _CST not found or not usable\n"); 1529 return false; 1530 } 1531 1532 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1533 { 1534 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1535 1536 /* 1537 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1538 * the interesting states are ACPI_CSTATE_FFH. 1539 */ 1540 for (cstate = 1; cstate < limit; cstate++) { 1541 struct acpi_processor_cx *cx; 1542 struct cpuidle_state *state; 1543 1544 if (intel_idle_max_cstate_reached(cstate - 1)) 1545 break; 1546 1547 cx = &acpi_state_table.states[cstate]; 1548 1549 state = &drv->states[drv->state_count++]; 1550 1551 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1552 strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1553 state->exit_latency = cx->latency; 1554 /* 1555 * For C1-type C-states use the same number for both the exit 1556 * latency and target residency, because that is the case for 1557 * C1 in the majority of the static C-states tables above. 1558 * For the other types of C-states, however, set the target 1559 * residency to 3 times the exit latency which should lead to 1560 * a reasonable balance between energy-efficiency and 1561 * performance in the majority of interesting cases. 1562 */ 1563 state->target_residency = cx->latency; 1564 if (cx->type > ACPI_STATE_C1) 1565 state->target_residency *= 3; 1566 1567 state->flags = MWAIT2flg(cx->address); 1568 if (cx->type > ACPI_STATE_C2) 1569 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1570 1571 if (disabled_states_mask & BIT(cstate)) 1572 state->flags |= CPUIDLE_FLAG_OFF; 1573 1574 if (intel_idle_state_needs_timer_stop(state)) 1575 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1576 1577 if (cx->type > ACPI_STATE_C1 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1578 mark_tsc_unstable("TSC halts in idle"); 1579 1580 state->enter = intel_idle; 1581 state->enter_s2idle = intel_idle_s2idle; 1582 } 1583 } 1584 1585 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1586 { 1587 int cstate, limit; 1588 1589 /* 1590 * If there are no _CST C-states, do not disable any C-states by 1591 * default. 1592 */ 1593 if (!acpi_state_table.count) 1594 return false; 1595 1596 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1597 /* 1598 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1599 * the interesting states are ACPI_CSTATE_FFH. 1600 */ 1601 for (cstate = 1; cstate < limit; cstate++) { 1602 if (acpi_state_table.states[cstate].address == mwait_hint) 1603 return false; 1604 } 1605 return true; 1606 } 1607 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1608 #define force_use_acpi (false) 1609 1610 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1611 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1612 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1613 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1614 1615 /** 1616 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1617 * 1618 * Tune IVT multi-socket targets. 1619 * Assumption: num_sockets == (max_package_num + 1). 1620 */ 1621 static void __init ivt_idle_state_table_update(void) 1622 { 1623 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1624 int cpu, package_num, num_sockets = 1; 1625 1626 for_each_online_cpu(cpu) { 1627 package_num = topology_physical_package_id(cpu); 1628 if (package_num + 1 > num_sockets) { 1629 num_sockets = package_num + 1; 1630 1631 if (num_sockets > 4) { 1632 cpuidle_state_table = ivt_cstates_8s; 1633 return; 1634 } 1635 } 1636 } 1637 1638 if (num_sockets > 2) 1639 cpuidle_state_table = ivt_cstates_4s; 1640 1641 /* else, 1 and 2 socket systems use default ivt_cstates */ 1642 } 1643 1644 /** 1645 * irtl_2_usec - IRTL to microseconds conversion. 1646 * @irtl: IRTL MSR value. 1647 * 1648 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1649 */ 1650 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1651 { 1652 static const unsigned int irtl_ns_units[] __initconst = { 1653 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1654 }; 1655 unsigned long long ns; 1656 1657 if (!irtl) 1658 return 0; 1659 1660 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1661 1662 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1663 } 1664 1665 /** 1666 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1667 * 1668 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1669 * definitive maximum latency and use the same value for target_residency. 1670 */ 1671 static void __init bxt_idle_state_table_update(void) 1672 { 1673 unsigned long long msr; 1674 unsigned int usec; 1675 1676 rdmsrl(MSR_PKGC6_IRTL, msr); 1677 usec = irtl_2_usec(msr); 1678 if (usec) { 1679 bxt_cstates[2].exit_latency = usec; 1680 bxt_cstates[2].target_residency = usec; 1681 } 1682 1683 rdmsrl(MSR_PKGC7_IRTL, msr); 1684 usec = irtl_2_usec(msr); 1685 if (usec) { 1686 bxt_cstates[3].exit_latency = usec; 1687 bxt_cstates[3].target_residency = usec; 1688 } 1689 1690 rdmsrl(MSR_PKGC8_IRTL, msr); 1691 usec = irtl_2_usec(msr); 1692 if (usec) { 1693 bxt_cstates[4].exit_latency = usec; 1694 bxt_cstates[4].target_residency = usec; 1695 } 1696 1697 rdmsrl(MSR_PKGC9_IRTL, msr); 1698 usec = irtl_2_usec(msr); 1699 if (usec) { 1700 bxt_cstates[5].exit_latency = usec; 1701 bxt_cstates[5].target_residency = usec; 1702 } 1703 1704 rdmsrl(MSR_PKGC10_IRTL, msr); 1705 usec = irtl_2_usec(msr); 1706 if (usec) { 1707 bxt_cstates[6].exit_latency = usec; 1708 bxt_cstates[6].target_residency = usec; 1709 } 1710 1711 } 1712 1713 /** 1714 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1715 * 1716 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1717 */ 1718 static void __init sklh_idle_state_table_update(void) 1719 { 1720 unsigned long long msr; 1721 unsigned int eax, ebx, ecx, edx; 1722 1723 1724 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1725 if (max_cstate <= 7) 1726 return; 1727 1728 /* if PC10 not present in CPUID.MWAIT.EDX */ 1729 if ((mwait_substates & (0xF << 28)) == 0) 1730 return; 1731 1732 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1733 1734 /* PC10 is not enabled in PKG C-state limit */ 1735 if ((msr & 0xF) != 8) 1736 return; 1737 1738 ecx = 0; 1739 cpuid(7, &eax, &ebx, &ecx, &edx); 1740 1741 /* if SGX is present */ 1742 if (ebx & (1 << 2)) { 1743 1744 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1745 1746 /* if SGX is enabled */ 1747 if (msr & (1 << 18)) 1748 return; 1749 } 1750 1751 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1752 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1753 } 1754 1755 /** 1756 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake 1757 * idle states table. 1758 */ 1759 static void __init skx_idle_state_table_update(void) 1760 { 1761 unsigned long long msr; 1762 1763 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1764 1765 /* 1766 * 000b: C0/C1 (no package C-state support) 1767 * 001b: C2 1768 * 010b: C6 (non-retention) 1769 * 011b: C6 (retention) 1770 * 111b: No Package C state limits. 1771 */ 1772 if ((msr & 0x7) < 2) { 1773 /* 1774 * Uses the CC6 + PC0 latency and 3 times of 1775 * latency for target_residency if the PC6 1776 * is disabled in BIOS. This is consistent 1777 * with how intel_idle driver uses _CST 1778 * to set the target_residency. 1779 */ 1780 skx_cstates[2].exit_latency = 92; 1781 skx_cstates[2].target_residency = 276; 1782 } 1783 } 1784 1785 /** 1786 * adl_idle_state_table_update - Adjust AlderLake idle states table. 1787 */ 1788 static void __init adl_idle_state_table_update(void) 1789 { 1790 /* Check if user prefers C1 over C1E. */ 1791 if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) { 1792 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1793 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE; 1794 1795 /* Disable C1E by clearing the "C1E promotion" bit. */ 1796 c1e_promotion = C1E_PROMOTION_DISABLE; 1797 return; 1798 } 1799 1800 /* Make sure C1E is enabled by default */ 1801 c1e_promotion = C1E_PROMOTION_ENABLE; 1802 } 1803 1804 /** 1805 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. 1806 */ 1807 static void __init spr_idle_state_table_update(void) 1808 { 1809 unsigned long long msr; 1810 1811 /* 1812 * By default, the C6 state assumes the worst-case scenario of package 1813 * C6. However, if PC6 is disabled, we update the numbers to match 1814 * core C6. 1815 */ 1816 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1817 1818 /* Limit value 2 and above allow for PC6. */ 1819 if ((msr & 0x7) < 2) { 1820 spr_cstates[2].exit_latency = 190; 1821 spr_cstates[2].target_residency = 600; 1822 } 1823 } 1824 1825 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1826 { 1827 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1828 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1829 MWAIT_SUBSTATE_MASK; 1830 1831 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1832 if (num_substates == 0) 1833 return false; 1834 1835 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1836 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1837 1838 return true; 1839 } 1840 1841 static void state_update_enter_method(struct cpuidle_state *state, int cstate) 1842 { 1843 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) { 1844 /* 1845 * Combining with XSTATE with IBRS or IRQ_ENABLE flags 1846 * is not currently supported but this driver. 1847 */ 1848 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS); 1849 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 1850 state->enter = intel_idle_xstate; 1851 return; 1852 } 1853 1854 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && 1855 state->flags & CPUIDLE_FLAG_IBRS) { 1856 /* 1857 * IBRS mitigation requires that C-states are entered 1858 * with interrupts disabled. 1859 */ 1860 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 1861 state->enter = intel_idle_ibrs; 1862 return; 1863 } 1864 1865 if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) { 1866 state->enter = intel_idle_irq; 1867 return; 1868 } 1869 1870 if (force_irq_on) { 1871 pr_info("forced intel_idle_irq for state %d\n", cstate); 1872 state->enter = intel_idle_irq; 1873 } 1874 } 1875 1876 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1877 { 1878 int cstate; 1879 1880 switch (boot_cpu_data.x86_model) { 1881 case INTEL_FAM6_IVYBRIDGE_X: 1882 ivt_idle_state_table_update(); 1883 break; 1884 case INTEL_FAM6_ATOM_GOLDMONT: 1885 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1886 bxt_idle_state_table_update(); 1887 break; 1888 case INTEL_FAM6_SKYLAKE: 1889 sklh_idle_state_table_update(); 1890 break; 1891 case INTEL_FAM6_SKYLAKE_X: 1892 skx_idle_state_table_update(); 1893 break; 1894 case INTEL_FAM6_SAPPHIRERAPIDS_X: 1895 case INTEL_FAM6_EMERALDRAPIDS_X: 1896 spr_idle_state_table_update(); 1897 break; 1898 case INTEL_FAM6_ALDERLAKE: 1899 case INTEL_FAM6_ALDERLAKE_L: 1900 case INTEL_FAM6_ATOM_GRACEMONT: 1901 adl_idle_state_table_update(); 1902 break; 1903 } 1904 1905 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1906 struct cpuidle_state *state; 1907 unsigned int mwait_hint; 1908 1909 if (intel_idle_max_cstate_reached(cstate)) 1910 break; 1911 1912 if (!cpuidle_state_table[cstate].enter && 1913 !cpuidle_state_table[cstate].enter_s2idle) 1914 break; 1915 1916 /* If marked as unusable, skip this state. */ 1917 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1918 pr_debug("state %s is disabled\n", 1919 cpuidle_state_table[cstate].name); 1920 continue; 1921 } 1922 1923 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1924 if (!intel_idle_verify_cstate(mwait_hint)) 1925 continue; 1926 1927 /* Structure copy. */ 1928 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1929 state = &drv->states[drv->state_count]; 1930 1931 state_update_enter_method(state, cstate); 1932 1933 1934 if ((disabled_states_mask & BIT(drv->state_count)) || 1935 ((icpu->use_acpi || force_use_acpi) && 1936 intel_idle_off_by_default(mwait_hint) && 1937 !(state->flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1938 state->flags |= CPUIDLE_FLAG_OFF; 1939 1940 if (intel_idle_state_needs_timer_stop(state)) 1941 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1942 1943 drv->state_count++; 1944 } 1945 1946 if (icpu->byt_auto_demotion_disable_flag) { 1947 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1948 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1949 } 1950 } 1951 1952 /** 1953 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1954 * @drv: cpuidle driver structure to initialize. 1955 */ 1956 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1957 { 1958 cpuidle_poll_state_init(drv); 1959 1960 if (disabled_states_mask & BIT(0)) 1961 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1962 1963 drv->state_count = 1; 1964 1965 if (icpu) 1966 intel_idle_init_cstates_icpu(drv); 1967 else 1968 intel_idle_init_cstates_acpi(drv); 1969 } 1970 1971 static void auto_demotion_disable(void) 1972 { 1973 unsigned long long msr_bits; 1974 1975 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1976 msr_bits &= ~auto_demotion_disable_flags; 1977 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1978 } 1979 1980 static void c1e_promotion_enable(void) 1981 { 1982 unsigned long long msr_bits; 1983 1984 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1985 msr_bits |= 0x2; 1986 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1987 } 1988 1989 static void c1e_promotion_disable(void) 1990 { 1991 unsigned long long msr_bits; 1992 1993 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1994 msr_bits &= ~0x2; 1995 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1996 } 1997 1998 /** 1999 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 2000 * @cpu: CPU to initialize. 2001 * 2002 * Register a cpuidle device object for @cpu and update its MSRs in accordance 2003 * with the processor model flags. 2004 */ 2005 static int intel_idle_cpu_init(unsigned int cpu) 2006 { 2007 struct cpuidle_device *dev; 2008 2009 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 2010 dev->cpu = cpu; 2011 2012 if (cpuidle_register_device(dev)) { 2013 pr_debug("cpuidle_register_device %d failed!\n", cpu); 2014 return -EIO; 2015 } 2016 2017 if (auto_demotion_disable_flags) 2018 auto_demotion_disable(); 2019 2020 if (c1e_promotion == C1E_PROMOTION_ENABLE) 2021 c1e_promotion_enable(); 2022 else if (c1e_promotion == C1E_PROMOTION_DISABLE) 2023 c1e_promotion_disable(); 2024 2025 return 0; 2026 } 2027 2028 static int intel_idle_cpu_online(unsigned int cpu) 2029 { 2030 struct cpuidle_device *dev; 2031 2032 if (!boot_cpu_has(X86_FEATURE_ARAT)) 2033 tick_broadcast_enable(); 2034 2035 /* 2036 * Some systems can hotplug a cpu at runtime after 2037 * the kernel has booted, we have to initialize the 2038 * driver in this case 2039 */ 2040 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 2041 if (!dev->registered) 2042 return intel_idle_cpu_init(cpu); 2043 2044 return 0; 2045 } 2046 2047 /** 2048 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 2049 */ 2050 static void __init intel_idle_cpuidle_devices_uninit(void) 2051 { 2052 int i; 2053 2054 for_each_online_cpu(i) 2055 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 2056 } 2057 2058 static int __init intel_idle_init(void) 2059 { 2060 const struct x86_cpu_id *id; 2061 unsigned int eax, ebx, ecx; 2062 int retval; 2063 2064 /* Do not load intel_idle at all for now if idle= is passed */ 2065 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 2066 return -ENODEV; 2067 2068 if (max_cstate == 0) { 2069 pr_debug("disabled\n"); 2070 return -EPERM; 2071 } 2072 2073 id = x86_match_cpu(intel_idle_ids); 2074 if (id) { 2075 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 2076 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 2077 return -ENODEV; 2078 } 2079 } else { 2080 id = x86_match_cpu(intel_mwait_ids); 2081 if (!id) 2082 return -ENODEV; 2083 } 2084 2085 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 2086 return -ENODEV; 2087 2088 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 2089 2090 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 2091 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 2092 !mwait_substates) 2093 return -ENODEV; 2094 2095 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 2096 2097 icpu = (const struct idle_cpu *)id->driver_data; 2098 if (icpu) { 2099 cpuidle_state_table = icpu->state_table; 2100 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 2101 if (icpu->disable_promotion_to_c1e) 2102 c1e_promotion = C1E_PROMOTION_DISABLE; 2103 if (icpu->use_acpi || force_use_acpi) 2104 intel_idle_acpi_cst_extract(); 2105 } else if (!intel_idle_acpi_cst_extract()) { 2106 return -ENODEV; 2107 } 2108 2109 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 2110 boot_cpu_data.x86_model); 2111 2112 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 2113 if (!intel_idle_cpuidle_devices) 2114 return -ENOMEM; 2115 2116 intel_idle_cpuidle_driver_init(&intel_idle_driver); 2117 2118 retval = cpuidle_register_driver(&intel_idle_driver); 2119 if (retval) { 2120 struct cpuidle_driver *drv = cpuidle_get_driver(); 2121 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 2122 drv ? drv->name : "none"); 2123 goto init_driver_fail; 2124 } 2125 2126 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 2127 intel_idle_cpu_online, NULL); 2128 if (retval < 0) 2129 goto hp_setup_fail; 2130 2131 pr_debug("Local APIC timer is reliable in %s\n", 2132 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 2133 2134 return 0; 2135 2136 hp_setup_fail: 2137 intel_idle_cpuidle_devices_uninit(); 2138 cpuidle_unregister_driver(&intel_idle_driver); 2139 init_driver_fail: 2140 free_percpu(intel_idle_cpuidle_devices); 2141 return retval; 2142 2143 } 2144 device_initcall(intel_idle_init); 2145 2146 /* 2147 * We are not really modular, but we used to support that. Meaning we also 2148 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 2149 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 2150 * is the easiest way (currently) to continue doing that. 2151 */ 2152 module_param(max_cstate, int, 0444); 2153 /* 2154 * The positions of the bits that are set in this number are the indices of the 2155 * idle states to be disabled by default (as reflected by the names of the 2156 * corresponding idle state directories in sysfs, "state0", "state1" ... 2157 * "state<i>" ..., where <i> is the index of the given state). 2158 */ 2159 module_param_named(states_off, disabled_states_mask, uint, 0444); 2160 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 2161 /* 2162 * Some platforms come with mutually exclusive C-states, so that if one is 2163 * enabled, the other C-states must not be used. Example: C1 and C1E on 2164 * Sapphire Rapids platform. This parameter allows for selecting the 2165 * preferred C-states among the groups of mutually exclusive C-states - the 2166 * selected C-states will be registered, the other C-states from the mutually 2167 * exclusive group won't be registered. If the platform has no mutually 2168 * exclusive C-states, this parameter has no effect. 2169 */ 2170 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); 2171 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); 2172 /* 2173 * Debugging option that forces the driver to enter all C-states with 2174 * interrupts enabled. Does not apply to C-states with 2175 * 'CPUIDLE_FLAG_INIT_XSTATE' and 'CPUIDLE_FLAG_IBRS' flags. 2176 */ 2177 module_param(force_irq_on, bool, 0444); 2178