1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/sched/smt.h> 51 #include <linux/notifier.h> 52 #include <linux/cpu.h> 53 #include <linux/moduleparam.h> 54 #include <asm/cpu_device_id.h> 55 #include <asm/intel-family.h> 56 #include <asm/nospec-branch.h> 57 #include <asm/mwait.h> 58 #include <asm/msr.h> 59 #include <asm/fpu/api.h> 60 61 #define INTEL_IDLE_VERSION "0.5.1" 62 63 static struct cpuidle_driver intel_idle_driver = { 64 .name = "intel_idle", 65 .owner = THIS_MODULE, 66 }; 67 /* intel_idle.max_cstate=0 disables driver */ 68 static int max_cstate = CPUIDLE_STATE_MAX - 1; 69 static unsigned int disabled_states_mask __read_mostly; 70 static unsigned int preferred_states_mask __read_mostly; 71 static bool force_irq_on __read_mostly; 72 73 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 74 75 static unsigned long auto_demotion_disable_flags; 76 77 static enum { 78 C1E_PROMOTION_PRESERVE, 79 C1E_PROMOTION_ENABLE, 80 C1E_PROMOTION_DISABLE 81 } c1e_promotion = C1E_PROMOTION_PRESERVE; 82 83 struct idle_cpu { 84 struct cpuidle_state *state_table; 85 86 /* 87 * Hardware C-state auto-demotion may not always be optimal. 88 * Indicate which enable bits to clear here. 89 */ 90 unsigned long auto_demotion_disable_flags; 91 bool byt_auto_demotion_disable_flag; 92 bool disable_promotion_to_c1e; 93 bool use_acpi; 94 }; 95 96 static const struct idle_cpu *icpu __initdata; 97 static struct cpuidle_state *cpuidle_state_table __initdata; 98 99 static unsigned int mwait_substates __initdata; 100 101 /* 102 * Enable interrupts before entering the C-state. On some platforms and for 103 * some C-states, this may measurably decrease interrupt latency. 104 */ 105 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) 106 107 /* 108 * Enable this state by default even if the ACPI _CST does not list it. 109 */ 110 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 111 112 /* 113 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE 114 * above. 115 */ 116 #define CPUIDLE_FLAG_IBRS BIT(16) 117 118 /* 119 * Initialize large xstate for the C6-state entrance. 120 */ 121 #define CPUIDLE_FLAG_INIT_XSTATE BIT(17) 122 123 /* 124 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 125 * the C-state (top nibble) and sub-state (bottom nibble) 126 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 127 * 128 * We store the hint at the top of our "flags" for each state. 129 */ 130 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 131 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 132 133 static __always_inline int __intel_idle(struct cpuidle_device *dev, 134 struct cpuidle_driver *drv, int index) 135 { 136 struct cpuidle_state *state = &drv->states[index]; 137 unsigned long eax = flg2MWAIT(state->flags); 138 unsigned long ecx = 1; /* break on interrupt flag */ 139 140 mwait_idle_with_hints(eax, ecx); 141 142 return index; 143 } 144 145 /** 146 * intel_idle - Ask the processor to enter the given idle state. 147 * @dev: cpuidle device of the target CPU. 148 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 149 * @index: Target idle state index. 150 * 151 * Use the MWAIT instruction to notify the processor that the CPU represented by 152 * @dev is idle and it can try to enter the idle state corresponding to @index. 153 * 154 * If the local APIC timer is not known to be reliable in the target idle state, 155 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 156 * 157 * Must be called under local_irq_disable(). 158 */ 159 static __cpuidle int intel_idle(struct cpuidle_device *dev, 160 struct cpuidle_driver *drv, int index) 161 { 162 return __intel_idle(dev, drv, index); 163 } 164 165 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, 166 struct cpuidle_driver *drv, int index) 167 { 168 int ret; 169 170 raw_local_irq_enable(); 171 ret = __intel_idle(dev, drv, index); 172 raw_local_irq_disable(); 173 174 return ret; 175 } 176 177 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, 178 struct cpuidle_driver *drv, int index) 179 { 180 bool smt_active = sched_smt_active(); 181 u64 spec_ctrl = spec_ctrl_current(); 182 int ret; 183 184 if (smt_active) 185 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); 186 187 ret = __intel_idle(dev, drv, index); 188 189 if (smt_active) 190 native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); 191 192 return ret; 193 } 194 195 static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, 196 struct cpuidle_driver *drv, int index) 197 { 198 fpu_idle_fpregs(); 199 return __intel_idle(dev, drv, index); 200 } 201 202 static __always_inline int __intel_idle_hlt(struct cpuidle_device *dev, 203 struct cpuidle_driver *drv, int index) 204 { 205 raw_safe_halt(); 206 raw_local_irq_disable(); 207 return index; 208 } 209 210 /** 211 * intel_idle_hlt - Ask the processor to enter the given idle state using hlt. 212 * @dev: cpuidle device of the target CPU. 213 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 214 * @index: Target idle state index. 215 * 216 * Use the HLT instruction to notify the processor that the CPU represented by 217 * @dev is idle and it can try to enter the idle state corresponding to @index. 218 * 219 * Must be called under local_irq_disable(). 220 */ 221 static __cpuidle int intel_idle_hlt(struct cpuidle_device *dev, 222 struct cpuidle_driver *drv, int index) 223 { 224 return __intel_idle_hlt(dev, drv, index); 225 } 226 227 static __cpuidle int intel_idle_hlt_irq_on(struct cpuidle_device *dev, 228 struct cpuidle_driver *drv, int index) 229 { 230 int ret; 231 232 raw_local_irq_enable(); 233 ret = __intel_idle_hlt(dev, drv, index); 234 raw_local_irq_disable(); 235 236 return ret; 237 } 238 239 /** 240 * intel_idle_s2idle - Ask the processor to enter the given idle state. 241 * @dev: cpuidle device of the target CPU. 242 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 243 * @index: Target idle state index. 244 * 245 * Use the MWAIT instruction to notify the processor that the CPU represented by 246 * @dev is idle and it can try to enter the idle state corresponding to @index. 247 * 248 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 249 * scheduler tick and suspended scheduler clock on the target CPU. 250 */ 251 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 252 struct cpuidle_driver *drv, int index) 253 { 254 unsigned long ecx = 1; /* break on interrupt flag */ 255 struct cpuidle_state *state = &drv->states[index]; 256 unsigned long eax = flg2MWAIT(state->flags); 257 258 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) 259 fpu_idle_fpregs(); 260 261 mwait_idle_with_hints(eax, ecx); 262 263 return 0; 264 } 265 266 /* 267 * States are indexed by the cstate number, 268 * which is also the index into the MWAIT hint array. 269 * Thus C0 is a dummy. 270 */ 271 static struct cpuidle_state nehalem_cstates[] __initdata = { 272 { 273 .name = "C1", 274 .desc = "MWAIT 0x00", 275 .flags = MWAIT2flg(0x00), 276 .exit_latency = 3, 277 .target_residency = 6, 278 .enter = &intel_idle, 279 .enter_s2idle = intel_idle_s2idle, }, 280 { 281 .name = "C1E", 282 .desc = "MWAIT 0x01", 283 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 284 .exit_latency = 10, 285 .target_residency = 20, 286 .enter = &intel_idle, 287 .enter_s2idle = intel_idle_s2idle, }, 288 { 289 .name = "C3", 290 .desc = "MWAIT 0x10", 291 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 292 .exit_latency = 20, 293 .target_residency = 80, 294 .enter = &intel_idle, 295 .enter_s2idle = intel_idle_s2idle, }, 296 { 297 .name = "C6", 298 .desc = "MWAIT 0x20", 299 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 300 .exit_latency = 200, 301 .target_residency = 800, 302 .enter = &intel_idle, 303 .enter_s2idle = intel_idle_s2idle, }, 304 { 305 .enter = NULL } 306 }; 307 308 static struct cpuidle_state snb_cstates[] __initdata = { 309 { 310 .name = "C1", 311 .desc = "MWAIT 0x00", 312 .flags = MWAIT2flg(0x00), 313 .exit_latency = 2, 314 .target_residency = 2, 315 .enter = &intel_idle, 316 .enter_s2idle = intel_idle_s2idle, }, 317 { 318 .name = "C1E", 319 .desc = "MWAIT 0x01", 320 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 321 .exit_latency = 10, 322 .target_residency = 20, 323 .enter = &intel_idle, 324 .enter_s2idle = intel_idle_s2idle, }, 325 { 326 .name = "C3", 327 .desc = "MWAIT 0x10", 328 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 329 .exit_latency = 80, 330 .target_residency = 211, 331 .enter = &intel_idle, 332 .enter_s2idle = intel_idle_s2idle, }, 333 { 334 .name = "C6", 335 .desc = "MWAIT 0x20", 336 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 337 .exit_latency = 104, 338 .target_residency = 345, 339 .enter = &intel_idle, 340 .enter_s2idle = intel_idle_s2idle, }, 341 { 342 .name = "C7", 343 .desc = "MWAIT 0x30", 344 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 345 .exit_latency = 109, 346 .target_residency = 345, 347 .enter = &intel_idle, 348 .enter_s2idle = intel_idle_s2idle, }, 349 { 350 .enter = NULL } 351 }; 352 353 static struct cpuidle_state byt_cstates[] __initdata = { 354 { 355 .name = "C1", 356 .desc = "MWAIT 0x00", 357 .flags = MWAIT2flg(0x00), 358 .exit_latency = 1, 359 .target_residency = 1, 360 .enter = &intel_idle, 361 .enter_s2idle = intel_idle_s2idle, }, 362 { 363 .name = "C6N", 364 .desc = "MWAIT 0x58", 365 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 366 .exit_latency = 300, 367 .target_residency = 275, 368 .enter = &intel_idle, 369 .enter_s2idle = intel_idle_s2idle, }, 370 { 371 .name = "C6S", 372 .desc = "MWAIT 0x52", 373 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 374 .exit_latency = 500, 375 .target_residency = 560, 376 .enter = &intel_idle, 377 .enter_s2idle = intel_idle_s2idle, }, 378 { 379 .name = "C7", 380 .desc = "MWAIT 0x60", 381 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 382 .exit_latency = 1200, 383 .target_residency = 4000, 384 .enter = &intel_idle, 385 .enter_s2idle = intel_idle_s2idle, }, 386 { 387 .name = "C7S", 388 .desc = "MWAIT 0x64", 389 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 390 .exit_latency = 10000, 391 .target_residency = 20000, 392 .enter = &intel_idle, 393 .enter_s2idle = intel_idle_s2idle, }, 394 { 395 .enter = NULL } 396 }; 397 398 static struct cpuidle_state cht_cstates[] __initdata = { 399 { 400 .name = "C1", 401 .desc = "MWAIT 0x00", 402 .flags = MWAIT2flg(0x00), 403 .exit_latency = 1, 404 .target_residency = 1, 405 .enter = &intel_idle, 406 .enter_s2idle = intel_idle_s2idle, }, 407 { 408 .name = "C6N", 409 .desc = "MWAIT 0x58", 410 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 411 .exit_latency = 80, 412 .target_residency = 275, 413 .enter = &intel_idle, 414 .enter_s2idle = intel_idle_s2idle, }, 415 { 416 .name = "C6S", 417 .desc = "MWAIT 0x52", 418 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 419 .exit_latency = 200, 420 .target_residency = 560, 421 .enter = &intel_idle, 422 .enter_s2idle = intel_idle_s2idle, }, 423 { 424 .name = "C7", 425 .desc = "MWAIT 0x60", 426 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 427 .exit_latency = 1200, 428 .target_residency = 4000, 429 .enter = &intel_idle, 430 .enter_s2idle = intel_idle_s2idle, }, 431 { 432 .name = "C7S", 433 .desc = "MWAIT 0x64", 434 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 435 .exit_latency = 10000, 436 .target_residency = 20000, 437 .enter = &intel_idle, 438 .enter_s2idle = intel_idle_s2idle, }, 439 { 440 .enter = NULL } 441 }; 442 443 static struct cpuidle_state ivb_cstates[] __initdata = { 444 { 445 .name = "C1", 446 .desc = "MWAIT 0x00", 447 .flags = MWAIT2flg(0x00), 448 .exit_latency = 1, 449 .target_residency = 1, 450 .enter = &intel_idle, 451 .enter_s2idle = intel_idle_s2idle, }, 452 { 453 .name = "C1E", 454 .desc = "MWAIT 0x01", 455 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 456 .exit_latency = 10, 457 .target_residency = 20, 458 .enter = &intel_idle, 459 .enter_s2idle = intel_idle_s2idle, }, 460 { 461 .name = "C3", 462 .desc = "MWAIT 0x10", 463 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 464 .exit_latency = 59, 465 .target_residency = 156, 466 .enter = &intel_idle, 467 .enter_s2idle = intel_idle_s2idle, }, 468 { 469 .name = "C6", 470 .desc = "MWAIT 0x20", 471 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 472 .exit_latency = 80, 473 .target_residency = 300, 474 .enter = &intel_idle, 475 .enter_s2idle = intel_idle_s2idle, }, 476 { 477 .name = "C7", 478 .desc = "MWAIT 0x30", 479 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 480 .exit_latency = 87, 481 .target_residency = 300, 482 .enter = &intel_idle, 483 .enter_s2idle = intel_idle_s2idle, }, 484 { 485 .enter = NULL } 486 }; 487 488 static struct cpuidle_state ivt_cstates[] __initdata = { 489 { 490 .name = "C1", 491 .desc = "MWAIT 0x00", 492 .flags = MWAIT2flg(0x00), 493 .exit_latency = 1, 494 .target_residency = 1, 495 .enter = &intel_idle, 496 .enter_s2idle = intel_idle_s2idle, }, 497 { 498 .name = "C1E", 499 .desc = "MWAIT 0x01", 500 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 501 .exit_latency = 10, 502 .target_residency = 80, 503 .enter = &intel_idle, 504 .enter_s2idle = intel_idle_s2idle, }, 505 { 506 .name = "C3", 507 .desc = "MWAIT 0x10", 508 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 509 .exit_latency = 59, 510 .target_residency = 156, 511 .enter = &intel_idle, 512 .enter_s2idle = intel_idle_s2idle, }, 513 { 514 .name = "C6", 515 .desc = "MWAIT 0x20", 516 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 517 .exit_latency = 82, 518 .target_residency = 300, 519 .enter = &intel_idle, 520 .enter_s2idle = intel_idle_s2idle, }, 521 { 522 .enter = NULL } 523 }; 524 525 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 526 { 527 .name = "C1", 528 .desc = "MWAIT 0x00", 529 .flags = MWAIT2flg(0x00), 530 .exit_latency = 1, 531 .target_residency = 1, 532 .enter = &intel_idle, 533 .enter_s2idle = intel_idle_s2idle, }, 534 { 535 .name = "C1E", 536 .desc = "MWAIT 0x01", 537 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 538 .exit_latency = 10, 539 .target_residency = 250, 540 .enter = &intel_idle, 541 .enter_s2idle = intel_idle_s2idle, }, 542 { 543 .name = "C3", 544 .desc = "MWAIT 0x10", 545 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 546 .exit_latency = 59, 547 .target_residency = 300, 548 .enter = &intel_idle, 549 .enter_s2idle = intel_idle_s2idle, }, 550 { 551 .name = "C6", 552 .desc = "MWAIT 0x20", 553 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 554 .exit_latency = 84, 555 .target_residency = 400, 556 .enter = &intel_idle, 557 .enter_s2idle = intel_idle_s2idle, }, 558 { 559 .enter = NULL } 560 }; 561 562 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 563 { 564 .name = "C1", 565 .desc = "MWAIT 0x00", 566 .flags = MWAIT2flg(0x00), 567 .exit_latency = 1, 568 .target_residency = 1, 569 .enter = &intel_idle, 570 .enter_s2idle = intel_idle_s2idle, }, 571 { 572 .name = "C1E", 573 .desc = "MWAIT 0x01", 574 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 575 .exit_latency = 10, 576 .target_residency = 500, 577 .enter = &intel_idle, 578 .enter_s2idle = intel_idle_s2idle, }, 579 { 580 .name = "C3", 581 .desc = "MWAIT 0x10", 582 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 583 .exit_latency = 59, 584 .target_residency = 600, 585 .enter = &intel_idle, 586 .enter_s2idle = intel_idle_s2idle, }, 587 { 588 .name = "C6", 589 .desc = "MWAIT 0x20", 590 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 591 .exit_latency = 88, 592 .target_residency = 700, 593 .enter = &intel_idle, 594 .enter_s2idle = intel_idle_s2idle, }, 595 { 596 .enter = NULL } 597 }; 598 599 static struct cpuidle_state hsw_cstates[] __initdata = { 600 { 601 .name = "C1", 602 .desc = "MWAIT 0x00", 603 .flags = MWAIT2flg(0x00), 604 .exit_latency = 2, 605 .target_residency = 2, 606 .enter = &intel_idle, 607 .enter_s2idle = intel_idle_s2idle, }, 608 { 609 .name = "C1E", 610 .desc = "MWAIT 0x01", 611 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 612 .exit_latency = 10, 613 .target_residency = 20, 614 .enter = &intel_idle, 615 .enter_s2idle = intel_idle_s2idle, }, 616 { 617 .name = "C3", 618 .desc = "MWAIT 0x10", 619 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 620 .exit_latency = 33, 621 .target_residency = 100, 622 .enter = &intel_idle, 623 .enter_s2idle = intel_idle_s2idle, }, 624 { 625 .name = "C6", 626 .desc = "MWAIT 0x20", 627 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 628 .exit_latency = 133, 629 .target_residency = 400, 630 .enter = &intel_idle, 631 .enter_s2idle = intel_idle_s2idle, }, 632 { 633 .name = "C7s", 634 .desc = "MWAIT 0x32", 635 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 636 .exit_latency = 166, 637 .target_residency = 500, 638 .enter = &intel_idle, 639 .enter_s2idle = intel_idle_s2idle, }, 640 { 641 .name = "C8", 642 .desc = "MWAIT 0x40", 643 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 644 .exit_latency = 300, 645 .target_residency = 900, 646 .enter = &intel_idle, 647 .enter_s2idle = intel_idle_s2idle, }, 648 { 649 .name = "C9", 650 .desc = "MWAIT 0x50", 651 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 652 .exit_latency = 600, 653 .target_residency = 1800, 654 .enter = &intel_idle, 655 .enter_s2idle = intel_idle_s2idle, }, 656 { 657 .name = "C10", 658 .desc = "MWAIT 0x60", 659 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 660 .exit_latency = 2600, 661 .target_residency = 7700, 662 .enter = &intel_idle, 663 .enter_s2idle = intel_idle_s2idle, }, 664 { 665 .enter = NULL } 666 }; 667 static struct cpuidle_state bdw_cstates[] __initdata = { 668 { 669 .name = "C1", 670 .desc = "MWAIT 0x00", 671 .flags = MWAIT2flg(0x00), 672 .exit_latency = 2, 673 .target_residency = 2, 674 .enter = &intel_idle, 675 .enter_s2idle = intel_idle_s2idle, }, 676 { 677 .name = "C1E", 678 .desc = "MWAIT 0x01", 679 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 680 .exit_latency = 10, 681 .target_residency = 20, 682 .enter = &intel_idle, 683 .enter_s2idle = intel_idle_s2idle, }, 684 { 685 .name = "C3", 686 .desc = "MWAIT 0x10", 687 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 688 .exit_latency = 40, 689 .target_residency = 100, 690 .enter = &intel_idle, 691 .enter_s2idle = intel_idle_s2idle, }, 692 { 693 .name = "C6", 694 .desc = "MWAIT 0x20", 695 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 696 .exit_latency = 133, 697 .target_residency = 400, 698 .enter = &intel_idle, 699 .enter_s2idle = intel_idle_s2idle, }, 700 { 701 .name = "C7s", 702 .desc = "MWAIT 0x32", 703 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 704 .exit_latency = 166, 705 .target_residency = 500, 706 .enter = &intel_idle, 707 .enter_s2idle = intel_idle_s2idle, }, 708 { 709 .name = "C8", 710 .desc = "MWAIT 0x40", 711 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 712 .exit_latency = 300, 713 .target_residency = 900, 714 .enter = &intel_idle, 715 .enter_s2idle = intel_idle_s2idle, }, 716 { 717 .name = "C9", 718 .desc = "MWAIT 0x50", 719 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 720 .exit_latency = 600, 721 .target_residency = 1800, 722 .enter = &intel_idle, 723 .enter_s2idle = intel_idle_s2idle, }, 724 { 725 .name = "C10", 726 .desc = "MWAIT 0x60", 727 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 728 .exit_latency = 2600, 729 .target_residency = 7700, 730 .enter = &intel_idle, 731 .enter_s2idle = intel_idle_s2idle, }, 732 { 733 .enter = NULL } 734 }; 735 736 static struct cpuidle_state skl_cstates[] __initdata = { 737 { 738 .name = "C1", 739 .desc = "MWAIT 0x00", 740 .flags = MWAIT2flg(0x00), 741 .exit_latency = 2, 742 .target_residency = 2, 743 .enter = &intel_idle, 744 .enter_s2idle = intel_idle_s2idle, }, 745 { 746 .name = "C1E", 747 .desc = "MWAIT 0x01", 748 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 749 .exit_latency = 10, 750 .target_residency = 20, 751 .enter = &intel_idle, 752 .enter_s2idle = intel_idle_s2idle, }, 753 { 754 .name = "C3", 755 .desc = "MWAIT 0x10", 756 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 757 .exit_latency = 70, 758 .target_residency = 100, 759 .enter = &intel_idle, 760 .enter_s2idle = intel_idle_s2idle, }, 761 { 762 .name = "C6", 763 .desc = "MWAIT 0x20", 764 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 765 .exit_latency = 85, 766 .target_residency = 200, 767 .enter = &intel_idle, 768 .enter_s2idle = intel_idle_s2idle, }, 769 { 770 .name = "C7s", 771 .desc = "MWAIT 0x33", 772 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 773 .exit_latency = 124, 774 .target_residency = 800, 775 .enter = &intel_idle, 776 .enter_s2idle = intel_idle_s2idle, }, 777 { 778 .name = "C8", 779 .desc = "MWAIT 0x40", 780 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 781 .exit_latency = 200, 782 .target_residency = 800, 783 .enter = &intel_idle, 784 .enter_s2idle = intel_idle_s2idle, }, 785 { 786 .name = "C9", 787 .desc = "MWAIT 0x50", 788 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 789 .exit_latency = 480, 790 .target_residency = 5000, 791 .enter = &intel_idle, 792 .enter_s2idle = intel_idle_s2idle, }, 793 { 794 .name = "C10", 795 .desc = "MWAIT 0x60", 796 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 797 .exit_latency = 890, 798 .target_residency = 5000, 799 .enter = &intel_idle, 800 .enter_s2idle = intel_idle_s2idle, }, 801 { 802 .enter = NULL } 803 }; 804 805 static struct cpuidle_state skx_cstates[] __initdata = { 806 { 807 .name = "C1", 808 .desc = "MWAIT 0x00", 809 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 810 .exit_latency = 2, 811 .target_residency = 2, 812 .enter = &intel_idle, 813 .enter_s2idle = intel_idle_s2idle, }, 814 { 815 .name = "C1E", 816 .desc = "MWAIT 0x01", 817 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 818 .exit_latency = 10, 819 .target_residency = 20, 820 .enter = &intel_idle, 821 .enter_s2idle = intel_idle_s2idle, }, 822 { 823 .name = "C6", 824 .desc = "MWAIT 0x20", 825 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 826 .exit_latency = 133, 827 .target_residency = 600, 828 .enter = &intel_idle, 829 .enter_s2idle = intel_idle_s2idle, }, 830 { 831 .enter = NULL } 832 }; 833 834 static struct cpuidle_state icx_cstates[] __initdata = { 835 { 836 .name = "C1", 837 .desc = "MWAIT 0x00", 838 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 839 .exit_latency = 1, 840 .target_residency = 1, 841 .enter = &intel_idle, 842 .enter_s2idle = intel_idle_s2idle, }, 843 { 844 .name = "C1E", 845 .desc = "MWAIT 0x01", 846 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 847 .exit_latency = 4, 848 .target_residency = 4, 849 .enter = &intel_idle, 850 .enter_s2idle = intel_idle_s2idle, }, 851 { 852 .name = "C6", 853 .desc = "MWAIT 0x20", 854 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 855 .exit_latency = 170, 856 .target_residency = 600, 857 .enter = &intel_idle, 858 .enter_s2idle = intel_idle_s2idle, }, 859 { 860 .enter = NULL } 861 }; 862 863 /* 864 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. 865 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. 866 * But in this case there is effectively no C1, because C1 requests are 867 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 868 * and C1E requests end up with C1, so there is effectively no C1E. 869 * 870 * By default we enable C1E and disable C1 by marking it with 871 * 'CPUIDLE_FLAG_UNUSABLE'. 872 */ 873 static struct cpuidle_state adl_cstates[] __initdata = { 874 { 875 .name = "C1", 876 .desc = "MWAIT 0x00", 877 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 878 .exit_latency = 1, 879 .target_residency = 1, 880 .enter = &intel_idle, 881 .enter_s2idle = intel_idle_s2idle, }, 882 { 883 .name = "C1E", 884 .desc = "MWAIT 0x01", 885 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 886 .exit_latency = 2, 887 .target_residency = 4, 888 .enter = &intel_idle, 889 .enter_s2idle = intel_idle_s2idle, }, 890 { 891 .name = "C6", 892 .desc = "MWAIT 0x20", 893 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 894 .exit_latency = 220, 895 .target_residency = 600, 896 .enter = &intel_idle, 897 .enter_s2idle = intel_idle_s2idle, }, 898 { 899 .name = "C8", 900 .desc = "MWAIT 0x40", 901 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 902 .exit_latency = 280, 903 .target_residency = 800, 904 .enter = &intel_idle, 905 .enter_s2idle = intel_idle_s2idle, }, 906 { 907 .name = "C10", 908 .desc = "MWAIT 0x60", 909 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 910 .exit_latency = 680, 911 .target_residency = 2000, 912 .enter = &intel_idle, 913 .enter_s2idle = intel_idle_s2idle, }, 914 { 915 .enter = NULL } 916 }; 917 918 static struct cpuidle_state adl_l_cstates[] __initdata = { 919 { 920 .name = "C1", 921 .desc = "MWAIT 0x00", 922 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 923 .exit_latency = 1, 924 .target_residency = 1, 925 .enter = &intel_idle, 926 .enter_s2idle = intel_idle_s2idle, }, 927 { 928 .name = "C1E", 929 .desc = "MWAIT 0x01", 930 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 931 .exit_latency = 2, 932 .target_residency = 4, 933 .enter = &intel_idle, 934 .enter_s2idle = intel_idle_s2idle, }, 935 { 936 .name = "C6", 937 .desc = "MWAIT 0x20", 938 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 939 .exit_latency = 170, 940 .target_residency = 500, 941 .enter = &intel_idle, 942 .enter_s2idle = intel_idle_s2idle, }, 943 { 944 .name = "C8", 945 .desc = "MWAIT 0x40", 946 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 947 .exit_latency = 200, 948 .target_residency = 600, 949 .enter = &intel_idle, 950 .enter_s2idle = intel_idle_s2idle, }, 951 { 952 .name = "C10", 953 .desc = "MWAIT 0x60", 954 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 955 .exit_latency = 230, 956 .target_residency = 700, 957 .enter = &intel_idle, 958 .enter_s2idle = intel_idle_s2idle, }, 959 { 960 .enter = NULL } 961 }; 962 963 static struct cpuidle_state adl_n_cstates[] __initdata = { 964 { 965 .name = "C1", 966 .desc = "MWAIT 0x00", 967 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 968 .exit_latency = 1, 969 .target_residency = 1, 970 .enter = &intel_idle, 971 .enter_s2idle = intel_idle_s2idle, }, 972 { 973 .name = "C1E", 974 .desc = "MWAIT 0x01", 975 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 976 .exit_latency = 2, 977 .target_residency = 4, 978 .enter = &intel_idle, 979 .enter_s2idle = intel_idle_s2idle, }, 980 { 981 .name = "C6", 982 .desc = "MWAIT 0x20", 983 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 984 .exit_latency = 195, 985 .target_residency = 585, 986 .enter = &intel_idle, 987 .enter_s2idle = intel_idle_s2idle, }, 988 { 989 .name = "C8", 990 .desc = "MWAIT 0x40", 991 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 992 .exit_latency = 260, 993 .target_residency = 1040, 994 .enter = &intel_idle, 995 .enter_s2idle = intel_idle_s2idle, }, 996 { 997 .name = "C10", 998 .desc = "MWAIT 0x60", 999 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1000 .exit_latency = 660, 1001 .target_residency = 1980, 1002 .enter = &intel_idle, 1003 .enter_s2idle = intel_idle_s2idle, }, 1004 { 1005 .enter = NULL } 1006 }; 1007 1008 static struct cpuidle_state spr_cstates[] __initdata = { 1009 { 1010 .name = "C1", 1011 .desc = "MWAIT 0x00", 1012 .flags = MWAIT2flg(0x00), 1013 .exit_latency = 1, 1014 .target_residency = 1, 1015 .enter = &intel_idle, 1016 .enter_s2idle = intel_idle_s2idle, }, 1017 { 1018 .name = "C1E", 1019 .desc = "MWAIT 0x01", 1020 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1021 .exit_latency = 2, 1022 .target_residency = 4, 1023 .enter = &intel_idle, 1024 .enter_s2idle = intel_idle_s2idle, }, 1025 { 1026 .name = "C6", 1027 .desc = "MWAIT 0x20", 1028 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | 1029 CPUIDLE_FLAG_INIT_XSTATE, 1030 .exit_latency = 290, 1031 .target_residency = 800, 1032 .enter = &intel_idle, 1033 .enter_s2idle = intel_idle_s2idle, }, 1034 { 1035 .enter = NULL } 1036 }; 1037 1038 static struct cpuidle_state atom_cstates[] __initdata = { 1039 { 1040 .name = "C1E", 1041 .desc = "MWAIT 0x00", 1042 .flags = MWAIT2flg(0x00), 1043 .exit_latency = 10, 1044 .target_residency = 20, 1045 .enter = &intel_idle, 1046 .enter_s2idle = intel_idle_s2idle, }, 1047 { 1048 .name = "C2", 1049 .desc = "MWAIT 0x10", 1050 .flags = MWAIT2flg(0x10), 1051 .exit_latency = 20, 1052 .target_residency = 80, 1053 .enter = &intel_idle, 1054 .enter_s2idle = intel_idle_s2idle, }, 1055 { 1056 .name = "C4", 1057 .desc = "MWAIT 0x30", 1058 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1059 .exit_latency = 100, 1060 .target_residency = 400, 1061 .enter = &intel_idle, 1062 .enter_s2idle = intel_idle_s2idle, }, 1063 { 1064 .name = "C6", 1065 .desc = "MWAIT 0x52", 1066 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1067 .exit_latency = 140, 1068 .target_residency = 560, 1069 .enter = &intel_idle, 1070 .enter_s2idle = intel_idle_s2idle, }, 1071 { 1072 .enter = NULL } 1073 }; 1074 static struct cpuidle_state tangier_cstates[] __initdata = { 1075 { 1076 .name = "C1", 1077 .desc = "MWAIT 0x00", 1078 .flags = MWAIT2flg(0x00), 1079 .exit_latency = 1, 1080 .target_residency = 4, 1081 .enter = &intel_idle, 1082 .enter_s2idle = intel_idle_s2idle, }, 1083 { 1084 .name = "C4", 1085 .desc = "MWAIT 0x30", 1086 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 1087 .exit_latency = 100, 1088 .target_residency = 400, 1089 .enter = &intel_idle, 1090 .enter_s2idle = intel_idle_s2idle, }, 1091 { 1092 .name = "C6", 1093 .desc = "MWAIT 0x52", 1094 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1095 .exit_latency = 140, 1096 .target_residency = 560, 1097 .enter = &intel_idle, 1098 .enter_s2idle = intel_idle_s2idle, }, 1099 { 1100 .name = "C7", 1101 .desc = "MWAIT 0x60", 1102 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1103 .exit_latency = 1200, 1104 .target_residency = 4000, 1105 .enter = &intel_idle, 1106 .enter_s2idle = intel_idle_s2idle, }, 1107 { 1108 .name = "C9", 1109 .desc = "MWAIT 0x64", 1110 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 1111 .exit_latency = 10000, 1112 .target_residency = 20000, 1113 .enter = &intel_idle, 1114 .enter_s2idle = intel_idle_s2idle, }, 1115 { 1116 .enter = NULL } 1117 }; 1118 static struct cpuidle_state avn_cstates[] __initdata = { 1119 { 1120 .name = "C1", 1121 .desc = "MWAIT 0x00", 1122 .flags = MWAIT2flg(0x00), 1123 .exit_latency = 2, 1124 .target_residency = 2, 1125 .enter = &intel_idle, 1126 .enter_s2idle = intel_idle_s2idle, }, 1127 { 1128 .name = "C6", 1129 .desc = "MWAIT 0x51", 1130 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 1131 .exit_latency = 15, 1132 .target_residency = 45, 1133 .enter = &intel_idle, 1134 .enter_s2idle = intel_idle_s2idle, }, 1135 { 1136 .enter = NULL } 1137 }; 1138 static struct cpuidle_state knl_cstates[] __initdata = { 1139 { 1140 .name = "C1", 1141 .desc = "MWAIT 0x00", 1142 .flags = MWAIT2flg(0x00), 1143 .exit_latency = 1, 1144 .target_residency = 2, 1145 .enter = &intel_idle, 1146 .enter_s2idle = intel_idle_s2idle }, 1147 { 1148 .name = "C6", 1149 .desc = "MWAIT 0x10", 1150 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 1151 .exit_latency = 120, 1152 .target_residency = 500, 1153 .enter = &intel_idle, 1154 .enter_s2idle = intel_idle_s2idle }, 1155 { 1156 .enter = NULL } 1157 }; 1158 1159 static struct cpuidle_state bxt_cstates[] __initdata = { 1160 { 1161 .name = "C1", 1162 .desc = "MWAIT 0x00", 1163 .flags = MWAIT2flg(0x00), 1164 .exit_latency = 2, 1165 .target_residency = 2, 1166 .enter = &intel_idle, 1167 .enter_s2idle = intel_idle_s2idle, }, 1168 { 1169 .name = "C1E", 1170 .desc = "MWAIT 0x01", 1171 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1172 .exit_latency = 10, 1173 .target_residency = 20, 1174 .enter = &intel_idle, 1175 .enter_s2idle = intel_idle_s2idle, }, 1176 { 1177 .name = "C6", 1178 .desc = "MWAIT 0x20", 1179 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1180 .exit_latency = 133, 1181 .target_residency = 133, 1182 .enter = &intel_idle, 1183 .enter_s2idle = intel_idle_s2idle, }, 1184 { 1185 .name = "C7s", 1186 .desc = "MWAIT 0x31", 1187 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 1188 .exit_latency = 155, 1189 .target_residency = 155, 1190 .enter = &intel_idle, 1191 .enter_s2idle = intel_idle_s2idle, }, 1192 { 1193 .name = "C8", 1194 .desc = "MWAIT 0x40", 1195 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 1196 .exit_latency = 1000, 1197 .target_residency = 1000, 1198 .enter = &intel_idle, 1199 .enter_s2idle = intel_idle_s2idle, }, 1200 { 1201 .name = "C9", 1202 .desc = "MWAIT 0x50", 1203 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 1204 .exit_latency = 2000, 1205 .target_residency = 2000, 1206 .enter = &intel_idle, 1207 .enter_s2idle = intel_idle_s2idle, }, 1208 { 1209 .name = "C10", 1210 .desc = "MWAIT 0x60", 1211 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1212 .exit_latency = 10000, 1213 .target_residency = 10000, 1214 .enter = &intel_idle, 1215 .enter_s2idle = intel_idle_s2idle, }, 1216 { 1217 .enter = NULL } 1218 }; 1219 1220 static struct cpuidle_state dnv_cstates[] __initdata = { 1221 { 1222 .name = "C1", 1223 .desc = "MWAIT 0x00", 1224 .flags = MWAIT2flg(0x00), 1225 .exit_latency = 2, 1226 .target_residency = 2, 1227 .enter = &intel_idle, 1228 .enter_s2idle = intel_idle_s2idle, }, 1229 { 1230 .name = "C1E", 1231 .desc = "MWAIT 0x01", 1232 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1233 .exit_latency = 10, 1234 .target_residency = 20, 1235 .enter = &intel_idle, 1236 .enter_s2idle = intel_idle_s2idle, }, 1237 { 1238 .name = "C6", 1239 .desc = "MWAIT 0x20", 1240 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1241 .exit_latency = 50, 1242 .target_residency = 500, 1243 .enter = &intel_idle, 1244 .enter_s2idle = intel_idle_s2idle, }, 1245 { 1246 .enter = NULL } 1247 }; 1248 1249 /* 1250 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 1251 * C6, and this is indicated in the CPUID mwait leaf. 1252 */ 1253 static struct cpuidle_state snr_cstates[] __initdata = { 1254 { 1255 .name = "C1", 1256 .desc = "MWAIT 0x00", 1257 .flags = MWAIT2flg(0x00), 1258 .exit_latency = 2, 1259 .target_residency = 2, 1260 .enter = &intel_idle, 1261 .enter_s2idle = intel_idle_s2idle, }, 1262 { 1263 .name = "C1E", 1264 .desc = "MWAIT 0x01", 1265 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1266 .exit_latency = 15, 1267 .target_residency = 25, 1268 .enter = &intel_idle, 1269 .enter_s2idle = intel_idle_s2idle, }, 1270 { 1271 .name = "C6", 1272 .desc = "MWAIT 0x20", 1273 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1274 .exit_latency = 130, 1275 .target_residency = 500, 1276 .enter = &intel_idle, 1277 .enter_s2idle = intel_idle_s2idle, }, 1278 { 1279 .enter = NULL } 1280 }; 1281 1282 static struct cpuidle_state vmguest_cstates[] __initdata = { 1283 { 1284 .name = "C1", 1285 .desc = "HLT", 1286 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 1287 .exit_latency = 5, 1288 .target_residency = 10, 1289 .enter = &intel_idle_hlt, }, 1290 { 1291 .name = "C1L", 1292 .desc = "Long HLT", 1293 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TLB_FLUSHED, 1294 .exit_latency = 5, 1295 .target_residency = 200, 1296 .enter = &intel_idle_hlt, }, 1297 { 1298 .enter = NULL } 1299 }; 1300 1301 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1302 .state_table = nehalem_cstates, 1303 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1304 .disable_promotion_to_c1e = true, 1305 }; 1306 1307 static const struct idle_cpu idle_cpu_nhx __initconst = { 1308 .state_table = nehalem_cstates, 1309 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1310 .disable_promotion_to_c1e = true, 1311 .use_acpi = true, 1312 }; 1313 1314 static const struct idle_cpu idle_cpu_atom __initconst = { 1315 .state_table = atom_cstates, 1316 }; 1317 1318 static const struct idle_cpu idle_cpu_tangier __initconst = { 1319 .state_table = tangier_cstates, 1320 }; 1321 1322 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1323 .state_table = atom_cstates, 1324 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1325 }; 1326 1327 static const struct idle_cpu idle_cpu_snb __initconst = { 1328 .state_table = snb_cstates, 1329 .disable_promotion_to_c1e = true, 1330 }; 1331 1332 static const struct idle_cpu idle_cpu_snx __initconst = { 1333 .state_table = snb_cstates, 1334 .disable_promotion_to_c1e = true, 1335 .use_acpi = true, 1336 }; 1337 1338 static const struct idle_cpu idle_cpu_byt __initconst = { 1339 .state_table = byt_cstates, 1340 .disable_promotion_to_c1e = true, 1341 .byt_auto_demotion_disable_flag = true, 1342 }; 1343 1344 static const struct idle_cpu idle_cpu_cht __initconst = { 1345 .state_table = cht_cstates, 1346 .disable_promotion_to_c1e = true, 1347 .byt_auto_demotion_disable_flag = true, 1348 }; 1349 1350 static const struct idle_cpu idle_cpu_ivb __initconst = { 1351 .state_table = ivb_cstates, 1352 .disable_promotion_to_c1e = true, 1353 }; 1354 1355 static const struct idle_cpu idle_cpu_ivt __initconst = { 1356 .state_table = ivt_cstates, 1357 .disable_promotion_to_c1e = true, 1358 .use_acpi = true, 1359 }; 1360 1361 static const struct idle_cpu idle_cpu_hsw __initconst = { 1362 .state_table = hsw_cstates, 1363 .disable_promotion_to_c1e = true, 1364 }; 1365 1366 static const struct idle_cpu idle_cpu_hsx __initconst = { 1367 .state_table = hsw_cstates, 1368 .disable_promotion_to_c1e = true, 1369 .use_acpi = true, 1370 }; 1371 1372 static const struct idle_cpu idle_cpu_bdw __initconst = { 1373 .state_table = bdw_cstates, 1374 .disable_promotion_to_c1e = true, 1375 }; 1376 1377 static const struct idle_cpu idle_cpu_bdx __initconst = { 1378 .state_table = bdw_cstates, 1379 .disable_promotion_to_c1e = true, 1380 .use_acpi = true, 1381 }; 1382 1383 static const struct idle_cpu idle_cpu_skl __initconst = { 1384 .state_table = skl_cstates, 1385 .disable_promotion_to_c1e = true, 1386 }; 1387 1388 static const struct idle_cpu idle_cpu_skx __initconst = { 1389 .state_table = skx_cstates, 1390 .disable_promotion_to_c1e = true, 1391 .use_acpi = true, 1392 }; 1393 1394 static const struct idle_cpu idle_cpu_icx __initconst = { 1395 .state_table = icx_cstates, 1396 .disable_promotion_to_c1e = true, 1397 .use_acpi = true, 1398 }; 1399 1400 static const struct idle_cpu idle_cpu_adl __initconst = { 1401 .state_table = adl_cstates, 1402 }; 1403 1404 static const struct idle_cpu idle_cpu_adl_l __initconst = { 1405 .state_table = adl_l_cstates, 1406 }; 1407 1408 static const struct idle_cpu idle_cpu_adl_n __initconst = { 1409 .state_table = adl_n_cstates, 1410 }; 1411 1412 static const struct idle_cpu idle_cpu_spr __initconst = { 1413 .state_table = spr_cstates, 1414 .disable_promotion_to_c1e = true, 1415 .use_acpi = true, 1416 }; 1417 1418 static const struct idle_cpu idle_cpu_avn __initconst = { 1419 .state_table = avn_cstates, 1420 .disable_promotion_to_c1e = true, 1421 .use_acpi = true, 1422 }; 1423 1424 static const struct idle_cpu idle_cpu_knl __initconst = { 1425 .state_table = knl_cstates, 1426 .use_acpi = true, 1427 }; 1428 1429 static const struct idle_cpu idle_cpu_bxt __initconst = { 1430 .state_table = bxt_cstates, 1431 .disable_promotion_to_c1e = true, 1432 }; 1433 1434 static const struct idle_cpu idle_cpu_dnv __initconst = { 1435 .state_table = dnv_cstates, 1436 .disable_promotion_to_c1e = true, 1437 .use_acpi = true, 1438 }; 1439 1440 static const struct idle_cpu idle_cpu_snr __initconst = { 1441 .state_table = snr_cstates, 1442 .disable_promotion_to_c1e = true, 1443 .use_acpi = true, 1444 }; 1445 1446 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1447 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1448 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1449 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1450 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1451 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1452 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1453 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1454 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1455 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1456 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1457 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1458 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1459 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1460 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1461 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1462 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1463 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1464 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1465 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1466 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1467 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1468 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1469 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1470 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1471 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1472 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1473 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1474 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1475 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1476 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1477 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1478 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1479 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), 1480 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), 1481 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), 1482 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n), 1483 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), 1484 X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr), 1485 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1486 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1487 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1488 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1489 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1490 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), 1491 {} 1492 }; 1493 1494 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1495 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1496 {} 1497 }; 1498 1499 static bool __init intel_idle_max_cstate_reached(int cstate) 1500 { 1501 if (cstate + 1 > max_cstate) { 1502 pr_info("max_cstate %d reached\n", max_cstate); 1503 return true; 1504 } 1505 return false; 1506 } 1507 1508 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1509 { 1510 unsigned long eax = flg2MWAIT(state->flags); 1511 1512 if (boot_cpu_has(X86_FEATURE_ARAT)) 1513 return false; 1514 1515 /* 1516 * Switch over to one-shot tick broadcast if the target C-state 1517 * is deeper than C1. 1518 */ 1519 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1520 } 1521 1522 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1523 #include <acpi/processor.h> 1524 1525 static bool no_acpi __read_mostly; 1526 module_param(no_acpi, bool, 0444); 1527 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1528 1529 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1530 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1531 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1532 1533 static struct acpi_processor_power acpi_state_table __initdata; 1534 1535 /** 1536 * intel_idle_cst_usable - Check if the _CST information can be used. 1537 * 1538 * Check if all of the C-states listed by _CST in the max_cstate range are 1539 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1540 */ 1541 static bool __init intel_idle_cst_usable(void) 1542 { 1543 int cstate, limit; 1544 1545 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1546 acpi_state_table.count); 1547 1548 for (cstate = 1; cstate < limit; cstate++) { 1549 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1550 1551 if (cx->entry_method != ACPI_CSTATE_FFH) 1552 return false; 1553 } 1554 1555 return true; 1556 } 1557 1558 static bool __init intel_idle_acpi_cst_extract(void) 1559 { 1560 unsigned int cpu; 1561 1562 if (no_acpi) { 1563 pr_debug("Not allowed to use ACPI _CST\n"); 1564 return false; 1565 } 1566 1567 for_each_possible_cpu(cpu) { 1568 struct acpi_processor *pr = per_cpu(processors, cpu); 1569 1570 if (!pr) 1571 continue; 1572 1573 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1574 continue; 1575 1576 acpi_state_table.count++; 1577 1578 if (!intel_idle_cst_usable()) 1579 continue; 1580 1581 if (!acpi_processor_claim_cst_control()) 1582 break; 1583 1584 return true; 1585 } 1586 1587 acpi_state_table.count = 0; 1588 pr_debug("ACPI _CST not found or not usable\n"); 1589 return false; 1590 } 1591 1592 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1593 { 1594 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1595 1596 /* 1597 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1598 * the interesting states are ACPI_CSTATE_FFH. 1599 */ 1600 for (cstate = 1; cstate < limit; cstate++) { 1601 struct acpi_processor_cx *cx; 1602 struct cpuidle_state *state; 1603 1604 if (intel_idle_max_cstate_reached(cstate - 1)) 1605 break; 1606 1607 cx = &acpi_state_table.states[cstate]; 1608 1609 state = &drv->states[drv->state_count++]; 1610 1611 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1612 strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1613 state->exit_latency = cx->latency; 1614 /* 1615 * For C1-type C-states use the same number for both the exit 1616 * latency and target residency, because that is the case for 1617 * C1 in the majority of the static C-states tables above. 1618 * For the other types of C-states, however, set the target 1619 * residency to 3 times the exit latency which should lead to 1620 * a reasonable balance between energy-efficiency and 1621 * performance in the majority of interesting cases. 1622 */ 1623 state->target_residency = cx->latency; 1624 if (cx->type > ACPI_STATE_C1) 1625 state->target_residency *= 3; 1626 1627 state->flags = MWAIT2flg(cx->address); 1628 if (cx->type > ACPI_STATE_C2) 1629 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1630 1631 if (disabled_states_mask & BIT(cstate)) 1632 state->flags |= CPUIDLE_FLAG_OFF; 1633 1634 if (intel_idle_state_needs_timer_stop(state)) 1635 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1636 1637 state->enter = intel_idle; 1638 state->enter_s2idle = intel_idle_s2idle; 1639 } 1640 } 1641 1642 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1643 { 1644 int cstate, limit; 1645 1646 /* 1647 * If there are no _CST C-states, do not disable any C-states by 1648 * default. 1649 */ 1650 if (!acpi_state_table.count) 1651 return false; 1652 1653 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1654 /* 1655 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1656 * the interesting states are ACPI_CSTATE_FFH. 1657 */ 1658 for (cstate = 1; cstate < limit; cstate++) { 1659 if (acpi_state_table.states[cstate].address == mwait_hint) 1660 return false; 1661 } 1662 return true; 1663 } 1664 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1665 #define force_use_acpi (false) 1666 1667 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1668 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1669 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1670 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1671 1672 /** 1673 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1674 * 1675 * Tune IVT multi-socket targets. 1676 * Assumption: num_sockets == (max_package_num + 1). 1677 */ 1678 static void __init ivt_idle_state_table_update(void) 1679 { 1680 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1681 int cpu, package_num, num_sockets = 1; 1682 1683 for_each_online_cpu(cpu) { 1684 package_num = topology_physical_package_id(cpu); 1685 if (package_num + 1 > num_sockets) { 1686 num_sockets = package_num + 1; 1687 1688 if (num_sockets > 4) { 1689 cpuidle_state_table = ivt_cstates_8s; 1690 return; 1691 } 1692 } 1693 } 1694 1695 if (num_sockets > 2) 1696 cpuidle_state_table = ivt_cstates_4s; 1697 1698 /* else, 1 and 2 socket systems use default ivt_cstates */ 1699 } 1700 1701 /** 1702 * irtl_2_usec - IRTL to microseconds conversion. 1703 * @irtl: IRTL MSR value. 1704 * 1705 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1706 */ 1707 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1708 { 1709 static const unsigned int irtl_ns_units[] __initconst = { 1710 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1711 }; 1712 unsigned long long ns; 1713 1714 if (!irtl) 1715 return 0; 1716 1717 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1718 1719 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1720 } 1721 1722 /** 1723 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1724 * 1725 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1726 * definitive maximum latency and use the same value for target_residency. 1727 */ 1728 static void __init bxt_idle_state_table_update(void) 1729 { 1730 unsigned long long msr; 1731 unsigned int usec; 1732 1733 rdmsrl(MSR_PKGC6_IRTL, msr); 1734 usec = irtl_2_usec(msr); 1735 if (usec) { 1736 bxt_cstates[2].exit_latency = usec; 1737 bxt_cstates[2].target_residency = usec; 1738 } 1739 1740 rdmsrl(MSR_PKGC7_IRTL, msr); 1741 usec = irtl_2_usec(msr); 1742 if (usec) { 1743 bxt_cstates[3].exit_latency = usec; 1744 bxt_cstates[3].target_residency = usec; 1745 } 1746 1747 rdmsrl(MSR_PKGC8_IRTL, msr); 1748 usec = irtl_2_usec(msr); 1749 if (usec) { 1750 bxt_cstates[4].exit_latency = usec; 1751 bxt_cstates[4].target_residency = usec; 1752 } 1753 1754 rdmsrl(MSR_PKGC9_IRTL, msr); 1755 usec = irtl_2_usec(msr); 1756 if (usec) { 1757 bxt_cstates[5].exit_latency = usec; 1758 bxt_cstates[5].target_residency = usec; 1759 } 1760 1761 rdmsrl(MSR_PKGC10_IRTL, msr); 1762 usec = irtl_2_usec(msr); 1763 if (usec) { 1764 bxt_cstates[6].exit_latency = usec; 1765 bxt_cstates[6].target_residency = usec; 1766 } 1767 1768 } 1769 1770 /** 1771 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1772 * 1773 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1774 */ 1775 static void __init sklh_idle_state_table_update(void) 1776 { 1777 unsigned long long msr; 1778 unsigned int eax, ebx, ecx, edx; 1779 1780 1781 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1782 if (max_cstate <= 7) 1783 return; 1784 1785 /* if PC10 not present in CPUID.MWAIT.EDX */ 1786 if ((mwait_substates & (0xF << 28)) == 0) 1787 return; 1788 1789 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1790 1791 /* PC10 is not enabled in PKG C-state limit */ 1792 if ((msr & 0xF) != 8) 1793 return; 1794 1795 ecx = 0; 1796 cpuid(7, &eax, &ebx, &ecx, &edx); 1797 1798 /* if SGX is present */ 1799 if (ebx & (1 << 2)) { 1800 1801 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1802 1803 /* if SGX is enabled */ 1804 if (msr & (1 << 18)) 1805 return; 1806 } 1807 1808 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1809 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1810 } 1811 1812 /** 1813 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake 1814 * idle states table. 1815 */ 1816 static void __init skx_idle_state_table_update(void) 1817 { 1818 unsigned long long msr; 1819 1820 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1821 1822 /* 1823 * 000b: C0/C1 (no package C-state support) 1824 * 001b: C2 1825 * 010b: C6 (non-retention) 1826 * 011b: C6 (retention) 1827 * 111b: No Package C state limits. 1828 */ 1829 if ((msr & 0x7) < 2) { 1830 /* 1831 * Uses the CC6 + PC0 latency and 3 times of 1832 * latency for target_residency if the PC6 1833 * is disabled in BIOS. This is consistent 1834 * with how intel_idle driver uses _CST 1835 * to set the target_residency. 1836 */ 1837 skx_cstates[2].exit_latency = 92; 1838 skx_cstates[2].target_residency = 276; 1839 } 1840 } 1841 1842 /** 1843 * adl_idle_state_table_update - Adjust AlderLake idle states table. 1844 */ 1845 static void __init adl_idle_state_table_update(void) 1846 { 1847 /* Check if user prefers C1 over C1E. */ 1848 if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) { 1849 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1850 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE; 1851 1852 /* Disable C1E by clearing the "C1E promotion" bit. */ 1853 c1e_promotion = C1E_PROMOTION_DISABLE; 1854 return; 1855 } 1856 1857 /* Make sure C1E is enabled by default */ 1858 c1e_promotion = C1E_PROMOTION_ENABLE; 1859 } 1860 1861 /** 1862 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. 1863 */ 1864 static void __init spr_idle_state_table_update(void) 1865 { 1866 unsigned long long msr; 1867 1868 /* 1869 * By default, the C6 state assumes the worst-case scenario of package 1870 * C6. However, if PC6 is disabled, we update the numbers to match 1871 * core C6. 1872 */ 1873 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1874 1875 /* Limit value 2 and above allow for PC6. */ 1876 if ((msr & 0x7) < 2) { 1877 spr_cstates[2].exit_latency = 190; 1878 spr_cstates[2].target_residency = 600; 1879 } 1880 } 1881 1882 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1883 { 1884 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1885 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1886 MWAIT_SUBSTATE_MASK; 1887 1888 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1889 if (num_substates == 0) 1890 return false; 1891 1892 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1893 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1894 1895 return true; 1896 } 1897 1898 static void state_update_enter_method(struct cpuidle_state *state, int cstate) 1899 { 1900 if (state->enter == intel_idle_hlt) { 1901 if (force_irq_on) { 1902 pr_info("forced intel_idle_irq for state %d\n", cstate); 1903 state->enter = intel_idle_hlt_irq_on; 1904 } 1905 return; 1906 } 1907 if (state->enter == intel_idle_hlt_irq_on) 1908 return; /* no update scenarios */ 1909 1910 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) { 1911 /* 1912 * Combining with XSTATE with IBRS or IRQ_ENABLE flags 1913 * is not currently supported but this driver. 1914 */ 1915 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS); 1916 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 1917 state->enter = intel_idle_xstate; 1918 return; 1919 } 1920 1921 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && 1922 state->flags & CPUIDLE_FLAG_IBRS) { 1923 /* 1924 * IBRS mitigation requires that C-states are entered 1925 * with interrupts disabled. 1926 */ 1927 WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 1928 state->enter = intel_idle_ibrs; 1929 return; 1930 } 1931 1932 if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) { 1933 state->enter = intel_idle_irq; 1934 return; 1935 } 1936 1937 if (force_irq_on) { 1938 pr_info("forced intel_idle_irq for state %d\n", cstate); 1939 state->enter = intel_idle_irq; 1940 } 1941 } 1942 1943 /* 1944 * For mwait based states, we want to verify the cpuid data to see if the state 1945 * is actually supported by this specific CPU. 1946 * For non-mwait based states, this check should be skipped. 1947 */ 1948 static bool should_verify_mwait(struct cpuidle_state *state) 1949 { 1950 if (state->enter == intel_idle_hlt) 1951 return false; 1952 if (state->enter == intel_idle_hlt_irq_on) 1953 return false; 1954 1955 return true; 1956 } 1957 1958 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1959 { 1960 int cstate; 1961 1962 switch (boot_cpu_data.x86_model) { 1963 case INTEL_FAM6_IVYBRIDGE_X: 1964 ivt_idle_state_table_update(); 1965 break; 1966 case INTEL_FAM6_ATOM_GOLDMONT: 1967 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1968 bxt_idle_state_table_update(); 1969 break; 1970 case INTEL_FAM6_SKYLAKE: 1971 sklh_idle_state_table_update(); 1972 break; 1973 case INTEL_FAM6_SKYLAKE_X: 1974 skx_idle_state_table_update(); 1975 break; 1976 case INTEL_FAM6_SAPPHIRERAPIDS_X: 1977 case INTEL_FAM6_EMERALDRAPIDS_X: 1978 spr_idle_state_table_update(); 1979 break; 1980 case INTEL_FAM6_ALDERLAKE: 1981 case INTEL_FAM6_ALDERLAKE_L: 1982 case INTEL_FAM6_ALDERLAKE_N: 1983 adl_idle_state_table_update(); 1984 break; 1985 } 1986 1987 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1988 struct cpuidle_state *state; 1989 unsigned int mwait_hint; 1990 1991 if (intel_idle_max_cstate_reached(cstate)) 1992 break; 1993 1994 if (!cpuidle_state_table[cstate].enter && 1995 !cpuidle_state_table[cstate].enter_s2idle) 1996 break; 1997 1998 /* If marked as unusable, skip this state. */ 1999 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 2000 pr_debug("state %s is disabled\n", 2001 cpuidle_state_table[cstate].name); 2002 continue; 2003 } 2004 2005 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 2006 if (should_verify_mwait(&cpuidle_state_table[cstate]) && !intel_idle_verify_cstate(mwait_hint)) 2007 continue; 2008 2009 /* Structure copy. */ 2010 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 2011 state = &drv->states[drv->state_count]; 2012 2013 state_update_enter_method(state, cstate); 2014 2015 2016 if ((disabled_states_mask & BIT(drv->state_count)) || 2017 ((icpu->use_acpi || force_use_acpi) && 2018 intel_idle_off_by_default(mwait_hint) && 2019 !(state->flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 2020 state->flags |= CPUIDLE_FLAG_OFF; 2021 2022 if (intel_idle_state_needs_timer_stop(state)) 2023 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 2024 2025 drv->state_count++; 2026 } 2027 2028 if (icpu->byt_auto_demotion_disable_flag) { 2029 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 2030 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 2031 } 2032 } 2033 2034 /** 2035 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 2036 * @drv: cpuidle driver structure to initialize. 2037 */ 2038 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 2039 { 2040 cpuidle_poll_state_init(drv); 2041 2042 if (disabled_states_mask & BIT(0)) 2043 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 2044 2045 drv->state_count = 1; 2046 2047 if (icpu) 2048 intel_idle_init_cstates_icpu(drv); 2049 else 2050 intel_idle_init_cstates_acpi(drv); 2051 } 2052 2053 static void auto_demotion_disable(void) 2054 { 2055 unsigned long long msr_bits; 2056 2057 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 2058 msr_bits &= ~auto_demotion_disable_flags; 2059 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 2060 } 2061 2062 static void c1e_promotion_enable(void) 2063 { 2064 unsigned long long msr_bits; 2065 2066 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 2067 msr_bits |= 0x2; 2068 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 2069 } 2070 2071 static void c1e_promotion_disable(void) 2072 { 2073 unsigned long long msr_bits; 2074 2075 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 2076 msr_bits &= ~0x2; 2077 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 2078 } 2079 2080 /** 2081 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 2082 * @cpu: CPU to initialize. 2083 * 2084 * Register a cpuidle device object for @cpu and update its MSRs in accordance 2085 * with the processor model flags. 2086 */ 2087 static int intel_idle_cpu_init(unsigned int cpu) 2088 { 2089 struct cpuidle_device *dev; 2090 2091 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 2092 dev->cpu = cpu; 2093 2094 if (cpuidle_register_device(dev)) { 2095 pr_debug("cpuidle_register_device %d failed!\n", cpu); 2096 return -EIO; 2097 } 2098 2099 if (auto_demotion_disable_flags) 2100 auto_demotion_disable(); 2101 2102 if (c1e_promotion == C1E_PROMOTION_ENABLE) 2103 c1e_promotion_enable(); 2104 else if (c1e_promotion == C1E_PROMOTION_DISABLE) 2105 c1e_promotion_disable(); 2106 2107 return 0; 2108 } 2109 2110 static int intel_idle_cpu_online(unsigned int cpu) 2111 { 2112 struct cpuidle_device *dev; 2113 2114 if (!boot_cpu_has(X86_FEATURE_ARAT)) 2115 tick_broadcast_enable(); 2116 2117 /* 2118 * Some systems can hotplug a cpu at runtime after 2119 * the kernel has booted, we have to initialize the 2120 * driver in this case 2121 */ 2122 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 2123 if (!dev->registered) 2124 return intel_idle_cpu_init(cpu); 2125 2126 return 0; 2127 } 2128 2129 /** 2130 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 2131 */ 2132 static void __init intel_idle_cpuidle_devices_uninit(void) 2133 { 2134 int i; 2135 2136 for_each_online_cpu(i) 2137 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 2138 } 2139 2140 /* 2141 * Match up the latency and break even point of the bare metal (cpu based) 2142 * states with the deepest VM available state. 2143 * 2144 * We only want to do this for the deepest state, the ones that has 2145 * the TLB_FLUSHED flag set on the . 2146 * 2147 * All our short idle states are dominated by vmexit/vmenter latencies, 2148 * not the underlying hardware latencies so we keep our values for these. 2149 */ 2150 static void __init matchup_vm_state_with_baremetal(void) 2151 { 2152 int cstate; 2153 2154 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 2155 int matching_cstate; 2156 2157 if (intel_idle_max_cstate_reached(cstate)) 2158 break; 2159 2160 if (!cpuidle_state_table[cstate].enter) 2161 break; 2162 2163 if (!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_TLB_FLUSHED)) 2164 continue; 2165 2166 for (matching_cstate = 0; matching_cstate < CPUIDLE_STATE_MAX; ++matching_cstate) { 2167 if (!icpu->state_table[matching_cstate].enter) 2168 break; 2169 if (icpu->state_table[matching_cstate].exit_latency > cpuidle_state_table[cstate].exit_latency) { 2170 cpuidle_state_table[cstate].exit_latency = icpu->state_table[matching_cstate].exit_latency; 2171 cpuidle_state_table[cstate].target_residency = icpu->state_table[matching_cstate].target_residency; 2172 } 2173 } 2174 2175 } 2176 } 2177 2178 2179 static int __init intel_idle_vminit(const struct x86_cpu_id *id) 2180 { 2181 int retval; 2182 2183 cpuidle_state_table = vmguest_cstates; 2184 2185 icpu = (const struct idle_cpu *)id->driver_data; 2186 2187 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 2188 boot_cpu_data.x86_model); 2189 2190 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 2191 if (!intel_idle_cpuidle_devices) 2192 return -ENOMEM; 2193 2194 /* 2195 * We don't know exactly what the host will do when we go idle, but as a worst estimate 2196 * we can assume that the exit latency of the deepest host state will be hit for our 2197 * deep (long duration) guest idle state. 2198 * The same logic applies to the break even point for the long duration guest idle state. 2199 * So lets copy these two properties from the table we found for the host CPU type. 2200 */ 2201 matchup_vm_state_with_baremetal(); 2202 2203 intel_idle_cpuidle_driver_init(&intel_idle_driver); 2204 2205 retval = cpuidle_register_driver(&intel_idle_driver); 2206 if (retval) { 2207 struct cpuidle_driver *drv = cpuidle_get_driver(); 2208 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 2209 drv ? drv->name : "none"); 2210 goto init_driver_fail; 2211 } 2212 2213 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 2214 intel_idle_cpu_online, NULL); 2215 if (retval < 0) 2216 goto hp_setup_fail; 2217 2218 return 0; 2219 hp_setup_fail: 2220 intel_idle_cpuidle_devices_uninit(); 2221 cpuidle_unregister_driver(&intel_idle_driver); 2222 init_driver_fail: 2223 free_percpu(intel_idle_cpuidle_devices); 2224 return retval; 2225 } 2226 2227 static int __init intel_idle_init(void) 2228 { 2229 const struct x86_cpu_id *id; 2230 unsigned int eax, ebx, ecx; 2231 int retval; 2232 2233 /* Do not load intel_idle at all for now if idle= is passed */ 2234 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 2235 return -ENODEV; 2236 2237 if (max_cstate == 0) { 2238 pr_debug("disabled\n"); 2239 return -EPERM; 2240 } 2241 2242 id = x86_match_cpu(intel_idle_ids); 2243 if (id) { 2244 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 2245 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 2246 return intel_idle_vminit(id); 2247 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 2248 return -ENODEV; 2249 } 2250 } else { 2251 id = x86_match_cpu(intel_mwait_ids); 2252 if (!id) 2253 return -ENODEV; 2254 } 2255 2256 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 2257 return -ENODEV; 2258 2259 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 2260 2261 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 2262 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 2263 !mwait_substates) 2264 return -ENODEV; 2265 2266 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 2267 2268 icpu = (const struct idle_cpu *)id->driver_data; 2269 if (icpu) { 2270 cpuidle_state_table = icpu->state_table; 2271 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 2272 if (icpu->disable_promotion_to_c1e) 2273 c1e_promotion = C1E_PROMOTION_DISABLE; 2274 if (icpu->use_acpi || force_use_acpi) 2275 intel_idle_acpi_cst_extract(); 2276 } else if (!intel_idle_acpi_cst_extract()) { 2277 return -ENODEV; 2278 } 2279 2280 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 2281 boot_cpu_data.x86_model); 2282 2283 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 2284 if (!intel_idle_cpuidle_devices) 2285 return -ENOMEM; 2286 2287 intel_idle_cpuidle_driver_init(&intel_idle_driver); 2288 2289 retval = cpuidle_register_driver(&intel_idle_driver); 2290 if (retval) { 2291 struct cpuidle_driver *drv = cpuidle_get_driver(); 2292 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 2293 drv ? drv->name : "none"); 2294 goto init_driver_fail; 2295 } 2296 2297 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 2298 intel_idle_cpu_online, NULL); 2299 if (retval < 0) 2300 goto hp_setup_fail; 2301 2302 pr_debug("Local APIC timer is reliable in %s\n", 2303 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 2304 2305 return 0; 2306 2307 hp_setup_fail: 2308 intel_idle_cpuidle_devices_uninit(); 2309 cpuidle_unregister_driver(&intel_idle_driver); 2310 init_driver_fail: 2311 free_percpu(intel_idle_cpuidle_devices); 2312 return retval; 2313 2314 } 2315 device_initcall(intel_idle_init); 2316 2317 /* 2318 * We are not really modular, but we used to support that. Meaning we also 2319 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 2320 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 2321 * is the easiest way (currently) to continue doing that. 2322 */ 2323 module_param(max_cstate, int, 0444); 2324 /* 2325 * The positions of the bits that are set in this number are the indices of the 2326 * idle states to be disabled by default (as reflected by the names of the 2327 * corresponding idle state directories in sysfs, "state0", "state1" ... 2328 * "state<i>" ..., where <i> is the index of the given state). 2329 */ 2330 module_param_named(states_off, disabled_states_mask, uint, 0444); 2331 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 2332 /* 2333 * Some platforms come with mutually exclusive C-states, so that if one is 2334 * enabled, the other C-states must not be used. Example: C1 and C1E on 2335 * Sapphire Rapids platform. This parameter allows for selecting the 2336 * preferred C-states among the groups of mutually exclusive C-states - the 2337 * selected C-states will be registered, the other C-states from the mutually 2338 * exclusive group won't be registered. If the platform has no mutually 2339 * exclusive C-states, this parameter has no effect. 2340 */ 2341 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); 2342 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); 2343 /* 2344 * Debugging option that forces the driver to enter all C-states with 2345 * interrupts enabled. Does not apply to C-states with 2346 * 'CPUIDLE_FLAG_INIT_XSTATE' and 'CPUIDLE_FLAG_IBRS' flags. 2347 */ 2348 module_param(force_irq_on, bool, 0444); 2349