1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/sched/smt.h> 51 #include <linux/notifier.h> 52 #include <linux/cpu.h> 53 #include <linux/moduleparam.h> 54 #include <asm/cpu_device_id.h> 55 #include <asm/intel-family.h> 56 #include <asm/nospec-branch.h> 57 #include <asm/mwait.h> 58 #include <asm/msr.h> 59 60 #define INTEL_IDLE_VERSION "0.5.1" 61 62 static struct cpuidle_driver intel_idle_driver = { 63 .name = "intel_idle", 64 .owner = THIS_MODULE, 65 }; 66 /* intel_idle.max_cstate=0 disables driver */ 67 static int max_cstate = CPUIDLE_STATE_MAX - 1; 68 static unsigned int disabled_states_mask; 69 static unsigned int preferred_states_mask; 70 71 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 72 73 static unsigned long auto_demotion_disable_flags; 74 75 static enum { 76 C1E_PROMOTION_PRESERVE, 77 C1E_PROMOTION_ENABLE, 78 C1E_PROMOTION_DISABLE 79 } c1e_promotion = C1E_PROMOTION_PRESERVE; 80 81 struct idle_cpu { 82 struct cpuidle_state *state_table; 83 84 /* 85 * Hardware C-state auto-demotion may not always be optimal. 86 * Indicate which enable bits to clear here. 87 */ 88 unsigned long auto_demotion_disable_flags; 89 bool byt_auto_demotion_disable_flag; 90 bool disable_promotion_to_c1e; 91 bool use_acpi; 92 }; 93 94 static const struct idle_cpu *icpu __initdata; 95 static struct cpuidle_state *cpuidle_state_table __initdata; 96 97 static unsigned int mwait_substates __initdata; 98 99 /* 100 * Enable interrupts before entering the C-state. On some platforms and for 101 * some C-states, this may measurably decrease interrupt latency. 102 */ 103 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) 104 105 /* 106 * Enable this state by default even if the ACPI _CST does not list it. 107 */ 108 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 109 110 /* 111 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE 112 * above. 113 */ 114 #define CPUIDLE_FLAG_IBRS BIT(16) 115 116 /* 117 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 118 * the C-state (top nibble) and sub-state (bottom nibble) 119 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 120 * 121 * We store the hint at the top of our "flags" for each state. 122 */ 123 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 124 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 125 126 static __always_inline int __intel_idle(struct cpuidle_device *dev, 127 struct cpuidle_driver *drv, int index) 128 { 129 struct cpuidle_state *state = &drv->states[index]; 130 unsigned long eax = flg2MWAIT(state->flags); 131 unsigned long ecx = 1; /* break on interrupt flag */ 132 133 mwait_idle_with_hints(eax, ecx); 134 135 return index; 136 } 137 138 /** 139 * intel_idle - Ask the processor to enter the given idle state. 140 * @dev: cpuidle device of the target CPU. 141 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 142 * @index: Target idle state index. 143 * 144 * Use the MWAIT instruction to notify the processor that the CPU represented by 145 * @dev is idle and it can try to enter the idle state corresponding to @index. 146 * 147 * If the local APIC timer is not known to be reliable in the target idle state, 148 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 149 * 150 * Must be called under local_irq_disable(). 151 */ 152 static __cpuidle int intel_idle(struct cpuidle_device *dev, 153 struct cpuidle_driver *drv, int index) 154 { 155 return __intel_idle(dev, drv, index); 156 } 157 158 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, 159 struct cpuidle_driver *drv, int index) 160 { 161 int ret; 162 163 raw_local_irq_enable(); 164 ret = __intel_idle(dev, drv, index); 165 raw_local_irq_disable(); 166 167 return ret; 168 } 169 170 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, 171 struct cpuidle_driver *drv, int index) 172 { 173 bool smt_active = sched_smt_active(); 174 u64 spec_ctrl = spec_ctrl_current(); 175 int ret; 176 177 if (smt_active) 178 wrmsrl(MSR_IA32_SPEC_CTRL, 0); 179 180 ret = __intel_idle(dev, drv, index); 181 182 if (smt_active) 183 wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); 184 185 return ret; 186 } 187 188 /** 189 * intel_idle_s2idle - Ask the processor to enter the given idle state. 190 * @dev: cpuidle device of the target CPU. 191 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 192 * @index: Target idle state index. 193 * 194 * Use the MWAIT instruction to notify the processor that the CPU represented by 195 * @dev is idle and it can try to enter the idle state corresponding to @index. 196 * 197 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 198 * scheduler tick and suspended scheduler clock on the target CPU. 199 */ 200 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 201 struct cpuidle_driver *drv, int index) 202 { 203 unsigned long eax = flg2MWAIT(drv->states[index].flags); 204 unsigned long ecx = 1; /* break on interrupt flag */ 205 206 mwait_idle_with_hints(eax, ecx); 207 208 return 0; 209 } 210 211 /* 212 * States are indexed by the cstate number, 213 * which is also the index into the MWAIT hint array. 214 * Thus C0 is a dummy. 215 */ 216 static struct cpuidle_state nehalem_cstates[] __initdata = { 217 { 218 .name = "C1", 219 .desc = "MWAIT 0x00", 220 .flags = MWAIT2flg(0x00), 221 .exit_latency = 3, 222 .target_residency = 6, 223 .enter = &intel_idle, 224 .enter_s2idle = intel_idle_s2idle, }, 225 { 226 .name = "C1E", 227 .desc = "MWAIT 0x01", 228 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 229 .exit_latency = 10, 230 .target_residency = 20, 231 .enter = &intel_idle, 232 .enter_s2idle = intel_idle_s2idle, }, 233 { 234 .name = "C3", 235 .desc = "MWAIT 0x10", 236 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 237 .exit_latency = 20, 238 .target_residency = 80, 239 .enter = &intel_idle, 240 .enter_s2idle = intel_idle_s2idle, }, 241 { 242 .name = "C6", 243 .desc = "MWAIT 0x20", 244 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 245 .exit_latency = 200, 246 .target_residency = 800, 247 .enter = &intel_idle, 248 .enter_s2idle = intel_idle_s2idle, }, 249 { 250 .enter = NULL } 251 }; 252 253 static struct cpuidle_state snb_cstates[] __initdata = { 254 { 255 .name = "C1", 256 .desc = "MWAIT 0x00", 257 .flags = MWAIT2flg(0x00), 258 .exit_latency = 2, 259 .target_residency = 2, 260 .enter = &intel_idle, 261 .enter_s2idle = intel_idle_s2idle, }, 262 { 263 .name = "C1E", 264 .desc = "MWAIT 0x01", 265 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 266 .exit_latency = 10, 267 .target_residency = 20, 268 .enter = &intel_idle, 269 .enter_s2idle = intel_idle_s2idle, }, 270 { 271 .name = "C3", 272 .desc = "MWAIT 0x10", 273 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 274 .exit_latency = 80, 275 .target_residency = 211, 276 .enter = &intel_idle, 277 .enter_s2idle = intel_idle_s2idle, }, 278 { 279 .name = "C6", 280 .desc = "MWAIT 0x20", 281 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 282 .exit_latency = 104, 283 .target_residency = 345, 284 .enter = &intel_idle, 285 .enter_s2idle = intel_idle_s2idle, }, 286 { 287 .name = "C7", 288 .desc = "MWAIT 0x30", 289 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 290 .exit_latency = 109, 291 .target_residency = 345, 292 .enter = &intel_idle, 293 .enter_s2idle = intel_idle_s2idle, }, 294 { 295 .enter = NULL } 296 }; 297 298 static struct cpuidle_state byt_cstates[] __initdata = { 299 { 300 .name = "C1", 301 .desc = "MWAIT 0x00", 302 .flags = MWAIT2flg(0x00), 303 .exit_latency = 1, 304 .target_residency = 1, 305 .enter = &intel_idle, 306 .enter_s2idle = intel_idle_s2idle, }, 307 { 308 .name = "C6N", 309 .desc = "MWAIT 0x58", 310 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 311 .exit_latency = 300, 312 .target_residency = 275, 313 .enter = &intel_idle, 314 .enter_s2idle = intel_idle_s2idle, }, 315 { 316 .name = "C6S", 317 .desc = "MWAIT 0x52", 318 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 319 .exit_latency = 500, 320 .target_residency = 560, 321 .enter = &intel_idle, 322 .enter_s2idle = intel_idle_s2idle, }, 323 { 324 .name = "C7", 325 .desc = "MWAIT 0x60", 326 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 327 .exit_latency = 1200, 328 .target_residency = 4000, 329 .enter = &intel_idle, 330 .enter_s2idle = intel_idle_s2idle, }, 331 { 332 .name = "C7S", 333 .desc = "MWAIT 0x64", 334 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 335 .exit_latency = 10000, 336 .target_residency = 20000, 337 .enter = &intel_idle, 338 .enter_s2idle = intel_idle_s2idle, }, 339 { 340 .enter = NULL } 341 }; 342 343 static struct cpuidle_state cht_cstates[] __initdata = { 344 { 345 .name = "C1", 346 .desc = "MWAIT 0x00", 347 .flags = MWAIT2flg(0x00), 348 .exit_latency = 1, 349 .target_residency = 1, 350 .enter = &intel_idle, 351 .enter_s2idle = intel_idle_s2idle, }, 352 { 353 .name = "C6N", 354 .desc = "MWAIT 0x58", 355 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 356 .exit_latency = 80, 357 .target_residency = 275, 358 .enter = &intel_idle, 359 .enter_s2idle = intel_idle_s2idle, }, 360 { 361 .name = "C6S", 362 .desc = "MWAIT 0x52", 363 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 364 .exit_latency = 200, 365 .target_residency = 560, 366 .enter = &intel_idle, 367 .enter_s2idle = intel_idle_s2idle, }, 368 { 369 .name = "C7", 370 .desc = "MWAIT 0x60", 371 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 372 .exit_latency = 1200, 373 .target_residency = 4000, 374 .enter = &intel_idle, 375 .enter_s2idle = intel_idle_s2idle, }, 376 { 377 .name = "C7S", 378 .desc = "MWAIT 0x64", 379 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 380 .exit_latency = 10000, 381 .target_residency = 20000, 382 .enter = &intel_idle, 383 .enter_s2idle = intel_idle_s2idle, }, 384 { 385 .enter = NULL } 386 }; 387 388 static struct cpuidle_state ivb_cstates[] __initdata = { 389 { 390 .name = "C1", 391 .desc = "MWAIT 0x00", 392 .flags = MWAIT2flg(0x00), 393 .exit_latency = 1, 394 .target_residency = 1, 395 .enter = &intel_idle, 396 .enter_s2idle = intel_idle_s2idle, }, 397 { 398 .name = "C1E", 399 .desc = "MWAIT 0x01", 400 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 401 .exit_latency = 10, 402 .target_residency = 20, 403 .enter = &intel_idle, 404 .enter_s2idle = intel_idle_s2idle, }, 405 { 406 .name = "C3", 407 .desc = "MWAIT 0x10", 408 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 409 .exit_latency = 59, 410 .target_residency = 156, 411 .enter = &intel_idle, 412 .enter_s2idle = intel_idle_s2idle, }, 413 { 414 .name = "C6", 415 .desc = "MWAIT 0x20", 416 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 417 .exit_latency = 80, 418 .target_residency = 300, 419 .enter = &intel_idle, 420 .enter_s2idle = intel_idle_s2idle, }, 421 { 422 .name = "C7", 423 .desc = "MWAIT 0x30", 424 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 425 .exit_latency = 87, 426 .target_residency = 300, 427 .enter = &intel_idle, 428 .enter_s2idle = intel_idle_s2idle, }, 429 { 430 .enter = NULL } 431 }; 432 433 static struct cpuidle_state ivt_cstates[] __initdata = { 434 { 435 .name = "C1", 436 .desc = "MWAIT 0x00", 437 .flags = MWAIT2flg(0x00), 438 .exit_latency = 1, 439 .target_residency = 1, 440 .enter = &intel_idle, 441 .enter_s2idle = intel_idle_s2idle, }, 442 { 443 .name = "C1E", 444 .desc = "MWAIT 0x01", 445 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 446 .exit_latency = 10, 447 .target_residency = 80, 448 .enter = &intel_idle, 449 .enter_s2idle = intel_idle_s2idle, }, 450 { 451 .name = "C3", 452 .desc = "MWAIT 0x10", 453 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 454 .exit_latency = 59, 455 .target_residency = 156, 456 .enter = &intel_idle, 457 .enter_s2idle = intel_idle_s2idle, }, 458 { 459 .name = "C6", 460 .desc = "MWAIT 0x20", 461 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 462 .exit_latency = 82, 463 .target_residency = 300, 464 .enter = &intel_idle, 465 .enter_s2idle = intel_idle_s2idle, }, 466 { 467 .enter = NULL } 468 }; 469 470 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 471 { 472 .name = "C1", 473 .desc = "MWAIT 0x00", 474 .flags = MWAIT2flg(0x00), 475 .exit_latency = 1, 476 .target_residency = 1, 477 .enter = &intel_idle, 478 .enter_s2idle = intel_idle_s2idle, }, 479 { 480 .name = "C1E", 481 .desc = "MWAIT 0x01", 482 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 483 .exit_latency = 10, 484 .target_residency = 250, 485 .enter = &intel_idle, 486 .enter_s2idle = intel_idle_s2idle, }, 487 { 488 .name = "C3", 489 .desc = "MWAIT 0x10", 490 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 491 .exit_latency = 59, 492 .target_residency = 300, 493 .enter = &intel_idle, 494 .enter_s2idle = intel_idle_s2idle, }, 495 { 496 .name = "C6", 497 .desc = "MWAIT 0x20", 498 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 499 .exit_latency = 84, 500 .target_residency = 400, 501 .enter = &intel_idle, 502 .enter_s2idle = intel_idle_s2idle, }, 503 { 504 .enter = NULL } 505 }; 506 507 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 508 { 509 .name = "C1", 510 .desc = "MWAIT 0x00", 511 .flags = MWAIT2flg(0x00), 512 .exit_latency = 1, 513 .target_residency = 1, 514 .enter = &intel_idle, 515 .enter_s2idle = intel_idle_s2idle, }, 516 { 517 .name = "C1E", 518 .desc = "MWAIT 0x01", 519 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 520 .exit_latency = 10, 521 .target_residency = 500, 522 .enter = &intel_idle, 523 .enter_s2idle = intel_idle_s2idle, }, 524 { 525 .name = "C3", 526 .desc = "MWAIT 0x10", 527 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 528 .exit_latency = 59, 529 .target_residency = 600, 530 .enter = &intel_idle, 531 .enter_s2idle = intel_idle_s2idle, }, 532 { 533 .name = "C6", 534 .desc = "MWAIT 0x20", 535 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 536 .exit_latency = 88, 537 .target_residency = 700, 538 .enter = &intel_idle, 539 .enter_s2idle = intel_idle_s2idle, }, 540 { 541 .enter = NULL } 542 }; 543 544 static struct cpuidle_state hsw_cstates[] __initdata = { 545 { 546 .name = "C1", 547 .desc = "MWAIT 0x00", 548 .flags = MWAIT2flg(0x00), 549 .exit_latency = 2, 550 .target_residency = 2, 551 .enter = &intel_idle, 552 .enter_s2idle = intel_idle_s2idle, }, 553 { 554 .name = "C1E", 555 .desc = "MWAIT 0x01", 556 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 557 .exit_latency = 10, 558 .target_residency = 20, 559 .enter = &intel_idle, 560 .enter_s2idle = intel_idle_s2idle, }, 561 { 562 .name = "C3", 563 .desc = "MWAIT 0x10", 564 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 565 .exit_latency = 33, 566 .target_residency = 100, 567 .enter = &intel_idle, 568 .enter_s2idle = intel_idle_s2idle, }, 569 { 570 .name = "C6", 571 .desc = "MWAIT 0x20", 572 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 573 .exit_latency = 133, 574 .target_residency = 400, 575 .enter = &intel_idle, 576 .enter_s2idle = intel_idle_s2idle, }, 577 { 578 .name = "C7s", 579 .desc = "MWAIT 0x32", 580 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 581 .exit_latency = 166, 582 .target_residency = 500, 583 .enter = &intel_idle, 584 .enter_s2idle = intel_idle_s2idle, }, 585 { 586 .name = "C8", 587 .desc = "MWAIT 0x40", 588 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 589 .exit_latency = 300, 590 .target_residency = 900, 591 .enter = &intel_idle, 592 .enter_s2idle = intel_idle_s2idle, }, 593 { 594 .name = "C9", 595 .desc = "MWAIT 0x50", 596 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 597 .exit_latency = 600, 598 .target_residency = 1800, 599 .enter = &intel_idle, 600 .enter_s2idle = intel_idle_s2idle, }, 601 { 602 .name = "C10", 603 .desc = "MWAIT 0x60", 604 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 605 .exit_latency = 2600, 606 .target_residency = 7700, 607 .enter = &intel_idle, 608 .enter_s2idle = intel_idle_s2idle, }, 609 { 610 .enter = NULL } 611 }; 612 static struct cpuidle_state bdw_cstates[] __initdata = { 613 { 614 .name = "C1", 615 .desc = "MWAIT 0x00", 616 .flags = MWAIT2flg(0x00), 617 .exit_latency = 2, 618 .target_residency = 2, 619 .enter = &intel_idle, 620 .enter_s2idle = intel_idle_s2idle, }, 621 { 622 .name = "C1E", 623 .desc = "MWAIT 0x01", 624 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 625 .exit_latency = 10, 626 .target_residency = 20, 627 .enter = &intel_idle, 628 .enter_s2idle = intel_idle_s2idle, }, 629 { 630 .name = "C3", 631 .desc = "MWAIT 0x10", 632 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 633 .exit_latency = 40, 634 .target_residency = 100, 635 .enter = &intel_idle, 636 .enter_s2idle = intel_idle_s2idle, }, 637 { 638 .name = "C6", 639 .desc = "MWAIT 0x20", 640 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 641 .exit_latency = 133, 642 .target_residency = 400, 643 .enter = &intel_idle, 644 .enter_s2idle = intel_idle_s2idle, }, 645 { 646 .name = "C7s", 647 .desc = "MWAIT 0x32", 648 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 649 .exit_latency = 166, 650 .target_residency = 500, 651 .enter = &intel_idle, 652 .enter_s2idle = intel_idle_s2idle, }, 653 { 654 .name = "C8", 655 .desc = "MWAIT 0x40", 656 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 657 .exit_latency = 300, 658 .target_residency = 900, 659 .enter = &intel_idle, 660 .enter_s2idle = intel_idle_s2idle, }, 661 { 662 .name = "C9", 663 .desc = "MWAIT 0x50", 664 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 665 .exit_latency = 600, 666 .target_residency = 1800, 667 .enter = &intel_idle, 668 .enter_s2idle = intel_idle_s2idle, }, 669 { 670 .name = "C10", 671 .desc = "MWAIT 0x60", 672 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 673 .exit_latency = 2600, 674 .target_residency = 7700, 675 .enter = &intel_idle, 676 .enter_s2idle = intel_idle_s2idle, }, 677 { 678 .enter = NULL } 679 }; 680 681 static struct cpuidle_state skl_cstates[] __initdata = { 682 { 683 .name = "C1", 684 .desc = "MWAIT 0x00", 685 .flags = MWAIT2flg(0x00), 686 .exit_latency = 2, 687 .target_residency = 2, 688 .enter = &intel_idle, 689 .enter_s2idle = intel_idle_s2idle, }, 690 { 691 .name = "C1E", 692 .desc = "MWAIT 0x01", 693 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 694 .exit_latency = 10, 695 .target_residency = 20, 696 .enter = &intel_idle, 697 .enter_s2idle = intel_idle_s2idle, }, 698 { 699 .name = "C3", 700 .desc = "MWAIT 0x10", 701 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 702 .exit_latency = 70, 703 .target_residency = 100, 704 .enter = &intel_idle, 705 .enter_s2idle = intel_idle_s2idle, }, 706 { 707 .name = "C6", 708 .desc = "MWAIT 0x20", 709 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 710 .exit_latency = 85, 711 .target_residency = 200, 712 .enter = &intel_idle, 713 .enter_s2idle = intel_idle_s2idle, }, 714 { 715 .name = "C7s", 716 .desc = "MWAIT 0x33", 717 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 718 .exit_latency = 124, 719 .target_residency = 800, 720 .enter = &intel_idle, 721 .enter_s2idle = intel_idle_s2idle, }, 722 { 723 .name = "C8", 724 .desc = "MWAIT 0x40", 725 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 726 .exit_latency = 200, 727 .target_residency = 800, 728 .enter = &intel_idle, 729 .enter_s2idle = intel_idle_s2idle, }, 730 { 731 .name = "C9", 732 .desc = "MWAIT 0x50", 733 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 734 .exit_latency = 480, 735 .target_residency = 5000, 736 .enter = &intel_idle, 737 .enter_s2idle = intel_idle_s2idle, }, 738 { 739 .name = "C10", 740 .desc = "MWAIT 0x60", 741 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 742 .exit_latency = 890, 743 .target_residency = 5000, 744 .enter = &intel_idle, 745 .enter_s2idle = intel_idle_s2idle, }, 746 { 747 .enter = NULL } 748 }; 749 750 static struct cpuidle_state skx_cstates[] __initdata = { 751 { 752 .name = "C1", 753 .desc = "MWAIT 0x00", 754 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 755 .exit_latency = 2, 756 .target_residency = 2, 757 .enter = &intel_idle, 758 .enter_s2idle = intel_idle_s2idle, }, 759 { 760 .name = "C1E", 761 .desc = "MWAIT 0x01", 762 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 763 .exit_latency = 10, 764 .target_residency = 20, 765 .enter = &intel_idle, 766 .enter_s2idle = intel_idle_s2idle, }, 767 { 768 .name = "C6", 769 .desc = "MWAIT 0x20", 770 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 771 .exit_latency = 133, 772 .target_residency = 600, 773 .enter = &intel_idle, 774 .enter_s2idle = intel_idle_s2idle, }, 775 { 776 .enter = NULL } 777 }; 778 779 static struct cpuidle_state icx_cstates[] __initdata = { 780 { 781 .name = "C1", 782 .desc = "MWAIT 0x00", 783 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 784 .exit_latency = 1, 785 .target_residency = 1, 786 .enter = &intel_idle, 787 .enter_s2idle = intel_idle_s2idle, }, 788 { 789 .name = "C1E", 790 .desc = "MWAIT 0x01", 791 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 792 .exit_latency = 4, 793 .target_residency = 4, 794 .enter = &intel_idle, 795 .enter_s2idle = intel_idle_s2idle, }, 796 { 797 .name = "C6", 798 .desc = "MWAIT 0x20", 799 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 800 .exit_latency = 170, 801 .target_residency = 600, 802 .enter = &intel_idle, 803 .enter_s2idle = intel_idle_s2idle, }, 804 { 805 .enter = NULL } 806 }; 807 808 /* 809 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. 810 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. 811 * But in this case there is effectively no C1, because C1 requests are 812 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 813 * and C1E requests end up with C1, so there is effectively no C1E. 814 * 815 * By default we enable C1E and disable C1 by marking it with 816 * 'CPUIDLE_FLAG_UNUSABLE'. 817 */ 818 static struct cpuidle_state adl_cstates[] __initdata = { 819 { 820 .name = "C1", 821 .desc = "MWAIT 0x00", 822 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 823 .exit_latency = 1, 824 .target_residency = 1, 825 .enter = &intel_idle, 826 .enter_s2idle = intel_idle_s2idle, }, 827 { 828 .name = "C1E", 829 .desc = "MWAIT 0x01", 830 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 831 .exit_latency = 2, 832 .target_residency = 4, 833 .enter = &intel_idle, 834 .enter_s2idle = intel_idle_s2idle, }, 835 { 836 .name = "C6", 837 .desc = "MWAIT 0x20", 838 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 839 .exit_latency = 220, 840 .target_residency = 600, 841 .enter = &intel_idle, 842 .enter_s2idle = intel_idle_s2idle, }, 843 { 844 .name = "C8", 845 .desc = "MWAIT 0x40", 846 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 847 .exit_latency = 280, 848 .target_residency = 800, 849 .enter = &intel_idle, 850 .enter_s2idle = intel_idle_s2idle, }, 851 { 852 .name = "C10", 853 .desc = "MWAIT 0x60", 854 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 855 .exit_latency = 680, 856 .target_residency = 2000, 857 .enter = &intel_idle, 858 .enter_s2idle = intel_idle_s2idle, }, 859 { 860 .enter = NULL } 861 }; 862 863 static struct cpuidle_state adl_l_cstates[] __initdata = { 864 { 865 .name = "C1", 866 .desc = "MWAIT 0x00", 867 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 868 .exit_latency = 1, 869 .target_residency = 1, 870 .enter = &intel_idle, 871 .enter_s2idle = intel_idle_s2idle, }, 872 { 873 .name = "C1E", 874 .desc = "MWAIT 0x01", 875 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 876 .exit_latency = 2, 877 .target_residency = 4, 878 .enter = &intel_idle, 879 .enter_s2idle = intel_idle_s2idle, }, 880 { 881 .name = "C6", 882 .desc = "MWAIT 0x20", 883 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 884 .exit_latency = 170, 885 .target_residency = 500, 886 .enter = &intel_idle, 887 .enter_s2idle = intel_idle_s2idle, }, 888 { 889 .name = "C8", 890 .desc = "MWAIT 0x40", 891 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 892 .exit_latency = 200, 893 .target_residency = 600, 894 .enter = &intel_idle, 895 .enter_s2idle = intel_idle_s2idle, }, 896 { 897 .name = "C10", 898 .desc = "MWAIT 0x60", 899 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 900 .exit_latency = 230, 901 .target_residency = 700, 902 .enter = &intel_idle, 903 .enter_s2idle = intel_idle_s2idle, }, 904 { 905 .enter = NULL } 906 }; 907 908 /* 909 * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice 910 * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in 911 * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 912 * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then 913 * both C1 and C1E requests end up with C1, so there is effectively no C1E. 914 * 915 * By default we enable C1 and disable C1E by marking it with 916 * 'CPUIDLE_FLAG_UNUSABLE'. 917 */ 918 static struct cpuidle_state spr_cstates[] __initdata = { 919 { 920 .name = "C1", 921 .desc = "MWAIT 0x00", 922 .flags = MWAIT2flg(0x00), 923 .exit_latency = 1, 924 .target_residency = 1, 925 .enter = &intel_idle, 926 .enter_s2idle = intel_idle_s2idle, }, 927 { 928 .name = "C1E", 929 .desc = "MWAIT 0x01", 930 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | 931 CPUIDLE_FLAG_UNUSABLE, 932 .exit_latency = 2, 933 .target_residency = 4, 934 .enter = &intel_idle, 935 .enter_s2idle = intel_idle_s2idle, }, 936 { 937 .name = "C6", 938 .desc = "MWAIT 0x20", 939 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 940 .exit_latency = 290, 941 .target_residency = 800, 942 .enter = &intel_idle, 943 .enter_s2idle = intel_idle_s2idle, }, 944 { 945 .enter = NULL } 946 }; 947 948 static struct cpuidle_state atom_cstates[] __initdata = { 949 { 950 .name = "C1E", 951 .desc = "MWAIT 0x00", 952 .flags = MWAIT2flg(0x00), 953 .exit_latency = 10, 954 .target_residency = 20, 955 .enter = &intel_idle, 956 .enter_s2idle = intel_idle_s2idle, }, 957 { 958 .name = "C2", 959 .desc = "MWAIT 0x10", 960 .flags = MWAIT2flg(0x10), 961 .exit_latency = 20, 962 .target_residency = 80, 963 .enter = &intel_idle, 964 .enter_s2idle = intel_idle_s2idle, }, 965 { 966 .name = "C4", 967 .desc = "MWAIT 0x30", 968 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 969 .exit_latency = 100, 970 .target_residency = 400, 971 .enter = &intel_idle, 972 .enter_s2idle = intel_idle_s2idle, }, 973 { 974 .name = "C6", 975 .desc = "MWAIT 0x52", 976 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 977 .exit_latency = 140, 978 .target_residency = 560, 979 .enter = &intel_idle, 980 .enter_s2idle = intel_idle_s2idle, }, 981 { 982 .enter = NULL } 983 }; 984 static struct cpuidle_state tangier_cstates[] __initdata = { 985 { 986 .name = "C1", 987 .desc = "MWAIT 0x00", 988 .flags = MWAIT2flg(0x00), 989 .exit_latency = 1, 990 .target_residency = 4, 991 .enter = &intel_idle, 992 .enter_s2idle = intel_idle_s2idle, }, 993 { 994 .name = "C4", 995 .desc = "MWAIT 0x30", 996 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 997 .exit_latency = 100, 998 .target_residency = 400, 999 .enter = &intel_idle, 1000 .enter_s2idle = intel_idle_s2idle, }, 1001 { 1002 .name = "C6", 1003 .desc = "MWAIT 0x52", 1004 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 1005 .exit_latency = 140, 1006 .target_residency = 560, 1007 .enter = &intel_idle, 1008 .enter_s2idle = intel_idle_s2idle, }, 1009 { 1010 .name = "C7", 1011 .desc = "MWAIT 0x60", 1012 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1013 .exit_latency = 1200, 1014 .target_residency = 4000, 1015 .enter = &intel_idle, 1016 .enter_s2idle = intel_idle_s2idle, }, 1017 { 1018 .name = "C9", 1019 .desc = "MWAIT 0x64", 1020 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 1021 .exit_latency = 10000, 1022 .target_residency = 20000, 1023 .enter = &intel_idle, 1024 .enter_s2idle = intel_idle_s2idle, }, 1025 { 1026 .enter = NULL } 1027 }; 1028 static struct cpuidle_state avn_cstates[] __initdata = { 1029 { 1030 .name = "C1", 1031 .desc = "MWAIT 0x00", 1032 .flags = MWAIT2flg(0x00), 1033 .exit_latency = 2, 1034 .target_residency = 2, 1035 .enter = &intel_idle, 1036 .enter_s2idle = intel_idle_s2idle, }, 1037 { 1038 .name = "C6", 1039 .desc = "MWAIT 0x51", 1040 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 1041 .exit_latency = 15, 1042 .target_residency = 45, 1043 .enter = &intel_idle, 1044 .enter_s2idle = intel_idle_s2idle, }, 1045 { 1046 .enter = NULL } 1047 }; 1048 static struct cpuidle_state knl_cstates[] __initdata = { 1049 { 1050 .name = "C1", 1051 .desc = "MWAIT 0x00", 1052 .flags = MWAIT2flg(0x00), 1053 .exit_latency = 1, 1054 .target_residency = 2, 1055 .enter = &intel_idle, 1056 .enter_s2idle = intel_idle_s2idle }, 1057 { 1058 .name = "C6", 1059 .desc = "MWAIT 0x10", 1060 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 1061 .exit_latency = 120, 1062 .target_residency = 500, 1063 .enter = &intel_idle, 1064 .enter_s2idle = intel_idle_s2idle }, 1065 { 1066 .enter = NULL } 1067 }; 1068 1069 static struct cpuidle_state bxt_cstates[] __initdata = { 1070 { 1071 .name = "C1", 1072 .desc = "MWAIT 0x00", 1073 .flags = MWAIT2flg(0x00), 1074 .exit_latency = 2, 1075 .target_residency = 2, 1076 .enter = &intel_idle, 1077 .enter_s2idle = intel_idle_s2idle, }, 1078 { 1079 .name = "C1E", 1080 .desc = "MWAIT 0x01", 1081 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1082 .exit_latency = 10, 1083 .target_residency = 20, 1084 .enter = &intel_idle, 1085 .enter_s2idle = intel_idle_s2idle, }, 1086 { 1087 .name = "C6", 1088 .desc = "MWAIT 0x20", 1089 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1090 .exit_latency = 133, 1091 .target_residency = 133, 1092 .enter = &intel_idle, 1093 .enter_s2idle = intel_idle_s2idle, }, 1094 { 1095 .name = "C7s", 1096 .desc = "MWAIT 0x31", 1097 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 1098 .exit_latency = 155, 1099 .target_residency = 155, 1100 .enter = &intel_idle, 1101 .enter_s2idle = intel_idle_s2idle, }, 1102 { 1103 .name = "C8", 1104 .desc = "MWAIT 0x40", 1105 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 1106 .exit_latency = 1000, 1107 .target_residency = 1000, 1108 .enter = &intel_idle, 1109 .enter_s2idle = intel_idle_s2idle, }, 1110 { 1111 .name = "C9", 1112 .desc = "MWAIT 0x50", 1113 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 1114 .exit_latency = 2000, 1115 .target_residency = 2000, 1116 .enter = &intel_idle, 1117 .enter_s2idle = intel_idle_s2idle, }, 1118 { 1119 .name = "C10", 1120 .desc = "MWAIT 0x60", 1121 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1122 .exit_latency = 10000, 1123 .target_residency = 10000, 1124 .enter = &intel_idle, 1125 .enter_s2idle = intel_idle_s2idle, }, 1126 { 1127 .enter = NULL } 1128 }; 1129 1130 static struct cpuidle_state dnv_cstates[] __initdata = { 1131 { 1132 .name = "C1", 1133 .desc = "MWAIT 0x00", 1134 .flags = MWAIT2flg(0x00), 1135 .exit_latency = 2, 1136 .target_residency = 2, 1137 .enter = &intel_idle, 1138 .enter_s2idle = intel_idle_s2idle, }, 1139 { 1140 .name = "C1E", 1141 .desc = "MWAIT 0x01", 1142 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1143 .exit_latency = 10, 1144 .target_residency = 20, 1145 .enter = &intel_idle, 1146 .enter_s2idle = intel_idle_s2idle, }, 1147 { 1148 .name = "C6", 1149 .desc = "MWAIT 0x20", 1150 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1151 .exit_latency = 50, 1152 .target_residency = 500, 1153 .enter = &intel_idle, 1154 .enter_s2idle = intel_idle_s2idle, }, 1155 { 1156 .enter = NULL } 1157 }; 1158 1159 /* 1160 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 1161 * C6, and this is indicated in the CPUID mwait leaf. 1162 */ 1163 static struct cpuidle_state snr_cstates[] __initdata = { 1164 { 1165 .name = "C1", 1166 .desc = "MWAIT 0x00", 1167 .flags = MWAIT2flg(0x00), 1168 .exit_latency = 2, 1169 .target_residency = 2, 1170 .enter = &intel_idle, 1171 .enter_s2idle = intel_idle_s2idle, }, 1172 { 1173 .name = "C1E", 1174 .desc = "MWAIT 0x01", 1175 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1176 .exit_latency = 15, 1177 .target_residency = 25, 1178 .enter = &intel_idle, 1179 .enter_s2idle = intel_idle_s2idle, }, 1180 { 1181 .name = "C6", 1182 .desc = "MWAIT 0x20", 1183 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1184 .exit_latency = 130, 1185 .target_residency = 500, 1186 .enter = &intel_idle, 1187 .enter_s2idle = intel_idle_s2idle, }, 1188 { 1189 .enter = NULL } 1190 }; 1191 1192 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1193 .state_table = nehalem_cstates, 1194 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1195 .disable_promotion_to_c1e = true, 1196 }; 1197 1198 static const struct idle_cpu idle_cpu_nhx __initconst = { 1199 .state_table = nehalem_cstates, 1200 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1201 .disable_promotion_to_c1e = true, 1202 .use_acpi = true, 1203 }; 1204 1205 static const struct idle_cpu idle_cpu_atom __initconst = { 1206 .state_table = atom_cstates, 1207 }; 1208 1209 static const struct idle_cpu idle_cpu_tangier __initconst = { 1210 .state_table = tangier_cstates, 1211 }; 1212 1213 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1214 .state_table = atom_cstates, 1215 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1216 }; 1217 1218 static const struct idle_cpu idle_cpu_snb __initconst = { 1219 .state_table = snb_cstates, 1220 .disable_promotion_to_c1e = true, 1221 }; 1222 1223 static const struct idle_cpu idle_cpu_snx __initconst = { 1224 .state_table = snb_cstates, 1225 .disable_promotion_to_c1e = true, 1226 .use_acpi = true, 1227 }; 1228 1229 static const struct idle_cpu idle_cpu_byt __initconst = { 1230 .state_table = byt_cstates, 1231 .disable_promotion_to_c1e = true, 1232 .byt_auto_demotion_disable_flag = true, 1233 }; 1234 1235 static const struct idle_cpu idle_cpu_cht __initconst = { 1236 .state_table = cht_cstates, 1237 .disable_promotion_to_c1e = true, 1238 .byt_auto_demotion_disable_flag = true, 1239 }; 1240 1241 static const struct idle_cpu idle_cpu_ivb __initconst = { 1242 .state_table = ivb_cstates, 1243 .disable_promotion_to_c1e = true, 1244 }; 1245 1246 static const struct idle_cpu idle_cpu_ivt __initconst = { 1247 .state_table = ivt_cstates, 1248 .disable_promotion_to_c1e = true, 1249 .use_acpi = true, 1250 }; 1251 1252 static const struct idle_cpu idle_cpu_hsw __initconst = { 1253 .state_table = hsw_cstates, 1254 .disable_promotion_to_c1e = true, 1255 }; 1256 1257 static const struct idle_cpu idle_cpu_hsx __initconst = { 1258 .state_table = hsw_cstates, 1259 .disable_promotion_to_c1e = true, 1260 .use_acpi = true, 1261 }; 1262 1263 static const struct idle_cpu idle_cpu_bdw __initconst = { 1264 .state_table = bdw_cstates, 1265 .disable_promotion_to_c1e = true, 1266 }; 1267 1268 static const struct idle_cpu idle_cpu_bdx __initconst = { 1269 .state_table = bdw_cstates, 1270 .disable_promotion_to_c1e = true, 1271 .use_acpi = true, 1272 }; 1273 1274 static const struct idle_cpu idle_cpu_skl __initconst = { 1275 .state_table = skl_cstates, 1276 .disable_promotion_to_c1e = true, 1277 }; 1278 1279 static const struct idle_cpu idle_cpu_skx __initconst = { 1280 .state_table = skx_cstates, 1281 .disable_promotion_to_c1e = true, 1282 .use_acpi = true, 1283 }; 1284 1285 static const struct idle_cpu idle_cpu_icx __initconst = { 1286 .state_table = icx_cstates, 1287 .disable_promotion_to_c1e = true, 1288 .use_acpi = true, 1289 }; 1290 1291 static const struct idle_cpu idle_cpu_adl __initconst = { 1292 .state_table = adl_cstates, 1293 }; 1294 1295 static const struct idle_cpu idle_cpu_adl_l __initconst = { 1296 .state_table = adl_l_cstates, 1297 }; 1298 1299 static const struct idle_cpu idle_cpu_spr __initconst = { 1300 .state_table = spr_cstates, 1301 .disable_promotion_to_c1e = true, 1302 .use_acpi = true, 1303 }; 1304 1305 static const struct idle_cpu idle_cpu_avn __initconst = { 1306 .state_table = avn_cstates, 1307 .disable_promotion_to_c1e = true, 1308 .use_acpi = true, 1309 }; 1310 1311 static const struct idle_cpu idle_cpu_knl __initconst = { 1312 .state_table = knl_cstates, 1313 .use_acpi = true, 1314 }; 1315 1316 static const struct idle_cpu idle_cpu_bxt __initconst = { 1317 .state_table = bxt_cstates, 1318 .disable_promotion_to_c1e = true, 1319 }; 1320 1321 static const struct idle_cpu idle_cpu_dnv __initconst = { 1322 .state_table = dnv_cstates, 1323 .disable_promotion_to_c1e = true, 1324 .use_acpi = true, 1325 }; 1326 1327 static const struct idle_cpu idle_cpu_snr __initconst = { 1328 .state_table = snr_cstates, 1329 .disable_promotion_to_c1e = true, 1330 .use_acpi = true, 1331 }; 1332 1333 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1334 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1335 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1336 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1337 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1338 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1339 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1340 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1341 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1342 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1343 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1344 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1345 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1346 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1347 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1348 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1349 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1350 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1351 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1352 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1353 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1354 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1355 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1356 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1357 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1358 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1359 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1360 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1361 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1362 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1363 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1364 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1365 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1366 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), 1367 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), 1368 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), 1369 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), 1370 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1371 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1372 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1373 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1374 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1375 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), 1376 {} 1377 }; 1378 1379 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1380 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1381 {} 1382 }; 1383 1384 static bool __init intel_idle_max_cstate_reached(int cstate) 1385 { 1386 if (cstate + 1 > max_cstate) { 1387 pr_info("max_cstate %d reached\n", max_cstate); 1388 return true; 1389 } 1390 return false; 1391 } 1392 1393 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1394 { 1395 unsigned long eax = flg2MWAIT(state->flags); 1396 1397 if (boot_cpu_has(X86_FEATURE_ARAT)) 1398 return false; 1399 1400 /* 1401 * Switch over to one-shot tick broadcast if the target C-state 1402 * is deeper than C1. 1403 */ 1404 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1405 } 1406 1407 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1408 #include <acpi/processor.h> 1409 1410 static bool no_acpi __read_mostly; 1411 module_param(no_acpi, bool, 0444); 1412 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1413 1414 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1415 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1416 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1417 1418 static struct acpi_processor_power acpi_state_table __initdata; 1419 1420 /** 1421 * intel_idle_cst_usable - Check if the _CST information can be used. 1422 * 1423 * Check if all of the C-states listed by _CST in the max_cstate range are 1424 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1425 */ 1426 static bool __init intel_idle_cst_usable(void) 1427 { 1428 int cstate, limit; 1429 1430 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1431 acpi_state_table.count); 1432 1433 for (cstate = 1; cstate < limit; cstate++) { 1434 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1435 1436 if (cx->entry_method != ACPI_CSTATE_FFH) 1437 return false; 1438 } 1439 1440 return true; 1441 } 1442 1443 static bool __init intel_idle_acpi_cst_extract(void) 1444 { 1445 unsigned int cpu; 1446 1447 if (no_acpi) { 1448 pr_debug("Not allowed to use ACPI _CST\n"); 1449 return false; 1450 } 1451 1452 for_each_possible_cpu(cpu) { 1453 struct acpi_processor *pr = per_cpu(processors, cpu); 1454 1455 if (!pr) 1456 continue; 1457 1458 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1459 continue; 1460 1461 acpi_state_table.count++; 1462 1463 if (!intel_idle_cst_usable()) 1464 continue; 1465 1466 if (!acpi_processor_claim_cst_control()) 1467 break; 1468 1469 return true; 1470 } 1471 1472 acpi_state_table.count = 0; 1473 pr_debug("ACPI _CST not found or not usable\n"); 1474 return false; 1475 } 1476 1477 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1478 { 1479 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1480 1481 /* 1482 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1483 * the interesting states are ACPI_CSTATE_FFH. 1484 */ 1485 for (cstate = 1; cstate < limit; cstate++) { 1486 struct acpi_processor_cx *cx; 1487 struct cpuidle_state *state; 1488 1489 if (intel_idle_max_cstate_reached(cstate - 1)) 1490 break; 1491 1492 cx = &acpi_state_table.states[cstate]; 1493 1494 state = &drv->states[drv->state_count++]; 1495 1496 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1497 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1498 state->exit_latency = cx->latency; 1499 /* 1500 * For C1-type C-states use the same number for both the exit 1501 * latency and target residency, because that is the case for 1502 * C1 in the majority of the static C-states tables above. 1503 * For the other types of C-states, however, set the target 1504 * residency to 3 times the exit latency which should lead to 1505 * a reasonable balance between energy-efficiency and 1506 * performance in the majority of interesting cases. 1507 */ 1508 state->target_residency = cx->latency; 1509 if (cx->type > ACPI_STATE_C1) 1510 state->target_residency *= 3; 1511 1512 state->flags = MWAIT2flg(cx->address); 1513 if (cx->type > ACPI_STATE_C2) 1514 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1515 1516 if (disabled_states_mask & BIT(cstate)) 1517 state->flags |= CPUIDLE_FLAG_OFF; 1518 1519 if (intel_idle_state_needs_timer_stop(state)) 1520 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1521 1522 state->enter = intel_idle; 1523 state->enter_s2idle = intel_idle_s2idle; 1524 } 1525 } 1526 1527 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1528 { 1529 int cstate, limit; 1530 1531 /* 1532 * If there are no _CST C-states, do not disable any C-states by 1533 * default. 1534 */ 1535 if (!acpi_state_table.count) 1536 return false; 1537 1538 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1539 /* 1540 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1541 * the interesting states are ACPI_CSTATE_FFH. 1542 */ 1543 for (cstate = 1; cstate < limit; cstate++) { 1544 if (acpi_state_table.states[cstate].address == mwait_hint) 1545 return false; 1546 } 1547 return true; 1548 } 1549 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1550 #define force_use_acpi (false) 1551 1552 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1553 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1554 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1555 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1556 1557 /** 1558 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1559 * 1560 * Tune IVT multi-socket targets. 1561 * Assumption: num_sockets == (max_package_num + 1). 1562 */ 1563 static void __init ivt_idle_state_table_update(void) 1564 { 1565 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1566 int cpu, package_num, num_sockets = 1; 1567 1568 for_each_online_cpu(cpu) { 1569 package_num = topology_physical_package_id(cpu); 1570 if (package_num + 1 > num_sockets) { 1571 num_sockets = package_num + 1; 1572 1573 if (num_sockets > 4) { 1574 cpuidle_state_table = ivt_cstates_8s; 1575 return; 1576 } 1577 } 1578 } 1579 1580 if (num_sockets > 2) 1581 cpuidle_state_table = ivt_cstates_4s; 1582 1583 /* else, 1 and 2 socket systems use default ivt_cstates */ 1584 } 1585 1586 /** 1587 * irtl_2_usec - IRTL to microseconds conversion. 1588 * @irtl: IRTL MSR value. 1589 * 1590 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1591 */ 1592 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1593 { 1594 static const unsigned int irtl_ns_units[] __initconst = { 1595 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1596 }; 1597 unsigned long long ns; 1598 1599 if (!irtl) 1600 return 0; 1601 1602 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1603 1604 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1605 } 1606 1607 /** 1608 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1609 * 1610 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1611 * definitive maximum latency and use the same value for target_residency. 1612 */ 1613 static void __init bxt_idle_state_table_update(void) 1614 { 1615 unsigned long long msr; 1616 unsigned int usec; 1617 1618 rdmsrl(MSR_PKGC6_IRTL, msr); 1619 usec = irtl_2_usec(msr); 1620 if (usec) { 1621 bxt_cstates[2].exit_latency = usec; 1622 bxt_cstates[2].target_residency = usec; 1623 } 1624 1625 rdmsrl(MSR_PKGC7_IRTL, msr); 1626 usec = irtl_2_usec(msr); 1627 if (usec) { 1628 bxt_cstates[3].exit_latency = usec; 1629 bxt_cstates[3].target_residency = usec; 1630 } 1631 1632 rdmsrl(MSR_PKGC8_IRTL, msr); 1633 usec = irtl_2_usec(msr); 1634 if (usec) { 1635 bxt_cstates[4].exit_latency = usec; 1636 bxt_cstates[4].target_residency = usec; 1637 } 1638 1639 rdmsrl(MSR_PKGC9_IRTL, msr); 1640 usec = irtl_2_usec(msr); 1641 if (usec) { 1642 bxt_cstates[5].exit_latency = usec; 1643 bxt_cstates[5].target_residency = usec; 1644 } 1645 1646 rdmsrl(MSR_PKGC10_IRTL, msr); 1647 usec = irtl_2_usec(msr); 1648 if (usec) { 1649 bxt_cstates[6].exit_latency = usec; 1650 bxt_cstates[6].target_residency = usec; 1651 } 1652 1653 } 1654 1655 /** 1656 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1657 * 1658 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1659 */ 1660 static void __init sklh_idle_state_table_update(void) 1661 { 1662 unsigned long long msr; 1663 unsigned int eax, ebx, ecx, edx; 1664 1665 1666 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1667 if (max_cstate <= 7) 1668 return; 1669 1670 /* if PC10 not present in CPUID.MWAIT.EDX */ 1671 if ((mwait_substates & (0xF << 28)) == 0) 1672 return; 1673 1674 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1675 1676 /* PC10 is not enabled in PKG C-state limit */ 1677 if ((msr & 0xF) != 8) 1678 return; 1679 1680 ecx = 0; 1681 cpuid(7, &eax, &ebx, &ecx, &edx); 1682 1683 /* if SGX is present */ 1684 if (ebx & (1 << 2)) { 1685 1686 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1687 1688 /* if SGX is enabled */ 1689 if (msr & (1 << 18)) 1690 return; 1691 } 1692 1693 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1694 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1695 } 1696 1697 /** 1698 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake 1699 * idle states table. 1700 */ 1701 static void __init skx_idle_state_table_update(void) 1702 { 1703 unsigned long long msr; 1704 1705 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1706 1707 /* 1708 * 000b: C0/C1 (no package C-state support) 1709 * 001b: C2 1710 * 010b: C6 (non-retention) 1711 * 011b: C6 (retention) 1712 * 111b: No Package C state limits. 1713 */ 1714 if ((msr & 0x7) < 2) { 1715 /* 1716 * Uses the CC6 + PC0 latency and 3 times of 1717 * latency for target_residency if the PC6 1718 * is disabled in BIOS. This is consistent 1719 * with how intel_idle driver uses _CST 1720 * to set the target_residency. 1721 */ 1722 skx_cstates[2].exit_latency = 92; 1723 skx_cstates[2].target_residency = 276; 1724 } 1725 } 1726 1727 /** 1728 * adl_idle_state_table_update - Adjust AlderLake idle states table. 1729 */ 1730 static void __init adl_idle_state_table_update(void) 1731 { 1732 /* Check if user prefers C1 over C1E. */ 1733 if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) { 1734 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1735 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE; 1736 1737 /* Disable C1E by clearing the "C1E promotion" bit. */ 1738 c1e_promotion = C1E_PROMOTION_DISABLE; 1739 return; 1740 } 1741 1742 /* Make sure C1E is enabled by default */ 1743 c1e_promotion = C1E_PROMOTION_ENABLE; 1744 } 1745 1746 /** 1747 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. 1748 */ 1749 static void __init spr_idle_state_table_update(void) 1750 { 1751 unsigned long long msr; 1752 1753 /* Check if user prefers C1E over C1. */ 1754 if ((preferred_states_mask & BIT(2)) && 1755 !(preferred_states_mask & BIT(1))) { 1756 /* Disable C1 and enable C1E. */ 1757 spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; 1758 spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1759 1760 /* Enable C1E using the "C1E promotion" bit. */ 1761 c1e_promotion = C1E_PROMOTION_ENABLE; 1762 } 1763 1764 /* 1765 * By default, the C6 state assumes the worst-case scenario of package 1766 * C6. However, if PC6 is disabled, we update the numbers to match 1767 * core C6. 1768 */ 1769 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1770 1771 /* Limit value 2 and above allow for PC6. */ 1772 if ((msr & 0x7) < 2) { 1773 spr_cstates[2].exit_latency = 190; 1774 spr_cstates[2].target_residency = 600; 1775 } 1776 } 1777 1778 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1779 { 1780 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1781 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1782 MWAIT_SUBSTATE_MASK; 1783 1784 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1785 if (num_substates == 0) 1786 return false; 1787 1788 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1789 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1790 1791 return true; 1792 } 1793 1794 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1795 { 1796 int cstate; 1797 1798 switch (boot_cpu_data.x86_model) { 1799 case INTEL_FAM6_IVYBRIDGE_X: 1800 ivt_idle_state_table_update(); 1801 break; 1802 case INTEL_FAM6_ATOM_GOLDMONT: 1803 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1804 bxt_idle_state_table_update(); 1805 break; 1806 case INTEL_FAM6_SKYLAKE: 1807 sklh_idle_state_table_update(); 1808 break; 1809 case INTEL_FAM6_SKYLAKE_X: 1810 skx_idle_state_table_update(); 1811 break; 1812 case INTEL_FAM6_SAPPHIRERAPIDS_X: 1813 spr_idle_state_table_update(); 1814 break; 1815 case INTEL_FAM6_ALDERLAKE: 1816 case INTEL_FAM6_ALDERLAKE_L: 1817 adl_idle_state_table_update(); 1818 break; 1819 } 1820 1821 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1822 unsigned int mwait_hint; 1823 1824 if (intel_idle_max_cstate_reached(cstate)) 1825 break; 1826 1827 if (!cpuidle_state_table[cstate].enter && 1828 !cpuidle_state_table[cstate].enter_s2idle) 1829 break; 1830 1831 /* If marked as unusable, skip this state. */ 1832 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1833 pr_debug("state %s is disabled\n", 1834 cpuidle_state_table[cstate].name); 1835 continue; 1836 } 1837 1838 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1839 if (!intel_idle_verify_cstate(mwait_hint)) 1840 continue; 1841 1842 /* Structure copy. */ 1843 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1844 1845 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) 1846 drv->states[drv->state_count].enter = intel_idle_irq; 1847 1848 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && 1849 cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { 1850 WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE); 1851 drv->states[drv->state_count].enter = intel_idle_ibrs; 1852 } 1853 1854 if ((disabled_states_mask & BIT(drv->state_count)) || 1855 ((icpu->use_acpi || force_use_acpi) && 1856 intel_idle_off_by_default(mwait_hint) && 1857 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1858 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1859 1860 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count])) 1861 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP; 1862 1863 drv->state_count++; 1864 } 1865 1866 if (icpu->byt_auto_demotion_disable_flag) { 1867 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1868 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1869 } 1870 } 1871 1872 /** 1873 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1874 * @drv: cpuidle driver structure to initialize. 1875 */ 1876 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1877 { 1878 cpuidle_poll_state_init(drv); 1879 1880 if (disabled_states_mask & BIT(0)) 1881 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1882 1883 drv->state_count = 1; 1884 1885 if (icpu) 1886 intel_idle_init_cstates_icpu(drv); 1887 else 1888 intel_idle_init_cstates_acpi(drv); 1889 } 1890 1891 static void auto_demotion_disable(void) 1892 { 1893 unsigned long long msr_bits; 1894 1895 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1896 msr_bits &= ~auto_demotion_disable_flags; 1897 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1898 } 1899 1900 static void c1e_promotion_enable(void) 1901 { 1902 unsigned long long msr_bits; 1903 1904 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1905 msr_bits |= 0x2; 1906 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1907 } 1908 1909 static void c1e_promotion_disable(void) 1910 { 1911 unsigned long long msr_bits; 1912 1913 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1914 msr_bits &= ~0x2; 1915 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1916 } 1917 1918 /** 1919 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1920 * @cpu: CPU to initialize. 1921 * 1922 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1923 * with the processor model flags. 1924 */ 1925 static int intel_idle_cpu_init(unsigned int cpu) 1926 { 1927 struct cpuidle_device *dev; 1928 1929 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1930 dev->cpu = cpu; 1931 1932 if (cpuidle_register_device(dev)) { 1933 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1934 return -EIO; 1935 } 1936 1937 if (auto_demotion_disable_flags) 1938 auto_demotion_disable(); 1939 1940 if (c1e_promotion == C1E_PROMOTION_ENABLE) 1941 c1e_promotion_enable(); 1942 else if (c1e_promotion == C1E_PROMOTION_DISABLE) 1943 c1e_promotion_disable(); 1944 1945 return 0; 1946 } 1947 1948 static int intel_idle_cpu_online(unsigned int cpu) 1949 { 1950 struct cpuidle_device *dev; 1951 1952 if (!boot_cpu_has(X86_FEATURE_ARAT)) 1953 tick_broadcast_enable(); 1954 1955 /* 1956 * Some systems can hotplug a cpu at runtime after 1957 * the kernel has booted, we have to initialize the 1958 * driver in this case 1959 */ 1960 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1961 if (!dev->registered) 1962 return intel_idle_cpu_init(cpu); 1963 1964 return 0; 1965 } 1966 1967 /** 1968 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 1969 */ 1970 static void __init intel_idle_cpuidle_devices_uninit(void) 1971 { 1972 int i; 1973 1974 for_each_online_cpu(i) 1975 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 1976 } 1977 1978 static int __init intel_idle_init(void) 1979 { 1980 const struct x86_cpu_id *id; 1981 unsigned int eax, ebx, ecx; 1982 int retval; 1983 1984 /* Do not load intel_idle at all for now if idle= is passed */ 1985 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1986 return -ENODEV; 1987 1988 if (max_cstate == 0) { 1989 pr_debug("disabled\n"); 1990 return -EPERM; 1991 } 1992 1993 id = x86_match_cpu(intel_idle_ids); 1994 if (id) { 1995 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1996 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1997 return -ENODEV; 1998 } 1999 } else { 2000 id = x86_match_cpu(intel_mwait_ids); 2001 if (!id) 2002 return -ENODEV; 2003 } 2004 2005 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 2006 return -ENODEV; 2007 2008 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 2009 2010 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 2011 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 2012 !mwait_substates) 2013 return -ENODEV; 2014 2015 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 2016 2017 icpu = (const struct idle_cpu *)id->driver_data; 2018 if (icpu) { 2019 cpuidle_state_table = icpu->state_table; 2020 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 2021 if (icpu->disable_promotion_to_c1e) 2022 c1e_promotion = C1E_PROMOTION_DISABLE; 2023 if (icpu->use_acpi || force_use_acpi) 2024 intel_idle_acpi_cst_extract(); 2025 } else if (!intel_idle_acpi_cst_extract()) { 2026 return -ENODEV; 2027 } 2028 2029 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 2030 boot_cpu_data.x86_model); 2031 2032 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 2033 if (!intel_idle_cpuidle_devices) 2034 return -ENOMEM; 2035 2036 intel_idle_cpuidle_driver_init(&intel_idle_driver); 2037 2038 retval = cpuidle_register_driver(&intel_idle_driver); 2039 if (retval) { 2040 struct cpuidle_driver *drv = cpuidle_get_driver(); 2041 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 2042 drv ? drv->name : "none"); 2043 goto init_driver_fail; 2044 } 2045 2046 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 2047 intel_idle_cpu_online, NULL); 2048 if (retval < 0) 2049 goto hp_setup_fail; 2050 2051 pr_debug("Local APIC timer is reliable in %s\n", 2052 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 2053 2054 return 0; 2055 2056 hp_setup_fail: 2057 intel_idle_cpuidle_devices_uninit(); 2058 cpuidle_unregister_driver(&intel_idle_driver); 2059 init_driver_fail: 2060 free_percpu(intel_idle_cpuidle_devices); 2061 return retval; 2062 2063 } 2064 device_initcall(intel_idle_init); 2065 2066 /* 2067 * We are not really modular, but we used to support that. Meaning we also 2068 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 2069 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 2070 * is the easiest way (currently) to continue doing that. 2071 */ 2072 module_param(max_cstate, int, 0444); 2073 /* 2074 * The positions of the bits that are set in this number are the indices of the 2075 * idle states to be disabled by default (as reflected by the names of the 2076 * corresponding idle state directories in sysfs, "state0", "state1" ... 2077 * "state<i>" ..., where <i> is the index of the given state). 2078 */ 2079 module_param_named(states_off, disabled_states_mask, uint, 0444); 2080 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 2081 /* 2082 * Some platforms come with mutually exclusive C-states, so that if one is 2083 * enabled, the other C-states must not be used. Example: C1 and C1E on 2084 * Sapphire Rapids platform. This parameter allows for selecting the 2085 * preferred C-states among the groups of mutually exclusive C-states - the 2086 * selected C-states will be registered, the other C-states from the mutually 2087 * exclusive group won't be registered. If the platform has no mutually 2088 * exclusive C-states, this parameter has no effect. 2089 */ 2090 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); 2091 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); 2092