1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/notifier.h> 51 #include <linux/cpu.h> 52 #include <linux/moduleparam.h> 53 #include <asm/cpu_device_id.h> 54 #include <asm/intel-family.h> 55 #include <asm/mwait.h> 56 #include <asm/msr.h> 57 58 #define INTEL_IDLE_VERSION "0.5.1" 59 60 static struct cpuidle_driver intel_idle_driver = { 61 .name = "intel_idle", 62 .owner = THIS_MODULE, 63 }; 64 /* intel_idle.max_cstate=0 disables driver */ 65 static int max_cstate = CPUIDLE_STATE_MAX - 1; 66 static unsigned int disabled_states_mask; 67 static unsigned int preferred_states_mask; 68 69 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 70 71 static unsigned long auto_demotion_disable_flags; 72 73 static enum { 74 C1E_PROMOTION_PRESERVE, 75 C1E_PROMOTION_ENABLE, 76 C1E_PROMOTION_DISABLE 77 } c1e_promotion = C1E_PROMOTION_PRESERVE; 78 79 struct idle_cpu { 80 struct cpuidle_state *state_table; 81 82 /* 83 * Hardware C-state auto-demotion may not always be optimal. 84 * Indicate which enable bits to clear here. 85 */ 86 unsigned long auto_demotion_disable_flags; 87 bool byt_auto_demotion_disable_flag; 88 bool disable_promotion_to_c1e; 89 bool use_acpi; 90 }; 91 92 static const struct idle_cpu *icpu __initdata; 93 static struct cpuidle_state *cpuidle_state_table __initdata; 94 95 static unsigned int mwait_substates __initdata; 96 97 /* 98 * Enable interrupts before entering the C-state. On some platforms and for 99 * some C-states, this may measurably decrease interrupt latency. 100 */ 101 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) 102 103 /* 104 * Enable this state by default even if the ACPI _CST does not list it. 105 */ 106 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 107 108 /* 109 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 110 * the C-state (top nibble) and sub-state (bottom nibble) 111 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 112 * 113 * We store the hint at the top of our "flags" for each state. 114 */ 115 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 116 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 117 118 /** 119 * intel_idle - Ask the processor to enter the given idle state. 120 * @dev: cpuidle device of the target CPU. 121 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 122 * @index: Target idle state index. 123 * 124 * Use the MWAIT instruction to notify the processor that the CPU represented by 125 * @dev is idle and it can try to enter the idle state corresponding to @index. 126 * 127 * If the local APIC timer is not known to be reliable in the target idle state, 128 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 129 * 130 * Must be called under local_irq_disable(). 131 */ 132 static __cpuidle int intel_idle(struct cpuidle_device *dev, 133 struct cpuidle_driver *drv, int index) 134 { 135 struct cpuidle_state *state = &drv->states[index]; 136 unsigned long eax = flg2MWAIT(state->flags); 137 unsigned long ecx = 1; /* break on interrupt flag */ 138 139 if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) 140 local_irq_enable(); 141 142 mwait_idle_with_hints(eax, ecx); 143 144 return index; 145 } 146 147 /** 148 * intel_idle_s2idle - Ask the processor to enter the given idle state. 149 * @dev: cpuidle device of the target CPU. 150 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 151 * @index: Target idle state index. 152 * 153 * Use the MWAIT instruction to notify the processor that the CPU represented by 154 * @dev is idle and it can try to enter the idle state corresponding to @index. 155 * 156 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 157 * scheduler tick and suspended scheduler clock on the target CPU. 158 */ 159 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 160 struct cpuidle_driver *drv, int index) 161 { 162 unsigned long eax = flg2MWAIT(drv->states[index].flags); 163 unsigned long ecx = 1; /* break on interrupt flag */ 164 165 mwait_idle_with_hints(eax, ecx); 166 167 return 0; 168 } 169 170 /* 171 * States are indexed by the cstate number, 172 * which is also the index into the MWAIT hint array. 173 * Thus C0 is a dummy. 174 */ 175 static struct cpuidle_state nehalem_cstates[] __initdata = { 176 { 177 .name = "C1", 178 .desc = "MWAIT 0x00", 179 .flags = MWAIT2flg(0x00), 180 .exit_latency = 3, 181 .target_residency = 6, 182 .enter = &intel_idle, 183 .enter_s2idle = intel_idle_s2idle, }, 184 { 185 .name = "C1E", 186 .desc = "MWAIT 0x01", 187 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 188 .exit_latency = 10, 189 .target_residency = 20, 190 .enter = &intel_idle, 191 .enter_s2idle = intel_idle_s2idle, }, 192 { 193 .name = "C3", 194 .desc = "MWAIT 0x10", 195 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 196 .exit_latency = 20, 197 .target_residency = 80, 198 .enter = &intel_idle, 199 .enter_s2idle = intel_idle_s2idle, }, 200 { 201 .name = "C6", 202 .desc = "MWAIT 0x20", 203 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 204 .exit_latency = 200, 205 .target_residency = 800, 206 .enter = &intel_idle, 207 .enter_s2idle = intel_idle_s2idle, }, 208 { 209 .enter = NULL } 210 }; 211 212 static struct cpuidle_state snb_cstates[] __initdata = { 213 { 214 .name = "C1", 215 .desc = "MWAIT 0x00", 216 .flags = MWAIT2flg(0x00), 217 .exit_latency = 2, 218 .target_residency = 2, 219 .enter = &intel_idle, 220 .enter_s2idle = intel_idle_s2idle, }, 221 { 222 .name = "C1E", 223 .desc = "MWAIT 0x01", 224 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 225 .exit_latency = 10, 226 .target_residency = 20, 227 .enter = &intel_idle, 228 .enter_s2idle = intel_idle_s2idle, }, 229 { 230 .name = "C3", 231 .desc = "MWAIT 0x10", 232 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 233 .exit_latency = 80, 234 .target_residency = 211, 235 .enter = &intel_idle, 236 .enter_s2idle = intel_idle_s2idle, }, 237 { 238 .name = "C6", 239 .desc = "MWAIT 0x20", 240 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 241 .exit_latency = 104, 242 .target_residency = 345, 243 .enter = &intel_idle, 244 .enter_s2idle = intel_idle_s2idle, }, 245 { 246 .name = "C7", 247 .desc = "MWAIT 0x30", 248 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 249 .exit_latency = 109, 250 .target_residency = 345, 251 .enter = &intel_idle, 252 .enter_s2idle = intel_idle_s2idle, }, 253 { 254 .enter = NULL } 255 }; 256 257 static struct cpuidle_state byt_cstates[] __initdata = { 258 { 259 .name = "C1", 260 .desc = "MWAIT 0x00", 261 .flags = MWAIT2flg(0x00), 262 .exit_latency = 1, 263 .target_residency = 1, 264 .enter = &intel_idle, 265 .enter_s2idle = intel_idle_s2idle, }, 266 { 267 .name = "C6N", 268 .desc = "MWAIT 0x58", 269 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 270 .exit_latency = 300, 271 .target_residency = 275, 272 .enter = &intel_idle, 273 .enter_s2idle = intel_idle_s2idle, }, 274 { 275 .name = "C6S", 276 .desc = "MWAIT 0x52", 277 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 278 .exit_latency = 500, 279 .target_residency = 560, 280 .enter = &intel_idle, 281 .enter_s2idle = intel_idle_s2idle, }, 282 { 283 .name = "C7", 284 .desc = "MWAIT 0x60", 285 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 286 .exit_latency = 1200, 287 .target_residency = 4000, 288 .enter = &intel_idle, 289 .enter_s2idle = intel_idle_s2idle, }, 290 { 291 .name = "C7S", 292 .desc = "MWAIT 0x64", 293 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 294 .exit_latency = 10000, 295 .target_residency = 20000, 296 .enter = &intel_idle, 297 .enter_s2idle = intel_idle_s2idle, }, 298 { 299 .enter = NULL } 300 }; 301 302 static struct cpuidle_state cht_cstates[] __initdata = { 303 { 304 .name = "C1", 305 .desc = "MWAIT 0x00", 306 .flags = MWAIT2flg(0x00), 307 .exit_latency = 1, 308 .target_residency = 1, 309 .enter = &intel_idle, 310 .enter_s2idle = intel_idle_s2idle, }, 311 { 312 .name = "C6N", 313 .desc = "MWAIT 0x58", 314 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 315 .exit_latency = 80, 316 .target_residency = 275, 317 .enter = &intel_idle, 318 .enter_s2idle = intel_idle_s2idle, }, 319 { 320 .name = "C6S", 321 .desc = "MWAIT 0x52", 322 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 323 .exit_latency = 200, 324 .target_residency = 560, 325 .enter = &intel_idle, 326 .enter_s2idle = intel_idle_s2idle, }, 327 { 328 .name = "C7", 329 .desc = "MWAIT 0x60", 330 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 331 .exit_latency = 1200, 332 .target_residency = 4000, 333 .enter = &intel_idle, 334 .enter_s2idle = intel_idle_s2idle, }, 335 { 336 .name = "C7S", 337 .desc = "MWAIT 0x64", 338 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 339 .exit_latency = 10000, 340 .target_residency = 20000, 341 .enter = &intel_idle, 342 .enter_s2idle = intel_idle_s2idle, }, 343 { 344 .enter = NULL } 345 }; 346 347 static struct cpuidle_state ivb_cstates[] __initdata = { 348 { 349 .name = "C1", 350 .desc = "MWAIT 0x00", 351 .flags = MWAIT2flg(0x00), 352 .exit_latency = 1, 353 .target_residency = 1, 354 .enter = &intel_idle, 355 .enter_s2idle = intel_idle_s2idle, }, 356 { 357 .name = "C1E", 358 .desc = "MWAIT 0x01", 359 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 360 .exit_latency = 10, 361 .target_residency = 20, 362 .enter = &intel_idle, 363 .enter_s2idle = intel_idle_s2idle, }, 364 { 365 .name = "C3", 366 .desc = "MWAIT 0x10", 367 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 368 .exit_latency = 59, 369 .target_residency = 156, 370 .enter = &intel_idle, 371 .enter_s2idle = intel_idle_s2idle, }, 372 { 373 .name = "C6", 374 .desc = "MWAIT 0x20", 375 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 376 .exit_latency = 80, 377 .target_residency = 300, 378 .enter = &intel_idle, 379 .enter_s2idle = intel_idle_s2idle, }, 380 { 381 .name = "C7", 382 .desc = "MWAIT 0x30", 383 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 384 .exit_latency = 87, 385 .target_residency = 300, 386 .enter = &intel_idle, 387 .enter_s2idle = intel_idle_s2idle, }, 388 { 389 .enter = NULL } 390 }; 391 392 static struct cpuidle_state ivt_cstates[] __initdata = { 393 { 394 .name = "C1", 395 .desc = "MWAIT 0x00", 396 .flags = MWAIT2flg(0x00), 397 .exit_latency = 1, 398 .target_residency = 1, 399 .enter = &intel_idle, 400 .enter_s2idle = intel_idle_s2idle, }, 401 { 402 .name = "C1E", 403 .desc = "MWAIT 0x01", 404 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 405 .exit_latency = 10, 406 .target_residency = 80, 407 .enter = &intel_idle, 408 .enter_s2idle = intel_idle_s2idle, }, 409 { 410 .name = "C3", 411 .desc = "MWAIT 0x10", 412 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 413 .exit_latency = 59, 414 .target_residency = 156, 415 .enter = &intel_idle, 416 .enter_s2idle = intel_idle_s2idle, }, 417 { 418 .name = "C6", 419 .desc = "MWAIT 0x20", 420 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 421 .exit_latency = 82, 422 .target_residency = 300, 423 .enter = &intel_idle, 424 .enter_s2idle = intel_idle_s2idle, }, 425 { 426 .enter = NULL } 427 }; 428 429 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 430 { 431 .name = "C1", 432 .desc = "MWAIT 0x00", 433 .flags = MWAIT2flg(0x00), 434 .exit_latency = 1, 435 .target_residency = 1, 436 .enter = &intel_idle, 437 .enter_s2idle = intel_idle_s2idle, }, 438 { 439 .name = "C1E", 440 .desc = "MWAIT 0x01", 441 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 442 .exit_latency = 10, 443 .target_residency = 250, 444 .enter = &intel_idle, 445 .enter_s2idle = intel_idle_s2idle, }, 446 { 447 .name = "C3", 448 .desc = "MWAIT 0x10", 449 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 450 .exit_latency = 59, 451 .target_residency = 300, 452 .enter = &intel_idle, 453 .enter_s2idle = intel_idle_s2idle, }, 454 { 455 .name = "C6", 456 .desc = "MWAIT 0x20", 457 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 458 .exit_latency = 84, 459 .target_residency = 400, 460 .enter = &intel_idle, 461 .enter_s2idle = intel_idle_s2idle, }, 462 { 463 .enter = NULL } 464 }; 465 466 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 467 { 468 .name = "C1", 469 .desc = "MWAIT 0x00", 470 .flags = MWAIT2flg(0x00), 471 .exit_latency = 1, 472 .target_residency = 1, 473 .enter = &intel_idle, 474 .enter_s2idle = intel_idle_s2idle, }, 475 { 476 .name = "C1E", 477 .desc = "MWAIT 0x01", 478 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 479 .exit_latency = 10, 480 .target_residency = 500, 481 .enter = &intel_idle, 482 .enter_s2idle = intel_idle_s2idle, }, 483 { 484 .name = "C3", 485 .desc = "MWAIT 0x10", 486 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 487 .exit_latency = 59, 488 .target_residency = 600, 489 .enter = &intel_idle, 490 .enter_s2idle = intel_idle_s2idle, }, 491 { 492 .name = "C6", 493 .desc = "MWAIT 0x20", 494 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 495 .exit_latency = 88, 496 .target_residency = 700, 497 .enter = &intel_idle, 498 .enter_s2idle = intel_idle_s2idle, }, 499 { 500 .enter = NULL } 501 }; 502 503 static struct cpuidle_state hsw_cstates[] __initdata = { 504 { 505 .name = "C1", 506 .desc = "MWAIT 0x00", 507 .flags = MWAIT2flg(0x00), 508 .exit_latency = 2, 509 .target_residency = 2, 510 .enter = &intel_idle, 511 .enter_s2idle = intel_idle_s2idle, }, 512 { 513 .name = "C1E", 514 .desc = "MWAIT 0x01", 515 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 516 .exit_latency = 10, 517 .target_residency = 20, 518 .enter = &intel_idle, 519 .enter_s2idle = intel_idle_s2idle, }, 520 { 521 .name = "C3", 522 .desc = "MWAIT 0x10", 523 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 524 .exit_latency = 33, 525 .target_residency = 100, 526 .enter = &intel_idle, 527 .enter_s2idle = intel_idle_s2idle, }, 528 { 529 .name = "C6", 530 .desc = "MWAIT 0x20", 531 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 532 .exit_latency = 133, 533 .target_residency = 400, 534 .enter = &intel_idle, 535 .enter_s2idle = intel_idle_s2idle, }, 536 { 537 .name = "C7s", 538 .desc = "MWAIT 0x32", 539 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 540 .exit_latency = 166, 541 .target_residency = 500, 542 .enter = &intel_idle, 543 .enter_s2idle = intel_idle_s2idle, }, 544 { 545 .name = "C8", 546 .desc = "MWAIT 0x40", 547 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 548 .exit_latency = 300, 549 .target_residency = 900, 550 .enter = &intel_idle, 551 .enter_s2idle = intel_idle_s2idle, }, 552 { 553 .name = "C9", 554 .desc = "MWAIT 0x50", 555 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 556 .exit_latency = 600, 557 .target_residency = 1800, 558 .enter = &intel_idle, 559 .enter_s2idle = intel_idle_s2idle, }, 560 { 561 .name = "C10", 562 .desc = "MWAIT 0x60", 563 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 564 .exit_latency = 2600, 565 .target_residency = 7700, 566 .enter = &intel_idle, 567 .enter_s2idle = intel_idle_s2idle, }, 568 { 569 .enter = NULL } 570 }; 571 static struct cpuidle_state bdw_cstates[] __initdata = { 572 { 573 .name = "C1", 574 .desc = "MWAIT 0x00", 575 .flags = MWAIT2flg(0x00), 576 .exit_latency = 2, 577 .target_residency = 2, 578 .enter = &intel_idle, 579 .enter_s2idle = intel_idle_s2idle, }, 580 { 581 .name = "C1E", 582 .desc = "MWAIT 0x01", 583 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 584 .exit_latency = 10, 585 .target_residency = 20, 586 .enter = &intel_idle, 587 .enter_s2idle = intel_idle_s2idle, }, 588 { 589 .name = "C3", 590 .desc = "MWAIT 0x10", 591 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 592 .exit_latency = 40, 593 .target_residency = 100, 594 .enter = &intel_idle, 595 .enter_s2idle = intel_idle_s2idle, }, 596 { 597 .name = "C6", 598 .desc = "MWAIT 0x20", 599 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 600 .exit_latency = 133, 601 .target_residency = 400, 602 .enter = &intel_idle, 603 .enter_s2idle = intel_idle_s2idle, }, 604 { 605 .name = "C7s", 606 .desc = "MWAIT 0x32", 607 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 608 .exit_latency = 166, 609 .target_residency = 500, 610 .enter = &intel_idle, 611 .enter_s2idle = intel_idle_s2idle, }, 612 { 613 .name = "C8", 614 .desc = "MWAIT 0x40", 615 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 616 .exit_latency = 300, 617 .target_residency = 900, 618 .enter = &intel_idle, 619 .enter_s2idle = intel_idle_s2idle, }, 620 { 621 .name = "C9", 622 .desc = "MWAIT 0x50", 623 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 624 .exit_latency = 600, 625 .target_residency = 1800, 626 .enter = &intel_idle, 627 .enter_s2idle = intel_idle_s2idle, }, 628 { 629 .name = "C10", 630 .desc = "MWAIT 0x60", 631 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 632 .exit_latency = 2600, 633 .target_residency = 7700, 634 .enter = &intel_idle, 635 .enter_s2idle = intel_idle_s2idle, }, 636 { 637 .enter = NULL } 638 }; 639 640 static struct cpuidle_state skl_cstates[] __initdata = { 641 { 642 .name = "C1", 643 .desc = "MWAIT 0x00", 644 .flags = MWAIT2flg(0x00), 645 .exit_latency = 2, 646 .target_residency = 2, 647 .enter = &intel_idle, 648 .enter_s2idle = intel_idle_s2idle, }, 649 { 650 .name = "C1E", 651 .desc = "MWAIT 0x01", 652 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 653 .exit_latency = 10, 654 .target_residency = 20, 655 .enter = &intel_idle, 656 .enter_s2idle = intel_idle_s2idle, }, 657 { 658 .name = "C3", 659 .desc = "MWAIT 0x10", 660 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 661 .exit_latency = 70, 662 .target_residency = 100, 663 .enter = &intel_idle, 664 .enter_s2idle = intel_idle_s2idle, }, 665 { 666 .name = "C6", 667 .desc = "MWAIT 0x20", 668 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 669 .exit_latency = 85, 670 .target_residency = 200, 671 .enter = &intel_idle, 672 .enter_s2idle = intel_idle_s2idle, }, 673 { 674 .name = "C7s", 675 .desc = "MWAIT 0x33", 676 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 677 .exit_latency = 124, 678 .target_residency = 800, 679 .enter = &intel_idle, 680 .enter_s2idle = intel_idle_s2idle, }, 681 { 682 .name = "C8", 683 .desc = "MWAIT 0x40", 684 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 685 .exit_latency = 200, 686 .target_residency = 800, 687 .enter = &intel_idle, 688 .enter_s2idle = intel_idle_s2idle, }, 689 { 690 .name = "C9", 691 .desc = "MWAIT 0x50", 692 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 693 .exit_latency = 480, 694 .target_residency = 5000, 695 .enter = &intel_idle, 696 .enter_s2idle = intel_idle_s2idle, }, 697 { 698 .name = "C10", 699 .desc = "MWAIT 0x60", 700 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 701 .exit_latency = 890, 702 .target_residency = 5000, 703 .enter = &intel_idle, 704 .enter_s2idle = intel_idle_s2idle, }, 705 { 706 .enter = NULL } 707 }; 708 709 static struct cpuidle_state skx_cstates[] __initdata = { 710 { 711 .name = "C1", 712 .desc = "MWAIT 0x00", 713 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 714 .exit_latency = 2, 715 .target_residency = 2, 716 .enter = &intel_idle, 717 .enter_s2idle = intel_idle_s2idle, }, 718 { 719 .name = "C1E", 720 .desc = "MWAIT 0x01", 721 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 722 .exit_latency = 10, 723 .target_residency = 20, 724 .enter = &intel_idle, 725 .enter_s2idle = intel_idle_s2idle, }, 726 { 727 .name = "C6", 728 .desc = "MWAIT 0x20", 729 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 730 .exit_latency = 133, 731 .target_residency = 600, 732 .enter = &intel_idle, 733 .enter_s2idle = intel_idle_s2idle, }, 734 { 735 .enter = NULL } 736 }; 737 738 static struct cpuidle_state icx_cstates[] __initdata = { 739 { 740 .name = "C1", 741 .desc = "MWAIT 0x00", 742 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 743 .exit_latency = 1, 744 .target_residency = 1, 745 .enter = &intel_idle, 746 .enter_s2idle = intel_idle_s2idle, }, 747 { 748 .name = "C1E", 749 .desc = "MWAIT 0x01", 750 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 751 .exit_latency = 4, 752 .target_residency = 4, 753 .enter = &intel_idle, 754 .enter_s2idle = intel_idle_s2idle, }, 755 { 756 .name = "C6", 757 .desc = "MWAIT 0x20", 758 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 759 .exit_latency = 170, 760 .target_residency = 600, 761 .enter = &intel_idle, 762 .enter_s2idle = intel_idle_s2idle, }, 763 { 764 .enter = NULL } 765 }; 766 767 /* 768 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. 769 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. 770 * But in this case there is effectively no C1, because C1 requests are 771 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 772 * and C1E requests end up with C1, so there is effectively no C1E. 773 * 774 * By default we enable C1E and disable C1 by marking it with 775 * 'CPUIDLE_FLAG_UNUSABLE'. 776 */ 777 static struct cpuidle_state adl_cstates[] __initdata = { 778 { 779 .name = "C1", 780 .desc = "MWAIT 0x00", 781 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 782 .exit_latency = 1, 783 .target_residency = 1, 784 .enter = &intel_idle, 785 .enter_s2idle = intel_idle_s2idle, }, 786 { 787 .name = "C1E", 788 .desc = "MWAIT 0x01", 789 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 790 .exit_latency = 2, 791 .target_residency = 4, 792 .enter = &intel_idle, 793 .enter_s2idle = intel_idle_s2idle, }, 794 { 795 .name = "C6", 796 .desc = "MWAIT 0x20", 797 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 798 .exit_latency = 220, 799 .target_residency = 600, 800 .enter = &intel_idle, 801 .enter_s2idle = intel_idle_s2idle, }, 802 { 803 .name = "C8", 804 .desc = "MWAIT 0x40", 805 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 806 .exit_latency = 280, 807 .target_residency = 800, 808 .enter = &intel_idle, 809 .enter_s2idle = intel_idle_s2idle, }, 810 { 811 .name = "C10", 812 .desc = "MWAIT 0x60", 813 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 814 .exit_latency = 680, 815 .target_residency = 2000, 816 .enter = &intel_idle, 817 .enter_s2idle = intel_idle_s2idle, }, 818 { 819 .enter = NULL } 820 }; 821 822 static struct cpuidle_state adl_l_cstates[] __initdata = { 823 { 824 .name = "C1", 825 .desc = "MWAIT 0x00", 826 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, 827 .exit_latency = 1, 828 .target_residency = 1, 829 .enter = &intel_idle, 830 .enter_s2idle = intel_idle_s2idle, }, 831 { 832 .name = "C1E", 833 .desc = "MWAIT 0x01", 834 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 835 .exit_latency = 2, 836 .target_residency = 4, 837 .enter = &intel_idle, 838 .enter_s2idle = intel_idle_s2idle, }, 839 { 840 .name = "C6", 841 .desc = "MWAIT 0x20", 842 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 843 .exit_latency = 170, 844 .target_residency = 500, 845 .enter = &intel_idle, 846 .enter_s2idle = intel_idle_s2idle, }, 847 { 848 .name = "C8", 849 .desc = "MWAIT 0x40", 850 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 851 .exit_latency = 200, 852 .target_residency = 600, 853 .enter = &intel_idle, 854 .enter_s2idle = intel_idle_s2idle, }, 855 { 856 .name = "C10", 857 .desc = "MWAIT 0x60", 858 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 859 .exit_latency = 230, 860 .target_residency = 700, 861 .enter = &intel_idle, 862 .enter_s2idle = intel_idle_s2idle, }, 863 { 864 .enter = NULL } 865 }; 866 867 /* 868 * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice 869 * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in 870 * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 871 * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then 872 * both C1 and C1E requests end up with C1, so there is effectively no C1E. 873 * 874 * By default we enable C1 and disable C1E by marking it with 875 * 'CPUIDLE_FLAG_UNUSABLE'. 876 */ 877 static struct cpuidle_state spr_cstates[] __initdata = { 878 { 879 .name = "C1", 880 .desc = "MWAIT 0x00", 881 .flags = MWAIT2flg(0x00), 882 .exit_latency = 1, 883 .target_residency = 1, 884 .enter = &intel_idle, 885 .enter_s2idle = intel_idle_s2idle, }, 886 { 887 .name = "C1E", 888 .desc = "MWAIT 0x01", 889 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | 890 CPUIDLE_FLAG_UNUSABLE, 891 .exit_latency = 2, 892 .target_residency = 4, 893 .enter = &intel_idle, 894 .enter_s2idle = intel_idle_s2idle, }, 895 { 896 .name = "C6", 897 .desc = "MWAIT 0x20", 898 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 899 .exit_latency = 290, 900 .target_residency = 800, 901 .enter = &intel_idle, 902 .enter_s2idle = intel_idle_s2idle, }, 903 { 904 .enter = NULL } 905 }; 906 907 static struct cpuidle_state atom_cstates[] __initdata = { 908 { 909 .name = "C1E", 910 .desc = "MWAIT 0x00", 911 .flags = MWAIT2flg(0x00), 912 .exit_latency = 10, 913 .target_residency = 20, 914 .enter = &intel_idle, 915 .enter_s2idle = intel_idle_s2idle, }, 916 { 917 .name = "C2", 918 .desc = "MWAIT 0x10", 919 .flags = MWAIT2flg(0x10), 920 .exit_latency = 20, 921 .target_residency = 80, 922 .enter = &intel_idle, 923 .enter_s2idle = intel_idle_s2idle, }, 924 { 925 .name = "C4", 926 .desc = "MWAIT 0x30", 927 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 928 .exit_latency = 100, 929 .target_residency = 400, 930 .enter = &intel_idle, 931 .enter_s2idle = intel_idle_s2idle, }, 932 { 933 .name = "C6", 934 .desc = "MWAIT 0x52", 935 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 936 .exit_latency = 140, 937 .target_residency = 560, 938 .enter = &intel_idle, 939 .enter_s2idle = intel_idle_s2idle, }, 940 { 941 .enter = NULL } 942 }; 943 static struct cpuidle_state tangier_cstates[] __initdata = { 944 { 945 .name = "C1", 946 .desc = "MWAIT 0x00", 947 .flags = MWAIT2flg(0x00), 948 .exit_latency = 1, 949 .target_residency = 4, 950 .enter = &intel_idle, 951 .enter_s2idle = intel_idle_s2idle, }, 952 { 953 .name = "C4", 954 .desc = "MWAIT 0x30", 955 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 956 .exit_latency = 100, 957 .target_residency = 400, 958 .enter = &intel_idle, 959 .enter_s2idle = intel_idle_s2idle, }, 960 { 961 .name = "C6", 962 .desc = "MWAIT 0x52", 963 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 964 .exit_latency = 140, 965 .target_residency = 560, 966 .enter = &intel_idle, 967 .enter_s2idle = intel_idle_s2idle, }, 968 { 969 .name = "C7", 970 .desc = "MWAIT 0x60", 971 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 972 .exit_latency = 1200, 973 .target_residency = 4000, 974 .enter = &intel_idle, 975 .enter_s2idle = intel_idle_s2idle, }, 976 { 977 .name = "C9", 978 .desc = "MWAIT 0x64", 979 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 980 .exit_latency = 10000, 981 .target_residency = 20000, 982 .enter = &intel_idle, 983 .enter_s2idle = intel_idle_s2idle, }, 984 { 985 .enter = NULL } 986 }; 987 static struct cpuidle_state avn_cstates[] __initdata = { 988 { 989 .name = "C1", 990 .desc = "MWAIT 0x00", 991 .flags = MWAIT2flg(0x00), 992 .exit_latency = 2, 993 .target_residency = 2, 994 .enter = &intel_idle, 995 .enter_s2idle = intel_idle_s2idle, }, 996 { 997 .name = "C6", 998 .desc = "MWAIT 0x51", 999 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 1000 .exit_latency = 15, 1001 .target_residency = 45, 1002 .enter = &intel_idle, 1003 .enter_s2idle = intel_idle_s2idle, }, 1004 { 1005 .enter = NULL } 1006 }; 1007 static struct cpuidle_state knl_cstates[] __initdata = { 1008 { 1009 .name = "C1", 1010 .desc = "MWAIT 0x00", 1011 .flags = MWAIT2flg(0x00), 1012 .exit_latency = 1, 1013 .target_residency = 2, 1014 .enter = &intel_idle, 1015 .enter_s2idle = intel_idle_s2idle }, 1016 { 1017 .name = "C6", 1018 .desc = "MWAIT 0x10", 1019 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 1020 .exit_latency = 120, 1021 .target_residency = 500, 1022 .enter = &intel_idle, 1023 .enter_s2idle = intel_idle_s2idle }, 1024 { 1025 .enter = NULL } 1026 }; 1027 1028 static struct cpuidle_state bxt_cstates[] __initdata = { 1029 { 1030 .name = "C1", 1031 .desc = "MWAIT 0x00", 1032 .flags = MWAIT2flg(0x00), 1033 .exit_latency = 2, 1034 .target_residency = 2, 1035 .enter = &intel_idle, 1036 .enter_s2idle = intel_idle_s2idle, }, 1037 { 1038 .name = "C1E", 1039 .desc = "MWAIT 0x01", 1040 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1041 .exit_latency = 10, 1042 .target_residency = 20, 1043 .enter = &intel_idle, 1044 .enter_s2idle = intel_idle_s2idle, }, 1045 { 1046 .name = "C6", 1047 .desc = "MWAIT 0x20", 1048 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1049 .exit_latency = 133, 1050 .target_residency = 133, 1051 .enter = &intel_idle, 1052 .enter_s2idle = intel_idle_s2idle, }, 1053 { 1054 .name = "C7s", 1055 .desc = "MWAIT 0x31", 1056 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 1057 .exit_latency = 155, 1058 .target_residency = 155, 1059 .enter = &intel_idle, 1060 .enter_s2idle = intel_idle_s2idle, }, 1061 { 1062 .name = "C8", 1063 .desc = "MWAIT 0x40", 1064 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 1065 .exit_latency = 1000, 1066 .target_residency = 1000, 1067 .enter = &intel_idle, 1068 .enter_s2idle = intel_idle_s2idle, }, 1069 { 1070 .name = "C9", 1071 .desc = "MWAIT 0x50", 1072 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 1073 .exit_latency = 2000, 1074 .target_residency = 2000, 1075 .enter = &intel_idle, 1076 .enter_s2idle = intel_idle_s2idle, }, 1077 { 1078 .name = "C10", 1079 .desc = "MWAIT 0x60", 1080 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 1081 .exit_latency = 10000, 1082 .target_residency = 10000, 1083 .enter = &intel_idle, 1084 .enter_s2idle = intel_idle_s2idle, }, 1085 { 1086 .enter = NULL } 1087 }; 1088 1089 static struct cpuidle_state dnv_cstates[] __initdata = { 1090 { 1091 .name = "C1", 1092 .desc = "MWAIT 0x00", 1093 .flags = MWAIT2flg(0x00), 1094 .exit_latency = 2, 1095 .target_residency = 2, 1096 .enter = &intel_idle, 1097 .enter_s2idle = intel_idle_s2idle, }, 1098 { 1099 .name = "C1E", 1100 .desc = "MWAIT 0x01", 1101 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1102 .exit_latency = 10, 1103 .target_residency = 20, 1104 .enter = &intel_idle, 1105 .enter_s2idle = intel_idle_s2idle, }, 1106 { 1107 .name = "C6", 1108 .desc = "MWAIT 0x20", 1109 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1110 .exit_latency = 50, 1111 .target_residency = 500, 1112 .enter = &intel_idle, 1113 .enter_s2idle = intel_idle_s2idle, }, 1114 { 1115 .enter = NULL } 1116 }; 1117 1118 /* 1119 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 1120 * C6, and this is indicated in the CPUID mwait leaf. 1121 */ 1122 static struct cpuidle_state snr_cstates[] __initdata = { 1123 { 1124 .name = "C1", 1125 .desc = "MWAIT 0x00", 1126 .flags = MWAIT2flg(0x00), 1127 .exit_latency = 2, 1128 .target_residency = 2, 1129 .enter = &intel_idle, 1130 .enter_s2idle = intel_idle_s2idle, }, 1131 { 1132 .name = "C1E", 1133 .desc = "MWAIT 0x01", 1134 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 1135 .exit_latency = 15, 1136 .target_residency = 25, 1137 .enter = &intel_idle, 1138 .enter_s2idle = intel_idle_s2idle, }, 1139 { 1140 .name = "C6", 1141 .desc = "MWAIT 0x20", 1142 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1143 .exit_latency = 130, 1144 .target_residency = 500, 1145 .enter = &intel_idle, 1146 .enter_s2idle = intel_idle_s2idle, }, 1147 { 1148 .enter = NULL } 1149 }; 1150 1151 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1152 .state_table = nehalem_cstates, 1153 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1154 .disable_promotion_to_c1e = true, 1155 }; 1156 1157 static const struct idle_cpu idle_cpu_nhx __initconst = { 1158 .state_table = nehalem_cstates, 1159 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1160 .disable_promotion_to_c1e = true, 1161 .use_acpi = true, 1162 }; 1163 1164 static const struct idle_cpu idle_cpu_atom __initconst = { 1165 .state_table = atom_cstates, 1166 }; 1167 1168 static const struct idle_cpu idle_cpu_tangier __initconst = { 1169 .state_table = tangier_cstates, 1170 }; 1171 1172 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1173 .state_table = atom_cstates, 1174 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1175 }; 1176 1177 static const struct idle_cpu idle_cpu_snb __initconst = { 1178 .state_table = snb_cstates, 1179 .disable_promotion_to_c1e = true, 1180 }; 1181 1182 static const struct idle_cpu idle_cpu_snx __initconst = { 1183 .state_table = snb_cstates, 1184 .disable_promotion_to_c1e = true, 1185 .use_acpi = true, 1186 }; 1187 1188 static const struct idle_cpu idle_cpu_byt __initconst = { 1189 .state_table = byt_cstates, 1190 .disable_promotion_to_c1e = true, 1191 .byt_auto_demotion_disable_flag = true, 1192 }; 1193 1194 static const struct idle_cpu idle_cpu_cht __initconst = { 1195 .state_table = cht_cstates, 1196 .disable_promotion_to_c1e = true, 1197 .byt_auto_demotion_disable_flag = true, 1198 }; 1199 1200 static const struct idle_cpu idle_cpu_ivb __initconst = { 1201 .state_table = ivb_cstates, 1202 .disable_promotion_to_c1e = true, 1203 }; 1204 1205 static const struct idle_cpu idle_cpu_ivt __initconst = { 1206 .state_table = ivt_cstates, 1207 .disable_promotion_to_c1e = true, 1208 .use_acpi = true, 1209 }; 1210 1211 static const struct idle_cpu idle_cpu_hsw __initconst = { 1212 .state_table = hsw_cstates, 1213 .disable_promotion_to_c1e = true, 1214 }; 1215 1216 static const struct idle_cpu idle_cpu_hsx __initconst = { 1217 .state_table = hsw_cstates, 1218 .disable_promotion_to_c1e = true, 1219 .use_acpi = true, 1220 }; 1221 1222 static const struct idle_cpu idle_cpu_bdw __initconst = { 1223 .state_table = bdw_cstates, 1224 .disable_promotion_to_c1e = true, 1225 }; 1226 1227 static const struct idle_cpu idle_cpu_bdx __initconst = { 1228 .state_table = bdw_cstates, 1229 .disable_promotion_to_c1e = true, 1230 .use_acpi = true, 1231 }; 1232 1233 static const struct idle_cpu idle_cpu_skl __initconst = { 1234 .state_table = skl_cstates, 1235 .disable_promotion_to_c1e = true, 1236 }; 1237 1238 static const struct idle_cpu idle_cpu_skx __initconst = { 1239 .state_table = skx_cstates, 1240 .disable_promotion_to_c1e = true, 1241 .use_acpi = true, 1242 }; 1243 1244 static const struct idle_cpu idle_cpu_icx __initconst = { 1245 .state_table = icx_cstates, 1246 .disable_promotion_to_c1e = true, 1247 .use_acpi = true, 1248 }; 1249 1250 static const struct idle_cpu idle_cpu_adl __initconst = { 1251 .state_table = adl_cstates, 1252 }; 1253 1254 static const struct idle_cpu idle_cpu_adl_l __initconst = { 1255 .state_table = adl_l_cstates, 1256 }; 1257 1258 static const struct idle_cpu idle_cpu_spr __initconst = { 1259 .state_table = spr_cstates, 1260 .disable_promotion_to_c1e = true, 1261 .use_acpi = true, 1262 }; 1263 1264 static const struct idle_cpu idle_cpu_avn __initconst = { 1265 .state_table = avn_cstates, 1266 .disable_promotion_to_c1e = true, 1267 .use_acpi = true, 1268 }; 1269 1270 static const struct idle_cpu idle_cpu_knl __initconst = { 1271 .state_table = knl_cstates, 1272 .use_acpi = true, 1273 }; 1274 1275 static const struct idle_cpu idle_cpu_bxt __initconst = { 1276 .state_table = bxt_cstates, 1277 .disable_promotion_to_c1e = true, 1278 }; 1279 1280 static const struct idle_cpu idle_cpu_dnv __initconst = { 1281 .state_table = dnv_cstates, 1282 .disable_promotion_to_c1e = true, 1283 .use_acpi = true, 1284 }; 1285 1286 static const struct idle_cpu idle_cpu_snr __initconst = { 1287 .state_table = snr_cstates, 1288 .disable_promotion_to_c1e = true, 1289 .use_acpi = true, 1290 }; 1291 1292 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1293 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1294 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1295 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1296 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1297 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1298 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1299 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1300 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1301 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1302 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1303 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1304 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1305 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1306 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1307 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1308 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1309 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1310 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1311 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1312 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1313 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1314 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1315 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1316 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1317 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1318 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1319 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1320 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1321 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1322 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1323 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1324 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1325 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), 1326 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), 1327 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), 1328 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), 1329 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1330 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1331 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1332 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1333 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1334 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), 1335 {} 1336 }; 1337 1338 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1339 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1340 {} 1341 }; 1342 1343 static bool __init intel_idle_max_cstate_reached(int cstate) 1344 { 1345 if (cstate + 1 > max_cstate) { 1346 pr_info("max_cstate %d reached\n", max_cstate); 1347 return true; 1348 } 1349 return false; 1350 } 1351 1352 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1353 { 1354 unsigned long eax = flg2MWAIT(state->flags); 1355 1356 if (boot_cpu_has(X86_FEATURE_ARAT)) 1357 return false; 1358 1359 /* 1360 * Switch over to one-shot tick broadcast if the target C-state 1361 * is deeper than C1. 1362 */ 1363 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1364 } 1365 1366 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1367 #include <acpi/processor.h> 1368 1369 static bool no_acpi __read_mostly; 1370 module_param(no_acpi, bool, 0444); 1371 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1372 1373 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1374 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1375 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1376 1377 static struct acpi_processor_power acpi_state_table __initdata; 1378 1379 /** 1380 * intel_idle_cst_usable - Check if the _CST information can be used. 1381 * 1382 * Check if all of the C-states listed by _CST in the max_cstate range are 1383 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1384 */ 1385 static bool __init intel_idle_cst_usable(void) 1386 { 1387 int cstate, limit; 1388 1389 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1390 acpi_state_table.count); 1391 1392 for (cstate = 1; cstate < limit; cstate++) { 1393 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1394 1395 if (cx->entry_method != ACPI_CSTATE_FFH) 1396 return false; 1397 } 1398 1399 return true; 1400 } 1401 1402 static bool __init intel_idle_acpi_cst_extract(void) 1403 { 1404 unsigned int cpu; 1405 1406 if (no_acpi) { 1407 pr_debug("Not allowed to use ACPI _CST\n"); 1408 return false; 1409 } 1410 1411 for_each_possible_cpu(cpu) { 1412 struct acpi_processor *pr = per_cpu(processors, cpu); 1413 1414 if (!pr) 1415 continue; 1416 1417 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1418 continue; 1419 1420 acpi_state_table.count++; 1421 1422 if (!intel_idle_cst_usable()) 1423 continue; 1424 1425 if (!acpi_processor_claim_cst_control()) 1426 break; 1427 1428 return true; 1429 } 1430 1431 acpi_state_table.count = 0; 1432 pr_debug("ACPI _CST not found or not usable\n"); 1433 return false; 1434 } 1435 1436 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1437 { 1438 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1439 1440 /* 1441 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1442 * the interesting states are ACPI_CSTATE_FFH. 1443 */ 1444 for (cstate = 1; cstate < limit; cstate++) { 1445 struct acpi_processor_cx *cx; 1446 struct cpuidle_state *state; 1447 1448 if (intel_idle_max_cstate_reached(cstate - 1)) 1449 break; 1450 1451 cx = &acpi_state_table.states[cstate]; 1452 1453 state = &drv->states[drv->state_count++]; 1454 1455 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1456 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1457 state->exit_latency = cx->latency; 1458 /* 1459 * For C1-type C-states use the same number for both the exit 1460 * latency and target residency, because that is the case for 1461 * C1 in the majority of the static C-states tables above. 1462 * For the other types of C-states, however, set the target 1463 * residency to 3 times the exit latency which should lead to 1464 * a reasonable balance between energy-efficiency and 1465 * performance in the majority of interesting cases. 1466 */ 1467 state->target_residency = cx->latency; 1468 if (cx->type > ACPI_STATE_C1) 1469 state->target_residency *= 3; 1470 1471 state->flags = MWAIT2flg(cx->address); 1472 if (cx->type > ACPI_STATE_C2) 1473 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1474 1475 if (disabled_states_mask & BIT(cstate)) 1476 state->flags |= CPUIDLE_FLAG_OFF; 1477 1478 if (intel_idle_state_needs_timer_stop(state)) 1479 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1480 1481 state->enter = intel_idle; 1482 state->enter_s2idle = intel_idle_s2idle; 1483 } 1484 } 1485 1486 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1487 { 1488 int cstate, limit; 1489 1490 /* 1491 * If there are no _CST C-states, do not disable any C-states by 1492 * default. 1493 */ 1494 if (!acpi_state_table.count) 1495 return false; 1496 1497 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1498 /* 1499 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1500 * the interesting states are ACPI_CSTATE_FFH. 1501 */ 1502 for (cstate = 1; cstate < limit; cstate++) { 1503 if (acpi_state_table.states[cstate].address == mwait_hint) 1504 return false; 1505 } 1506 return true; 1507 } 1508 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1509 #define force_use_acpi (false) 1510 1511 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1512 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1513 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1514 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1515 1516 /** 1517 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1518 * 1519 * Tune IVT multi-socket targets. 1520 * Assumption: num_sockets == (max_package_num + 1). 1521 */ 1522 static void __init ivt_idle_state_table_update(void) 1523 { 1524 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1525 int cpu, package_num, num_sockets = 1; 1526 1527 for_each_online_cpu(cpu) { 1528 package_num = topology_physical_package_id(cpu); 1529 if (package_num + 1 > num_sockets) { 1530 num_sockets = package_num + 1; 1531 1532 if (num_sockets > 4) { 1533 cpuidle_state_table = ivt_cstates_8s; 1534 return; 1535 } 1536 } 1537 } 1538 1539 if (num_sockets > 2) 1540 cpuidle_state_table = ivt_cstates_4s; 1541 1542 /* else, 1 and 2 socket systems use default ivt_cstates */ 1543 } 1544 1545 /** 1546 * irtl_2_usec - IRTL to microseconds conversion. 1547 * @irtl: IRTL MSR value. 1548 * 1549 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1550 */ 1551 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1552 { 1553 static const unsigned int irtl_ns_units[] __initconst = { 1554 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1555 }; 1556 unsigned long long ns; 1557 1558 if (!irtl) 1559 return 0; 1560 1561 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1562 1563 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1564 } 1565 1566 /** 1567 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1568 * 1569 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1570 * definitive maximum latency and use the same value for target_residency. 1571 */ 1572 static void __init bxt_idle_state_table_update(void) 1573 { 1574 unsigned long long msr; 1575 unsigned int usec; 1576 1577 rdmsrl(MSR_PKGC6_IRTL, msr); 1578 usec = irtl_2_usec(msr); 1579 if (usec) { 1580 bxt_cstates[2].exit_latency = usec; 1581 bxt_cstates[2].target_residency = usec; 1582 } 1583 1584 rdmsrl(MSR_PKGC7_IRTL, msr); 1585 usec = irtl_2_usec(msr); 1586 if (usec) { 1587 bxt_cstates[3].exit_latency = usec; 1588 bxt_cstates[3].target_residency = usec; 1589 } 1590 1591 rdmsrl(MSR_PKGC8_IRTL, msr); 1592 usec = irtl_2_usec(msr); 1593 if (usec) { 1594 bxt_cstates[4].exit_latency = usec; 1595 bxt_cstates[4].target_residency = usec; 1596 } 1597 1598 rdmsrl(MSR_PKGC9_IRTL, msr); 1599 usec = irtl_2_usec(msr); 1600 if (usec) { 1601 bxt_cstates[5].exit_latency = usec; 1602 bxt_cstates[5].target_residency = usec; 1603 } 1604 1605 rdmsrl(MSR_PKGC10_IRTL, msr); 1606 usec = irtl_2_usec(msr); 1607 if (usec) { 1608 bxt_cstates[6].exit_latency = usec; 1609 bxt_cstates[6].target_residency = usec; 1610 } 1611 1612 } 1613 1614 /** 1615 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1616 * 1617 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1618 */ 1619 static void __init sklh_idle_state_table_update(void) 1620 { 1621 unsigned long long msr; 1622 unsigned int eax, ebx, ecx, edx; 1623 1624 1625 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1626 if (max_cstate <= 7) 1627 return; 1628 1629 /* if PC10 not present in CPUID.MWAIT.EDX */ 1630 if ((mwait_substates & (0xF << 28)) == 0) 1631 return; 1632 1633 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1634 1635 /* PC10 is not enabled in PKG C-state limit */ 1636 if ((msr & 0xF) != 8) 1637 return; 1638 1639 ecx = 0; 1640 cpuid(7, &eax, &ebx, &ecx, &edx); 1641 1642 /* if SGX is present */ 1643 if (ebx & (1 << 2)) { 1644 1645 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1646 1647 /* if SGX is enabled */ 1648 if (msr & (1 << 18)) 1649 return; 1650 } 1651 1652 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1653 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1654 } 1655 1656 /** 1657 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake 1658 * idle states table. 1659 */ 1660 static void __init skx_idle_state_table_update(void) 1661 { 1662 unsigned long long msr; 1663 1664 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1665 1666 /* 1667 * 000b: C0/C1 (no package C-state support) 1668 * 001b: C2 1669 * 010b: C6 (non-retention) 1670 * 011b: C6 (retention) 1671 * 111b: No Package C state limits. 1672 */ 1673 if ((msr & 0x7) < 2) { 1674 /* 1675 * Uses the CC6 + PC0 latency and 3 times of 1676 * latency for target_residency if the PC6 1677 * is disabled in BIOS. This is consistent 1678 * with how intel_idle driver uses _CST 1679 * to set the target_residency. 1680 */ 1681 skx_cstates[2].exit_latency = 92; 1682 skx_cstates[2].target_residency = 276; 1683 } 1684 } 1685 1686 /** 1687 * adl_idle_state_table_update - Adjust AlderLake idle states table. 1688 */ 1689 static void __init adl_idle_state_table_update(void) 1690 { 1691 /* Check if user prefers C1 over C1E. */ 1692 if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) { 1693 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1694 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE; 1695 1696 /* Disable C1E by clearing the "C1E promotion" bit. */ 1697 c1e_promotion = C1E_PROMOTION_DISABLE; 1698 return; 1699 } 1700 1701 /* Make sure C1E is enabled by default */ 1702 c1e_promotion = C1E_PROMOTION_ENABLE; 1703 } 1704 1705 /** 1706 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. 1707 */ 1708 static void __init spr_idle_state_table_update(void) 1709 { 1710 unsigned long long msr; 1711 1712 /* Check if user prefers C1E over C1. */ 1713 if ((preferred_states_mask & BIT(2)) && 1714 !(preferred_states_mask & BIT(1))) { 1715 /* Disable C1 and enable C1E. */ 1716 spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; 1717 spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; 1718 1719 /* Enable C1E using the "C1E promotion" bit. */ 1720 c1e_promotion = C1E_PROMOTION_ENABLE; 1721 } 1722 1723 /* 1724 * By default, the C6 state assumes the worst-case scenario of package 1725 * C6. However, if PC6 is disabled, we update the numbers to match 1726 * core C6. 1727 */ 1728 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1729 1730 /* Limit value 2 and above allow for PC6. */ 1731 if ((msr & 0x7) < 2) { 1732 spr_cstates[2].exit_latency = 190; 1733 spr_cstates[2].target_residency = 600; 1734 } 1735 } 1736 1737 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1738 { 1739 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1740 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1741 MWAIT_SUBSTATE_MASK; 1742 1743 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1744 if (num_substates == 0) 1745 return false; 1746 1747 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1748 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1749 1750 return true; 1751 } 1752 1753 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1754 { 1755 int cstate; 1756 1757 switch (boot_cpu_data.x86_model) { 1758 case INTEL_FAM6_IVYBRIDGE_X: 1759 ivt_idle_state_table_update(); 1760 break; 1761 case INTEL_FAM6_ATOM_GOLDMONT: 1762 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1763 bxt_idle_state_table_update(); 1764 break; 1765 case INTEL_FAM6_SKYLAKE: 1766 sklh_idle_state_table_update(); 1767 break; 1768 case INTEL_FAM6_SKYLAKE_X: 1769 skx_idle_state_table_update(); 1770 break; 1771 case INTEL_FAM6_SAPPHIRERAPIDS_X: 1772 spr_idle_state_table_update(); 1773 break; 1774 case INTEL_FAM6_ALDERLAKE: 1775 case INTEL_FAM6_ALDERLAKE_L: 1776 adl_idle_state_table_update(); 1777 break; 1778 } 1779 1780 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1781 unsigned int mwait_hint; 1782 1783 if (intel_idle_max_cstate_reached(cstate)) 1784 break; 1785 1786 if (!cpuidle_state_table[cstate].enter && 1787 !cpuidle_state_table[cstate].enter_s2idle) 1788 break; 1789 1790 /* If marked as unusable, skip this state. */ 1791 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1792 pr_debug("state %s is disabled\n", 1793 cpuidle_state_table[cstate].name); 1794 continue; 1795 } 1796 1797 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1798 if (!intel_idle_verify_cstate(mwait_hint)) 1799 continue; 1800 1801 /* Structure copy. */ 1802 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1803 1804 if ((disabled_states_mask & BIT(drv->state_count)) || 1805 ((icpu->use_acpi || force_use_acpi) && 1806 intel_idle_off_by_default(mwait_hint) && 1807 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1808 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1809 1810 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count])) 1811 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP; 1812 1813 drv->state_count++; 1814 } 1815 1816 if (icpu->byt_auto_demotion_disable_flag) { 1817 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1818 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1819 } 1820 } 1821 1822 /** 1823 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1824 * @drv: cpuidle driver structure to initialize. 1825 */ 1826 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1827 { 1828 cpuidle_poll_state_init(drv); 1829 1830 if (disabled_states_mask & BIT(0)) 1831 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1832 1833 drv->state_count = 1; 1834 1835 if (icpu) 1836 intel_idle_init_cstates_icpu(drv); 1837 else 1838 intel_idle_init_cstates_acpi(drv); 1839 } 1840 1841 static void auto_demotion_disable(void) 1842 { 1843 unsigned long long msr_bits; 1844 1845 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1846 msr_bits &= ~auto_demotion_disable_flags; 1847 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1848 } 1849 1850 static void c1e_promotion_enable(void) 1851 { 1852 unsigned long long msr_bits; 1853 1854 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1855 msr_bits |= 0x2; 1856 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1857 } 1858 1859 static void c1e_promotion_disable(void) 1860 { 1861 unsigned long long msr_bits; 1862 1863 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1864 msr_bits &= ~0x2; 1865 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1866 } 1867 1868 /** 1869 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1870 * @cpu: CPU to initialize. 1871 * 1872 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1873 * with the processor model flags. 1874 */ 1875 static int intel_idle_cpu_init(unsigned int cpu) 1876 { 1877 struct cpuidle_device *dev; 1878 1879 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1880 dev->cpu = cpu; 1881 1882 if (cpuidle_register_device(dev)) { 1883 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1884 return -EIO; 1885 } 1886 1887 if (auto_demotion_disable_flags) 1888 auto_demotion_disable(); 1889 1890 if (c1e_promotion == C1E_PROMOTION_ENABLE) 1891 c1e_promotion_enable(); 1892 else if (c1e_promotion == C1E_PROMOTION_DISABLE) 1893 c1e_promotion_disable(); 1894 1895 return 0; 1896 } 1897 1898 static int intel_idle_cpu_online(unsigned int cpu) 1899 { 1900 struct cpuidle_device *dev; 1901 1902 if (!boot_cpu_has(X86_FEATURE_ARAT)) 1903 tick_broadcast_enable(); 1904 1905 /* 1906 * Some systems can hotplug a cpu at runtime after 1907 * the kernel has booted, we have to initialize the 1908 * driver in this case 1909 */ 1910 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1911 if (!dev->registered) 1912 return intel_idle_cpu_init(cpu); 1913 1914 return 0; 1915 } 1916 1917 /** 1918 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 1919 */ 1920 static void __init intel_idle_cpuidle_devices_uninit(void) 1921 { 1922 int i; 1923 1924 for_each_online_cpu(i) 1925 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 1926 } 1927 1928 static int __init intel_idle_init(void) 1929 { 1930 const struct x86_cpu_id *id; 1931 unsigned int eax, ebx, ecx; 1932 int retval; 1933 1934 /* Do not load intel_idle at all for now if idle= is passed */ 1935 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1936 return -ENODEV; 1937 1938 if (max_cstate == 0) { 1939 pr_debug("disabled\n"); 1940 return -EPERM; 1941 } 1942 1943 id = x86_match_cpu(intel_idle_ids); 1944 if (id) { 1945 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1946 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1947 return -ENODEV; 1948 } 1949 } else { 1950 id = x86_match_cpu(intel_mwait_ids); 1951 if (!id) 1952 return -ENODEV; 1953 } 1954 1955 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1956 return -ENODEV; 1957 1958 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1959 1960 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1961 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1962 !mwait_substates) 1963 return -ENODEV; 1964 1965 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1966 1967 icpu = (const struct idle_cpu *)id->driver_data; 1968 if (icpu) { 1969 cpuidle_state_table = icpu->state_table; 1970 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 1971 if (icpu->disable_promotion_to_c1e) 1972 c1e_promotion = C1E_PROMOTION_DISABLE; 1973 if (icpu->use_acpi || force_use_acpi) 1974 intel_idle_acpi_cst_extract(); 1975 } else if (!intel_idle_acpi_cst_extract()) { 1976 return -ENODEV; 1977 } 1978 1979 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1980 boot_cpu_data.x86_model); 1981 1982 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1983 if (!intel_idle_cpuidle_devices) 1984 return -ENOMEM; 1985 1986 intel_idle_cpuidle_driver_init(&intel_idle_driver); 1987 1988 retval = cpuidle_register_driver(&intel_idle_driver); 1989 if (retval) { 1990 struct cpuidle_driver *drv = cpuidle_get_driver(); 1991 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1992 drv ? drv->name : "none"); 1993 goto init_driver_fail; 1994 } 1995 1996 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1997 intel_idle_cpu_online, NULL); 1998 if (retval < 0) 1999 goto hp_setup_fail; 2000 2001 pr_debug("Local APIC timer is reliable in %s\n", 2002 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 2003 2004 return 0; 2005 2006 hp_setup_fail: 2007 intel_idle_cpuidle_devices_uninit(); 2008 cpuidle_unregister_driver(&intel_idle_driver); 2009 init_driver_fail: 2010 free_percpu(intel_idle_cpuidle_devices); 2011 return retval; 2012 2013 } 2014 device_initcall(intel_idle_init); 2015 2016 /* 2017 * We are not really modular, but we used to support that. Meaning we also 2018 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 2019 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 2020 * is the easiest way (currently) to continue doing that. 2021 */ 2022 module_param(max_cstate, int, 0444); 2023 /* 2024 * The positions of the bits that are set in this number are the indices of the 2025 * idle states to be disabled by default (as reflected by the names of the 2026 * corresponding idle state directories in sysfs, "state0", "state1" ... 2027 * "state<i>" ..., where <i> is the index of the given state). 2028 */ 2029 module_param_named(states_off, disabled_states_mask, uint, 0444); 2030 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 2031 /* 2032 * Some platforms come with mutually exclusive C-states, so that if one is 2033 * enabled, the other C-states must not be used. Example: C1 and C1E on 2034 * Sapphire Rapids platform. This parameter allows for selecting the 2035 * preferred C-states among the groups of mutually exclusive C-states - the 2036 * selected C-states will be registered, the other C-states from the mutually 2037 * exclusive group won't be registered. If the platform has no mutually 2038 * exclusive C-states, this parameter has no effect. 2039 */ 2040 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); 2041 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); 2042