1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on specific Intel processors 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-stats 24 */ 25 26 /* 27 * Known limitations 28 * 29 * ACPI has a .suspend hack to turn off deep c-statees during suspend 30 * to avoid complications with the lapic timer workaround. 31 * Have not seen issues with suspend, but may need same workaround here. 32 * 33 */ 34 35 /* un-comment DEBUG to enable pr_debug() statements */ 36 #define DEBUG 37 38 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 39 40 #include <linux/acpi.h> 41 #include <linux/kernel.h> 42 #include <linux/cpuidle.h> 43 #include <linux/tick.h> 44 #include <trace/events/power.h> 45 #include <linux/sched.h> 46 #include <linux/notifier.h> 47 #include <linux/cpu.h> 48 #include <linux/moduleparam.h> 49 #include <asm/cpu_device_id.h> 50 #include <asm/intel-family.h> 51 #include <asm/mwait.h> 52 #include <asm/msr.h> 53 54 #define INTEL_IDLE_VERSION "0.5.1" 55 56 static struct cpuidle_driver intel_idle_driver = { 57 .name = "intel_idle", 58 .owner = THIS_MODULE, 59 }; 60 /* intel_idle.max_cstate=0 disables driver */ 61 static int max_cstate = CPUIDLE_STATE_MAX - 1; 62 static unsigned int disabled_states_mask; 63 64 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 65 66 static unsigned long auto_demotion_disable_flags; 67 static bool disable_promotion_to_c1e; 68 69 struct idle_cpu { 70 struct cpuidle_state *state_table; 71 72 /* 73 * Hardware C-state auto-demotion may not always be optimal. 74 * Indicate which enable bits to clear here. 75 */ 76 unsigned long auto_demotion_disable_flags; 77 bool byt_auto_demotion_disable_flag; 78 bool disable_promotion_to_c1e; 79 bool use_acpi; 80 }; 81 82 static const struct idle_cpu *icpu __initdata; 83 static struct cpuidle_state *cpuidle_state_table __initdata; 84 85 static unsigned int mwait_substates __initdata; 86 87 /* 88 * Enable this state by default even if the ACPI _CST does not list it. 89 */ 90 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 91 92 /* 93 * Set this flag for states where the HW flushes the TLB for us 94 * and so we don't need cross-calls to keep it consistent. 95 * If this flag is set, SW flushes the TLB, so even if the 96 * HW doesn't do the flushing, this flag is safe to use. 97 */ 98 #define CPUIDLE_FLAG_TLB_FLUSHED BIT(16) 99 100 /* 101 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 102 * the C-state (top nibble) and sub-state (bottom nibble) 103 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 104 * 105 * We store the hint at the top of our "flags" for each state. 106 */ 107 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 108 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 109 110 /** 111 * intel_idle - Ask the processor to enter the given idle state. 112 * @dev: cpuidle device of the target CPU. 113 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 114 * @index: Target idle state index. 115 * 116 * Use the MWAIT instruction to notify the processor that the CPU represented by 117 * @dev is idle and it can try to enter the idle state corresponding to @index. 118 * 119 * If the local APIC timer is not known to be reliable in the target idle state, 120 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 121 * 122 * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to 123 * flushing user TLBs. 124 * 125 * Must be called under local_irq_disable(). 126 */ 127 static __cpuidle int intel_idle(struct cpuidle_device *dev, 128 struct cpuidle_driver *drv, int index) 129 { 130 struct cpuidle_state *state = &drv->states[index]; 131 unsigned long eax = flg2MWAIT(state->flags); 132 unsigned long ecx = 1; /* break on interrupt flag */ 133 bool tick; 134 int cpu = smp_processor_id(); 135 136 /* 137 * leave_mm() to avoid costly and often unnecessary wakeups 138 * for flushing the user TLB's associated with the active mm. 139 */ 140 if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) 141 leave_mm(cpu); 142 143 if (!static_cpu_has(X86_FEATURE_ARAT)) { 144 /* 145 * Switch over to one-shot tick broadcast if the target C-state 146 * is deeper than C1. 147 */ 148 if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) { 149 tick = true; 150 tick_broadcast_enter(); 151 } else { 152 tick = false; 153 } 154 } 155 156 mwait_idle_with_hints(eax, ecx); 157 158 if (!static_cpu_has(X86_FEATURE_ARAT) && tick) 159 tick_broadcast_exit(); 160 161 return index; 162 } 163 164 /** 165 * intel_idle_s2idle - Ask the processor to enter the given idle state. 166 * @dev: cpuidle device of the target CPU. 167 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 168 * @index: Target idle state index. 169 * 170 * Use the MWAIT instruction to notify the processor that the CPU represented by 171 * @dev is idle and it can try to enter the idle state corresponding to @index. 172 * 173 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 174 * scheduler tick and suspended scheduler clock on the target CPU. 175 */ 176 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 177 struct cpuidle_driver *drv, int index) 178 { 179 unsigned long eax = flg2MWAIT(drv->states[index].flags); 180 unsigned long ecx = 1; /* break on interrupt flag */ 181 182 mwait_idle_with_hints(eax, ecx); 183 184 return 0; 185 } 186 187 /* 188 * States are indexed by the cstate number, 189 * which is also the index into the MWAIT hint array. 190 * Thus C0 is a dummy. 191 */ 192 static struct cpuidle_state nehalem_cstates[] __initdata = { 193 { 194 .name = "C1", 195 .desc = "MWAIT 0x00", 196 .flags = MWAIT2flg(0x00), 197 .exit_latency = 3, 198 .target_residency = 6, 199 .enter = &intel_idle, 200 .enter_s2idle = intel_idle_s2idle, }, 201 { 202 .name = "C1E", 203 .desc = "MWAIT 0x01", 204 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 205 .exit_latency = 10, 206 .target_residency = 20, 207 .enter = &intel_idle, 208 .enter_s2idle = intel_idle_s2idle, }, 209 { 210 .name = "C3", 211 .desc = "MWAIT 0x10", 212 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 213 .exit_latency = 20, 214 .target_residency = 80, 215 .enter = &intel_idle, 216 .enter_s2idle = intel_idle_s2idle, }, 217 { 218 .name = "C6", 219 .desc = "MWAIT 0x20", 220 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 221 .exit_latency = 200, 222 .target_residency = 800, 223 .enter = &intel_idle, 224 .enter_s2idle = intel_idle_s2idle, }, 225 { 226 .enter = NULL } 227 }; 228 229 static struct cpuidle_state snb_cstates[] __initdata = { 230 { 231 .name = "C1", 232 .desc = "MWAIT 0x00", 233 .flags = MWAIT2flg(0x00), 234 .exit_latency = 2, 235 .target_residency = 2, 236 .enter = &intel_idle, 237 .enter_s2idle = intel_idle_s2idle, }, 238 { 239 .name = "C1E", 240 .desc = "MWAIT 0x01", 241 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 242 .exit_latency = 10, 243 .target_residency = 20, 244 .enter = &intel_idle, 245 .enter_s2idle = intel_idle_s2idle, }, 246 { 247 .name = "C3", 248 .desc = "MWAIT 0x10", 249 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 250 .exit_latency = 80, 251 .target_residency = 211, 252 .enter = &intel_idle, 253 .enter_s2idle = intel_idle_s2idle, }, 254 { 255 .name = "C6", 256 .desc = "MWAIT 0x20", 257 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 258 .exit_latency = 104, 259 .target_residency = 345, 260 .enter = &intel_idle, 261 .enter_s2idle = intel_idle_s2idle, }, 262 { 263 .name = "C7", 264 .desc = "MWAIT 0x30", 265 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 266 .exit_latency = 109, 267 .target_residency = 345, 268 .enter = &intel_idle, 269 .enter_s2idle = intel_idle_s2idle, }, 270 { 271 .enter = NULL } 272 }; 273 274 static struct cpuidle_state byt_cstates[] __initdata = { 275 { 276 .name = "C1", 277 .desc = "MWAIT 0x00", 278 .flags = MWAIT2flg(0x00), 279 .exit_latency = 1, 280 .target_residency = 1, 281 .enter = &intel_idle, 282 .enter_s2idle = intel_idle_s2idle, }, 283 { 284 .name = "C6N", 285 .desc = "MWAIT 0x58", 286 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 287 .exit_latency = 300, 288 .target_residency = 275, 289 .enter = &intel_idle, 290 .enter_s2idle = intel_idle_s2idle, }, 291 { 292 .name = "C6S", 293 .desc = "MWAIT 0x52", 294 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 295 .exit_latency = 500, 296 .target_residency = 560, 297 .enter = &intel_idle, 298 .enter_s2idle = intel_idle_s2idle, }, 299 { 300 .name = "C7", 301 .desc = "MWAIT 0x60", 302 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 303 .exit_latency = 1200, 304 .target_residency = 4000, 305 .enter = &intel_idle, 306 .enter_s2idle = intel_idle_s2idle, }, 307 { 308 .name = "C7S", 309 .desc = "MWAIT 0x64", 310 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 311 .exit_latency = 10000, 312 .target_residency = 20000, 313 .enter = &intel_idle, 314 .enter_s2idle = intel_idle_s2idle, }, 315 { 316 .enter = NULL } 317 }; 318 319 static struct cpuidle_state cht_cstates[] __initdata = { 320 { 321 .name = "C1", 322 .desc = "MWAIT 0x00", 323 .flags = MWAIT2flg(0x00), 324 .exit_latency = 1, 325 .target_residency = 1, 326 .enter = &intel_idle, 327 .enter_s2idle = intel_idle_s2idle, }, 328 { 329 .name = "C6N", 330 .desc = "MWAIT 0x58", 331 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 332 .exit_latency = 80, 333 .target_residency = 275, 334 .enter = &intel_idle, 335 .enter_s2idle = intel_idle_s2idle, }, 336 { 337 .name = "C6S", 338 .desc = "MWAIT 0x52", 339 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 340 .exit_latency = 200, 341 .target_residency = 560, 342 .enter = &intel_idle, 343 .enter_s2idle = intel_idle_s2idle, }, 344 { 345 .name = "C7", 346 .desc = "MWAIT 0x60", 347 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 348 .exit_latency = 1200, 349 .target_residency = 4000, 350 .enter = &intel_idle, 351 .enter_s2idle = intel_idle_s2idle, }, 352 { 353 .name = "C7S", 354 .desc = "MWAIT 0x64", 355 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 356 .exit_latency = 10000, 357 .target_residency = 20000, 358 .enter = &intel_idle, 359 .enter_s2idle = intel_idle_s2idle, }, 360 { 361 .enter = NULL } 362 }; 363 364 static struct cpuidle_state ivb_cstates[] __initdata = { 365 { 366 .name = "C1", 367 .desc = "MWAIT 0x00", 368 .flags = MWAIT2flg(0x00), 369 .exit_latency = 1, 370 .target_residency = 1, 371 .enter = &intel_idle, 372 .enter_s2idle = intel_idle_s2idle, }, 373 { 374 .name = "C1E", 375 .desc = "MWAIT 0x01", 376 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 377 .exit_latency = 10, 378 .target_residency = 20, 379 .enter = &intel_idle, 380 .enter_s2idle = intel_idle_s2idle, }, 381 { 382 .name = "C3", 383 .desc = "MWAIT 0x10", 384 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 385 .exit_latency = 59, 386 .target_residency = 156, 387 .enter = &intel_idle, 388 .enter_s2idle = intel_idle_s2idle, }, 389 { 390 .name = "C6", 391 .desc = "MWAIT 0x20", 392 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 393 .exit_latency = 80, 394 .target_residency = 300, 395 .enter = &intel_idle, 396 .enter_s2idle = intel_idle_s2idle, }, 397 { 398 .name = "C7", 399 .desc = "MWAIT 0x30", 400 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 401 .exit_latency = 87, 402 .target_residency = 300, 403 .enter = &intel_idle, 404 .enter_s2idle = intel_idle_s2idle, }, 405 { 406 .enter = NULL } 407 }; 408 409 static struct cpuidle_state ivt_cstates[] __initdata = { 410 { 411 .name = "C1", 412 .desc = "MWAIT 0x00", 413 .flags = MWAIT2flg(0x00), 414 .exit_latency = 1, 415 .target_residency = 1, 416 .enter = &intel_idle, 417 .enter_s2idle = intel_idle_s2idle, }, 418 { 419 .name = "C1E", 420 .desc = "MWAIT 0x01", 421 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 422 .exit_latency = 10, 423 .target_residency = 80, 424 .enter = &intel_idle, 425 .enter_s2idle = intel_idle_s2idle, }, 426 { 427 .name = "C3", 428 .desc = "MWAIT 0x10", 429 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 430 .exit_latency = 59, 431 .target_residency = 156, 432 .enter = &intel_idle, 433 .enter_s2idle = intel_idle_s2idle, }, 434 { 435 .name = "C6", 436 .desc = "MWAIT 0x20", 437 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 438 .exit_latency = 82, 439 .target_residency = 300, 440 .enter = &intel_idle, 441 .enter_s2idle = intel_idle_s2idle, }, 442 { 443 .enter = NULL } 444 }; 445 446 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 447 { 448 .name = "C1", 449 .desc = "MWAIT 0x00", 450 .flags = MWAIT2flg(0x00), 451 .exit_latency = 1, 452 .target_residency = 1, 453 .enter = &intel_idle, 454 .enter_s2idle = intel_idle_s2idle, }, 455 { 456 .name = "C1E", 457 .desc = "MWAIT 0x01", 458 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 459 .exit_latency = 10, 460 .target_residency = 250, 461 .enter = &intel_idle, 462 .enter_s2idle = intel_idle_s2idle, }, 463 { 464 .name = "C3", 465 .desc = "MWAIT 0x10", 466 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 467 .exit_latency = 59, 468 .target_residency = 300, 469 .enter = &intel_idle, 470 .enter_s2idle = intel_idle_s2idle, }, 471 { 472 .name = "C6", 473 .desc = "MWAIT 0x20", 474 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 475 .exit_latency = 84, 476 .target_residency = 400, 477 .enter = &intel_idle, 478 .enter_s2idle = intel_idle_s2idle, }, 479 { 480 .enter = NULL } 481 }; 482 483 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 484 { 485 .name = "C1", 486 .desc = "MWAIT 0x00", 487 .flags = MWAIT2flg(0x00), 488 .exit_latency = 1, 489 .target_residency = 1, 490 .enter = &intel_idle, 491 .enter_s2idle = intel_idle_s2idle, }, 492 { 493 .name = "C1E", 494 .desc = "MWAIT 0x01", 495 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 496 .exit_latency = 10, 497 .target_residency = 500, 498 .enter = &intel_idle, 499 .enter_s2idle = intel_idle_s2idle, }, 500 { 501 .name = "C3", 502 .desc = "MWAIT 0x10", 503 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 504 .exit_latency = 59, 505 .target_residency = 600, 506 .enter = &intel_idle, 507 .enter_s2idle = intel_idle_s2idle, }, 508 { 509 .name = "C6", 510 .desc = "MWAIT 0x20", 511 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 512 .exit_latency = 88, 513 .target_residency = 700, 514 .enter = &intel_idle, 515 .enter_s2idle = intel_idle_s2idle, }, 516 { 517 .enter = NULL } 518 }; 519 520 static struct cpuidle_state hsw_cstates[] __initdata = { 521 { 522 .name = "C1", 523 .desc = "MWAIT 0x00", 524 .flags = MWAIT2flg(0x00), 525 .exit_latency = 2, 526 .target_residency = 2, 527 .enter = &intel_idle, 528 .enter_s2idle = intel_idle_s2idle, }, 529 { 530 .name = "C1E", 531 .desc = "MWAIT 0x01", 532 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 533 .exit_latency = 10, 534 .target_residency = 20, 535 .enter = &intel_idle, 536 .enter_s2idle = intel_idle_s2idle, }, 537 { 538 .name = "C3", 539 .desc = "MWAIT 0x10", 540 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 541 .exit_latency = 33, 542 .target_residency = 100, 543 .enter = &intel_idle, 544 .enter_s2idle = intel_idle_s2idle, }, 545 { 546 .name = "C6", 547 .desc = "MWAIT 0x20", 548 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 549 .exit_latency = 133, 550 .target_residency = 400, 551 .enter = &intel_idle, 552 .enter_s2idle = intel_idle_s2idle, }, 553 { 554 .name = "C7s", 555 .desc = "MWAIT 0x32", 556 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 557 .exit_latency = 166, 558 .target_residency = 500, 559 .enter = &intel_idle, 560 .enter_s2idle = intel_idle_s2idle, }, 561 { 562 .name = "C8", 563 .desc = "MWAIT 0x40", 564 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 565 .exit_latency = 300, 566 .target_residency = 900, 567 .enter = &intel_idle, 568 .enter_s2idle = intel_idle_s2idle, }, 569 { 570 .name = "C9", 571 .desc = "MWAIT 0x50", 572 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 573 .exit_latency = 600, 574 .target_residency = 1800, 575 .enter = &intel_idle, 576 .enter_s2idle = intel_idle_s2idle, }, 577 { 578 .name = "C10", 579 .desc = "MWAIT 0x60", 580 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 581 .exit_latency = 2600, 582 .target_residency = 7700, 583 .enter = &intel_idle, 584 .enter_s2idle = intel_idle_s2idle, }, 585 { 586 .enter = NULL } 587 }; 588 static struct cpuidle_state bdw_cstates[] __initdata = { 589 { 590 .name = "C1", 591 .desc = "MWAIT 0x00", 592 .flags = MWAIT2flg(0x00), 593 .exit_latency = 2, 594 .target_residency = 2, 595 .enter = &intel_idle, 596 .enter_s2idle = intel_idle_s2idle, }, 597 { 598 .name = "C1E", 599 .desc = "MWAIT 0x01", 600 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 601 .exit_latency = 10, 602 .target_residency = 20, 603 .enter = &intel_idle, 604 .enter_s2idle = intel_idle_s2idle, }, 605 { 606 .name = "C3", 607 .desc = "MWAIT 0x10", 608 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 609 .exit_latency = 40, 610 .target_residency = 100, 611 .enter = &intel_idle, 612 .enter_s2idle = intel_idle_s2idle, }, 613 { 614 .name = "C6", 615 .desc = "MWAIT 0x20", 616 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 617 .exit_latency = 133, 618 .target_residency = 400, 619 .enter = &intel_idle, 620 .enter_s2idle = intel_idle_s2idle, }, 621 { 622 .name = "C7s", 623 .desc = "MWAIT 0x32", 624 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 625 .exit_latency = 166, 626 .target_residency = 500, 627 .enter = &intel_idle, 628 .enter_s2idle = intel_idle_s2idle, }, 629 { 630 .name = "C8", 631 .desc = "MWAIT 0x40", 632 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 633 .exit_latency = 300, 634 .target_residency = 900, 635 .enter = &intel_idle, 636 .enter_s2idle = intel_idle_s2idle, }, 637 { 638 .name = "C9", 639 .desc = "MWAIT 0x50", 640 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 641 .exit_latency = 600, 642 .target_residency = 1800, 643 .enter = &intel_idle, 644 .enter_s2idle = intel_idle_s2idle, }, 645 { 646 .name = "C10", 647 .desc = "MWAIT 0x60", 648 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 649 .exit_latency = 2600, 650 .target_residency = 7700, 651 .enter = &intel_idle, 652 .enter_s2idle = intel_idle_s2idle, }, 653 { 654 .enter = NULL } 655 }; 656 657 static struct cpuidle_state skl_cstates[] __initdata = { 658 { 659 .name = "C1", 660 .desc = "MWAIT 0x00", 661 .flags = MWAIT2flg(0x00), 662 .exit_latency = 2, 663 .target_residency = 2, 664 .enter = &intel_idle, 665 .enter_s2idle = intel_idle_s2idle, }, 666 { 667 .name = "C1E", 668 .desc = "MWAIT 0x01", 669 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 670 .exit_latency = 10, 671 .target_residency = 20, 672 .enter = &intel_idle, 673 .enter_s2idle = intel_idle_s2idle, }, 674 { 675 .name = "C3", 676 .desc = "MWAIT 0x10", 677 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 678 .exit_latency = 70, 679 .target_residency = 100, 680 .enter = &intel_idle, 681 .enter_s2idle = intel_idle_s2idle, }, 682 { 683 .name = "C6", 684 .desc = "MWAIT 0x20", 685 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 686 .exit_latency = 85, 687 .target_residency = 200, 688 .enter = &intel_idle, 689 .enter_s2idle = intel_idle_s2idle, }, 690 { 691 .name = "C7s", 692 .desc = "MWAIT 0x33", 693 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 694 .exit_latency = 124, 695 .target_residency = 800, 696 .enter = &intel_idle, 697 .enter_s2idle = intel_idle_s2idle, }, 698 { 699 .name = "C8", 700 .desc = "MWAIT 0x40", 701 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 702 .exit_latency = 200, 703 .target_residency = 800, 704 .enter = &intel_idle, 705 .enter_s2idle = intel_idle_s2idle, }, 706 { 707 .name = "C9", 708 .desc = "MWAIT 0x50", 709 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 710 .exit_latency = 480, 711 .target_residency = 5000, 712 .enter = &intel_idle, 713 .enter_s2idle = intel_idle_s2idle, }, 714 { 715 .name = "C10", 716 .desc = "MWAIT 0x60", 717 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 718 .exit_latency = 890, 719 .target_residency = 5000, 720 .enter = &intel_idle, 721 .enter_s2idle = intel_idle_s2idle, }, 722 { 723 .enter = NULL } 724 }; 725 726 static struct cpuidle_state skx_cstates[] __initdata = { 727 { 728 .name = "C1", 729 .desc = "MWAIT 0x00", 730 .flags = MWAIT2flg(0x00), 731 .exit_latency = 2, 732 .target_residency = 2, 733 .enter = &intel_idle, 734 .enter_s2idle = intel_idle_s2idle, }, 735 { 736 .name = "C1E", 737 .desc = "MWAIT 0x01", 738 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 739 .exit_latency = 10, 740 .target_residency = 20, 741 .enter = &intel_idle, 742 .enter_s2idle = intel_idle_s2idle, }, 743 { 744 .name = "C6", 745 .desc = "MWAIT 0x20", 746 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 747 .exit_latency = 133, 748 .target_residency = 600, 749 .enter = &intel_idle, 750 .enter_s2idle = intel_idle_s2idle, }, 751 { 752 .enter = NULL } 753 }; 754 755 static struct cpuidle_state icx_cstates[] __initdata = { 756 { 757 .name = "C1", 758 .desc = "MWAIT 0x00", 759 .flags = MWAIT2flg(0x00), 760 .exit_latency = 1, 761 .target_residency = 1, 762 .enter = &intel_idle, 763 .enter_s2idle = intel_idle_s2idle, }, 764 { 765 .name = "C1E", 766 .desc = "MWAIT 0x01", 767 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 768 .exit_latency = 4, 769 .target_residency = 4, 770 .enter = &intel_idle, 771 .enter_s2idle = intel_idle_s2idle, }, 772 { 773 .name = "C6", 774 .desc = "MWAIT 0x20", 775 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 776 .exit_latency = 128, 777 .target_residency = 384, 778 .enter = &intel_idle, 779 .enter_s2idle = intel_idle_s2idle, }, 780 { 781 .enter = NULL } 782 }; 783 784 static struct cpuidle_state atom_cstates[] __initdata = { 785 { 786 .name = "C1E", 787 .desc = "MWAIT 0x00", 788 .flags = MWAIT2flg(0x00), 789 .exit_latency = 10, 790 .target_residency = 20, 791 .enter = &intel_idle, 792 .enter_s2idle = intel_idle_s2idle, }, 793 { 794 .name = "C2", 795 .desc = "MWAIT 0x10", 796 .flags = MWAIT2flg(0x10), 797 .exit_latency = 20, 798 .target_residency = 80, 799 .enter = &intel_idle, 800 .enter_s2idle = intel_idle_s2idle, }, 801 { 802 .name = "C4", 803 .desc = "MWAIT 0x30", 804 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 805 .exit_latency = 100, 806 .target_residency = 400, 807 .enter = &intel_idle, 808 .enter_s2idle = intel_idle_s2idle, }, 809 { 810 .name = "C6", 811 .desc = "MWAIT 0x52", 812 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 813 .exit_latency = 140, 814 .target_residency = 560, 815 .enter = &intel_idle, 816 .enter_s2idle = intel_idle_s2idle, }, 817 { 818 .enter = NULL } 819 }; 820 static struct cpuidle_state tangier_cstates[] __initdata = { 821 { 822 .name = "C1", 823 .desc = "MWAIT 0x00", 824 .flags = MWAIT2flg(0x00), 825 .exit_latency = 1, 826 .target_residency = 4, 827 .enter = &intel_idle, 828 .enter_s2idle = intel_idle_s2idle, }, 829 { 830 .name = "C4", 831 .desc = "MWAIT 0x30", 832 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 833 .exit_latency = 100, 834 .target_residency = 400, 835 .enter = &intel_idle, 836 .enter_s2idle = intel_idle_s2idle, }, 837 { 838 .name = "C6", 839 .desc = "MWAIT 0x52", 840 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 841 .exit_latency = 140, 842 .target_residency = 560, 843 .enter = &intel_idle, 844 .enter_s2idle = intel_idle_s2idle, }, 845 { 846 .name = "C7", 847 .desc = "MWAIT 0x60", 848 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 849 .exit_latency = 1200, 850 .target_residency = 4000, 851 .enter = &intel_idle, 852 .enter_s2idle = intel_idle_s2idle, }, 853 { 854 .name = "C9", 855 .desc = "MWAIT 0x64", 856 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 857 .exit_latency = 10000, 858 .target_residency = 20000, 859 .enter = &intel_idle, 860 .enter_s2idle = intel_idle_s2idle, }, 861 { 862 .enter = NULL } 863 }; 864 static struct cpuidle_state avn_cstates[] __initdata = { 865 { 866 .name = "C1", 867 .desc = "MWAIT 0x00", 868 .flags = MWAIT2flg(0x00), 869 .exit_latency = 2, 870 .target_residency = 2, 871 .enter = &intel_idle, 872 .enter_s2idle = intel_idle_s2idle, }, 873 { 874 .name = "C6", 875 .desc = "MWAIT 0x51", 876 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 877 .exit_latency = 15, 878 .target_residency = 45, 879 .enter = &intel_idle, 880 .enter_s2idle = intel_idle_s2idle, }, 881 { 882 .enter = NULL } 883 }; 884 static struct cpuidle_state knl_cstates[] __initdata = { 885 { 886 .name = "C1", 887 .desc = "MWAIT 0x00", 888 .flags = MWAIT2flg(0x00), 889 .exit_latency = 1, 890 .target_residency = 2, 891 .enter = &intel_idle, 892 .enter_s2idle = intel_idle_s2idle }, 893 { 894 .name = "C6", 895 .desc = "MWAIT 0x10", 896 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 897 .exit_latency = 120, 898 .target_residency = 500, 899 .enter = &intel_idle, 900 .enter_s2idle = intel_idle_s2idle }, 901 { 902 .enter = NULL } 903 }; 904 905 static struct cpuidle_state bxt_cstates[] __initdata = { 906 { 907 .name = "C1", 908 .desc = "MWAIT 0x00", 909 .flags = MWAIT2flg(0x00), 910 .exit_latency = 2, 911 .target_residency = 2, 912 .enter = &intel_idle, 913 .enter_s2idle = intel_idle_s2idle, }, 914 { 915 .name = "C1E", 916 .desc = "MWAIT 0x01", 917 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 918 .exit_latency = 10, 919 .target_residency = 20, 920 .enter = &intel_idle, 921 .enter_s2idle = intel_idle_s2idle, }, 922 { 923 .name = "C6", 924 .desc = "MWAIT 0x20", 925 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 926 .exit_latency = 133, 927 .target_residency = 133, 928 .enter = &intel_idle, 929 .enter_s2idle = intel_idle_s2idle, }, 930 { 931 .name = "C7s", 932 .desc = "MWAIT 0x31", 933 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 934 .exit_latency = 155, 935 .target_residency = 155, 936 .enter = &intel_idle, 937 .enter_s2idle = intel_idle_s2idle, }, 938 { 939 .name = "C8", 940 .desc = "MWAIT 0x40", 941 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 942 .exit_latency = 1000, 943 .target_residency = 1000, 944 .enter = &intel_idle, 945 .enter_s2idle = intel_idle_s2idle, }, 946 { 947 .name = "C9", 948 .desc = "MWAIT 0x50", 949 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 950 .exit_latency = 2000, 951 .target_residency = 2000, 952 .enter = &intel_idle, 953 .enter_s2idle = intel_idle_s2idle, }, 954 { 955 .name = "C10", 956 .desc = "MWAIT 0x60", 957 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 958 .exit_latency = 10000, 959 .target_residency = 10000, 960 .enter = &intel_idle, 961 .enter_s2idle = intel_idle_s2idle, }, 962 { 963 .enter = NULL } 964 }; 965 966 static struct cpuidle_state dnv_cstates[] __initdata = { 967 { 968 .name = "C1", 969 .desc = "MWAIT 0x00", 970 .flags = MWAIT2flg(0x00), 971 .exit_latency = 2, 972 .target_residency = 2, 973 .enter = &intel_idle, 974 .enter_s2idle = intel_idle_s2idle, }, 975 { 976 .name = "C1E", 977 .desc = "MWAIT 0x01", 978 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 979 .exit_latency = 10, 980 .target_residency = 20, 981 .enter = &intel_idle, 982 .enter_s2idle = intel_idle_s2idle, }, 983 { 984 .name = "C6", 985 .desc = "MWAIT 0x20", 986 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 987 .exit_latency = 50, 988 .target_residency = 500, 989 .enter = &intel_idle, 990 .enter_s2idle = intel_idle_s2idle, }, 991 { 992 .enter = NULL } 993 }; 994 995 static const struct idle_cpu idle_cpu_nehalem __initconst = { 996 .state_table = nehalem_cstates, 997 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 998 .disable_promotion_to_c1e = true, 999 }; 1000 1001 static const struct idle_cpu idle_cpu_nhx __initconst = { 1002 .state_table = nehalem_cstates, 1003 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1004 .disable_promotion_to_c1e = true, 1005 .use_acpi = true, 1006 }; 1007 1008 static const struct idle_cpu idle_cpu_atom __initconst = { 1009 .state_table = atom_cstates, 1010 }; 1011 1012 static const struct idle_cpu idle_cpu_tangier __initconst = { 1013 .state_table = tangier_cstates, 1014 }; 1015 1016 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1017 .state_table = atom_cstates, 1018 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1019 }; 1020 1021 static const struct idle_cpu idle_cpu_snb __initconst = { 1022 .state_table = snb_cstates, 1023 .disable_promotion_to_c1e = true, 1024 }; 1025 1026 static const struct idle_cpu idle_cpu_snx __initconst = { 1027 .state_table = snb_cstates, 1028 .disable_promotion_to_c1e = true, 1029 .use_acpi = true, 1030 }; 1031 1032 static const struct idle_cpu idle_cpu_byt __initconst = { 1033 .state_table = byt_cstates, 1034 .disable_promotion_to_c1e = true, 1035 .byt_auto_demotion_disable_flag = true, 1036 }; 1037 1038 static const struct idle_cpu idle_cpu_cht __initconst = { 1039 .state_table = cht_cstates, 1040 .disable_promotion_to_c1e = true, 1041 .byt_auto_demotion_disable_flag = true, 1042 }; 1043 1044 static const struct idle_cpu idle_cpu_ivb __initconst = { 1045 .state_table = ivb_cstates, 1046 .disable_promotion_to_c1e = true, 1047 }; 1048 1049 static const struct idle_cpu idle_cpu_ivt __initconst = { 1050 .state_table = ivt_cstates, 1051 .disable_promotion_to_c1e = true, 1052 .use_acpi = true, 1053 }; 1054 1055 static const struct idle_cpu idle_cpu_hsw __initconst = { 1056 .state_table = hsw_cstates, 1057 .disable_promotion_to_c1e = true, 1058 }; 1059 1060 static const struct idle_cpu idle_cpu_hsx __initconst = { 1061 .state_table = hsw_cstates, 1062 .disable_promotion_to_c1e = true, 1063 .use_acpi = true, 1064 }; 1065 1066 static const struct idle_cpu idle_cpu_bdw __initconst = { 1067 .state_table = bdw_cstates, 1068 .disable_promotion_to_c1e = true, 1069 }; 1070 1071 static const struct idle_cpu idle_cpu_bdx __initconst = { 1072 .state_table = bdw_cstates, 1073 .disable_promotion_to_c1e = true, 1074 .use_acpi = true, 1075 }; 1076 1077 static const struct idle_cpu idle_cpu_skl __initconst = { 1078 .state_table = skl_cstates, 1079 .disable_promotion_to_c1e = true, 1080 }; 1081 1082 static const struct idle_cpu idle_cpu_skx __initconst = { 1083 .state_table = skx_cstates, 1084 .disable_promotion_to_c1e = true, 1085 .use_acpi = true, 1086 }; 1087 1088 static const struct idle_cpu idle_cpu_icx __initconst = { 1089 .state_table = icx_cstates, 1090 .disable_promotion_to_c1e = true, 1091 .use_acpi = true, 1092 }; 1093 1094 static const struct idle_cpu idle_cpu_avn __initconst = { 1095 .state_table = avn_cstates, 1096 .disable_promotion_to_c1e = true, 1097 .use_acpi = true, 1098 }; 1099 1100 static const struct idle_cpu idle_cpu_knl __initconst = { 1101 .state_table = knl_cstates, 1102 .use_acpi = true, 1103 }; 1104 1105 static const struct idle_cpu idle_cpu_bxt __initconst = { 1106 .state_table = bxt_cstates, 1107 .disable_promotion_to_c1e = true, 1108 }; 1109 1110 static const struct idle_cpu idle_cpu_dnv __initconst = { 1111 .state_table = dnv_cstates, 1112 .disable_promotion_to_c1e = true, 1113 .use_acpi = true, 1114 }; 1115 1116 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1117 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1118 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1119 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1120 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1121 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1122 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1123 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1124 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1125 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1126 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1127 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1128 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1129 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1130 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1131 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1132 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1133 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1134 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1135 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1136 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1137 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1138 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1139 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1140 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1141 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1142 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1143 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1144 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1145 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1146 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1147 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1148 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1149 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1150 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1151 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1152 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1153 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1154 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), 1155 {} 1156 }; 1157 1158 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1159 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1160 {} 1161 }; 1162 1163 static bool __init intel_idle_max_cstate_reached(int cstate) 1164 { 1165 if (cstate + 1 > max_cstate) { 1166 pr_info("max_cstate %d reached\n", max_cstate); 1167 return true; 1168 } 1169 return false; 1170 } 1171 1172 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1173 #include <acpi/processor.h> 1174 1175 static bool no_acpi __read_mostly; 1176 module_param(no_acpi, bool, 0444); 1177 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1178 1179 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1180 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1181 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1182 1183 static struct acpi_processor_power acpi_state_table __initdata; 1184 1185 /** 1186 * intel_idle_cst_usable - Check if the _CST information can be used. 1187 * 1188 * Check if all of the C-states listed by _CST in the max_cstate range are 1189 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1190 */ 1191 static bool __init intel_idle_cst_usable(void) 1192 { 1193 int cstate, limit; 1194 1195 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1196 acpi_state_table.count); 1197 1198 for (cstate = 1; cstate < limit; cstate++) { 1199 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1200 1201 if (cx->entry_method != ACPI_CSTATE_FFH) 1202 return false; 1203 } 1204 1205 return true; 1206 } 1207 1208 static bool __init intel_idle_acpi_cst_extract(void) 1209 { 1210 unsigned int cpu; 1211 1212 if (no_acpi) { 1213 pr_debug("Not allowed to use ACPI _CST\n"); 1214 return false; 1215 } 1216 1217 for_each_possible_cpu(cpu) { 1218 struct acpi_processor *pr = per_cpu(processors, cpu); 1219 1220 if (!pr) 1221 continue; 1222 1223 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1224 continue; 1225 1226 acpi_state_table.count++; 1227 1228 if (!intel_idle_cst_usable()) 1229 continue; 1230 1231 if (!acpi_processor_claim_cst_control()) { 1232 acpi_state_table.count = 0; 1233 return false; 1234 } 1235 1236 return true; 1237 } 1238 1239 pr_debug("ACPI _CST not found or not usable\n"); 1240 return false; 1241 } 1242 1243 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1244 { 1245 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1246 1247 /* 1248 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1249 * the interesting states are ACPI_CSTATE_FFH. 1250 */ 1251 for (cstate = 1; cstate < limit; cstate++) { 1252 struct acpi_processor_cx *cx; 1253 struct cpuidle_state *state; 1254 1255 if (intel_idle_max_cstate_reached(cstate)) 1256 break; 1257 1258 cx = &acpi_state_table.states[cstate]; 1259 1260 state = &drv->states[drv->state_count++]; 1261 1262 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1263 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1264 state->exit_latency = cx->latency; 1265 /* 1266 * For C1-type C-states use the same number for both the exit 1267 * latency and target residency, because that is the case for 1268 * C1 in the majority of the static C-states tables above. 1269 * For the other types of C-states, however, set the target 1270 * residency to 3 times the exit latency which should lead to 1271 * a reasonable balance between energy-efficiency and 1272 * performance in the majority of interesting cases. 1273 */ 1274 state->target_residency = cx->latency; 1275 if (cx->type > ACPI_STATE_C1) 1276 state->target_residency *= 3; 1277 1278 state->flags = MWAIT2flg(cx->address); 1279 if (cx->type > ACPI_STATE_C2) 1280 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1281 1282 if (disabled_states_mask & BIT(cstate)) 1283 state->flags |= CPUIDLE_FLAG_OFF; 1284 1285 state->enter = intel_idle; 1286 state->enter_s2idle = intel_idle_s2idle; 1287 } 1288 } 1289 1290 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1291 { 1292 int cstate, limit; 1293 1294 /* 1295 * If there are no _CST C-states, do not disable any C-states by 1296 * default. 1297 */ 1298 if (!acpi_state_table.count) 1299 return false; 1300 1301 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1302 /* 1303 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1304 * the interesting states are ACPI_CSTATE_FFH. 1305 */ 1306 for (cstate = 1; cstate < limit; cstate++) { 1307 if (acpi_state_table.states[cstate].address == mwait_hint) 1308 return false; 1309 } 1310 return true; 1311 } 1312 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1313 #define force_use_acpi (false) 1314 1315 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1316 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1317 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1318 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1319 1320 /** 1321 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1322 * 1323 * Tune IVT multi-socket targets. 1324 * Assumption: num_sockets == (max_package_num + 1). 1325 */ 1326 static void __init ivt_idle_state_table_update(void) 1327 { 1328 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1329 int cpu, package_num, num_sockets = 1; 1330 1331 for_each_online_cpu(cpu) { 1332 package_num = topology_physical_package_id(cpu); 1333 if (package_num + 1 > num_sockets) { 1334 num_sockets = package_num + 1; 1335 1336 if (num_sockets > 4) { 1337 cpuidle_state_table = ivt_cstates_8s; 1338 return; 1339 } 1340 } 1341 } 1342 1343 if (num_sockets > 2) 1344 cpuidle_state_table = ivt_cstates_4s; 1345 1346 /* else, 1 and 2 socket systems use default ivt_cstates */ 1347 } 1348 1349 /** 1350 * irtl_2_usec - IRTL to microseconds conversion. 1351 * @irtl: IRTL MSR value. 1352 * 1353 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1354 */ 1355 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1356 { 1357 static const unsigned int irtl_ns_units[] __initconst = { 1358 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1359 }; 1360 unsigned long long ns; 1361 1362 if (!irtl) 1363 return 0; 1364 1365 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1366 1367 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1368 } 1369 1370 /** 1371 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1372 * 1373 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1374 * definitive maximum latency and use the same value for target_residency. 1375 */ 1376 static void __init bxt_idle_state_table_update(void) 1377 { 1378 unsigned long long msr; 1379 unsigned int usec; 1380 1381 rdmsrl(MSR_PKGC6_IRTL, msr); 1382 usec = irtl_2_usec(msr); 1383 if (usec) { 1384 bxt_cstates[2].exit_latency = usec; 1385 bxt_cstates[2].target_residency = usec; 1386 } 1387 1388 rdmsrl(MSR_PKGC7_IRTL, msr); 1389 usec = irtl_2_usec(msr); 1390 if (usec) { 1391 bxt_cstates[3].exit_latency = usec; 1392 bxt_cstates[3].target_residency = usec; 1393 } 1394 1395 rdmsrl(MSR_PKGC8_IRTL, msr); 1396 usec = irtl_2_usec(msr); 1397 if (usec) { 1398 bxt_cstates[4].exit_latency = usec; 1399 bxt_cstates[4].target_residency = usec; 1400 } 1401 1402 rdmsrl(MSR_PKGC9_IRTL, msr); 1403 usec = irtl_2_usec(msr); 1404 if (usec) { 1405 bxt_cstates[5].exit_latency = usec; 1406 bxt_cstates[5].target_residency = usec; 1407 } 1408 1409 rdmsrl(MSR_PKGC10_IRTL, msr); 1410 usec = irtl_2_usec(msr); 1411 if (usec) { 1412 bxt_cstates[6].exit_latency = usec; 1413 bxt_cstates[6].target_residency = usec; 1414 } 1415 1416 } 1417 1418 /** 1419 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1420 * 1421 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1422 */ 1423 static void __init sklh_idle_state_table_update(void) 1424 { 1425 unsigned long long msr; 1426 unsigned int eax, ebx, ecx, edx; 1427 1428 1429 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1430 if (max_cstate <= 7) 1431 return; 1432 1433 /* if PC10 not present in CPUID.MWAIT.EDX */ 1434 if ((mwait_substates & (0xF << 28)) == 0) 1435 return; 1436 1437 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1438 1439 /* PC10 is not enabled in PKG C-state limit */ 1440 if ((msr & 0xF) != 8) 1441 return; 1442 1443 ecx = 0; 1444 cpuid(7, &eax, &ebx, &ecx, &edx); 1445 1446 /* if SGX is present */ 1447 if (ebx & (1 << 2)) { 1448 1449 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1450 1451 /* if SGX is enabled */ 1452 if (msr & (1 << 18)) 1453 return; 1454 } 1455 1456 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1457 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1458 } 1459 1460 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1461 { 1462 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1463 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1464 MWAIT_SUBSTATE_MASK; 1465 1466 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1467 if (num_substates == 0) 1468 return false; 1469 1470 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1471 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1472 1473 return true; 1474 } 1475 1476 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1477 { 1478 int cstate; 1479 1480 switch (boot_cpu_data.x86_model) { 1481 case INTEL_FAM6_IVYBRIDGE_X: 1482 ivt_idle_state_table_update(); 1483 break; 1484 case INTEL_FAM6_ATOM_GOLDMONT: 1485 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1486 bxt_idle_state_table_update(); 1487 break; 1488 case INTEL_FAM6_SKYLAKE: 1489 sklh_idle_state_table_update(); 1490 break; 1491 } 1492 1493 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1494 unsigned int mwait_hint; 1495 1496 if (intel_idle_max_cstate_reached(cstate)) 1497 break; 1498 1499 if (!cpuidle_state_table[cstate].enter && 1500 !cpuidle_state_table[cstate].enter_s2idle) 1501 break; 1502 1503 /* If marked as unusable, skip this state. */ 1504 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1505 pr_debug("state %s is disabled\n", 1506 cpuidle_state_table[cstate].name); 1507 continue; 1508 } 1509 1510 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1511 if (!intel_idle_verify_cstate(mwait_hint)) 1512 continue; 1513 1514 /* Structure copy. */ 1515 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1516 1517 if ((disabled_states_mask & BIT(drv->state_count)) || 1518 ((icpu->use_acpi || force_use_acpi) && 1519 intel_idle_off_by_default(mwait_hint) && 1520 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1521 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1522 1523 drv->state_count++; 1524 } 1525 1526 if (icpu->byt_auto_demotion_disable_flag) { 1527 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1528 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1529 } 1530 } 1531 1532 /** 1533 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1534 * @drv: cpuidle driver structure to initialize. 1535 */ 1536 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1537 { 1538 cpuidle_poll_state_init(drv); 1539 1540 if (disabled_states_mask & BIT(0)) 1541 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1542 1543 drv->state_count = 1; 1544 1545 if (icpu) 1546 intel_idle_init_cstates_icpu(drv); 1547 else 1548 intel_idle_init_cstates_acpi(drv); 1549 } 1550 1551 static void auto_demotion_disable(void) 1552 { 1553 unsigned long long msr_bits; 1554 1555 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1556 msr_bits &= ~auto_demotion_disable_flags; 1557 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1558 } 1559 1560 static void c1e_promotion_disable(void) 1561 { 1562 unsigned long long msr_bits; 1563 1564 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1565 msr_bits &= ~0x2; 1566 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1567 } 1568 1569 /** 1570 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1571 * @cpu: CPU to initialize. 1572 * 1573 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1574 * with the processor model flags. 1575 */ 1576 static int intel_idle_cpu_init(unsigned int cpu) 1577 { 1578 struct cpuidle_device *dev; 1579 1580 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1581 dev->cpu = cpu; 1582 1583 if (cpuidle_register_device(dev)) { 1584 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1585 return -EIO; 1586 } 1587 1588 if (auto_demotion_disable_flags) 1589 auto_demotion_disable(); 1590 1591 if (disable_promotion_to_c1e) 1592 c1e_promotion_disable(); 1593 1594 return 0; 1595 } 1596 1597 static int intel_idle_cpu_online(unsigned int cpu) 1598 { 1599 struct cpuidle_device *dev; 1600 1601 if (!boot_cpu_has(X86_FEATURE_ARAT)) 1602 tick_broadcast_enable(); 1603 1604 /* 1605 * Some systems can hotplug a cpu at runtime after 1606 * the kernel has booted, we have to initialize the 1607 * driver in this case 1608 */ 1609 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1610 if (!dev->registered) 1611 return intel_idle_cpu_init(cpu); 1612 1613 return 0; 1614 } 1615 1616 /** 1617 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 1618 */ 1619 static void __init intel_idle_cpuidle_devices_uninit(void) 1620 { 1621 int i; 1622 1623 for_each_online_cpu(i) 1624 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 1625 } 1626 1627 static int __init intel_idle_init(void) 1628 { 1629 const struct x86_cpu_id *id; 1630 unsigned int eax, ebx, ecx; 1631 int retval; 1632 1633 /* Do not load intel_idle at all for now if idle= is passed */ 1634 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1635 return -ENODEV; 1636 1637 if (max_cstate == 0) { 1638 pr_debug("disabled\n"); 1639 return -EPERM; 1640 } 1641 1642 id = x86_match_cpu(intel_idle_ids); 1643 if (id) { 1644 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1645 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1646 return -ENODEV; 1647 } 1648 } else { 1649 id = x86_match_cpu(intel_mwait_ids); 1650 if (!id) 1651 return -ENODEV; 1652 } 1653 1654 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1655 return -ENODEV; 1656 1657 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1658 1659 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1660 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1661 !mwait_substates) 1662 return -ENODEV; 1663 1664 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1665 1666 icpu = (const struct idle_cpu *)id->driver_data; 1667 if (icpu) { 1668 cpuidle_state_table = icpu->state_table; 1669 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 1670 disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; 1671 if (icpu->use_acpi || force_use_acpi) 1672 intel_idle_acpi_cst_extract(); 1673 } else if (!intel_idle_acpi_cst_extract()) { 1674 return -ENODEV; 1675 } 1676 1677 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1678 boot_cpu_data.x86_model); 1679 1680 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1681 if (!intel_idle_cpuidle_devices) 1682 return -ENOMEM; 1683 1684 intel_idle_cpuidle_driver_init(&intel_idle_driver); 1685 1686 retval = cpuidle_register_driver(&intel_idle_driver); 1687 if (retval) { 1688 struct cpuidle_driver *drv = cpuidle_get_driver(); 1689 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1690 drv ? drv->name : "none"); 1691 goto init_driver_fail; 1692 } 1693 1694 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1695 intel_idle_cpu_online, NULL); 1696 if (retval < 0) 1697 goto hp_setup_fail; 1698 1699 pr_debug("Local APIC timer is reliable in %s\n", 1700 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 1701 1702 return 0; 1703 1704 hp_setup_fail: 1705 intel_idle_cpuidle_devices_uninit(); 1706 cpuidle_unregister_driver(&intel_idle_driver); 1707 init_driver_fail: 1708 free_percpu(intel_idle_cpuidle_devices); 1709 return retval; 1710 1711 } 1712 device_initcall(intel_idle_init); 1713 1714 /* 1715 * We are not really modular, but we used to support that. Meaning we also 1716 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1717 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1718 * is the easiest way (currently) to continue doing that. 1719 */ 1720 module_param(max_cstate, int, 0444); 1721 /* 1722 * The positions of the bits that are set in this number are the indices of the 1723 * idle states to be disabled by default (as reflected by the names of the 1724 * corresponding idle state directories in sysfs, "state0", "state1" ... 1725 * "state<i>" ..., where <i> is the index of the given state). 1726 */ 1727 module_param_named(states_off, disabled_states_mask, uint, 0444); 1728 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 1729