1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 #define DEBUG 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/notifier.h> 51 #include <linux/cpu.h> 52 #include <linux/moduleparam.h> 53 #include <asm/cpu_device_id.h> 54 #include <asm/intel-family.h> 55 #include <asm/mwait.h> 56 #include <asm/msr.h> 57 58 #define INTEL_IDLE_VERSION "0.5.1" 59 60 static struct cpuidle_driver intel_idle_driver = { 61 .name = "intel_idle", 62 .owner = THIS_MODULE, 63 }; 64 /* intel_idle.max_cstate=0 disables driver */ 65 static int max_cstate = CPUIDLE_STATE_MAX - 1; 66 static unsigned int disabled_states_mask; 67 68 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 69 70 static unsigned long auto_demotion_disable_flags; 71 static bool disable_promotion_to_c1e; 72 73 struct idle_cpu { 74 struct cpuidle_state *state_table; 75 76 /* 77 * Hardware C-state auto-demotion may not always be optimal. 78 * Indicate which enable bits to clear here. 79 */ 80 unsigned long auto_demotion_disable_flags; 81 bool byt_auto_demotion_disable_flag; 82 bool disable_promotion_to_c1e; 83 bool use_acpi; 84 }; 85 86 static const struct idle_cpu *icpu __initdata; 87 static struct cpuidle_state *cpuidle_state_table __initdata; 88 89 static unsigned int mwait_substates __initdata; 90 91 /* 92 * Enable this state by default even if the ACPI _CST does not list it. 93 */ 94 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 95 96 /* 97 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 98 * the C-state (top nibble) and sub-state (bottom nibble) 99 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 100 * 101 * We store the hint at the top of our "flags" for each state. 102 */ 103 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 104 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 105 106 /** 107 * intel_idle - Ask the processor to enter the given idle state. 108 * @dev: cpuidle device of the target CPU. 109 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 110 * @index: Target idle state index. 111 * 112 * Use the MWAIT instruction to notify the processor that the CPU represented by 113 * @dev is idle and it can try to enter the idle state corresponding to @index. 114 * 115 * If the local APIC timer is not known to be reliable in the target idle state, 116 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 117 * 118 * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to 119 * flushing user TLBs. 120 * 121 * Must be called under local_irq_disable(). 122 */ 123 static __cpuidle int intel_idle(struct cpuidle_device *dev, 124 struct cpuidle_driver *drv, int index) 125 { 126 struct cpuidle_state *state = &drv->states[index]; 127 unsigned long eax = flg2MWAIT(state->flags); 128 unsigned long ecx = 1; /* break on interrupt flag */ 129 bool tick; 130 131 if (!static_cpu_has(X86_FEATURE_ARAT)) { 132 /* 133 * Switch over to one-shot tick broadcast if the target C-state 134 * is deeper than C1. 135 */ 136 if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) { 137 tick = true; 138 tick_broadcast_enter(); 139 } else { 140 tick = false; 141 } 142 } 143 144 mwait_idle_with_hints(eax, ecx); 145 146 if (!static_cpu_has(X86_FEATURE_ARAT) && tick) 147 tick_broadcast_exit(); 148 149 return index; 150 } 151 152 /** 153 * intel_idle_s2idle - Ask the processor to enter the given idle state. 154 * @dev: cpuidle device of the target CPU. 155 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 156 * @index: Target idle state index. 157 * 158 * Use the MWAIT instruction to notify the processor that the CPU represented by 159 * @dev is idle and it can try to enter the idle state corresponding to @index. 160 * 161 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 162 * scheduler tick and suspended scheduler clock on the target CPU. 163 */ 164 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 165 struct cpuidle_driver *drv, int index) 166 { 167 unsigned long eax = flg2MWAIT(drv->states[index].flags); 168 unsigned long ecx = 1; /* break on interrupt flag */ 169 170 mwait_idle_with_hints(eax, ecx); 171 172 return 0; 173 } 174 175 /* 176 * States are indexed by the cstate number, 177 * which is also the index into the MWAIT hint array. 178 * Thus C0 is a dummy. 179 */ 180 static struct cpuidle_state nehalem_cstates[] __initdata = { 181 { 182 .name = "C1", 183 .desc = "MWAIT 0x00", 184 .flags = MWAIT2flg(0x00), 185 .exit_latency = 3, 186 .target_residency = 6, 187 .enter = &intel_idle, 188 .enter_s2idle = intel_idle_s2idle, }, 189 { 190 .name = "C1E", 191 .desc = "MWAIT 0x01", 192 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 193 .exit_latency = 10, 194 .target_residency = 20, 195 .enter = &intel_idle, 196 .enter_s2idle = intel_idle_s2idle, }, 197 { 198 .name = "C3", 199 .desc = "MWAIT 0x10", 200 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 201 .exit_latency = 20, 202 .target_residency = 80, 203 .enter = &intel_idle, 204 .enter_s2idle = intel_idle_s2idle, }, 205 { 206 .name = "C6", 207 .desc = "MWAIT 0x20", 208 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 209 .exit_latency = 200, 210 .target_residency = 800, 211 .enter = &intel_idle, 212 .enter_s2idle = intel_idle_s2idle, }, 213 { 214 .enter = NULL } 215 }; 216 217 static struct cpuidle_state snb_cstates[] __initdata = { 218 { 219 .name = "C1", 220 .desc = "MWAIT 0x00", 221 .flags = MWAIT2flg(0x00), 222 .exit_latency = 2, 223 .target_residency = 2, 224 .enter = &intel_idle, 225 .enter_s2idle = intel_idle_s2idle, }, 226 { 227 .name = "C1E", 228 .desc = "MWAIT 0x01", 229 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 230 .exit_latency = 10, 231 .target_residency = 20, 232 .enter = &intel_idle, 233 .enter_s2idle = intel_idle_s2idle, }, 234 { 235 .name = "C3", 236 .desc = "MWAIT 0x10", 237 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 238 .exit_latency = 80, 239 .target_residency = 211, 240 .enter = &intel_idle, 241 .enter_s2idle = intel_idle_s2idle, }, 242 { 243 .name = "C6", 244 .desc = "MWAIT 0x20", 245 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 246 .exit_latency = 104, 247 .target_residency = 345, 248 .enter = &intel_idle, 249 .enter_s2idle = intel_idle_s2idle, }, 250 { 251 .name = "C7", 252 .desc = "MWAIT 0x30", 253 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 254 .exit_latency = 109, 255 .target_residency = 345, 256 .enter = &intel_idle, 257 .enter_s2idle = intel_idle_s2idle, }, 258 { 259 .enter = NULL } 260 }; 261 262 static struct cpuidle_state byt_cstates[] __initdata = { 263 { 264 .name = "C1", 265 .desc = "MWAIT 0x00", 266 .flags = MWAIT2flg(0x00), 267 .exit_latency = 1, 268 .target_residency = 1, 269 .enter = &intel_idle, 270 .enter_s2idle = intel_idle_s2idle, }, 271 { 272 .name = "C6N", 273 .desc = "MWAIT 0x58", 274 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 275 .exit_latency = 300, 276 .target_residency = 275, 277 .enter = &intel_idle, 278 .enter_s2idle = intel_idle_s2idle, }, 279 { 280 .name = "C6S", 281 .desc = "MWAIT 0x52", 282 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 283 .exit_latency = 500, 284 .target_residency = 560, 285 .enter = &intel_idle, 286 .enter_s2idle = intel_idle_s2idle, }, 287 { 288 .name = "C7", 289 .desc = "MWAIT 0x60", 290 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 291 .exit_latency = 1200, 292 .target_residency = 4000, 293 .enter = &intel_idle, 294 .enter_s2idle = intel_idle_s2idle, }, 295 { 296 .name = "C7S", 297 .desc = "MWAIT 0x64", 298 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 299 .exit_latency = 10000, 300 .target_residency = 20000, 301 .enter = &intel_idle, 302 .enter_s2idle = intel_idle_s2idle, }, 303 { 304 .enter = NULL } 305 }; 306 307 static struct cpuidle_state cht_cstates[] __initdata = { 308 { 309 .name = "C1", 310 .desc = "MWAIT 0x00", 311 .flags = MWAIT2flg(0x00), 312 .exit_latency = 1, 313 .target_residency = 1, 314 .enter = &intel_idle, 315 .enter_s2idle = intel_idle_s2idle, }, 316 { 317 .name = "C6N", 318 .desc = "MWAIT 0x58", 319 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 320 .exit_latency = 80, 321 .target_residency = 275, 322 .enter = &intel_idle, 323 .enter_s2idle = intel_idle_s2idle, }, 324 { 325 .name = "C6S", 326 .desc = "MWAIT 0x52", 327 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 328 .exit_latency = 200, 329 .target_residency = 560, 330 .enter = &intel_idle, 331 .enter_s2idle = intel_idle_s2idle, }, 332 { 333 .name = "C7", 334 .desc = "MWAIT 0x60", 335 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 336 .exit_latency = 1200, 337 .target_residency = 4000, 338 .enter = &intel_idle, 339 .enter_s2idle = intel_idle_s2idle, }, 340 { 341 .name = "C7S", 342 .desc = "MWAIT 0x64", 343 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 344 .exit_latency = 10000, 345 .target_residency = 20000, 346 .enter = &intel_idle, 347 .enter_s2idle = intel_idle_s2idle, }, 348 { 349 .enter = NULL } 350 }; 351 352 static struct cpuidle_state ivb_cstates[] __initdata = { 353 { 354 .name = "C1", 355 .desc = "MWAIT 0x00", 356 .flags = MWAIT2flg(0x00), 357 .exit_latency = 1, 358 .target_residency = 1, 359 .enter = &intel_idle, 360 .enter_s2idle = intel_idle_s2idle, }, 361 { 362 .name = "C1E", 363 .desc = "MWAIT 0x01", 364 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 365 .exit_latency = 10, 366 .target_residency = 20, 367 .enter = &intel_idle, 368 .enter_s2idle = intel_idle_s2idle, }, 369 { 370 .name = "C3", 371 .desc = "MWAIT 0x10", 372 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 373 .exit_latency = 59, 374 .target_residency = 156, 375 .enter = &intel_idle, 376 .enter_s2idle = intel_idle_s2idle, }, 377 { 378 .name = "C6", 379 .desc = "MWAIT 0x20", 380 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 381 .exit_latency = 80, 382 .target_residency = 300, 383 .enter = &intel_idle, 384 .enter_s2idle = intel_idle_s2idle, }, 385 { 386 .name = "C7", 387 .desc = "MWAIT 0x30", 388 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 389 .exit_latency = 87, 390 .target_residency = 300, 391 .enter = &intel_idle, 392 .enter_s2idle = intel_idle_s2idle, }, 393 { 394 .enter = NULL } 395 }; 396 397 static struct cpuidle_state ivt_cstates[] __initdata = { 398 { 399 .name = "C1", 400 .desc = "MWAIT 0x00", 401 .flags = MWAIT2flg(0x00), 402 .exit_latency = 1, 403 .target_residency = 1, 404 .enter = &intel_idle, 405 .enter_s2idle = intel_idle_s2idle, }, 406 { 407 .name = "C1E", 408 .desc = "MWAIT 0x01", 409 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 410 .exit_latency = 10, 411 .target_residency = 80, 412 .enter = &intel_idle, 413 .enter_s2idle = intel_idle_s2idle, }, 414 { 415 .name = "C3", 416 .desc = "MWAIT 0x10", 417 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 418 .exit_latency = 59, 419 .target_residency = 156, 420 .enter = &intel_idle, 421 .enter_s2idle = intel_idle_s2idle, }, 422 { 423 .name = "C6", 424 .desc = "MWAIT 0x20", 425 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 426 .exit_latency = 82, 427 .target_residency = 300, 428 .enter = &intel_idle, 429 .enter_s2idle = intel_idle_s2idle, }, 430 { 431 .enter = NULL } 432 }; 433 434 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 435 { 436 .name = "C1", 437 .desc = "MWAIT 0x00", 438 .flags = MWAIT2flg(0x00), 439 .exit_latency = 1, 440 .target_residency = 1, 441 .enter = &intel_idle, 442 .enter_s2idle = intel_idle_s2idle, }, 443 { 444 .name = "C1E", 445 .desc = "MWAIT 0x01", 446 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 447 .exit_latency = 10, 448 .target_residency = 250, 449 .enter = &intel_idle, 450 .enter_s2idle = intel_idle_s2idle, }, 451 { 452 .name = "C3", 453 .desc = "MWAIT 0x10", 454 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 455 .exit_latency = 59, 456 .target_residency = 300, 457 .enter = &intel_idle, 458 .enter_s2idle = intel_idle_s2idle, }, 459 { 460 .name = "C6", 461 .desc = "MWAIT 0x20", 462 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 463 .exit_latency = 84, 464 .target_residency = 400, 465 .enter = &intel_idle, 466 .enter_s2idle = intel_idle_s2idle, }, 467 { 468 .enter = NULL } 469 }; 470 471 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 472 { 473 .name = "C1", 474 .desc = "MWAIT 0x00", 475 .flags = MWAIT2flg(0x00), 476 .exit_latency = 1, 477 .target_residency = 1, 478 .enter = &intel_idle, 479 .enter_s2idle = intel_idle_s2idle, }, 480 { 481 .name = "C1E", 482 .desc = "MWAIT 0x01", 483 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 484 .exit_latency = 10, 485 .target_residency = 500, 486 .enter = &intel_idle, 487 .enter_s2idle = intel_idle_s2idle, }, 488 { 489 .name = "C3", 490 .desc = "MWAIT 0x10", 491 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 492 .exit_latency = 59, 493 .target_residency = 600, 494 .enter = &intel_idle, 495 .enter_s2idle = intel_idle_s2idle, }, 496 { 497 .name = "C6", 498 .desc = "MWAIT 0x20", 499 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 500 .exit_latency = 88, 501 .target_residency = 700, 502 .enter = &intel_idle, 503 .enter_s2idle = intel_idle_s2idle, }, 504 { 505 .enter = NULL } 506 }; 507 508 static struct cpuidle_state hsw_cstates[] __initdata = { 509 { 510 .name = "C1", 511 .desc = "MWAIT 0x00", 512 .flags = MWAIT2flg(0x00), 513 .exit_latency = 2, 514 .target_residency = 2, 515 .enter = &intel_idle, 516 .enter_s2idle = intel_idle_s2idle, }, 517 { 518 .name = "C1E", 519 .desc = "MWAIT 0x01", 520 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 521 .exit_latency = 10, 522 .target_residency = 20, 523 .enter = &intel_idle, 524 .enter_s2idle = intel_idle_s2idle, }, 525 { 526 .name = "C3", 527 .desc = "MWAIT 0x10", 528 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 529 .exit_latency = 33, 530 .target_residency = 100, 531 .enter = &intel_idle, 532 .enter_s2idle = intel_idle_s2idle, }, 533 { 534 .name = "C6", 535 .desc = "MWAIT 0x20", 536 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 537 .exit_latency = 133, 538 .target_residency = 400, 539 .enter = &intel_idle, 540 .enter_s2idle = intel_idle_s2idle, }, 541 { 542 .name = "C7s", 543 .desc = "MWAIT 0x32", 544 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 545 .exit_latency = 166, 546 .target_residency = 500, 547 .enter = &intel_idle, 548 .enter_s2idle = intel_idle_s2idle, }, 549 { 550 .name = "C8", 551 .desc = "MWAIT 0x40", 552 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 553 .exit_latency = 300, 554 .target_residency = 900, 555 .enter = &intel_idle, 556 .enter_s2idle = intel_idle_s2idle, }, 557 { 558 .name = "C9", 559 .desc = "MWAIT 0x50", 560 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 561 .exit_latency = 600, 562 .target_residency = 1800, 563 .enter = &intel_idle, 564 .enter_s2idle = intel_idle_s2idle, }, 565 { 566 .name = "C10", 567 .desc = "MWAIT 0x60", 568 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 569 .exit_latency = 2600, 570 .target_residency = 7700, 571 .enter = &intel_idle, 572 .enter_s2idle = intel_idle_s2idle, }, 573 { 574 .enter = NULL } 575 }; 576 static struct cpuidle_state bdw_cstates[] __initdata = { 577 { 578 .name = "C1", 579 .desc = "MWAIT 0x00", 580 .flags = MWAIT2flg(0x00), 581 .exit_latency = 2, 582 .target_residency = 2, 583 .enter = &intel_idle, 584 .enter_s2idle = intel_idle_s2idle, }, 585 { 586 .name = "C1E", 587 .desc = "MWAIT 0x01", 588 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 589 .exit_latency = 10, 590 .target_residency = 20, 591 .enter = &intel_idle, 592 .enter_s2idle = intel_idle_s2idle, }, 593 { 594 .name = "C3", 595 .desc = "MWAIT 0x10", 596 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 597 .exit_latency = 40, 598 .target_residency = 100, 599 .enter = &intel_idle, 600 .enter_s2idle = intel_idle_s2idle, }, 601 { 602 .name = "C6", 603 .desc = "MWAIT 0x20", 604 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 605 .exit_latency = 133, 606 .target_residency = 400, 607 .enter = &intel_idle, 608 .enter_s2idle = intel_idle_s2idle, }, 609 { 610 .name = "C7s", 611 .desc = "MWAIT 0x32", 612 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 613 .exit_latency = 166, 614 .target_residency = 500, 615 .enter = &intel_idle, 616 .enter_s2idle = intel_idle_s2idle, }, 617 { 618 .name = "C8", 619 .desc = "MWAIT 0x40", 620 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 621 .exit_latency = 300, 622 .target_residency = 900, 623 .enter = &intel_idle, 624 .enter_s2idle = intel_idle_s2idle, }, 625 { 626 .name = "C9", 627 .desc = "MWAIT 0x50", 628 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 629 .exit_latency = 600, 630 .target_residency = 1800, 631 .enter = &intel_idle, 632 .enter_s2idle = intel_idle_s2idle, }, 633 { 634 .name = "C10", 635 .desc = "MWAIT 0x60", 636 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 637 .exit_latency = 2600, 638 .target_residency = 7700, 639 .enter = &intel_idle, 640 .enter_s2idle = intel_idle_s2idle, }, 641 { 642 .enter = NULL } 643 }; 644 645 static struct cpuidle_state skl_cstates[] __initdata = { 646 { 647 .name = "C1", 648 .desc = "MWAIT 0x00", 649 .flags = MWAIT2flg(0x00), 650 .exit_latency = 2, 651 .target_residency = 2, 652 .enter = &intel_idle, 653 .enter_s2idle = intel_idle_s2idle, }, 654 { 655 .name = "C1E", 656 .desc = "MWAIT 0x01", 657 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 658 .exit_latency = 10, 659 .target_residency = 20, 660 .enter = &intel_idle, 661 .enter_s2idle = intel_idle_s2idle, }, 662 { 663 .name = "C3", 664 .desc = "MWAIT 0x10", 665 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 666 .exit_latency = 70, 667 .target_residency = 100, 668 .enter = &intel_idle, 669 .enter_s2idle = intel_idle_s2idle, }, 670 { 671 .name = "C6", 672 .desc = "MWAIT 0x20", 673 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 674 .exit_latency = 85, 675 .target_residency = 200, 676 .enter = &intel_idle, 677 .enter_s2idle = intel_idle_s2idle, }, 678 { 679 .name = "C7s", 680 .desc = "MWAIT 0x33", 681 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 682 .exit_latency = 124, 683 .target_residency = 800, 684 .enter = &intel_idle, 685 .enter_s2idle = intel_idle_s2idle, }, 686 { 687 .name = "C8", 688 .desc = "MWAIT 0x40", 689 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 690 .exit_latency = 200, 691 .target_residency = 800, 692 .enter = &intel_idle, 693 .enter_s2idle = intel_idle_s2idle, }, 694 { 695 .name = "C9", 696 .desc = "MWAIT 0x50", 697 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 698 .exit_latency = 480, 699 .target_residency = 5000, 700 .enter = &intel_idle, 701 .enter_s2idle = intel_idle_s2idle, }, 702 { 703 .name = "C10", 704 .desc = "MWAIT 0x60", 705 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 706 .exit_latency = 890, 707 .target_residency = 5000, 708 .enter = &intel_idle, 709 .enter_s2idle = intel_idle_s2idle, }, 710 { 711 .enter = NULL } 712 }; 713 714 static struct cpuidle_state skx_cstates[] __initdata = { 715 { 716 .name = "C1", 717 .desc = "MWAIT 0x00", 718 .flags = MWAIT2flg(0x00), 719 .exit_latency = 2, 720 .target_residency = 2, 721 .enter = &intel_idle, 722 .enter_s2idle = intel_idle_s2idle, }, 723 { 724 .name = "C1E", 725 .desc = "MWAIT 0x01", 726 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 727 .exit_latency = 10, 728 .target_residency = 20, 729 .enter = &intel_idle, 730 .enter_s2idle = intel_idle_s2idle, }, 731 { 732 .name = "C6", 733 .desc = "MWAIT 0x20", 734 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 735 .exit_latency = 133, 736 .target_residency = 600, 737 .enter = &intel_idle, 738 .enter_s2idle = intel_idle_s2idle, }, 739 { 740 .enter = NULL } 741 }; 742 743 static struct cpuidle_state icx_cstates[] __initdata = { 744 { 745 .name = "C1", 746 .desc = "MWAIT 0x00", 747 .flags = MWAIT2flg(0x00), 748 .exit_latency = 1, 749 .target_residency = 1, 750 .enter = &intel_idle, 751 .enter_s2idle = intel_idle_s2idle, }, 752 { 753 .name = "C1E", 754 .desc = "MWAIT 0x01", 755 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 756 .exit_latency = 4, 757 .target_residency = 4, 758 .enter = &intel_idle, 759 .enter_s2idle = intel_idle_s2idle, }, 760 { 761 .name = "C6", 762 .desc = "MWAIT 0x20", 763 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 764 .exit_latency = 128, 765 .target_residency = 384, 766 .enter = &intel_idle, 767 .enter_s2idle = intel_idle_s2idle, }, 768 { 769 .enter = NULL } 770 }; 771 772 static struct cpuidle_state atom_cstates[] __initdata = { 773 { 774 .name = "C1E", 775 .desc = "MWAIT 0x00", 776 .flags = MWAIT2flg(0x00), 777 .exit_latency = 10, 778 .target_residency = 20, 779 .enter = &intel_idle, 780 .enter_s2idle = intel_idle_s2idle, }, 781 { 782 .name = "C2", 783 .desc = "MWAIT 0x10", 784 .flags = MWAIT2flg(0x10), 785 .exit_latency = 20, 786 .target_residency = 80, 787 .enter = &intel_idle, 788 .enter_s2idle = intel_idle_s2idle, }, 789 { 790 .name = "C4", 791 .desc = "MWAIT 0x30", 792 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 793 .exit_latency = 100, 794 .target_residency = 400, 795 .enter = &intel_idle, 796 .enter_s2idle = intel_idle_s2idle, }, 797 { 798 .name = "C6", 799 .desc = "MWAIT 0x52", 800 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 801 .exit_latency = 140, 802 .target_residency = 560, 803 .enter = &intel_idle, 804 .enter_s2idle = intel_idle_s2idle, }, 805 { 806 .enter = NULL } 807 }; 808 static struct cpuidle_state tangier_cstates[] __initdata = { 809 { 810 .name = "C1", 811 .desc = "MWAIT 0x00", 812 .flags = MWAIT2flg(0x00), 813 .exit_latency = 1, 814 .target_residency = 4, 815 .enter = &intel_idle, 816 .enter_s2idle = intel_idle_s2idle, }, 817 { 818 .name = "C4", 819 .desc = "MWAIT 0x30", 820 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 821 .exit_latency = 100, 822 .target_residency = 400, 823 .enter = &intel_idle, 824 .enter_s2idle = intel_idle_s2idle, }, 825 { 826 .name = "C6", 827 .desc = "MWAIT 0x52", 828 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 829 .exit_latency = 140, 830 .target_residency = 560, 831 .enter = &intel_idle, 832 .enter_s2idle = intel_idle_s2idle, }, 833 { 834 .name = "C7", 835 .desc = "MWAIT 0x60", 836 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 837 .exit_latency = 1200, 838 .target_residency = 4000, 839 .enter = &intel_idle, 840 .enter_s2idle = intel_idle_s2idle, }, 841 { 842 .name = "C9", 843 .desc = "MWAIT 0x64", 844 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 845 .exit_latency = 10000, 846 .target_residency = 20000, 847 .enter = &intel_idle, 848 .enter_s2idle = intel_idle_s2idle, }, 849 { 850 .enter = NULL } 851 }; 852 static struct cpuidle_state avn_cstates[] __initdata = { 853 { 854 .name = "C1", 855 .desc = "MWAIT 0x00", 856 .flags = MWAIT2flg(0x00), 857 .exit_latency = 2, 858 .target_residency = 2, 859 .enter = &intel_idle, 860 .enter_s2idle = intel_idle_s2idle, }, 861 { 862 .name = "C6", 863 .desc = "MWAIT 0x51", 864 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 865 .exit_latency = 15, 866 .target_residency = 45, 867 .enter = &intel_idle, 868 .enter_s2idle = intel_idle_s2idle, }, 869 { 870 .enter = NULL } 871 }; 872 static struct cpuidle_state knl_cstates[] __initdata = { 873 { 874 .name = "C1", 875 .desc = "MWAIT 0x00", 876 .flags = MWAIT2flg(0x00), 877 .exit_latency = 1, 878 .target_residency = 2, 879 .enter = &intel_idle, 880 .enter_s2idle = intel_idle_s2idle }, 881 { 882 .name = "C6", 883 .desc = "MWAIT 0x10", 884 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 885 .exit_latency = 120, 886 .target_residency = 500, 887 .enter = &intel_idle, 888 .enter_s2idle = intel_idle_s2idle }, 889 { 890 .enter = NULL } 891 }; 892 893 static struct cpuidle_state bxt_cstates[] __initdata = { 894 { 895 .name = "C1", 896 .desc = "MWAIT 0x00", 897 .flags = MWAIT2flg(0x00), 898 .exit_latency = 2, 899 .target_residency = 2, 900 .enter = &intel_idle, 901 .enter_s2idle = intel_idle_s2idle, }, 902 { 903 .name = "C1E", 904 .desc = "MWAIT 0x01", 905 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 906 .exit_latency = 10, 907 .target_residency = 20, 908 .enter = &intel_idle, 909 .enter_s2idle = intel_idle_s2idle, }, 910 { 911 .name = "C6", 912 .desc = "MWAIT 0x20", 913 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 914 .exit_latency = 133, 915 .target_residency = 133, 916 .enter = &intel_idle, 917 .enter_s2idle = intel_idle_s2idle, }, 918 { 919 .name = "C7s", 920 .desc = "MWAIT 0x31", 921 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 922 .exit_latency = 155, 923 .target_residency = 155, 924 .enter = &intel_idle, 925 .enter_s2idle = intel_idle_s2idle, }, 926 { 927 .name = "C8", 928 .desc = "MWAIT 0x40", 929 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 930 .exit_latency = 1000, 931 .target_residency = 1000, 932 .enter = &intel_idle, 933 .enter_s2idle = intel_idle_s2idle, }, 934 { 935 .name = "C9", 936 .desc = "MWAIT 0x50", 937 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 938 .exit_latency = 2000, 939 .target_residency = 2000, 940 .enter = &intel_idle, 941 .enter_s2idle = intel_idle_s2idle, }, 942 { 943 .name = "C10", 944 .desc = "MWAIT 0x60", 945 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 946 .exit_latency = 10000, 947 .target_residency = 10000, 948 .enter = &intel_idle, 949 .enter_s2idle = intel_idle_s2idle, }, 950 { 951 .enter = NULL } 952 }; 953 954 static struct cpuidle_state dnv_cstates[] __initdata = { 955 { 956 .name = "C1", 957 .desc = "MWAIT 0x00", 958 .flags = MWAIT2flg(0x00), 959 .exit_latency = 2, 960 .target_residency = 2, 961 .enter = &intel_idle, 962 .enter_s2idle = intel_idle_s2idle, }, 963 { 964 .name = "C1E", 965 .desc = "MWAIT 0x01", 966 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 967 .exit_latency = 10, 968 .target_residency = 20, 969 .enter = &intel_idle, 970 .enter_s2idle = intel_idle_s2idle, }, 971 { 972 .name = "C6", 973 .desc = "MWAIT 0x20", 974 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 975 .exit_latency = 50, 976 .target_residency = 500, 977 .enter = &intel_idle, 978 .enter_s2idle = intel_idle_s2idle, }, 979 { 980 .enter = NULL } 981 }; 982 983 static const struct idle_cpu idle_cpu_nehalem __initconst = { 984 .state_table = nehalem_cstates, 985 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 986 .disable_promotion_to_c1e = true, 987 }; 988 989 static const struct idle_cpu idle_cpu_nhx __initconst = { 990 .state_table = nehalem_cstates, 991 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 992 .disable_promotion_to_c1e = true, 993 .use_acpi = true, 994 }; 995 996 static const struct idle_cpu idle_cpu_atom __initconst = { 997 .state_table = atom_cstates, 998 }; 999 1000 static const struct idle_cpu idle_cpu_tangier __initconst = { 1001 .state_table = tangier_cstates, 1002 }; 1003 1004 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1005 .state_table = atom_cstates, 1006 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1007 }; 1008 1009 static const struct idle_cpu idle_cpu_snb __initconst = { 1010 .state_table = snb_cstates, 1011 .disable_promotion_to_c1e = true, 1012 }; 1013 1014 static const struct idle_cpu idle_cpu_snx __initconst = { 1015 .state_table = snb_cstates, 1016 .disable_promotion_to_c1e = true, 1017 .use_acpi = true, 1018 }; 1019 1020 static const struct idle_cpu idle_cpu_byt __initconst = { 1021 .state_table = byt_cstates, 1022 .disable_promotion_to_c1e = true, 1023 .byt_auto_demotion_disable_flag = true, 1024 }; 1025 1026 static const struct idle_cpu idle_cpu_cht __initconst = { 1027 .state_table = cht_cstates, 1028 .disable_promotion_to_c1e = true, 1029 .byt_auto_demotion_disable_flag = true, 1030 }; 1031 1032 static const struct idle_cpu idle_cpu_ivb __initconst = { 1033 .state_table = ivb_cstates, 1034 .disable_promotion_to_c1e = true, 1035 }; 1036 1037 static const struct idle_cpu idle_cpu_ivt __initconst = { 1038 .state_table = ivt_cstates, 1039 .disable_promotion_to_c1e = true, 1040 .use_acpi = true, 1041 }; 1042 1043 static const struct idle_cpu idle_cpu_hsw __initconst = { 1044 .state_table = hsw_cstates, 1045 .disable_promotion_to_c1e = true, 1046 }; 1047 1048 static const struct idle_cpu idle_cpu_hsx __initconst = { 1049 .state_table = hsw_cstates, 1050 .disable_promotion_to_c1e = true, 1051 .use_acpi = true, 1052 }; 1053 1054 static const struct idle_cpu idle_cpu_bdw __initconst = { 1055 .state_table = bdw_cstates, 1056 .disable_promotion_to_c1e = true, 1057 }; 1058 1059 static const struct idle_cpu idle_cpu_bdx __initconst = { 1060 .state_table = bdw_cstates, 1061 .disable_promotion_to_c1e = true, 1062 .use_acpi = true, 1063 }; 1064 1065 static const struct idle_cpu idle_cpu_skl __initconst = { 1066 .state_table = skl_cstates, 1067 .disable_promotion_to_c1e = true, 1068 }; 1069 1070 static const struct idle_cpu idle_cpu_skx __initconst = { 1071 .state_table = skx_cstates, 1072 .disable_promotion_to_c1e = true, 1073 .use_acpi = true, 1074 }; 1075 1076 static const struct idle_cpu idle_cpu_icx __initconst = { 1077 .state_table = icx_cstates, 1078 .disable_promotion_to_c1e = true, 1079 .use_acpi = true, 1080 }; 1081 1082 static const struct idle_cpu idle_cpu_avn __initconst = { 1083 .state_table = avn_cstates, 1084 .disable_promotion_to_c1e = true, 1085 .use_acpi = true, 1086 }; 1087 1088 static const struct idle_cpu idle_cpu_knl __initconst = { 1089 .state_table = knl_cstates, 1090 .use_acpi = true, 1091 }; 1092 1093 static const struct idle_cpu idle_cpu_bxt __initconst = { 1094 .state_table = bxt_cstates, 1095 .disable_promotion_to_c1e = true, 1096 }; 1097 1098 static const struct idle_cpu idle_cpu_dnv __initconst = { 1099 .state_table = dnv_cstates, 1100 .disable_promotion_to_c1e = true, 1101 .use_acpi = true, 1102 }; 1103 1104 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1105 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1106 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1107 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1108 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1109 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1110 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1111 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1112 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1113 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1114 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1115 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1116 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1117 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1118 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1119 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1120 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1121 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1122 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1123 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1124 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1125 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1126 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1127 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1128 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1129 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1130 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1131 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1132 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1133 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1134 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1135 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1136 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1137 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1138 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1139 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1140 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1141 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1142 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), 1143 {} 1144 }; 1145 1146 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1147 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1148 {} 1149 }; 1150 1151 static bool __init intel_idle_max_cstate_reached(int cstate) 1152 { 1153 if (cstate + 1 > max_cstate) { 1154 pr_info("max_cstate %d reached\n", max_cstate); 1155 return true; 1156 } 1157 return false; 1158 } 1159 1160 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1161 #include <acpi/processor.h> 1162 1163 static bool no_acpi __read_mostly; 1164 module_param(no_acpi, bool, 0444); 1165 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1166 1167 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1168 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1169 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1170 1171 static struct acpi_processor_power acpi_state_table __initdata; 1172 1173 /** 1174 * intel_idle_cst_usable - Check if the _CST information can be used. 1175 * 1176 * Check if all of the C-states listed by _CST in the max_cstate range are 1177 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1178 */ 1179 static bool __init intel_idle_cst_usable(void) 1180 { 1181 int cstate, limit; 1182 1183 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1184 acpi_state_table.count); 1185 1186 for (cstate = 1; cstate < limit; cstate++) { 1187 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1188 1189 if (cx->entry_method != ACPI_CSTATE_FFH) 1190 return false; 1191 } 1192 1193 return true; 1194 } 1195 1196 static bool __init intel_idle_acpi_cst_extract(void) 1197 { 1198 unsigned int cpu; 1199 1200 if (no_acpi) { 1201 pr_debug("Not allowed to use ACPI _CST\n"); 1202 return false; 1203 } 1204 1205 for_each_possible_cpu(cpu) { 1206 struct acpi_processor *pr = per_cpu(processors, cpu); 1207 1208 if (!pr) 1209 continue; 1210 1211 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1212 continue; 1213 1214 acpi_state_table.count++; 1215 1216 if (!intel_idle_cst_usable()) 1217 continue; 1218 1219 if (!acpi_processor_claim_cst_control()) 1220 break; 1221 1222 return true; 1223 } 1224 1225 acpi_state_table.count = 0; 1226 pr_debug("ACPI _CST not found or not usable\n"); 1227 return false; 1228 } 1229 1230 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1231 { 1232 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1233 1234 /* 1235 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1236 * the interesting states are ACPI_CSTATE_FFH. 1237 */ 1238 for (cstate = 1; cstate < limit; cstate++) { 1239 struct acpi_processor_cx *cx; 1240 struct cpuidle_state *state; 1241 1242 if (intel_idle_max_cstate_reached(cstate)) 1243 break; 1244 1245 cx = &acpi_state_table.states[cstate]; 1246 1247 state = &drv->states[drv->state_count++]; 1248 1249 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1250 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1251 state->exit_latency = cx->latency; 1252 /* 1253 * For C1-type C-states use the same number for both the exit 1254 * latency and target residency, because that is the case for 1255 * C1 in the majority of the static C-states tables above. 1256 * For the other types of C-states, however, set the target 1257 * residency to 3 times the exit latency which should lead to 1258 * a reasonable balance between energy-efficiency and 1259 * performance in the majority of interesting cases. 1260 */ 1261 state->target_residency = cx->latency; 1262 if (cx->type > ACPI_STATE_C1) 1263 state->target_residency *= 3; 1264 1265 state->flags = MWAIT2flg(cx->address); 1266 if (cx->type > ACPI_STATE_C2) 1267 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1268 1269 if (disabled_states_mask & BIT(cstate)) 1270 state->flags |= CPUIDLE_FLAG_OFF; 1271 1272 state->enter = intel_idle; 1273 state->enter_s2idle = intel_idle_s2idle; 1274 } 1275 } 1276 1277 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1278 { 1279 int cstate, limit; 1280 1281 /* 1282 * If there are no _CST C-states, do not disable any C-states by 1283 * default. 1284 */ 1285 if (!acpi_state_table.count) 1286 return false; 1287 1288 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1289 /* 1290 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1291 * the interesting states are ACPI_CSTATE_FFH. 1292 */ 1293 for (cstate = 1; cstate < limit; cstate++) { 1294 if (acpi_state_table.states[cstate].address == mwait_hint) 1295 return false; 1296 } 1297 return true; 1298 } 1299 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1300 #define force_use_acpi (false) 1301 1302 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1303 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1304 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1305 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1306 1307 /** 1308 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1309 * 1310 * Tune IVT multi-socket targets. 1311 * Assumption: num_sockets == (max_package_num + 1). 1312 */ 1313 static void __init ivt_idle_state_table_update(void) 1314 { 1315 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1316 int cpu, package_num, num_sockets = 1; 1317 1318 for_each_online_cpu(cpu) { 1319 package_num = topology_physical_package_id(cpu); 1320 if (package_num + 1 > num_sockets) { 1321 num_sockets = package_num + 1; 1322 1323 if (num_sockets > 4) { 1324 cpuidle_state_table = ivt_cstates_8s; 1325 return; 1326 } 1327 } 1328 } 1329 1330 if (num_sockets > 2) 1331 cpuidle_state_table = ivt_cstates_4s; 1332 1333 /* else, 1 and 2 socket systems use default ivt_cstates */ 1334 } 1335 1336 /** 1337 * irtl_2_usec - IRTL to microseconds conversion. 1338 * @irtl: IRTL MSR value. 1339 * 1340 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1341 */ 1342 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1343 { 1344 static const unsigned int irtl_ns_units[] __initconst = { 1345 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1346 }; 1347 unsigned long long ns; 1348 1349 if (!irtl) 1350 return 0; 1351 1352 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1353 1354 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1355 } 1356 1357 /** 1358 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1359 * 1360 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1361 * definitive maximum latency and use the same value for target_residency. 1362 */ 1363 static void __init bxt_idle_state_table_update(void) 1364 { 1365 unsigned long long msr; 1366 unsigned int usec; 1367 1368 rdmsrl(MSR_PKGC6_IRTL, msr); 1369 usec = irtl_2_usec(msr); 1370 if (usec) { 1371 bxt_cstates[2].exit_latency = usec; 1372 bxt_cstates[2].target_residency = usec; 1373 } 1374 1375 rdmsrl(MSR_PKGC7_IRTL, msr); 1376 usec = irtl_2_usec(msr); 1377 if (usec) { 1378 bxt_cstates[3].exit_latency = usec; 1379 bxt_cstates[3].target_residency = usec; 1380 } 1381 1382 rdmsrl(MSR_PKGC8_IRTL, msr); 1383 usec = irtl_2_usec(msr); 1384 if (usec) { 1385 bxt_cstates[4].exit_latency = usec; 1386 bxt_cstates[4].target_residency = usec; 1387 } 1388 1389 rdmsrl(MSR_PKGC9_IRTL, msr); 1390 usec = irtl_2_usec(msr); 1391 if (usec) { 1392 bxt_cstates[5].exit_latency = usec; 1393 bxt_cstates[5].target_residency = usec; 1394 } 1395 1396 rdmsrl(MSR_PKGC10_IRTL, msr); 1397 usec = irtl_2_usec(msr); 1398 if (usec) { 1399 bxt_cstates[6].exit_latency = usec; 1400 bxt_cstates[6].target_residency = usec; 1401 } 1402 1403 } 1404 1405 /** 1406 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1407 * 1408 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1409 */ 1410 static void __init sklh_idle_state_table_update(void) 1411 { 1412 unsigned long long msr; 1413 unsigned int eax, ebx, ecx, edx; 1414 1415 1416 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1417 if (max_cstate <= 7) 1418 return; 1419 1420 /* if PC10 not present in CPUID.MWAIT.EDX */ 1421 if ((mwait_substates & (0xF << 28)) == 0) 1422 return; 1423 1424 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1425 1426 /* PC10 is not enabled in PKG C-state limit */ 1427 if ((msr & 0xF) != 8) 1428 return; 1429 1430 ecx = 0; 1431 cpuid(7, &eax, &ebx, &ecx, &edx); 1432 1433 /* if SGX is present */ 1434 if (ebx & (1 << 2)) { 1435 1436 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1437 1438 /* if SGX is enabled */ 1439 if (msr & (1 << 18)) 1440 return; 1441 } 1442 1443 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1444 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1445 } 1446 1447 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1448 { 1449 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1450 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1451 MWAIT_SUBSTATE_MASK; 1452 1453 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1454 if (num_substates == 0) 1455 return false; 1456 1457 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1458 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1459 1460 return true; 1461 } 1462 1463 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1464 { 1465 int cstate; 1466 1467 switch (boot_cpu_data.x86_model) { 1468 case INTEL_FAM6_IVYBRIDGE_X: 1469 ivt_idle_state_table_update(); 1470 break; 1471 case INTEL_FAM6_ATOM_GOLDMONT: 1472 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1473 bxt_idle_state_table_update(); 1474 break; 1475 case INTEL_FAM6_SKYLAKE: 1476 sklh_idle_state_table_update(); 1477 break; 1478 } 1479 1480 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1481 unsigned int mwait_hint; 1482 1483 if (intel_idle_max_cstate_reached(cstate)) 1484 break; 1485 1486 if (!cpuidle_state_table[cstate].enter && 1487 !cpuidle_state_table[cstate].enter_s2idle) 1488 break; 1489 1490 /* If marked as unusable, skip this state. */ 1491 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1492 pr_debug("state %s is disabled\n", 1493 cpuidle_state_table[cstate].name); 1494 continue; 1495 } 1496 1497 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1498 if (!intel_idle_verify_cstate(mwait_hint)) 1499 continue; 1500 1501 /* Structure copy. */ 1502 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1503 1504 if ((disabled_states_mask & BIT(drv->state_count)) || 1505 ((icpu->use_acpi || force_use_acpi) && 1506 intel_idle_off_by_default(mwait_hint) && 1507 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1508 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1509 1510 drv->state_count++; 1511 } 1512 1513 if (icpu->byt_auto_demotion_disable_flag) { 1514 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1515 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1516 } 1517 } 1518 1519 /** 1520 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1521 * @drv: cpuidle driver structure to initialize. 1522 */ 1523 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1524 { 1525 cpuidle_poll_state_init(drv); 1526 1527 if (disabled_states_mask & BIT(0)) 1528 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1529 1530 drv->state_count = 1; 1531 1532 if (icpu) 1533 intel_idle_init_cstates_icpu(drv); 1534 else 1535 intel_idle_init_cstates_acpi(drv); 1536 } 1537 1538 static void auto_demotion_disable(void) 1539 { 1540 unsigned long long msr_bits; 1541 1542 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1543 msr_bits &= ~auto_demotion_disable_flags; 1544 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1545 } 1546 1547 static void c1e_promotion_disable(void) 1548 { 1549 unsigned long long msr_bits; 1550 1551 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1552 msr_bits &= ~0x2; 1553 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1554 } 1555 1556 /** 1557 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1558 * @cpu: CPU to initialize. 1559 * 1560 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1561 * with the processor model flags. 1562 */ 1563 static int intel_idle_cpu_init(unsigned int cpu) 1564 { 1565 struct cpuidle_device *dev; 1566 1567 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1568 dev->cpu = cpu; 1569 1570 if (cpuidle_register_device(dev)) { 1571 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1572 return -EIO; 1573 } 1574 1575 if (auto_demotion_disable_flags) 1576 auto_demotion_disable(); 1577 1578 if (disable_promotion_to_c1e) 1579 c1e_promotion_disable(); 1580 1581 return 0; 1582 } 1583 1584 static int intel_idle_cpu_online(unsigned int cpu) 1585 { 1586 struct cpuidle_device *dev; 1587 1588 if (!boot_cpu_has(X86_FEATURE_ARAT)) 1589 tick_broadcast_enable(); 1590 1591 /* 1592 * Some systems can hotplug a cpu at runtime after 1593 * the kernel has booted, we have to initialize the 1594 * driver in this case 1595 */ 1596 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1597 if (!dev->registered) 1598 return intel_idle_cpu_init(cpu); 1599 1600 return 0; 1601 } 1602 1603 /** 1604 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 1605 */ 1606 static void __init intel_idle_cpuidle_devices_uninit(void) 1607 { 1608 int i; 1609 1610 for_each_online_cpu(i) 1611 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 1612 } 1613 1614 static int __init intel_idle_init(void) 1615 { 1616 const struct x86_cpu_id *id; 1617 unsigned int eax, ebx, ecx; 1618 int retval; 1619 1620 /* Do not load intel_idle at all for now if idle= is passed */ 1621 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1622 return -ENODEV; 1623 1624 if (max_cstate == 0) { 1625 pr_debug("disabled\n"); 1626 return -EPERM; 1627 } 1628 1629 id = x86_match_cpu(intel_idle_ids); 1630 if (id) { 1631 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1632 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1633 return -ENODEV; 1634 } 1635 } else { 1636 id = x86_match_cpu(intel_mwait_ids); 1637 if (!id) 1638 return -ENODEV; 1639 } 1640 1641 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1642 return -ENODEV; 1643 1644 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1645 1646 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1647 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1648 !mwait_substates) 1649 return -ENODEV; 1650 1651 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1652 1653 icpu = (const struct idle_cpu *)id->driver_data; 1654 if (icpu) { 1655 cpuidle_state_table = icpu->state_table; 1656 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 1657 disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; 1658 if (icpu->use_acpi || force_use_acpi) 1659 intel_idle_acpi_cst_extract(); 1660 } else if (!intel_idle_acpi_cst_extract()) { 1661 return -ENODEV; 1662 } 1663 1664 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1665 boot_cpu_data.x86_model); 1666 1667 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1668 if (!intel_idle_cpuidle_devices) 1669 return -ENOMEM; 1670 1671 intel_idle_cpuidle_driver_init(&intel_idle_driver); 1672 1673 retval = cpuidle_register_driver(&intel_idle_driver); 1674 if (retval) { 1675 struct cpuidle_driver *drv = cpuidle_get_driver(); 1676 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1677 drv ? drv->name : "none"); 1678 goto init_driver_fail; 1679 } 1680 1681 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1682 intel_idle_cpu_online, NULL); 1683 if (retval < 0) 1684 goto hp_setup_fail; 1685 1686 pr_debug("Local APIC timer is reliable in %s\n", 1687 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 1688 1689 return 0; 1690 1691 hp_setup_fail: 1692 intel_idle_cpuidle_devices_uninit(); 1693 cpuidle_unregister_driver(&intel_idle_driver); 1694 init_driver_fail: 1695 free_percpu(intel_idle_cpuidle_devices); 1696 return retval; 1697 1698 } 1699 device_initcall(intel_idle_init); 1700 1701 /* 1702 * We are not really modular, but we used to support that. Meaning we also 1703 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1704 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1705 * is the easiest way (currently) to continue doing that. 1706 */ 1707 module_param(max_cstate, int, 0444); 1708 /* 1709 * The positions of the bits that are set in this number are the indices of the 1710 * idle states to be disabled by default (as reflected by the names of the 1711 * corresponding idle state directories in sysfs, "state0", "state1" ... 1712 * "state<i>" ..., where <i> is the index of the given state). 1713 */ 1714 module_param_named(states_off, disabled_states_mask, uint, 0444); 1715 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 1716