1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on specific Intel processors 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-stats 24 */ 25 26 /* 27 * Known limitations 28 * 29 * ACPI has a .suspend hack to turn off deep c-statees during suspend 30 * to avoid complications with the lapic timer workaround. 31 * Have not seen issues with suspend, but may need same workaround here. 32 * 33 */ 34 35 /* un-comment DEBUG to enable pr_debug() statements */ 36 #define DEBUG 37 38 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 39 40 #include <linux/acpi.h> 41 #include <linux/kernel.h> 42 #include <linux/cpuidle.h> 43 #include <linux/tick.h> 44 #include <trace/events/power.h> 45 #include <linux/sched.h> 46 #include <linux/notifier.h> 47 #include <linux/cpu.h> 48 #include <linux/moduleparam.h> 49 #include <asm/cpu_device_id.h> 50 #include <asm/intel-family.h> 51 #include <asm/mwait.h> 52 #include <asm/msr.h> 53 54 #define INTEL_IDLE_VERSION "0.5.1" 55 56 static struct cpuidle_driver intel_idle_driver = { 57 .name = "intel_idle", 58 .owner = THIS_MODULE, 59 }; 60 /* intel_idle.max_cstate=0 disables driver */ 61 static int max_cstate = CPUIDLE_STATE_MAX - 1; 62 static unsigned int disabled_states_mask; 63 64 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 65 66 static unsigned long auto_demotion_disable_flags; 67 static bool disable_promotion_to_c1e; 68 69 struct idle_cpu { 70 struct cpuidle_state *state_table; 71 72 /* 73 * Hardware C-state auto-demotion may not always be optimal. 74 * Indicate which enable bits to clear here. 75 */ 76 unsigned long auto_demotion_disable_flags; 77 bool byt_auto_demotion_disable_flag; 78 bool disable_promotion_to_c1e; 79 bool use_acpi; 80 }; 81 82 static const struct idle_cpu *icpu __initdata; 83 static struct cpuidle_state *cpuidle_state_table __initdata; 84 85 static unsigned int mwait_substates __initdata; 86 87 /* 88 * Enable this state by default even if the ACPI _CST does not list it. 89 */ 90 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 91 92 /* 93 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 94 * the C-state (top nibble) and sub-state (bottom nibble) 95 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 96 * 97 * We store the hint at the top of our "flags" for each state. 98 */ 99 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 100 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 101 102 /** 103 * intel_idle - Ask the processor to enter the given idle state. 104 * @dev: cpuidle device of the target CPU. 105 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 106 * @index: Target idle state index. 107 * 108 * Use the MWAIT instruction to notify the processor that the CPU represented by 109 * @dev is idle and it can try to enter the idle state corresponding to @index. 110 * 111 * If the local APIC timer is not known to be reliable in the target idle state, 112 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 113 * 114 * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to 115 * flushing user TLBs. 116 * 117 * Must be called under local_irq_disable(). 118 */ 119 static __cpuidle int intel_idle(struct cpuidle_device *dev, 120 struct cpuidle_driver *drv, int index) 121 { 122 struct cpuidle_state *state = &drv->states[index]; 123 unsigned long eax = flg2MWAIT(state->flags); 124 unsigned long ecx = 1; /* break on interrupt flag */ 125 bool tick; 126 127 if (!static_cpu_has(X86_FEATURE_ARAT)) { 128 /* 129 * Switch over to one-shot tick broadcast if the target C-state 130 * is deeper than C1. 131 */ 132 if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) { 133 tick = true; 134 tick_broadcast_enter(); 135 } else { 136 tick = false; 137 } 138 } 139 140 mwait_idle_with_hints(eax, ecx); 141 142 if (!static_cpu_has(X86_FEATURE_ARAT) && tick) 143 tick_broadcast_exit(); 144 145 return index; 146 } 147 148 /** 149 * intel_idle_s2idle - Ask the processor to enter the given idle state. 150 * @dev: cpuidle device of the target CPU. 151 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 152 * @index: Target idle state index. 153 * 154 * Use the MWAIT instruction to notify the processor that the CPU represented by 155 * @dev is idle and it can try to enter the idle state corresponding to @index. 156 * 157 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 158 * scheduler tick and suspended scheduler clock on the target CPU. 159 */ 160 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 161 struct cpuidle_driver *drv, int index) 162 { 163 unsigned long eax = flg2MWAIT(drv->states[index].flags); 164 unsigned long ecx = 1; /* break on interrupt flag */ 165 166 mwait_idle_with_hints(eax, ecx); 167 168 return 0; 169 } 170 171 /* 172 * States are indexed by the cstate number, 173 * which is also the index into the MWAIT hint array. 174 * Thus C0 is a dummy. 175 */ 176 static struct cpuidle_state nehalem_cstates[] __initdata = { 177 { 178 .name = "C1", 179 .desc = "MWAIT 0x00", 180 .flags = MWAIT2flg(0x00), 181 .exit_latency = 3, 182 .target_residency = 6, 183 .enter = &intel_idle, 184 .enter_s2idle = intel_idle_s2idle, }, 185 { 186 .name = "C1E", 187 .desc = "MWAIT 0x01", 188 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 189 .exit_latency = 10, 190 .target_residency = 20, 191 .enter = &intel_idle, 192 .enter_s2idle = intel_idle_s2idle, }, 193 { 194 .name = "C3", 195 .desc = "MWAIT 0x10", 196 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 197 .exit_latency = 20, 198 .target_residency = 80, 199 .enter = &intel_idle, 200 .enter_s2idle = intel_idle_s2idle, }, 201 { 202 .name = "C6", 203 .desc = "MWAIT 0x20", 204 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 205 .exit_latency = 200, 206 .target_residency = 800, 207 .enter = &intel_idle, 208 .enter_s2idle = intel_idle_s2idle, }, 209 { 210 .enter = NULL } 211 }; 212 213 static struct cpuidle_state snb_cstates[] __initdata = { 214 { 215 .name = "C1", 216 .desc = "MWAIT 0x00", 217 .flags = MWAIT2flg(0x00), 218 .exit_latency = 2, 219 .target_residency = 2, 220 .enter = &intel_idle, 221 .enter_s2idle = intel_idle_s2idle, }, 222 { 223 .name = "C1E", 224 .desc = "MWAIT 0x01", 225 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 226 .exit_latency = 10, 227 .target_residency = 20, 228 .enter = &intel_idle, 229 .enter_s2idle = intel_idle_s2idle, }, 230 { 231 .name = "C3", 232 .desc = "MWAIT 0x10", 233 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 234 .exit_latency = 80, 235 .target_residency = 211, 236 .enter = &intel_idle, 237 .enter_s2idle = intel_idle_s2idle, }, 238 { 239 .name = "C6", 240 .desc = "MWAIT 0x20", 241 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 242 .exit_latency = 104, 243 .target_residency = 345, 244 .enter = &intel_idle, 245 .enter_s2idle = intel_idle_s2idle, }, 246 { 247 .name = "C7", 248 .desc = "MWAIT 0x30", 249 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 250 .exit_latency = 109, 251 .target_residency = 345, 252 .enter = &intel_idle, 253 .enter_s2idle = intel_idle_s2idle, }, 254 { 255 .enter = NULL } 256 }; 257 258 static struct cpuidle_state byt_cstates[] __initdata = { 259 { 260 .name = "C1", 261 .desc = "MWAIT 0x00", 262 .flags = MWAIT2flg(0x00), 263 .exit_latency = 1, 264 .target_residency = 1, 265 .enter = &intel_idle, 266 .enter_s2idle = intel_idle_s2idle, }, 267 { 268 .name = "C6N", 269 .desc = "MWAIT 0x58", 270 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 271 .exit_latency = 300, 272 .target_residency = 275, 273 .enter = &intel_idle, 274 .enter_s2idle = intel_idle_s2idle, }, 275 { 276 .name = "C6S", 277 .desc = "MWAIT 0x52", 278 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 279 .exit_latency = 500, 280 .target_residency = 560, 281 .enter = &intel_idle, 282 .enter_s2idle = intel_idle_s2idle, }, 283 { 284 .name = "C7", 285 .desc = "MWAIT 0x60", 286 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 287 .exit_latency = 1200, 288 .target_residency = 4000, 289 .enter = &intel_idle, 290 .enter_s2idle = intel_idle_s2idle, }, 291 { 292 .name = "C7S", 293 .desc = "MWAIT 0x64", 294 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 295 .exit_latency = 10000, 296 .target_residency = 20000, 297 .enter = &intel_idle, 298 .enter_s2idle = intel_idle_s2idle, }, 299 { 300 .enter = NULL } 301 }; 302 303 static struct cpuidle_state cht_cstates[] __initdata = { 304 { 305 .name = "C1", 306 .desc = "MWAIT 0x00", 307 .flags = MWAIT2flg(0x00), 308 .exit_latency = 1, 309 .target_residency = 1, 310 .enter = &intel_idle, 311 .enter_s2idle = intel_idle_s2idle, }, 312 { 313 .name = "C6N", 314 .desc = "MWAIT 0x58", 315 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 316 .exit_latency = 80, 317 .target_residency = 275, 318 .enter = &intel_idle, 319 .enter_s2idle = intel_idle_s2idle, }, 320 { 321 .name = "C6S", 322 .desc = "MWAIT 0x52", 323 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 324 .exit_latency = 200, 325 .target_residency = 560, 326 .enter = &intel_idle, 327 .enter_s2idle = intel_idle_s2idle, }, 328 { 329 .name = "C7", 330 .desc = "MWAIT 0x60", 331 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 332 .exit_latency = 1200, 333 .target_residency = 4000, 334 .enter = &intel_idle, 335 .enter_s2idle = intel_idle_s2idle, }, 336 { 337 .name = "C7S", 338 .desc = "MWAIT 0x64", 339 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 340 .exit_latency = 10000, 341 .target_residency = 20000, 342 .enter = &intel_idle, 343 .enter_s2idle = intel_idle_s2idle, }, 344 { 345 .enter = NULL } 346 }; 347 348 static struct cpuidle_state ivb_cstates[] __initdata = { 349 { 350 .name = "C1", 351 .desc = "MWAIT 0x00", 352 .flags = MWAIT2flg(0x00), 353 .exit_latency = 1, 354 .target_residency = 1, 355 .enter = &intel_idle, 356 .enter_s2idle = intel_idle_s2idle, }, 357 { 358 .name = "C1E", 359 .desc = "MWAIT 0x01", 360 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 361 .exit_latency = 10, 362 .target_residency = 20, 363 .enter = &intel_idle, 364 .enter_s2idle = intel_idle_s2idle, }, 365 { 366 .name = "C3", 367 .desc = "MWAIT 0x10", 368 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 369 .exit_latency = 59, 370 .target_residency = 156, 371 .enter = &intel_idle, 372 .enter_s2idle = intel_idle_s2idle, }, 373 { 374 .name = "C6", 375 .desc = "MWAIT 0x20", 376 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 377 .exit_latency = 80, 378 .target_residency = 300, 379 .enter = &intel_idle, 380 .enter_s2idle = intel_idle_s2idle, }, 381 { 382 .name = "C7", 383 .desc = "MWAIT 0x30", 384 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 385 .exit_latency = 87, 386 .target_residency = 300, 387 .enter = &intel_idle, 388 .enter_s2idle = intel_idle_s2idle, }, 389 { 390 .enter = NULL } 391 }; 392 393 static struct cpuidle_state ivt_cstates[] __initdata = { 394 { 395 .name = "C1", 396 .desc = "MWAIT 0x00", 397 .flags = MWAIT2flg(0x00), 398 .exit_latency = 1, 399 .target_residency = 1, 400 .enter = &intel_idle, 401 .enter_s2idle = intel_idle_s2idle, }, 402 { 403 .name = "C1E", 404 .desc = "MWAIT 0x01", 405 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 406 .exit_latency = 10, 407 .target_residency = 80, 408 .enter = &intel_idle, 409 .enter_s2idle = intel_idle_s2idle, }, 410 { 411 .name = "C3", 412 .desc = "MWAIT 0x10", 413 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 414 .exit_latency = 59, 415 .target_residency = 156, 416 .enter = &intel_idle, 417 .enter_s2idle = intel_idle_s2idle, }, 418 { 419 .name = "C6", 420 .desc = "MWAIT 0x20", 421 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 422 .exit_latency = 82, 423 .target_residency = 300, 424 .enter = &intel_idle, 425 .enter_s2idle = intel_idle_s2idle, }, 426 { 427 .enter = NULL } 428 }; 429 430 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 431 { 432 .name = "C1", 433 .desc = "MWAIT 0x00", 434 .flags = MWAIT2flg(0x00), 435 .exit_latency = 1, 436 .target_residency = 1, 437 .enter = &intel_idle, 438 .enter_s2idle = intel_idle_s2idle, }, 439 { 440 .name = "C1E", 441 .desc = "MWAIT 0x01", 442 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 443 .exit_latency = 10, 444 .target_residency = 250, 445 .enter = &intel_idle, 446 .enter_s2idle = intel_idle_s2idle, }, 447 { 448 .name = "C3", 449 .desc = "MWAIT 0x10", 450 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 451 .exit_latency = 59, 452 .target_residency = 300, 453 .enter = &intel_idle, 454 .enter_s2idle = intel_idle_s2idle, }, 455 { 456 .name = "C6", 457 .desc = "MWAIT 0x20", 458 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 459 .exit_latency = 84, 460 .target_residency = 400, 461 .enter = &intel_idle, 462 .enter_s2idle = intel_idle_s2idle, }, 463 { 464 .enter = NULL } 465 }; 466 467 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 468 { 469 .name = "C1", 470 .desc = "MWAIT 0x00", 471 .flags = MWAIT2flg(0x00), 472 .exit_latency = 1, 473 .target_residency = 1, 474 .enter = &intel_idle, 475 .enter_s2idle = intel_idle_s2idle, }, 476 { 477 .name = "C1E", 478 .desc = "MWAIT 0x01", 479 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 480 .exit_latency = 10, 481 .target_residency = 500, 482 .enter = &intel_idle, 483 .enter_s2idle = intel_idle_s2idle, }, 484 { 485 .name = "C3", 486 .desc = "MWAIT 0x10", 487 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 488 .exit_latency = 59, 489 .target_residency = 600, 490 .enter = &intel_idle, 491 .enter_s2idle = intel_idle_s2idle, }, 492 { 493 .name = "C6", 494 .desc = "MWAIT 0x20", 495 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 496 .exit_latency = 88, 497 .target_residency = 700, 498 .enter = &intel_idle, 499 .enter_s2idle = intel_idle_s2idle, }, 500 { 501 .enter = NULL } 502 }; 503 504 static struct cpuidle_state hsw_cstates[] __initdata = { 505 { 506 .name = "C1", 507 .desc = "MWAIT 0x00", 508 .flags = MWAIT2flg(0x00), 509 .exit_latency = 2, 510 .target_residency = 2, 511 .enter = &intel_idle, 512 .enter_s2idle = intel_idle_s2idle, }, 513 { 514 .name = "C1E", 515 .desc = "MWAIT 0x01", 516 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 517 .exit_latency = 10, 518 .target_residency = 20, 519 .enter = &intel_idle, 520 .enter_s2idle = intel_idle_s2idle, }, 521 { 522 .name = "C3", 523 .desc = "MWAIT 0x10", 524 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 525 .exit_latency = 33, 526 .target_residency = 100, 527 .enter = &intel_idle, 528 .enter_s2idle = intel_idle_s2idle, }, 529 { 530 .name = "C6", 531 .desc = "MWAIT 0x20", 532 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 533 .exit_latency = 133, 534 .target_residency = 400, 535 .enter = &intel_idle, 536 .enter_s2idle = intel_idle_s2idle, }, 537 { 538 .name = "C7s", 539 .desc = "MWAIT 0x32", 540 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 541 .exit_latency = 166, 542 .target_residency = 500, 543 .enter = &intel_idle, 544 .enter_s2idle = intel_idle_s2idle, }, 545 { 546 .name = "C8", 547 .desc = "MWAIT 0x40", 548 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 549 .exit_latency = 300, 550 .target_residency = 900, 551 .enter = &intel_idle, 552 .enter_s2idle = intel_idle_s2idle, }, 553 { 554 .name = "C9", 555 .desc = "MWAIT 0x50", 556 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 557 .exit_latency = 600, 558 .target_residency = 1800, 559 .enter = &intel_idle, 560 .enter_s2idle = intel_idle_s2idle, }, 561 { 562 .name = "C10", 563 .desc = "MWAIT 0x60", 564 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 565 .exit_latency = 2600, 566 .target_residency = 7700, 567 .enter = &intel_idle, 568 .enter_s2idle = intel_idle_s2idle, }, 569 { 570 .enter = NULL } 571 }; 572 static struct cpuidle_state bdw_cstates[] __initdata = { 573 { 574 .name = "C1", 575 .desc = "MWAIT 0x00", 576 .flags = MWAIT2flg(0x00), 577 .exit_latency = 2, 578 .target_residency = 2, 579 .enter = &intel_idle, 580 .enter_s2idle = intel_idle_s2idle, }, 581 { 582 .name = "C1E", 583 .desc = "MWAIT 0x01", 584 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 585 .exit_latency = 10, 586 .target_residency = 20, 587 .enter = &intel_idle, 588 .enter_s2idle = intel_idle_s2idle, }, 589 { 590 .name = "C3", 591 .desc = "MWAIT 0x10", 592 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 593 .exit_latency = 40, 594 .target_residency = 100, 595 .enter = &intel_idle, 596 .enter_s2idle = intel_idle_s2idle, }, 597 { 598 .name = "C6", 599 .desc = "MWAIT 0x20", 600 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 601 .exit_latency = 133, 602 .target_residency = 400, 603 .enter = &intel_idle, 604 .enter_s2idle = intel_idle_s2idle, }, 605 { 606 .name = "C7s", 607 .desc = "MWAIT 0x32", 608 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 609 .exit_latency = 166, 610 .target_residency = 500, 611 .enter = &intel_idle, 612 .enter_s2idle = intel_idle_s2idle, }, 613 { 614 .name = "C8", 615 .desc = "MWAIT 0x40", 616 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 617 .exit_latency = 300, 618 .target_residency = 900, 619 .enter = &intel_idle, 620 .enter_s2idle = intel_idle_s2idle, }, 621 { 622 .name = "C9", 623 .desc = "MWAIT 0x50", 624 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 625 .exit_latency = 600, 626 .target_residency = 1800, 627 .enter = &intel_idle, 628 .enter_s2idle = intel_idle_s2idle, }, 629 { 630 .name = "C10", 631 .desc = "MWAIT 0x60", 632 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 633 .exit_latency = 2600, 634 .target_residency = 7700, 635 .enter = &intel_idle, 636 .enter_s2idle = intel_idle_s2idle, }, 637 { 638 .enter = NULL } 639 }; 640 641 static struct cpuidle_state skl_cstates[] __initdata = { 642 { 643 .name = "C1", 644 .desc = "MWAIT 0x00", 645 .flags = MWAIT2flg(0x00), 646 .exit_latency = 2, 647 .target_residency = 2, 648 .enter = &intel_idle, 649 .enter_s2idle = intel_idle_s2idle, }, 650 { 651 .name = "C1E", 652 .desc = "MWAIT 0x01", 653 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 654 .exit_latency = 10, 655 .target_residency = 20, 656 .enter = &intel_idle, 657 .enter_s2idle = intel_idle_s2idle, }, 658 { 659 .name = "C3", 660 .desc = "MWAIT 0x10", 661 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 662 .exit_latency = 70, 663 .target_residency = 100, 664 .enter = &intel_idle, 665 .enter_s2idle = intel_idle_s2idle, }, 666 { 667 .name = "C6", 668 .desc = "MWAIT 0x20", 669 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 670 .exit_latency = 85, 671 .target_residency = 200, 672 .enter = &intel_idle, 673 .enter_s2idle = intel_idle_s2idle, }, 674 { 675 .name = "C7s", 676 .desc = "MWAIT 0x33", 677 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 678 .exit_latency = 124, 679 .target_residency = 800, 680 .enter = &intel_idle, 681 .enter_s2idle = intel_idle_s2idle, }, 682 { 683 .name = "C8", 684 .desc = "MWAIT 0x40", 685 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 686 .exit_latency = 200, 687 .target_residency = 800, 688 .enter = &intel_idle, 689 .enter_s2idle = intel_idle_s2idle, }, 690 { 691 .name = "C9", 692 .desc = "MWAIT 0x50", 693 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 694 .exit_latency = 480, 695 .target_residency = 5000, 696 .enter = &intel_idle, 697 .enter_s2idle = intel_idle_s2idle, }, 698 { 699 .name = "C10", 700 .desc = "MWAIT 0x60", 701 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 702 .exit_latency = 890, 703 .target_residency = 5000, 704 .enter = &intel_idle, 705 .enter_s2idle = intel_idle_s2idle, }, 706 { 707 .enter = NULL } 708 }; 709 710 static struct cpuidle_state skx_cstates[] __initdata = { 711 { 712 .name = "C1", 713 .desc = "MWAIT 0x00", 714 .flags = MWAIT2flg(0x00), 715 .exit_latency = 2, 716 .target_residency = 2, 717 .enter = &intel_idle, 718 .enter_s2idle = intel_idle_s2idle, }, 719 { 720 .name = "C1E", 721 .desc = "MWAIT 0x01", 722 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 723 .exit_latency = 10, 724 .target_residency = 20, 725 .enter = &intel_idle, 726 .enter_s2idle = intel_idle_s2idle, }, 727 { 728 .name = "C6", 729 .desc = "MWAIT 0x20", 730 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 731 .exit_latency = 133, 732 .target_residency = 600, 733 .enter = &intel_idle, 734 .enter_s2idle = intel_idle_s2idle, }, 735 { 736 .enter = NULL } 737 }; 738 739 static struct cpuidle_state icx_cstates[] __initdata = { 740 { 741 .name = "C1", 742 .desc = "MWAIT 0x00", 743 .flags = MWAIT2flg(0x00), 744 .exit_latency = 1, 745 .target_residency = 1, 746 .enter = &intel_idle, 747 .enter_s2idle = intel_idle_s2idle, }, 748 { 749 .name = "C1E", 750 .desc = "MWAIT 0x01", 751 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 752 .exit_latency = 4, 753 .target_residency = 4, 754 .enter = &intel_idle, 755 .enter_s2idle = intel_idle_s2idle, }, 756 { 757 .name = "C6", 758 .desc = "MWAIT 0x20", 759 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 760 .exit_latency = 128, 761 .target_residency = 384, 762 .enter = &intel_idle, 763 .enter_s2idle = intel_idle_s2idle, }, 764 { 765 .enter = NULL } 766 }; 767 768 static struct cpuidle_state atom_cstates[] __initdata = { 769 { 770 .name = "C1E", 771 .desc = "MWAIT 0x00", 772 .flags = MWAIT2flg(0x00), 773 .exit_latency = 10, 774 .target_residency = 20, 775 .enter = &intel_idle, 776 .enter_s2idle = intel_idle_s2idle, }, 777 { 778 .name = "C2", 779 .desc = "MWAIT 0x10", 780 .flags = MWAIT2flg(0x10), 781 .exit_latency = 20, 782 .target_residency = 80, 783 .enter = &intel_idle, 784 .enter_s2idle = intel_idle_s2idle, }, 785 { 786 .name = "C4", 787 .desc = "MWAIT 0x30", 788 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 789 .exit_latency = 100, 790 .target_residency = 400, 791 .enter = &intel_idle, 792 .enter_s2idle = intel_idle_s2idle, }, 793 { 794 .name = "C6", 795 .desc = "MWAIT 0x52", 796 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 797 .exit_latency = 140, 798 .target_residency = 560, 799 .enter = &intel_idle, 800 .enter_s2idle = intel_idle_s2idle, }, 801 { 802 .enter = NULL } 803 }; 804 static struct cpuidle_state tangier_cstates[] __initdata = { 805 { 806 .name = "C1", 807 .desc = "MWAIT 0x00", 808 .flags = MWAIT2flg(0x00), 809 .exit_latency = 1, 810 .target_residency = 4, 811 .enter = &intel_idle, 812 .enter_s2idle = intel_idle_s2idle, }, 813 { 814 .name = "C4", 815 .desc = "MWAIT 0x30", 816 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 817 .exit_latency = 100, 818 .target_residency = 400, 819 .enter = &intel_idle, 820 .enter_s2idle = intel_idle_s2idle, }, 821 { 822 .name = "C6", 823 .desc = "MWAIT 0x52", 824 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 825 .exit_latency = 140, 826 .target_residency = 560, 827 .enter = &intel_idle, 828 .enter_s2idle = intel_idle_s2idle, }, 829 { 830 .name = "C7", 831 .desc = "MWAIT 0x60", 832 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 833 .exit_latency = 1200, 834 .target_residency = 4000, 835 .enter = &intel_idle, 836 .enter_s2idle = intel_idle_s2idle, }, 837 { 838 .name = "C9", 839 .desc = "MWAIT 0x64", 840 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 841 .exit_latency = 10000, 842 .target_residency = 20000, 843 .enter = &intel_idle, 844 .enter_s2idle = intel_idle_s2idle, }, 845 { 846 .enter = NULL } 847 }; 848 static struct cpuidle_state avn_cstates[] __initdata = { 849 { 850 .name = "C1", 851 .desc = "MWAIT 0x00", 852 .flags = MWAIT2flg(0x00), 853 .exit_latency = 2, 854 .target_residency = 2, 855 .enter = &intel_idle, 856 .enter_s2idle = intel_idle_s2idle, }, 857 { 858 .name = "C6", 859 .desc = "MWAIT 0x51", 860 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 861 .exit_latency = 15, 862 .target_residency = 45, 863 .enter = &intel_idle, 864 .enter_s2idle = intel_idle_s2idle, }, 865 { 866 .enter = NULL } 867 }; 868 static struct cpuidle_state knl_cstates[] __initdata = { 869 { 870 .name = "C1", 871 .desc = "MWAIT 0x00", 872 .flags = MWAIT2flg(0x00), 873 .exit_latency = 1, 874 .target_residency = 2, 875 .enter = &intel_idle, 876 .enter_s2idle = intel_idle_s2idle }, 877 { 878 .name = "C6", 879 .desc = "MWAIT 0x10", 880 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 881 .exit_latency = 120, 882 .target_residency = 500, 883 .enter = &intel_idle, 884 .enter_s2idle = intel_idle_s2idle }, 885 { 886 .enter = NULL } 887 }; 888 889 static struct cpuidle_state bxt_cstates[] __initdata = { 890 { 891 .name = "C1", 892 .desc = "MWAIT 0x00", 893 .flags = MWAIT2flg(0x00), 894 .exit_latency = 2, 895 .target_residency = 2, 896 .enter = &intel_idle, 897 .enter_s2idle = intel_idle_s2idle, }, 898 { 899 .name = "C1E", 900 .desc = "MWAIT 0x01", 901 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 902 .exit_latency = 10, 903 .target_residency = 20, 904 .enter = &intel_idle, 905 .enter_s2idle = intel_idle_s2idle, }, 906 { 907 .name = "C6", 908 .desc = "MWAIT 0x20", 909 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 910 .exit_latency = 133, 911 .target_residency = 133, 912 .enter = &intel_idle, 913 .enter_s2idle = intel_idle_s2idle, }, 914 { 915 .name = "C7s", 916 .desc = "MWAIT 0x31", 917 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 918 .exit_latency = 155, 919 .target_residency = 155, 920 .enter = &intel_idle, 921 .enter_s2idle = intel_idle_s2idle, }, 922 { 923 .name = "C8", 924 .desc = "MWAIT 0x40", 925 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 926 .exit_latency = 1000, 927 .target_residency = 1000, 928 .enter = &intel_idle, 929 .enter_s2idle = intel_idle_s2idle, }, 930 { 931 .name = "C9", 932 .desc = "MWAIT 0x50", 933 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 934 .exit_latency = 2000, 935 .target_residency = 2000, 936 .enter = &intel_idle, 937 .enter_s2idle = intel_idle_s2idle, }, 938 { 939 .name = "C10", 940 .desc = "MWAIT 0x60", 941 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 942 .exit_latency = 10000, 943 .target_residency = 10000, 944 .enter = &intel_idle, 945 .enter_s2idle = intel_idle_s2idle, }, 946 { 947 .enter = NULL } 948 }; 949 950 static struct cpuidle_state dnv_cstates[] __initdata = { 951 { 952 .name = "C1", 953 .desc = "MWAIT 0x00", 954 .flags = MWAIT2flg(0x00), 955 .exit_latency = 2, 956 .target_residency = 2, 957 .enter = &intel_idle, 958 .enter_s2idle = intel_idle_s2idle, }, 959 { 960 .name = "C1E", 961 .desc = "MWAIT 0x01", 962 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 963 .exit_latency = 10, 964 .target_residency = 20, 965 .enter = &intel_idle, 966 .enter_s2idle = intel_idle_s2idle, }, 967 { 968 .name = "C6", 969 .desc = "MWAIT 0x20", 970 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 971 .exit_latency = 50, 972 .target_residency = 500, 973 .enter = &intel_idle, 974 .enter_s2idle = intel_idle_s2idle, }, 975 { 976 .enter = NULL } 977 }; 978 979 static const struct idle_cpu idle_cpu_nehalem __initconst = { 980 .state_table = nehalem_cstates, 981 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 982 .disable_promotion_to_c1e = true, 983 }; 984 985 static const struct idle_cpu idle_cpu_nhx __initconst = { 986 .state_table = nehalem_cstates, 987 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 988 .disable_promotion_to_c1e = true, 989 .use_acpi = true, 990 }; 991 992 static const struct idle_cpu idle_cpu_atom __initconst = { 993 .state_table = atom_cstates, 994 }; 995 996 static const struct idle_cpu idle_cpu_tangier __initconst = { 997 .state_table = tangier_cstates, 998 }; 999 1000 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1001 .state_table = atom_cstates, 1002 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1003 }; 1004 1005 static const struct idle_cpu idle_cpu_snb __initconst = { 1006 .state_table = snb_cstates, 1007 .disable_promotion_to_c1e = true, 1008 }; 1009 1010 static const struct idle_cpu idle_cpu_snx __initconst = { 1011 .state_table = snb_cstates, 1012 .disable_promotion_to_c1e = true, 1013 .use_acpi = true, 1014 }; 1015 1016 static const struct idle_cpu idle_cpu_byt __initconst = { 1017 .state_table = byt_cstates, 1018 .disable_promotion_to_c1e = true, 1019 .byt_auto_demotion_disable_flag = true, 1020 }; 1021 1022 static const struct idle_cpu idle_cpu_cht __initconst = { 1023 .state_table = cht_cstates, 1024 .disable_promotion_to_c1e = true, 1025 .byt_auto_demotion_disable_flag = true, 1026 }; 1027 1028 static const struct idle_cpu idle_cpu_ivb __initconst = { 1029 .state_table = ivb_cstates, 1030 .disable_promotion_to_c1e = true, 1031 }; 1032 1033 static const struct idle_cpu idle_cpu_ivt __initconst = { 1034 .state_table = ivt_cstates, 1035 .disable_promotion_to_c1e = true, 1036 .use_acpi = true, 1037 }; 1038 1039 static const struct idle_cpu idle_cpu_hsw __initconst = { 1040 .state_table = hsw_cstates, 1041 .disable_promotion_to_c1e = true, 1042 }; 1043 1044 static const struct idle_cpu idle_cpu_hsx __initconst = { 1045 .state_table = hsw_cstates, 1046 .disable_promotion_to_c1e = true, 1047 .use_acpi = true, 1048 }; 1049 1050 static const struct idle_cpu idle_cpu_bdw __initconst = { 1051 .state_table = bdw_cstates, 1052 .disable_promotion_to_c1e = true, 1053 }; 1054 1055 static const struct idle_cpu idle_cpu_bdx __initconst = { 1056 .state_table = bdw_cstates, 1057 .disable_promotion_to_c1e = true, 1058 .use_acpi = true, 1059 }; 1060 1061 static const struct idle_cpu idle_cpu_skl __initconst = { 1062 .state_table = skl_cstates, 1063 .disable_promotion_to_c1e = true, 1064 }; 1065 1066 static const struct idle_cpu idle_cpu_skx __initconst = { 1067 .state_table = skx_cstates, 1068 .disable_promotion_to_c1e = true, 1069 .use_acpi = true, 1070 }; 1071 1072 static const struct idle_cpu idle_cpu_icx __initconst = { 1073 .state_table = icx_cstates, 1074 .disable_promotion_to_c1e = true, 1075 .use_acpi = true, 1076 }; 1077 1078 static const struct idle_cpu idle_cpu_avn __initconst = { 1079 .state_table = avn_cstates, 1080 .disable_promotion_to_c1e = true, 1081 .use_acpi = true, 1082 }; 1083 1084 static const struct idle_cpu idle_cpu_knl __initconst = { 1085 .state_table = knl_cstates, 1086 .use_acpi = true, 1087 }; 1088 1089 static const struct idle_cpu idle_cpu_bxt __initconst = { 1090 .state_table = bxt_cstates, 1091 .disable_promotion_to_c1e = true, 1092 }; 1093 1094 static const struct idle_cpu idle_cpu_dnv __initconst = { 1095 .state_table = dnv_cstates, 1096 .disable_promotion_to_c1e = true, 1097 .use_acpi = true, 1098 }; 1099 1100 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1101 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1102 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1103 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1104 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1105 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1106 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1107 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1108 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1109 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1110 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1111 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1112 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1113 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1114 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1115 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1116 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1117 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1118 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1119 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1120 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1121 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1122 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1123 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1124 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1125 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1126 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1127 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1128 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1129 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1130 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1131 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1132 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1133 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1134 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1135 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1136 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1137 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1138 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), 1139 {} 1140 }; 1141 1142 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1143 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1144 {} 1145 }; 1146 1147 static bool __init intel_idle_max_cstate_reached(int cstate) 1148 { 1149 if (cstate + 1 > max_cstate) { 1150 pr_info("max_cstate %d reached\n", max_cstate); 1151 return true; 1152 } 1153 return false; 1154 } 1155 1156 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1157 #include <acpi/processor.h> 1158 1159 static bool no_acpi __read_mostly; 1160 module_param(no_acpi, bool, 0444); 1161 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1162 1163 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1164 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1165 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1166 1167 static struct acpi_processor_power acpi_state_table __initdata; 1168 1169 /** 1170 * intel_idle_cst_usable - Check if the _CST information can be used. 1171 * 1172 * Check if all of the C-states listed by _CST in the max_cstate range are 1173 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1174 */ 1175 static bool __init intel_idle_cst_usable(void) 1176 { 1177 int cstate, limit; 1178 1179 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1180 acpi_state_table.count); 1181 1182 for (cstate = 1; cstate < limit; cstate++) { 1183 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1184 1185 if (cx->entry_method != ACPI_CSTATE_FFH) 1186 return false; 1187 } 1188 1189 return true; 1190 } 1191 1192 static bool __init intel_idle_acpi_cst_extract(void) 1193 { 1194 unsigned int cpu; 1195 1196 if (no_acpi) { 1197 pr_debug("Not allowed to use ACPI _CST\n"); 1198 return false; 1199 } 1200 1201 for_each_possible_cpu(cpu) { 1202 struct acpi_processor *pr = per_cpu(processors, cpu); 1203 1204 if (!pr) 1205 continue; 1206 1207 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1208 continue; 1209 1210 acpi_state_table.count++; 1211 1212 if (!intel_idle_cst_usable()) 1213 continue; 1214 1215 if (!acpi_processor_claim_cst_control()) { 1216 acpi_state_table.count = 0; 1217 return false; 1218 } 1219 1220 return true; 1221 } 1222 1223 pr_debug("ACPI _CST not found or not usable\n"); 1224 return false; 1225 } 1226 1227 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1228 { 1229 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1230 1231 /* 1232 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1233 * the interesting states are ACPI_CSTATE_FFH. 1234 */ 1235 for (cstate = 1; cstate < limit; cstate++) { 1236 struct acpi_processor_cx *cx; 1237 struct cpuidle_state *state; 1238 1239 if (intel_idle_max_cstate_reached(cstate)) 1240 break; 1241 1242 cx = &acpi_state_table.states[cstate]; 1243 1244 state = &drv->states[drv->state_count++]; 1245 1246 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1247 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1248 state->exit_latency = cx->latency; 1249 /* 1250 * For C1-type C-states use the same number for both the exit 1251 * latency and target residency, because that is the case for 1252 * C1 in the majority of the static C-states tables above. 1253 * For the other types of C-states, however, set the target 1254 * residency to 3 times the exit latency which should lead to 1255 * a reasonable balance between energy-efficiency and 1256 * performance in the majority of interesting cases. 1257 */ 1258 state->target_residency = cx->latency; 1259 if (cx->type > ACPI_STATE_C1) 1260 state->target_residency *= 3; 1261 1262 state->flags = MWAIT2flg(cx->address); 1263 if (cx->type > ACPI_STATE_C2) 1264 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1265 1266 if (disabled_states_mask & BIT(cstate)) 1267 state->flags |= CPUIDLE_FLAG_OFF; 1268 1269 state->enter = intel_idle; 1270 state->enter_s2idle = intel_idle_s2idle; 1271 } 1272 } 1273 1274 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1275 { 1276 int cstate, limit; 1277 1278 /* 1279 * If there are no _CST C-states, do not disable any C-states by 1280 * default. 1281 */ 1282 if (!acpi_state_table.count) 1283 return false; 1284 1285 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1286 /* 1287 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1288 * the interesting states are ACPI_CSTATE_FFH. 1289 */ 1290 for (cstate = 1; cstate < limit; cstate++) { 1291 if (acpi_state_table.states[cstate].address == mwait_hint) 1292 return false; 1293 } 1294 return true; 1295 } 1296 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1297 #define force_use_acpi (false) 1298 1299 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1300 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1301 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1302 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1303 1304 /** 1305 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1306 * 1307 * Tune IVT multi-socket targets. 1308 * Assumption: num_sockets == (max_package_num + 1). 1309 */ 1310 static void __init ivt_idle_state_table_update(void) 1311 { 1312 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1313 int cpu, package_num, num_sockets = 1; 1314 1315 for_each_online_cpu(cpu) { 1316 package_num = topology_physical_package_id(cpu); 1317 if (package_num + 1 > num_sockets) { 1318 num_sockets = package_num + 1; 1319 1320 if (num_sockets > 4) { 1321 cpuidle_state_table = ivt_cstates_8s; 1322 return; 1323 } 1324 } 1325 } 1326 1327 if (num_sockets > 2) 1328 cpuidle_state_table = ivt_cstates_4s; 1329 1330 /* else, 1 and 2 socket systems use default ivt_cstates */ 1331 } 1332 1333 /** 1334 * irtl_2_usec - IRTL to microseconds conversion. 1335 * @irtl: IRTL MSR value. 1336 * 1337 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1338 */ 1339 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1340 { 1341 static const unsigned int irtl_ns_units[] __initconst = { 1342 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1343 }; 1344 unsigned long long ns; 1345 1346 if (!irtl) 1347 return 0; 1348 1349 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1350 1351 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1352 } 1353 1354 /** 1355 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1356 * 1357 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1358 * definitive maximum latency and use the same value for target_residency. 1359 */ 1360 static void __init bxt_idle_state_table_update(void) 1361 { 1362 unsigned long long msr; 1363 unsigned int usec; 1364 1365 rdmsrl(MSR_PKGC6_IRTL, msr); 1366 usec = irtl_2_usec(msr); 1367 if (usec) { 1368 bxt_cstates[2].exit_latency = usec; 1369 bxt_cstates[2].target_residency = usec; 1370 } 1371 1372 rdmsrl(MSR_PKGC7_IRTL, msr); 1373 usec = irtl_2_usec(msr); 1374 if (usec) { 1375 bxt_cstates[3].exit_latency = usec; 1376 bxt_cstates[3].target_residency = usec; 1377 } 1378 1379 rdmsrl(MSR_PKGC8_IRTL, msr); 1380 usec = irtl_2_usec(msr); 1381 if (usec) { 1382 bxt_cstates[4].exit_latency = usec; 1383 bxt_cstates[4].target_residency = usec; 1384 } 1385 1386 rdmsrl(MSR_PKGC9_IRTL, msr); 1387 usec = irtl_2_usec(msr); 1388 if (usec) { 1389 bxt_cstates[5].exit_latency = usec; 1390 bxt_cstates[5].target_residency = usec; 1391 } 1392 1393 rdmsrl(MSR_PKGC10_IRTL, msr); 1394 usec = irtl_2_usec(msr); 1395 if (usec) { 1396 bxt_cstates[6].exit_latency = usec; 1397 bxt_cstates[6].target_residency = usec; 1398 } 1399 1400 } 1401 1402 /** 1403 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1404 * 1405 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1406 */ 1407 static void __init sklh_idle_state_table_update(void) 1408 { 1409 unsigned long long msr; 1410 unsigned int eax, ebx, ecx, edx; 1411 1412 1413 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1414 if (max_cstate <= 7) 1415 return; 1416 1417 /* if PC10 not present in CPUID.MWAIT.EDX */ 1418 if ((mwait_substates & (0xF << 28)) == 0) 1419 return; 1420 1421 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1422 1423 /* PC10 is not enabled in PKG C-state limit */ 1424 if ((msr & 0xF) != 8) 1425 return; 1426 1427 ecx = 0; 1428 cpuid(7, &eax, &ebx, &ecx, &edx); 1429 1430 /* if SGX is present */ 1431 if (ebx & (1 << 2)) { 1432 1433 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1434 1435 /* if SGX is enabled */ 1436 if (msr & (1 << 18)) 1437 return; 1438 } 1439 1440 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1441 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1442 } 1443 1444 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1445 { 1446 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1447 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1448 MWAIT_SUBSTATE_MASK; 1449 1450 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1451 if (num_substates == 0) 1452 return false; 1453 1454 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1455 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1456 1457 return true; 1458 } 1459 1460 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1461 { 1462 int cstate; 1463 1464 switch (boot_cpu_data.x86_model) { 1465 case INTEL_FAM6_IVYBRIDGE_X: 1466 ivt_idle_state_table_update(); 1467 break; 1468 case INTEL_FAM6_ATOM_GOLDMONT: 1469 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1470 bxt_idle_state_table_update(); 1471 break; 1472 case INTEL_FAM6_SKYLAKE: 1473 sklh_idle_state_table_update(); 1474 break; 1475 } 1476 1477 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1478 unsigned int mwait_hint; 1479 1480 if (intel_idle_max_cstate_reached(cstate)) 1481 break; 1482 1483 if (!cpuidle_state_table[cstate].enter && 1484 !cpuidle_state_table[cstate].enter_s2idle) 1485 break; 1486 1487 /* If marked as unusable, skip this state. */ 1488 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1489 pr_debug("state %s is disabled\n", 1490 cpuidle_state_table[cstate].name); 1491 continue; 1492 } 1493 1494 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1495 if (!intel_idle_verify_cstate(mwait_hint)) 1496 continue; 1497 1498 /* Structure copy. */ 1499 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1500 1501 if ((disabled_states_mask & BIT(drv->state_count)) || 1502 ((icpu->use_acpi || force_use_acpi) && 1503 intel_idle_off_by_default(mwait_hint) && 1504 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1505 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1506 1507 drv->state_count++; 1508 } 1509 1510 if (icpu->byt_auto_demotion_disable_flag) { 1511 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1512 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1513 } 1514 } 1515 1516 /** 1517 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1518 * @drv: cpuidle driver structure to initialize. 1519 */ 1520 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1521 { 1522 cpuidle_poll_state_init(drv); 1523 1524 if (disabled_states_mask & BIT(0)) 1525 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1526 1527 drv->state_count = 1; 1528 1529 if (icpu) 1530 intel_idle_init_cstates_icpu(drv); 1531 else 1532 intel_idle_init_cstates_acpi(drv); 1533 } 1534 1535 static void auto_demotion_disable(void) 1536 { 1537 unsigned long long msr_bits; 1538 1539 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1540 msr_bits &= ~auto_demotion_disable_flags; 1541 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1542 } 1543 1544 static void c1e_promotion_disable(void) 1545 { 1546 unsigned long long msr_bits; 1547 1548 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1549 msr_bits &= ~0x2; 1550 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1551 } 1552 1553 /** 1554 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1555 * @cpu: CPU to initialize. 1556 * 1557 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1558 * with the processor model flags. 1559 */ 1560 static int intel_idle_cpu_init(unsigned int cpu) 1561 { 1562 struct cpuidle_device *dev; 1563 1564 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1565 dev->cpu = cpu; 1566 1567 if (cpuidle_register_device(dev)) { 1568 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1569 return -EIO; 1570 } 1571 1572 if (auto_demotion_disable_flags) 1573 auto_demotion_disable(); 1574 1575 if (disable_promotion_to_c1e) 1576 c1e_promotion_disable(); 1577 1578 return 0; 1579 } 1580 1581 static int intel_idle_cpu_online(unsigned int cpu) 1582 { 1583 struct cpuidle_device *dev; 1584 1585 if (!boot_cpu_has(X86_FEATURE_ARAT)) 1586 tick_broadcast_enable(); 1587 1588 /* 1589 * Some systems can hotplug a cpu at runtime after 1590 * the kernel has booted, we have to initialize the 1591 * driver in this case 1592 */ 1593 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1594 if (!dev->registered) 1595 return intel_idle_cpu_init(cpu); 1596 1597 return 0; 1598 } 1599 1600 /** 1601 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 1602 */ 1603 static void __init intel_idle_cpuidle_devices_uninit(void) 1604 { 1605 int i; 1606 1607 for_each_online_cpu(i) 1608 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 1609 } 1610 1611 static int __init intel_idle_init(void) 1612 { 1613 const struct x86_cpu_id *id; 1614 unsigned int eax, ebx, ecx; 1615 int retval; 1616 1617 /* Do not load intel_idle at all for now if idle= is passed */ 1618 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1619 return -ENODEV; 1620 1621 if (max_cstate == 0) { 1622 pr_debug("disabled\n"); 1623 return -EPERM; 1624 } 1625 1626 id = x86_match_cpu(intel_idle_ids); 1627 if (id) { 1628 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1629 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1630 return -ENODEV; 1631 } 1632 } else { 1633 id = x86_match_cpu(intel_mwait_ids); 1634 if (!id) 1635 return -ENODEV; 1636 } 1637 1638 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1639 return -ENODEV; 1640 1641 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1642 1643 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1644 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1645 !mwait_substates) 1646 return -ENODEV; 1647 1648 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1649 1650 icpu = (const struct idle_cpu *)id->driver_data; 1651 if (icpu) { 1652 cpuidle_state_table = icpu->state_table; 1653 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 1654 disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; 1655 if (icpu->use_acpi || force_use_acpi) 1656 intel_idle_acpi_cst_extract(); 1657 } else if (!intel_idle_acpi_cst_extract()) { 1658 return -ENODEV; 1659 } 1660 1661 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1662 boot_cpu_data.x86_model); 1663 1664 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1665 if (!intel_idle_cpuidle_devices) 1666 return -ENOMEM; 1667 1668 intel_idle_cpuidle_driver_init(&intel_idle_driver); 1669 1670 retval = cpuidle_register_driver(&intel_idle_driver); 1671 if (retval) { 1672 struct cpuidle_driver *drv = cpuidle_get_driver(); 1673 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1674 drv ? drv->name : "none"); 1675 goto init_driver_fail; 1676 } 1677 1678 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1679 intel_idle_cpu_online, NULL); 1680 if (retval < 0) 1681 goto hp_setup_fail; 1682 1683 pr_debug("Local APIC timer is reliable in %s\n", 1684 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 1685 1686 return 0; 1687 1688 hp_setup_fail: 1689 intel_idle_cpuidle_devices_uninit(); 1690 cpuidle_unregister_driver(&intel_idle_driver); 1691 init_driver_fail: 1692 free_percpu(intel_idle_cpuidle_devices); 1693 return retval; 1694 1695 } 1696 device_initcall(intel_idle_init); 1697 1698 /* 1699 * We are not really modular, but we used to support that. Meaning we also 1700 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1701 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1702 * is the easiest way (currently) to continue doing that. 1703 */ 1704 module_param(max_cstate, int, 0444); 1705 /* 1706 * The positions of the bits that are set in this number are the indices of the 1707 * idle states to be disabled by default (as reflected by the names of the 1708 * corresponding idle state directories in sysfs, "state0", "state1" ... 1709 * "state<i>" ..., where <i> is the index of the given state). 1710 */ 1711 module_param_named(states_off, disabled_states_mask, uint, 0444); 1712 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 1713