1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10 /* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17 /* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30 /* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 /* #define DEBUG */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/notifier.h> 51 #include <linux/cpu.h> 52 #include <linux/moduleparam.h> 53 #include <asm/cpu_device_id.h> 54 #include <asm/intel-family.h> 55 #include <asm/mwait.h> 56 #include <asm/msr.h> 57 58 #define INTEL_IDLE_VERSION "0.5.1" 59 60 static struct cpuidle_driver intel_idle_driver = { 61 .name = "intel_idle", 62 .owner = THIS_MODULE, 63 }; 64 /* intel_idle.max_cstate=0 disables driver */ 65 static int max_cstate = CPUIDLE_STATE_MAX - 1; 66 static unsigned int disabled_states_mask; 67 68 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 69 70 static unsigned long auto_demotion_disable_flags; 71 static bool disable_promotion_to_c1e; 72 73 struct idle_cpu { 74 struct cpuidle_state *state_table; 75 76 /* 77 * Hardware C-state auto-demotion may not always be optimal. 78 * Indicate which enable bits to clear here. 79 */ 80 unsigned long auto_demotion_disable_flags; 81 bool byt_auto_demotion_disable_flag; 82 bool disable_promotion_to_c1e; 83 bool use_acpi; 84 }; 85 86 static const struct idle_cpu *icpu __initdata; 87 static struct cpuidle_state *cpuidle_state_table __initdata; 88 89 static unsigned int mwait_substates __initdata; 90 91 /* 92 * Enable interrupts before entering the C-state. On some platforms and for 93 * some C-states, this may measurably decrease interrupt latency. 94 */ 95 #define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) 96 97 /* 98 * Enable this state by default even if the ACPI _CST does not list it. 99 */ 100 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 101 102 /* 103 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 104 * the C-state (top nibble) and sub-state (bottom nibble) 105 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 106 * 107 * We store the hint at the top of our "flags" for each state. 108 */ 109 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 110 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 111 112 /** 113 * intel_idle - Ask the processor to enter the given idle state. 114 * @dev: cpuidle device of the target CPU. 115 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 116 * @index: Target idle state index. 117 * 118 * Use the MWAIT instruction to notify the processor that the CPU represented by 119 * @dev is idle and it can try to enter the idle state corresponding to @index. 120 * 121 * If the local APIC timer is not known to be reliable in the target idle state, 122 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 123 * 124 * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to 125 * flushing user TLBs. 126 * 127 * Must be called under local_irq_disable(). 128 */ 129 static __cpuidle int intel_idle(struct cpuidle_device *dev, 130 struct cpuidle_driver *drv, int index) 131 { 132 struct cpuidle_state *state = &drv->states[index]; 133 unsigned long eax = flg2MWAIT(state->flags); 134 unsigned long ecx = 1; /* break on interrupt flag */ 135 136 if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) 137 local_irq_enable(); 138 139 mwait_idle_with_hints(eax, ecx); 140 141 return index; 142 } 143 144 /** 145 * intel_idle_s2idle - Ask the processor to enter the given idle state. 146 * @dev: cpuidle device of the target CPU. 147 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 148 * @index: Target idle state index. 149 * 150 * Use the MWAIT instruction to notify the processor that the CPU represented by 151 * @dev is idle and it can try to enter the idle state corresponding to @index. 152 * 153 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 154 * scheduler tick and suspended scheduler clock on the target CPU. 155 */ 156 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 157 struct cpuidle_driver *drv, int index) 158 { 159 unsigned long eax = flg2MWAIT(drv->states[index].flags); 160 unsigned long ecx = 1; /* break on interrupt flag */ 161 162 mwait_idle_with_hints(eax, ecx); 163 164 return 0; 165 } 166 167 /* 168 * States are indexed by the cstate number, 169 * which is also the index into the MWAIT hint array. 170 * Thus C0 is a dummy. 171 */ 172 static struct cpuidle_state nehalem_cstates[] __initdata = { 173 { 174 .name = "C1", 175 .desc = "MWAIT 0x00", 176 .flags = MWAIT2flg(0x00), 177 .exit_latency = 3, 178 .target_residency = 6, 179 .enter = &intel_idle, 180 .enter_s2idle = intel_idle_s2idle, }, 181 { 182 .name = "C1E", 183 .desc = "MWAIT 0x01", 184 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 185 .exit_latency = 10, 186 .target_residency = 20, 187 .enter = &intel_idle, 188 .enter_s2idle = intel_idle_s2idle, }, 189 { 190 .name = "C3", 191 .desc = "MWAIT 0x10", 192 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 193 .exit_latency = 20, 194 .target_residency = 80, 195 .enter = &intel_idle, 196 .enter_s2idle = intel_idle_s2idle, }, 197 { 198 .name = "C6", 199 .desc = "MWAIT 0x20", 200 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 201 .exit_latency = 200, 202 .target_residency = 800, 203 .enter = &intel_idle, 204 .enter_s2idle = intel_idle_s2idle, }, 205 { 206 .enter = NULL } 207 }; 208 209 static struct cpuidle_state snb_cstates[] __initdata = { 210 { 211 .name = "C1", 212 .desc = "MWAIT 0x00", 213 .flags = MWAIT2flg(0x00), 214 .exit_latency = 2, 215 .target_residency = 2, 216 .enter = &intel_idle, 217 .enter_s2idle = intel_idle_s2idle, }, 218 { 219 .name = "C1E", 220 .desc = "MWAIT 0x01", 221 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 222 .exit_latency = 10, 223 .target_residency = 20, 224 .enter = &intel_idle, 225 .enter_s2idle = intel_idle_s2idle, }, 226 { 227 .name = "C3", 228 .desc = "MWAIT 0x10", 229 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 230 .exit_latency = 80, 231 .target_residency = 211, 232 .enter = &intel_idle, 233 .enter_s2idle = intel_idle_s2idle, }, 234 { 235 .name = "C6", 236 .desc = "MWAIT 0x20", 237 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 238 .exit_latency = 104, 239 .target_residency = 345, 240 .enter = &intel_idle, 241 .enter_s2idle = intel_idle_s2idle, }, 242 { 243 .name = "C7", 244 .desc = "MWAIT 0x30", 245 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 246 .exit_latency = 109, 247 .target_residency = 345, 248 .enter = &intel_idle, 249 .enter_s2idle = intel_idle_s2idle, }, 250 { 251 .enter = NULL } 252 }; 253 254 static struct cpuidle_state byt_cstates[] __initdata = { 255 { 256 .name = "C1", 257 .desc = "MWAIT 0x00", 258 .flags = MWAIT2flg(0x00), 259 .exit_latency = 1, 260 .target_residency = 1, 261 .enter = &intel_idle, 262 .enter_s2idle = intel_idle_s2idle, }, 263 { 264 .name = "C6N", 265 .desc = "MWAIT 0x58", 266 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 267 .exit_latency = 300, 268 .target_residency = 275, 269 .enter = &intel_idle, 270 .enter_s2idle = intel_idle_s2idle, }, 271 { 272 .name = "C6S", 273 .desc = "MWAIT 0x52", 274 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 275 .exit_latency = 500, 276 .target_residency = 560, 277 .enter = &intel_idle, 278 .enter_s2idle = intel_idle_s2idle, }, 279 { 280 .name = "C7", 281 .desc = "MWAIT 0x60", 282 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 283 .exit_latency = 1200, 284 .target_residency = 4000, 285 .enter = &intel_idle, 286 .enter_s2idle = intel_idle_s2idle, }, 287 { 288 .name = "C7S", 289 .desc = "MWAIT 0x64", 290 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 291 .exit_latency = 10000, 292 .target_residency = 20000, 293 .enter = &intel_idle, 294 .enter_s2idle = intel_idle_s2idle, }, 295 { 296 .enter = NULL } 297 }; 298 299 static struct cpuidle_state cht_cstates[] __initdata = { 300 { 301 .name = "C1", 302 .desc = "MWAIT 0x00", 303 .flags = MWAIT2flg(0x00), 304 .exit_latency = 1, 305 .target_residency = 1, 306 .enter = &intel_idle, 307 .enter_s2idle = intel_idle_s2idle, }, 308 { 309 .name = "C6N", 310 .desc = "MWAIT 0x58", 311 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 312 .exit_latency = 80, 313 .target_residency = 275, 314 .enter = &intel_idle, 315 .enter_s2idle = intel_idle_s2idle, }, 316 { 317 .name = "C6S", 318 .desc = "MWAIT 0x52", 319 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 320 .exit_latency = 200, 321 .target_residency = 560, 322 .enter = &intel_idle, 323 .enter_s2idle = intel_idle_s2idle, }, 324 { 325 .name = "C7", 326 .desc = "MWAIT 0x60", 327 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 328 .exit_latency = 1200, 329 .target_residency = 4000, 330 .enter = &intel_idle, 331 .enter_s2idle = intel_idle_s2idle, }, 332 { 333 .name = "C7S", 334 .desc = "MWAIT 0x64", 335 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 336 .exit_latency = 10000, 337 .target_residency = 20000, 338 .enter = &intel_idle, 339 .enter_s2idle = intel_idle_s2idle, }, 340 { 341 .enter = NULL } 342 }; 343 344 static struct cpuidle_state ivb_cstates[] __initdata = { 345 { 346 .name = "C1", 347 .desc = "MWAIT 0x00", 348 .flags = MWAIT2flg(0x00), 349 .exit_latency = 1, 350 .target_residency = 1, 351 .enter = &intel_idle, 352 .enter_s2idle = intel_idle_s2idle, }, 353 { 354 .name = "C1E", 355 .desc = "MWAIT 0x01", 356 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 357 .exit_latency = 10, 358 .target_residency = 20, 359 .enter = &intel_idle, 360 .enter_s2idle = intel_idle_s2idle, }, 361 { 362 .name = "C3", 363 .desc = "MWAIT 0x10", 364 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 365 .exit_latency = 59, 366 .target_residency = 156, 367 .enter = &intel_idle, 368 .enter_s2idle = intel_idle_s2idle, }, 369 { 370 .name = "C6", 371 .desc = "MWAIT 0x20", 372 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 373 .exit_latency = 80, 374 .target_residency = 300, 375 .enter = &intel_idle, 376 .enter_s2idle = intel_idle_s2idle, }, 377 { 378 .name = "C7", 379 .desc = "MWAIT 0x30", 380 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 381 .exit_latency = 87, 382 .target_residency = 300, 383 .enter = &intel_idle, 384 .enter_s2idle = intel_idle_s2idle, }, 385 { 386 .enter = NULL } 387 }; 388 389 static struct cpuidle_state ivt_cstates[] __initdata = { 390 { 391 .name = "C1", 392 .desc = "MWAIT 0x00", 393 .flags = MWAIT2flg(0x00), 394 .exit_latency = 1, 395 .target_residency = 1, 396 .enter = &intel_idle, 397 .enter_s2idle = intel_idle_s2idle, }, 398 { 399 .name = "C1E", 400 .desc = "MWAIT 0x01", 401 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 402 .exit_latency = 10, 403 .target_residency = 80, 404 .enter = &intel_idle, 405 .enter_s2idle = intel_idle_s2idle, }, 406 { 407 .name = "C3", 408 .desc = "MWAIT 0x10", 409 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 410 .exit_latency = 59, 411 .target_residency = 156, 412 .enter = &intel_idle, 413 .enter_s2idle = intel_idle_s2idle, }, 414 { 415 .name = "C6", 416 .desc = "MWAIT 0x20", 417 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 418 .exit_latency = 82, 419 .target_residency = 300, 420 .enter = &intel_idle, 421 .enter_s2idle = intel_idle_s2idle, }, 422 { 423 .enter = NULL } 424 }; 425 426 static struct cpuidle_state ivt_cstates_4s[] __initdata = { 427 { 428 .name = "C1", 429 .desc = "MWAIT 0x00", 430 .flags = MWAIT2flg(0x00), 431 .exit_latency = 1, 432 .target_residency = 1, 433 .enter = &intel_idle, 434 .enter_s2idle = intel_idle_s2idle, }, 435 { 436 .name = "C1E", 437 .desc = "MWAIT 0x01", 438 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 439 .exit_latency = 10, 440 .target_residency = 250, 441 .enter = &intel_idle, 442 .enter_s2idle = intel_idle_s2idle, }, 443 { 444 .name = "C3", 445 .desc = "MWAIT 0x10", 446 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 447 .exit_latency = 59, 448 .target_residency = 300, 449 .enter = &intel_idle, 450 .enter_s2idle = intel_idle_s2idle, }, 451 { 452 .name = "C6", 453 .desc = "MWAIT 0x20", 454 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 455 .exit_latency = 84, 456 .target_residency = 400, 457 .enter = &intel_idle, 458 .enter_s2idle = intel_idle_s2idle, }, 459 { 460 .enter = NULL } 461 }; 462 463 static struct cpuidle_state ivt_cstates_8s[] __initdata = { 464 { 465 .name = "C1", 466 .desc = "MWAIT 0x00", 467 .flags = MWAIT2flg(0x00), 468 .exit_latency = 1, 469 .target_residency = 1, 470 .enter = &intel_idle, 471 .enter_s2idle = intel_idle_s2idle, }, 472 { 473 .name = "C1E", 474 .desc = "MWAIT 0x01", 475 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 476 .exit_latency = 10, 477 .target_residency = 500, 478 .enter = &intel_idle, 479 .enter_s2idle = intel_idle_s2idle, }, 480 { 481 .name = "C3", 482 .desc = "MWAIT 0x10", 483 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 484 .exit_latency = 59, 485 .target_residency = 600, 486 .enter = &intel_idle, 487 .enter_s2idle = intel_idle_s2idle, }, 488 { 489 .name = "C6", 490 .desc = "MWAIT 0x20", 491 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 492 .exit_latency = 88, 493 .target_residency = 700, 494 .enter = &intel_idle, 495 .enter_s2idle = intel_idle_s2idle, }, 496 { 497 .enter = NULL } 498 }; 499 500 static struct cpuidle_state hsw_cstates[] __initdata = { 501 { 502 .name = "C1", 503 .desc = "MWAIT 0x00", 504 .flags = MWAIT2flg(0x00), 505 .exit_latency = 2, 506 .target_residency = 2, 507 .enter = &intel_idle, 508 .enter_s2idle = intel_idle_s2idle, }, 509 { 510 .name = "C1E", 511 .desc = "MWAIT 0x01", 512 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 513 .exit_latency = 10, 514 .target_residency = 20, 515 .enter = &intel_idle, 516 .enter_s2idle = intel_idle_s2idle, }, 517 { 518 .name = "C3", 519 .desc = "MWAIT 0x10", 520 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 521 .exit_latency = 33, 522 .target_residency = 100, 523 .enter = &intel_idle, 524 .enter_s2idle = intel_idle_s2idle, }, 525 { 526 .name = "C6", 527 .desc = "MWAIT 0x20", 528 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 529 .exit_latency = 133, 530 .target_residency = 400, 531 .enter = &intel_idle, 532 .enter_s2idle = intel_idle_s2idle, }, 533 { 534 .name = "C7s", 535 .desc = "MWAIT 0x32", 536 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 537 .exit_latency = 166, 538 .target_residency = 500, 539 .enter = &intel_idle, 540 .enter_s2idle = intel_idle_s2idle, }, 541 { 542 .name = "C8", 543 .desc = "MWAIT 0x40", 544 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 545 .exit_latency = 300, 546 .target_residency = 900, 547 .enter = &intel_idle, 548 .enter_s2idle = intel_idle_s2idle, }, 549 { 550 .name = "C9", 551 .desc = "MWAIT 0x50", 552 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 553 .exit_latency = 600, 554 .target_residency = 1800, 555 .enter = &intel_idle, 556 .enter_s2idle = intel_idle_s2idle, }, 557 { 558 .name = "C10", 559 .desc = "MWAIT 0x60", 560 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 561 .exit_latency = 2600, 562 .target_residency = 7700, 563 .enter = &intel_idle, 564 .enter_s2idle = intel_idle_s2idle, }, 565 { 566 .enter = NULL } 567 }; 568 static struct cpuidle_state bdw_cstates[] __initdata = { 569 { 570 .name = "C1", 571 .desc = "MWAIT 0x00", 572 .flags = MWAIT2flg(0x00), 573 .exit_latency = 2, 574 .target_residency = 2, 575 .enter = &intel_idle, 576 .enter_s2idle = intel_idle_s2idle, }, 577 { 578 .name = "C1E", 579 .desc = "MWAIT 0x01", 580 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 581 .exit_latency = 10, 582 .target_residency = 20, 583 .enter = &intel_idle, 584 .enter_s2idle = intel_idle_s2idle, }, 585 { 586 .name = "C3", 587 .desc = "MWAIT 0x10", 588 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 589 .exit_latency = 40, 590 .target_residency = 100, 591 .enter = &intel_idle, 592 .enter_s2idle = intel_idle_s2idle, }, 593 { 594 .name = "C6", 595 .desc = "MWAIT 0x20", 596 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 597 .exit_latency = 133, 598 .target_residency = 400, 599 .enter = &intel_idle, 600 .enter_s2idle = intel_idle_s2idle, }, 601 { 602 .name = "C7s", 603 .desc = "MWAIT 0x32", 604 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 605 .exit_latency = 166, 606 .target_residency = 500, 607 .enter = &intel_idle, 608 .enter_s2idle = intel_idle_s2idle, }, 609 { 610 .name = "C8", 611 .desc = "MWAIT 0x40", 612 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 613 .exit_latency = 300, 614 .target_residency = 900, 615 .enter = &intel_idle, 616 .enter_s2idle = intel_idle_s2idle, }, 617 { 618 .name = "C9", 619 .desc = "MWAIT 0x50", 620 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 621 .exit_latency = 600, 622 .target_residency = 1800, 623 .enter = &intel_idle, 624 .enter_s2idle = intel_idle_s2idle, }, 625 { 626 .name = "C10", 627 .desc = "MWAIT 0x60", 628 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 629 .exit_latency = 2600, 630 .target_residency = 7700, 631 .enter = &intel_idle, 632 .enter_s2idle = intel_idle_s2idle, }, 633 { 634 .enter = NULL } 635 }; 636 637 static struct cpuidle_state skl_cstates[] __initdata = { 638 { 639 .name = "C1", 640 .desc = "MWAIT 0x00", 641 .flags = MWAIT2flg(0x00), 642 .exit_latency = 2, 643 .target_residency = 2, 644 .enter = &intel_idle, 645 .enter_s2idle = intel_idle_s2idle, }, 646 { 647 .name = "C1E", 648 .desc = "MWAIT 0x01", 649 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 650 .exit_latency = 10, 651 .target_residency = 20, 652 .enter = &intel_idle, 653 .enter_s2idle = intel_idle_s2idle, }, 654 { 655 .name = "C3", 656 .desc = "MWAIT 0x10", 657 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 658 .exit_latency = 70, 659 .target_residency = 100, 660 .enter = &intel_idle, 661 .enter_s2idle = intel_idle_s2idle, }, 662 { 663 .name = "C6", 664 .desc = "MWAIT 0x20", 665 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 666 .exit_latency = 85, 667 .target_residency = 200, 668 .enter = &intel_idle, 669 .enter_s2idle = intel_idle_s2idle, }, 670 { 671 .name = "C7s", 672 .desc = "MWAIT 0x33", 673 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 674 .exit_latency = 124, 675 .target_residency = 800, 676 .enter = &intel_idle, 677 .enter_s2idle = intel_idle_s2idle, }, 678 { 679 .name = "C8", 680 .desc = "MWAIT 0x40", 681 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 682 .exit_latency = 200, 683 .target_residency = 800, 684 .enter = &intel_idle, 685 .enter_s2idle = intel_idle_s2idle, }, 686 { 687 .name = "C9", 688 .desc = "MWAIT 0x50", 689 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 690 .exit_latency = 480, 691 .target_residency = 5000, 692 .enter = &intel_idle, 693 .enter_s2idle = intel_idle_s2idle, }, 694 { 695 .name = "C10", 696 .desc = "MWAIT 0x60", 697 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 698 .exit_latency = 890, 699 .target_residency = 5000, 700 .enter = &intel_idle, 701 .enter_s2idle = intel_idle_s2idle, }, 702 { 703 .enter = NULL } 704 }; 705 706 static struct cpuidle_state skx_cstates[] __initdata = { 707 { 708 .name = "C1", 709 .desc = "MWAIT 0x00", 710 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 711 .exit_latency = 2, 712 .target_residency = 2, 713 .enter = &intel_idle, 714 .enter_s2idle = intel_idle_s2idle, }, 715 { 716 .name = "C1E", 717 .desc = "MWAIT 0x01", 718 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 719 .exit_latency = 10, 720 .target_residency = 20, 721 .enter = &intel_idle, 722 .enter_s2idle = intel_idle_s2idle, }, 723 { 724 .name = "C6", 725 .desc = "MWAIT 0x20", 726 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 727 .exit_latency = 133, 728 .target_residency = 600, 729 .enter = &intel_idle, 730 .enter_s2idle = intel_idle_s2idle, }, 731 { 732 .enter = NULL } 733 }; 734 735 static struct cpuidle_state icx_cstates[] __initdata = { 736 { 737 .name = "C1", 738 .desc = "MWAIT 0x00", 739 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 740 .exit_latency = 1, 741 .target_residency = 1, 742 .enter = &intel_idle, 743 .enter_s2idle = intel_idle_s2idle, }, 744 { 745 .name = "C1E", 746 .desc = "MWAIT 0x01", 747 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 748 .exit_latency = 4, 749 .target_residency = 4, 750 .enter = &intel_idle, 751 .enter_s2idle = intel_idle_s2idle, }, 752 { 753 .name = "C6", 754 .desc = "MWAIT 0x20", 755 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 756 .exit_latency = 170, 757 .target_residency = 600, 758 .enter = &intel_idle, 759 .enter_s2idle = intel_idle_s2idle, }, 760 { 761 .enter = NULL } 762 }; 763 764 static struct cpuidle_state atom_cstates[] __initdata = { 765 { 766 .name = "C1E", 767 .desc = "MWAIT 0x00", 768 .flags = MWAIT2flg(0x00), 769 .exit_latency = 10, 770 .target_residency = 20, 771 .enter = &intel_idle, 772 .enter_s2idle = intel_idle_s2idle, }, 773 { 774 .name = "C2", 775 .desc = "MWAIT 0x10", 776 .flags = MWAIT2flg(0x10), 777 .exit_latency = 20, 778 .target_residency = 80, 779 .enter = &intel_idle, 780 .enter_s2idle = intel_idle_s2idle, }, 781 { 782 .name = "C4", 783 .desc = "MWAIT 0x30", 784 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 785 .exit_latency = 100, 786 .target_residency = 400, 787 .enter = &intel_idle, 788 .enter_s2idle = intel_idle_s2idle, }, 789 { 790 .name = "C6", 791 .desc = "MWAIT 0x52", 792 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 793 .exit_latency = 140, 794 .target_residency = 560, 795 .enter = &intel_idle, 796 .enter_s2idle = intel_idle_s2idle, }, 797 { 798 .enter = NULL } 799 }; 800 static struct cpuidle_state tangier_cstates[] __initdata = { 801 { 802 .name = "C1", 803 .desc = "MWAIT 0x00", 804 .flags = MWAIT2flg(0x00), 805 .exit_latency = 1, 806 .target_residency = 4, 807 .enter = &intel_idle, 808 .enter_s2idle = intel_idle_s2idle, }, 809 { 810 .name = "C4", 811 .desc = "MWAIT 0x30", 812 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 813 .exit_latency = 100, 814 .target_residency = 400, 815 .enter = &intel_idle, 816 .enter_s2idle = intel_idle_s2idle, }, 817 { 818 .name = "C6", 819 .desc = "MWAIT 0x52", 820 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 821 .exit_latency = 140, 822 .target_residency = 560, 823 .enter = &intel_idle, 824 .enter_s2idle = intel_idle_s2idle, }, 825 { 826 .name = "C7", 827 .desc = "MWAIT 0x60", 828 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 829 .exit_latency = 1200, 830 .target_residency = 4000, 831 .enter = &intel_idle, 832 .enter_s2idle = intel_idle_s2idle, }, 833 { 834 .name = "C9", 835 .desc = "MWAIT 0x64", 836 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 837 .exit_latency = 10000, 838 .target_residency = 20000, 839 .enter = &intel_idle, 840 .enter_s2idle = intel_idle_s2idle, }, 841 { 842 .enter = NULL } 843 }; 844 static struct cpuidle_state avn_cstates[] __initdata = { 845 { 846 .name = "C1", 847 .desc = "MWAIT 0x00", 848 .flags = MWAIT2flg(0x00), 849 .exit_latency = 2, 850 .target_residency = 2, 851 .enter = &intel_idle, 852 .enter_s2idle = intel_idle_s2idle, }, 853 { 854 .name = "C6", 855 .desc = "MWAIT 0x51", 856 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 857 .exit_latency = 15, 858 .target_residency = 45, 859 .enter = &intel_idle, 860 .enter_s2idle = intel_idle_s2idle, }, 861 { 862 .enter = NULL } 863 }; 864 static struct cpuidle_state knl_cstates[] __initdata = { 865 { 866 .name = "C1", 867 .desc = "MWAIT 0x00", 868 .flags = MWAIT2flg(0x00), 869 .exit_latency = 1, 870 .target_residency = 2, 871 .enter = &intel_idle, 872 .enter_s2idle = intel_idle_s2idle }, 873 { 874 .name = "C6", 875 .desc = "MWAIT 0x10", 876 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 877 .exit_latency = 120, 878 .target_residency = 500, 879 .enter = &intel_idle, 880 .enter_s2idle = intel_idle_s2idle }, 881 { 882 .enter = NULL } 883 }; 884 885 static struct cpuidle_state bxt_cstates[] __initdata = { 886 { 887 .name = "C1", 888 .desc = "MWAIT 0x00", 889 .flags = MWAIT2flg(0x00), 890 .exit_latency = 2, 891 .target_residency = 2, 892 .enter = &intel_idle, 893 .enter_s2idle = intel_idle_s2idle, }, 894 { 895 .name = "C1E", 896 .desc = "MWAIT 0x01", 897 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 898 .exit_latency = 10, 899 .target_residency = 20, 900 .enter = &intel_idle, 901 .enter_s2idle = intel_idle_s2idle, }, 902 { 903 .name = "C6", 904 .desc = "MWAIT 0x20", 905 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 906 .exit_latency = 133, 907 .target_residency = 133, 908 .enter = &intel_idle, 909 .enter_s2idle = intel_idle_s2idle, }, 910 { 911 .name = "C7s", 912 .desc = "MWAIT 0x31", 913 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 914 .exit_latency = 155, 915 .target_residency = 155, 916 .enter = &intel_idle, 917 .enter_s2idle = intel_idle_s2idle, }, 918 { 919 .name = "C8", 920 .desc = "MWAIT 0x40", 921 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 922 .exit_latency = 1000, 923 .target_residency = 1000, 924 .enter = &intel_idle, 925 .enter_s2idle = intel_idle_s2idle, }, 926 { 927 .name = "C9", 928 .desc = "MWAIT 0x50", 929 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 930 .exit_latency = 2000, 931 .target_residency = 2000, 932 .enter = &intel_idle, 933 .enter_s2idle = intel_idle_s2idle, }, 934 { 935 .name = "C10", 936 .desc = "MWAIT 0x60", 937 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 938 .exit_latency = 10000, 939 .target_residency = 10000, 940 .enter = &intel_idle, 941 .enter_s2idle = intel_idle_s2idle, }, 942 { 943 .enter = NULL } 944 }; 945 946 static struct cpuidle_state dnv_cstates[] __initdata = { 947 { 948 .name = "C1", 949 .desc = "MWAIT 0x00", 950 .flags = MWAIT2flg(0x00), 951 .exit_latency = 2, 952 .target_residency = 2, 953 .enter = &intel_idle, 954 .enter_s2idle = intel_idle_s2idle, }, 955 { 956 .name = "C1E", 957 .desc = "MWAIT 0x01", 958 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 959 .exit_latency = 10, 960 .target_residency = 20, 961 .enter = &intel_idle, 962 .enter_s2idle = intel_idle_s2idle, }, 963 { 964 .name = "C6", 965 .desc = "MWAIT 0x20", 966 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 967 .exit_latency = 50, 968 .target_residency = 500, 969 .enter = &intel_idle, 970 .enter_s2idle = intel_idle_s2idle, }, 971 { 972 .enter = NULL } 973 }; 974 975 /* 976 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support 977 * C6, and this is indicated in the CPUID mwait leaf. 978 */ 979 static struct cpuidle_state snr_cstates[] __initdata = { 980 { 981 .name = "C1", 982 .desc = "MWAIT 0x00", 983 .flags = MWAIT2flg(0x00), 984 .exit_latency = 2, 985 .target_residency = 2, 986 .enter = &intel_idle, 987 .enter_s2idle = intel_idle_s2idle, }, 988 { 989 .name = "C1E", 990 .desc = "MWAIT 0x01", 991 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 992 .exit_latency = 15, 993 .target_residency = 25, 994 .enter = &intel_idle, 995 .enter_s2idle = intel_idle_s2idle, }, 996 { 997 .name = "C6", 998 .desc = "MWAIT 0x20", 999 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 1000 .exit_latency = 130, 1001 .target_residency = 500, 1002 .enter = &intel_idle, 1003 .enter_s2idle = intel_idle_s2idle, }, 1004 { 1005 .enter = NULL } 1006 }; 1007 1008 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1009 .state_table = nehalem_cstates, 1010 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1011 .disable_promotion_to_c1e = true, 1012 }; 1013 1014 static const struct idle_cpu idle_cpu_nhx __initconst = { 1015 .state_table = nehalem_cstates, 1016 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1017 .disable_promotion_to_c1e = true, 1018 .use_acpi = true, 1019 }; 1020 1021 static const struct idle_cpu idle_cpu_atom __initconst = { 1022 .state_table = atom_cstates, 1023 }; 1024 1025 static const struct idle_cpu idle_cpu_tangier __initconst = { 1026 .state_table = tangier_cstates, 1027 }; 1028 1029 static const struct idle_cpu idle_cpu_lincroft __initconst = { 1030 .state_table = atom_cstates, 1031 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1032 }; 1033 1034 static const struct idle_cpu idle_cpu_snb __initconst = { 1035 .state_table = snb_cstates, 1036 .disable_promotion_to_c1e = true, 1037 }; 1038 1039 static const struct idle_cpu idle_cpu_snx __initconst = { 1040 .state_table = snb_cstates, 1041 .disable_promotion_to_c1e = true, 1042 .use_acpi = true, 1043 }; 1044 1045 static const struct idle_cpu idle_cpu_byt __initconst = { 1046 .state_table = byt_cstates, 1047 .disable_promotion_to_c1e = true, 1048 .byt_auto_demotion_disable_flag = true, 1049 }; 1050 1051 static const struct idle_cpu idle_cpu_cht __initconst = { 1052 .state_table = cht_cstates, 1053 .disable_promotion_to_c1e = true, 1054 .byt_auto_demotion_disable_flag = true, 1055 }; 1056 1057 static const struct idle_cpu idle_cpu_ivb __initconst = { 1058 .state_table = ivb_cstates, 1059 .disable_promotion_to_c1e = true, 1060 }; 1061 1062 static const struct idle_cpu idle_cpu_ivt __initconst = { 1063 .state_table = ivt_cstates, 1064 .disable_promotion_to_c1e = true, 1065 .use_acpi = true, 1066 }; 1067 1068 static const struct idle_cpu idle_cpu_hsw __initconst = { 1069 .state_table = hsw_cstates, 1070 .disable_promotion_to_c1e = true, 1071 }; 1072 1073 static const struct idle_cpu idle_cpu_hsx __initconst = { 1074 .state_table = hsw_cstates, 1075 .disable_promotion_to_c1e = true, 1076 .use_acpi = true, 1077 }; 1078 1079 static const struct idle_cpu idle_cpu_bdw __initconst = { 1080 .state_table = bdw_cstates, 1081 .disable_promotion_to_c1e = true, 1082 }; 1083 1084 static const struct idle_cpu idle_cpu_bdx __initconst = { 1085 .state_table = bdw_cstates, 1086 .disable_promotion_to_c1e = true, 1087 .use_acpi = true, 1088 }; 1089 1090 static const struct idle_cpu idle_cpu_skl __initconst = { 1091 .state_table = skl_cstates, 1092 .disable_promotion_to_c1e = true, 1093 }; 1094 1095 static const struct idle_cpu idle_cpu_skx __initconst = { 1096 .state_table = skx_cstates, 1097 .disable_promotion_to_c1e = true, 1098 .use_acpi = true, 1099 }; 1100 1101 static const struct idle_cpu idle_cpu_icx __initconst = { 1102 .state_table = icx_cstates, 1103 .disable_promotion_to_c1e = true, 1104 .use_acpi = true, 1105 }; 1106 1107 static const struct idle_cpu idle_cpu_avn __initconst = { 1108 .state_table = avn_cstates, 1109 .disable_promotion_to_c1e = true, 1110 .use_acpi = true, 1111 }; 1112 1113 static const struct idle_cpu idle_cpu_knl __initconst = { 1114 .state_table = knl_cstates, 1115 .use_acpi = true, 1116 }; 1117 1118 static const struct idle_cpu idle_cpu_bxt __initconst = { 1119 .state_table = bxt_cstates, 1120 .disable_promotion_to_c1e = true, 1121 }; 1122 1123 static const struct idle_cpu idle_cpu_dnv __initconst = { 1124 .state_table = dnv_cstates, 1125 .disable_promotion_to_c1e = true, 1126 .use_acpi = true, 1127 }; 1128 1129 static const struct idle_cpu idle_cpu_snr __initconst = { 1130 .state_table = snr_cstates, 1131 .disable_promotion_to_c1e = true, 1132 .use_acpi = true, 1133 }; 1134 1135 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1136 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1137 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1138 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1139 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1140 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1141 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1142 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1143 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1144 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1145 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1146 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1147 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1148 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1149 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1150 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1151 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1152 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1153 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1154 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1155 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1156 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1157 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1158 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1159 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1160 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1161 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1162 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1163 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1164 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1165 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1166 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1167 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1168 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), 1169 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1170 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1171 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1172 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1173 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1174 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), 1175 {} 1176 }; 1177 1178 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1179 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1180 {} 1181 }; 1182 1183 static bool __init intel_idle_max_cstate_reached(int cstate) 1184 { 1185 if (cstate + 1 > max_cstate) { 1186 pr_info("max_cstate %d reached\n", max_cstate); 1187 return true; 1188 } 1189 return false; 1190 } 1191 1192 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1193 { 1194 unsigned long eax = flg2MWAIT(state->flags); 1195 1196 if (boot_cpu_has(X86_FEATURE_ARAT)) 1197 return false; 1198 1199 /* 1200 * Switch over to one-shot tick broadcast if the target C-state 1201 * is deeper than C1. 1202 */ 1203 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1204 } 1205 1206 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1207 #include <acpi/processor.h> 1208 1209 static bool no_acpi __read_mostly; 1210 module_param(no_acpi, bool, 0444); 1211 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1212 1213 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1214 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1215 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1216 1217 static struct acpi_processor_power acpi_state_table __initdata; 1218 1219 /** 1220 * intel_idle_cst_usable - Check if the _CST information can be used. 1221 * 1222 * Check if all of the C-states listed by _CST in the max_cstate range are 1223 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1224 */ 1225 static bool __init intel_idle_cst_usable(void) 1226 { 1227 int cstate, limit; 1228 1229 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1230 acpi_state_table.count); 1231 1232 for (cstate = 1; cstate < limit; cstate++) { 1233 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1234 1235 if (cx->entry_method != ACPI_CSTATE_FFH) 1236 return false; 1237 } 1238 1239 return true; 1240 } 1241 1242 static bool __init intel_idle_acpi_cst_extract(void) 1243 { 1244 unsigned int cpu; 1245 1246 if (no_acpi) { 1247 pr_debug("Not allowed to use ACPI _CST\n"); 1248 return false; 1249 } 1250 1251 for_each_possible_cpu(cpu) { 1252 struct acpi_processor *pr = per_cpu(processors, cpu); 1253 1254 if (!pr) 1255 continue; 1256 1257 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1258 continue; 1259 1260 acpi_state_table.count++; 1261 1262 if (!intel_idle_cst_usable()) 1263 continue; 1264 1265 if (!acpi_processor_claim_cst_control()) 1266 break; 1267 1268 return true; 1269 } 1270 1271 acpi_state_table.count = 0; 1272 pr_debug("ACPI _CST not found or not usable\n"); 1273 return false; 1274 } 1275 1276 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1277 { 1278 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1279 1280 /* 1281 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1282 * the interesting states are ACPI_CSTATE_FFH. 1283 */ 1284 for (cstate = 1; cstate < limit; cstate++) { 1285 struct acpi_processor_cx *cx; 1286 struct cpuidle_state *state; 1287 1288 if (intel_idle_max_cstate_reached(cstate - 1)) 1289 break; 1290 1291 cx = &acpi_state_table.states[cstate]; 1292 1293 state = &drv->states[drv->state_count++]; 1294 1295 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1296 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1297 state->exit_latency = cx->latency; 1298 /* 1299 * For C1-type C-states use the same number for both the exit 1300 * latency and target residency, because that is the case for 1301 * C1 in the majority of the static C-states tables above. 1302 * For the other types of C-states, however, set the target 1303 * residency to 3 times the exit latency which should lead to 1304 * a reasonable balance between energy-efficiency and 1305 * performance in the majority of interesting cases. 1306 */ 1307 state->target_residency = cx->latency; 1308 if (cx->type > ACPI_STATE_C1) 1309 state->target_residency *= 3; 1310 1311 state->flags = MWAIT2flg(cx->address); 1312 if (cx->type > ACPI_STATE_C2) 1313 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1314 1315 if (disabled_states_mask & BIT(cstate)) 1316 state->flags |= CPUIDLE_FLAG_OFF; 1317 1318 if (intel_idle_state_needs_timer_stop(state)) 1319 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1320 1321 state->enter = intel_idle; 1322 state->enter_s2idle = intel_idle_s2idle; 1323 } 1324 } 1325 1326 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1327 { 1328 int cstate, limit; 1329 1330 /* 1331 * If there are no _CST C-states, do not disable any C-states by 1332 * default. 1333 */ 1334 if (!acpi_state_table.count) 1335 return false; 1336 1337 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1338 /* 1339 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1340 * the interesting states are ACPI_CSTATE_FFH. 1341 */ 1342 for (cstate = 1; cstate < limit; cstate++) { 1343 if (acpi_state_table.states[cstate].address == mwait_hint) 1344 return false; 1345 } 1346 return true; 1347 } 1348 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1349 #define force_use_acpi (false) 1350 1351 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1352 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1353 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1354 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1355 1356 /** 1357 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1358 * 1359 * Tune IVT multi-socket targets. 1360 * Assumption: num_sockets == (max_package_num + 1). 1361 */ 1362 static void __init ivt_idle_state_table_update(void) 1363 { 1364 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1365 int cpu, package_num, num_sockets = 1; 1366 1367 for_each_online_cpu(cpu) { 1368 package_num = topology_physical_package_id(cpu); 1369 if (package_num + 1 > num_sockets) { 1370 num_sockets = package_num + 1; 1371 1372 if (num_sockets > 4) { 1373 cpuidle_state_table = ivt_cstates_8s; 1374 return; 1375 } 1376 } 1377 } 1378 1379 if (num_sockets > 2) 1380 cpuidle_state_table = ivt_cstates_4s; 1381 1382 /* else, 1 and 2 socket systems use default ivt_cstates */ 1383 } 1384 1385 /** 1386 * irtl_2_usec - IRTL to microseconds conversion. 1387 * @irtl: IRTL MSR value. 1388 * 1389 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1390 */ 1391 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1392 { 1393 static const unsigned int irtl_ns_units[] __initconst = { 1394 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1395 }; 1396 unsigned long long ns; 1397 1398 if (!irtl) 1399 return 0; 1400 1401 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1402 1403 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1404 } 1405 1406 /** 1407 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1408 * 1409 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1410 * definitive maximum latency and use the same value for target_residency. 1411 */ 1412 static void __init bxt_idle_state_table_update(void) 1413 { 1414 unsigned long long msr; 1415 unsigned int usec; 1416 1417 rdmsrl(MSR_PKGC6_IRTL, msr); 1418 usec = irtl_2_usec(msr); 1419 if (usec) { 1420 bxt_cstates[2].exit_latency = usec; 1421 bxt_cstates[2].target_residency = usec; 1422 } 1423 1424 rdmsrl(MSR_PKGC7_IRTL, msr); 1425 usec = irtl_2_usec(msr); 1426 if (usec) { 1427 bxt_cstates[3].exit_latency = usec; 1428 bxt_cstates[3].target_residency = usec; 1429 } 1430 1431 rdmsrl(MSR_PKGC8_IRTL, msr); 1432 usec = irtl_2_usec(msr); 1433 if (usec) { 1434 bxt_cstates[4].exit_latency = usec; 1435 bxt_cstates[4].target_residency = usec; 1436 } 1437 1438 rdmsrl(MSR_PKGC9_IRTL, msr); 1439 usec = irtl_2_usec(msr); 1440 if (usec) { 1441 bxt_cstates[5].exit_latency = usec; 1442 bxt_cstates[5].target_residency = usec; 1443 } 1444 1445 rdmsrl(MSR_PKGC10_IRTL, msr); 1446 usec = irtl_2_usec(msr); 1447 if (usec) { 1448 bxt_cstates[6].exit_latency = usec; 1449 bxt_cstates[6].target_residency = usec; 1450 } 1451 1452 } 1453 1454 /** 1455 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1456 * 1457 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1458 */ 1459 static void __init sklh_idle_state_table_update(void) 1460 { 1461 unsigned long long msr; 1462 unsigned int eax, ebx, ecx, edx; 1463 1464 1465 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1466 if (max_cstate <= 7) 1467 return; 1468 1469 /* if PC10 not present in CPUID.MWAIT.EDX */ 1470 if ((mwait_substates & (0xF << 28)) == 0) 1471 return; 1472 1473 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1474 1475 /* PC10 is not enabled in PKG C-state limit */ 1476 if ((msr & 0xF) != 8) 1477 return; 1478 1479 ecx = 0; 1480 cpuid(7, &eax, &ebx, &ecx, &edx); 1481 1482 /* if SGX is present */ 1483 if (ebx & (1 << 2)) { 1484 1485 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1486 1487 /* if SGX is enabled */ 1488 if (msr & (1 << 18)) 1489 return; 1490 } 1491 1492 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1493 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1494 } 1495 1496 /** 1497 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake 1498 * idle states table. 1499 */ 1500 static void __init skx_idle_state_table_update(void) 1501 { 1502 unsigned long long msr; 1503 1504 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1505 1506 /* 1507 * 000b: C0/C1 (no package C-state support) 1508 * 001b: C2 1509 * 010b: C6 (non-retention) 1510 * 011b: C6 (retention) 1511 * 111b: No Package C state limits. 1512 */ 1513 if ((msr & 0x7) < 2) { 1514 /* 1515 * Uses the CC6 + PC0 latency and 3 times of 1516 * latency for target_residency if the PC6 1517 * is disabled in BIOS. This is consistent 1518 * with how intel_idle driver uses _CST 1519 * to set the target_residency. 1520 */ 1521 skx_cstates[2].exit_latency = 92; 1522 skx_cstates[2].target_residency = 276; 1523 } 1524 } 1525 1526 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1527 { 1528 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1529 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1530 MWAIT_SUBSTATE_MASK; 1531 1532 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1533 if (num_substates == 0) 1534 return false; 1535 1536 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1537 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1538 1539 return true; 1540 } 1541 1542 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1543 { 1544 int cstate; 1545 1546 switch (boot_cpu_data.x86_model) { 1547 case INTEL_FAM6_IVYBRIDGE_X: 1548 ivt_idle_state_table_update(); 1549 break; 1550 case INTEL_FAM6_ATOM_GOLDMONT: 1551 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1552 bxt_idle_state_table_update(); 1553 break; 1554 case INTEL_FAM6_SKYLAKE: 1555 sklh_idle_state_table_update(); 1556 break; 1557 case INTEL_FAM6_SKYLAKE_X: 1558 skx_idle_state_table_update(); 1559 break; 1560 } 1561 1562 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1563 unsigned int mwait_hint; 1564 1565 if (intel_idle_max_cstate_reached(cstate)) 1566 break; 1567 1568 if (!cpuidle_state_table[cstate].enter && 1569 !cpuidle_state_table[cstate].enter_s2idle) 1570 break; 1571 1572 /* If marked as unusable, skip this state. */ 1573 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1574 pr_debug("state %s is disabled\n", 1575 cpuidle_state_table[cstate].name); 1576 continue; 1577 } 1578 1579 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1580 if (!intel_idle_verify_cstate(mwait_hint)) 1581 continue; 1582 1583 /* Structure copy. */ 1584 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1585 1586 if ((disabled_states_mask & BIT(drv->state_count)) || 1587 ((icpu->use_acpi || force_use_acpi) && 1588 intel_idle_off_by_default(mwait_hint) && 1589 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1590 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1591 1592 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count])) 1593 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP; 1594 1595 drv->state_count++; 1596 } 1597 1598 if (icpu->byt_auto_demotion_disable_flag) { 1599 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1600 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1601 } 1602 } 1603 1604 /** 1605 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1606 * @drv: cpuidle driver structure to initialize. 1607 */ 1608 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1609 { 1610 cpuidle_poll_state_init(drv); 1611 1612 if (disabled_states_mask & BIT(0)) 1613 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1614 1615 drv->state_count = 1; 1616 1617 if (icpu) 1618 intel_idle_init_cstates_icpu(drv); 1619 else 1620 intel_idle_init_cstates_acpi(drv); 1621 } 1622 1623 static void auto_demotion_disable(void) 1624 { 1625 unsigned long long msr_bits; 1626 1627 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1628 msr_bits &= ~auto_demotion_disable_flags; 1629 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1630 } 1631 1632 static void c1e_promotion_disable(void) 1633 { 1634 unsigned long long msr_bits; 1635 1636 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1637 msr_bits &= ~0x2; 1638 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1639 } 1640 1641 /** 1642 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1643 * @cpu: CPU to initialize. 1644 * 1645 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1646 * with the processor model flags. 1647 */ 1648 static int intel_idle_cpu_init(unsigned int cpu) 1649 { 1650 struct cpuidle_device *dev; 1651 1652 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1653 dev->cpu = cpu; 1654 1655 if (cpuidle_register_device(dev)) { 1656 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1657 return -EIO; 1658 } 1659 1660 if (auto_demotion_disable_flags) 1661 auto_demotion_disable(); 1662 1663 if (disable_promotion_to_c1e) 1664 c1e_promotion_disable(); 1665 1666 return 0; 1667 } 1668 1669 static int intel_idle_cpu_online(unsigned int cpu) 1670 { 1671 struct cpuidle_device *dev; 1672 1673 if (!boot_cpu_has(X86_FEATURE_ARAT)) 1674 tick_broadcast_enable(); 1675 1676 /* 1677 * Some systems can hotplug a cpu at runtime after 1678 * the kernel has booted, we have to initialize the 1679 * driver in this case 1680 */ 1681 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1682 if (!dev->registered) 1683 return intel_idle_cpu_init(cpu); 1684 1685 return 0; 1686 } 1687 1688 /** 1689 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 1690 */ 1691 static void __init intel_idle_cpuidle_devices_uninit(void) 1692 { 1693 int i; 1694 1695 for_each_online_cpu(i) 1696 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 1697 } 1698 1699 static int __init intel_idle_init(void) 1700 { 1701 const struct x86_cpu_id *id; 1702 unsigned int eax, ebx, ecx; 1703 int retval; 1704 1705 /* Do not load intel_idle at all for now if idle= is passed */ 1706 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1707 return -ENODEV; 1708 1709 if (max_cstate == 0) { 1710 pr_debug("disabled\n"); 1711 return -EPERM; 1712 } 1713 1714 id = x86_match_cpu(intel_idle_ids); 1715 if (id) { 1716 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1717 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1718 return -ENODEV; 1719 } 1720 } else { 1721 id = x86_match_cpu(intel_mwait_ids); 1722 if (!id) 1723 return -ENODEV; 1724 } 1725 1726 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1727 return -ENODEV; 1728 1729 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1730 1731 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1732 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1733 !mwait_substates) 1734 return -ENODEV; 1735 1736 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1737 1738 icpu = (const struct idle_cpu *)id->driver_data; 1739 if (icpu) { 1740 cpuidle_state_table = icpu->state_table; 1741 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 1742 disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; 1743 if (icpu->use_acpi || force_use_acpi) 1744 intel_idle_acpi_cst_extract(); 1745 } else if (!intel_idle_acpi_cst_extract()) { 1746 return -ENODEV; 1747 } 1748 1749 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1750 boot_cpu_data.x86_model); 1751 1752 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1753 if (!intel_idle_cpuidle_devices) 1754 return -ENOMEM; 1755 1756 intel_idle_cpuidle_driver_init(&intel_idle_driver); 1757 1758 retval = cpuidle_register_driver(&intel_idle_driver); 1759 if (retval) { 1760 struct cpuidle_driver *drv = cpuidle_get_driver(); 1761 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1762 drv ? drv->name : "none"); 1763 goto init_driver_fail; 1764 } 1765 1766 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1767 intel_idle_cpu_online, NULL); 1768 if (retval < 0) 1769 goto hp_setup_fail; 1770 1771 pr_debug("Local APIC timer is reliable in %s\n", 1772 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 1773 1774 return 0; 1775 1776 hp_setup_fail: 1777 intel_idle_cpuidle_devices_uninit(); 1778 cpuidle_unregister_driver(&intel_idle_driver); 1779 init_driver_fail: 1780 free_percpu(intel_idle_cpuidle_devices); 1781 return retval; 1782 1783 } 1784 device_initcall(intel_idle_init); 1785 1786 /* 1787 * We are not really modular, but we used to support that. Meaning we also 1788 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1789 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1790 * is the easiest way (currently) to continue doing that. 1791 */ 1792 module_param(max_cstate, int, 0444); 1793 /* 1794 * The positions of the bits that are set in this number are the indices of the 1795 * idle states to be disabled by default (as reflected by the names of the 1796 * corresponding idle state directories in sysfs, "state0", "state1" ... 1797 * "state<i>" ..., where <i> is the index of the given state). 1798 */ 1799 module_param_named(states_off, disabled_states_mask, uint, 0444); 1800 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 1801