1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 */ 8 9 /* 10 * intel_idle is a cpuidle driver that loads on specific Intel processors 11 * in lieu of the legacy ACPI processor_idle driver. The intent is to 12 * make Linux more efficient on these processors, as intel_idle knows 13 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 14 */ 15 16 /* 17 * Design Assumptions 18 * 19 * All CPUs have same idle states as boot CPU 20 * 21 * Chipset BM_STS (bus master status) bit is a NOP 22 * for preventing entry into deep C-stats 23 */ 24 25 /* 26 * Known limitations 27 * 28 * The driver currently initializes for_each_online_cpu() upon modprobe. 29 * It it unaware of subsequent processors hot-added to the system. 30 * This means that if you boot with maxcpus=n and later online 31 * processors above n, those processors will use C1 only. 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39 /* un-comment DEBUG to enable pr_debug() statements */ 40 #define DEBUG 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/acpi.h> 45 #include <linux/kernel.h> 46 #include <linux/cpuidle.h> 47 #include <linux/tick.h> 48 #include <trace/events/power.h> 49 #include <linux/sched.h> 50 #include <linux/notifier.h> 51 #include <linux/cpu.h> 52 #include <linux/moduleparam.h> 53 #include <asm/cpu_device_id.h> 54 #include <asm/intel-family.h> 55 #include <asm/mwait.h> 56 #include <asm/msr.h> 57 58 #define INTEL_IDLE_VERSION "0.4.1" 59 60 static struct cpuidle_driver intel_idle_driver = { 61 .name = "intel_idle", 62 .owner = THIS_MODULE, 63 }; 64 /* intel_idle.max_cstate=0 disables driver */ 65 static int max_cstate = CPUIDLE_STATE_MAX - 1; 66 static unsigned int disabled_states_mask; 67 68 static unsigned int mwait_substates; 69 70 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF 71 /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ 72 static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ 73 74 struct idle_cpu { 75 struct cpuidle_state *state_table; 76 77 /* 78 * Hardware C-state auto-demotion may not always be optimal. 79 * Indicate which enable bits to clear here. 80 */ 81 unsigned long auto_demotion_disable_flags; 82 bool byt_auto_demotion_disable_flag; 83 bool disable_promotion_to_c1e; 84 bool use_acpi; 85 }; 86 87 static const struct idle_cpu *icpu; 88 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 89 static int intel_idle(struct cpuidle_device *dev, 90 struct cpuidle_driver *drv, int index); 91 static void intel_idle_s2idle(struct cpuidle_device *dev, 92 struct cpuidle_driver *drv, int index); 93 static struct cpuidle_state *cpuidle_state_table; 94 95 /* 96 * Enable this state by default even if the ACPI _CST does not list it. 97 */ 98 #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 99 100 /* 101 * Set this flag for states where the HW flushes the TLB for us 102 * and so we don't need cross-calls to keep it consistent. 103 * If this flag is set, SW flushes the TLB, so even if the 104 * HW doesn't do the flushing, this flag is safe to use. 105 */ 106 #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 107 108 /* 109 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 110 * the C-state (top nibble) and sub-state (bottom nibble) 111 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 112 * 113 * We store the hint at the top of our "flags" for each state. 114 */ 115 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 116 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 117 118 /* 119 * States are indexed by the cstate number, 120 * which is also the index into the MWAIT hint array. 121 * Thus C0 is a dummy. 122 */ 123 static struct cpuidle_state nehalem_cstates[] = { 124 { 125 .name = "C1", 126 .desc = "MWAIT 0x00", 127 .flags = MWAIT2flg(0x00), 128 .exit_latency = 3, 129 .target_residency = 6, 130 .enter = &intel_idle, 131 .enter_s2idle = intel_idle_s2idle, }, 132 { 133 .name = "C1E", 134 .desc = "MWAIT 0x01", 135 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 136 .exit_latency = 10, 137 .target_residency = 20, 138 .enter = &intel_idle, 139 .enter_s2idle = intel_idle_s2idle, }, 140 { 141 .name = "C3", 142 .desc = "MWAIT 0x10", 143 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 144 .exit_latency = 20, 145 .target_residency = 80, 146 .enter = &intel_idle, 147 .enter_s2idle = intel_idle_s2idle, }, 148 { 149 .name = "C6", 150 .desc = "MWAIT 0x20", 151 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 152 .exit_latency = 200, 153 .target_residency = 800, 154 .enter = &intel_idle, 155 .enter_s2idle = intel_idle_s2idle, }, 156 { 157 .enter = NULL } 158 }; 159 160 static struct cpuidle_state snb_cstates[] = { 161 { 162 .name = "C1", 163 .desc = "MWAIT 0x00", 164 .flags = MWAIT2flg(0x00), 165 .exit_latency = 2, 166 .target_residency = 2, 167 .enter = &intel_idle, 168 .enter_s2idle = intel_idle_s2idle, }, 169 { 170 .name = "C1E", 171 .desc = "MWAIT 0x01", 172 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 173 .exit_latency = 10, 174 .target_residency = 20, 175 .enter = &intel_idle, 176 .enter_s2idle = intel_idle_s2idle, }, 177 { 178 .name = "C3", 179 .desc = "MWAIT 0x10", 180 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 181 .exit_latency = 80, 182 .target_residency = 211, 183 .enter = &intel_idle, 184 .enter_s2idle = intel_idle_s2idle, }, 185 { 186 .name = "C6", 187 .desc = "MWAIT 0x20", 188 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 189 .exit_latency = 104, 190 .target_residency = 345, 191 .enter = &intel_idle, 192 .enter_s2idle = intel_idle_s2idle, }, 193 { 194 .name = "C7", 195 .desc = "MWAIT 0x30", 196 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 197 .exit_latency = 109, 198 .target_residency = 345, 199 .enter = &intel_idle, 200 .enter_s2idle = intel_idle_s2idle, }, 201 { 202 .enter = NULL } 203 }; 204 205 static struct cpuidle_state byt_cstates[] = { 206 { 207 .name = "C1", 208 .desc = "MWAIT 0x00", 209 .flags = MWAIT2flg(0x00), 210 .exit_latency = 1, 211 .target_residency = 1, 212 .enter = &intel_idle, 213 .enter_s2idle = intel_idle_s2idle, }, 214 { 215 .name = "C6N", 216 .desc = "MWAIT 0x58", 217 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 218 .exit_latency = 300, 219 .target_residency = 275, 220 .enter = &intel_idle, 221 .enter_s2idle = intel_idle_s2idle, }, 222 { 223 .name = "C6S", 224 .desc = "MWAIT 0x52", 225 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 226 .exit_latency = 500, 227 .target_residency = 560, 228 .enter = &intel_idle, 229 .enter_s2idle = intel_idle_s2idle, }, 230 { 231 .name = "C7", 232 .desc = "MWAIT 0x60", 233 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 234 .exit_latency = 1200, 235 .target_residency = 4000, 236 .enter = &intel_idle, 237 .enter_s2idle = intel_idle_s2idle, }, 238 { 239 .name = "C7S", 240 .desc = "MWAIT 0x64", 241 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 242 .exit_latency = 10000, 243 .target_residency = 20000, 244 .enter = &intel_idle, 245 .enter_s2idle = intel_idle_s2idle, }, 246 { 247 .enter = NULL } 248 }; 249 250 static struct cpuidle_state cht_cstates[] = { 251 { 252 .name = "C1", 253 .desc = "MWAIT 0x00", 254 .flags = MWAIT2flg(0x00), 255 .exit_latency = 1, 256 .target_residency = 1, 257 .enter = &intel_idle, 258 .enter_s2idle = intel_idle_s2idle, }, 259 { 260 .name = "C6N", 261 .desc = "MWAIT 0x58", 262 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 263 .exit_latency = 80, 264 .target_residency = 275, 265 .enter = &intel_idle, 266 .enter_s2idle = intel_idle_s2idle, }, 267 { 268 .name = "C6S", 269 .desc = "MWAIT 0x52", 270 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 271 .exit_latency = 200, 272 .target_residency = 560, 273 .enter = &intel_idle, 274 .enter_s2idle = intel_idle_s2idle, }, 275 { 276 .name = "C7", 277 .desc = "MWAIT 0x60", 278 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 279 .exit_latency = 1200, 280 .target_residency = 4000, 281 .enter = &intel_idle, 282 .enter_s2idle = intel_idle_s2idle, }, 283 { 284 .name = "C7S", 285 .desc = "MWAIT 0x64", 286 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 287 .exit_latency = 10000, 288 .target_residency = 20000, 289 .enter = &intel_idle, 290 .enter_s2idle = intel_idle_s2idle, }, 291 { 292 .enter = NULL } 293 }; 294 295 static struct cpuidle_state ivb_cstates[] = { 296 { 297 .name = "C1", 298 .desc = "MWAIT 0x00", 299 .flags = MWAIT2flg(0x00), 300 .exit_latency = 1, 301 .target_residency = 1, 302 .enter = &intel_idle, 303 .enter_s2idle = intel_idle_s2idle, }, 304 { 305 .name = "C1E", 306 .desc = "MWAIT 0x01", 307 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 308 .exit_latency = 10, 309 .target_residency = 20, 310 .enter = &intel_idle, 311 .enter_s2idle = intel_idle_s2idle, }, 312 { 313 .name = "C3", 314 .desc = "MWAIT 0x10", 315 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 316 .exit_latency = 59, 317 .target_residency = 156, 318 .enter = &intel_idle, 319 .enter_s2idle = intel_idle_s2idle, }, 320 { 321 .name = "C6", 322 .desc = "MWAIT 0x20", 323 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 324 .exit_latency = 80, 325 .target_residency = 300, 326 .enter = &intel_idle, 327 .enter_s2idle = intel_idle_s2idle, }, 328 { 329 .name = "C7", 330 .desc = "MWAIT 0x30", 331 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 332 .exit_latency = 87, 333 .target_residency = 300, 334 .enter = &intel_idle, 335 .enter_s2idle = intel_idle_s2idle, }, 336 { 337 .enter = NULL } 338 }; 339 340 static struct cpuidle_state ivt_cstates[] = { 341 { 342 .name = "C1", 343 .desc = "MWAIT 0x00", 344 .flags = MWAIT2flg(0x00), 345 .exit_latency = 1, 346 .target_residency = 1, 347 .enter = &intel_idle, 348 .enter_s2idle = intel_idle_s2idle, }, 349 { 350 .name = "C1E", 351 .desc = "MWAIT 0x01", 352 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 353 .exit_latency = 10, 354 .target_residency = 80, 355 .enter = &intel_idle, 356 .enter_s2idle = intel_idle_s2idle, }, 357 { 358 .name = "C3", 359 .desc = "MWAIT 0x10", 360 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 361 .exit_latency = 59, 362 .target_residency = 156, 363 .enter = &intel_idle, 364 .enter_s2idle = intel_idle_s2idle, }, 365 { 366 .name = "C6", 367 .desc = "MWAIT 0x20", 368 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 369 .exit_latency = 82, 370 .target_residency = 300, 371 .enter = &intel_idle, 372 .enter_s2idle = intel_idle_s2idle, }, 373 { 374 .enter = NULL } 375 }; 376 377 static struct cpuidle_state ivt_cstates_4s[] = { 378 { 379 .name = "C1", 380 .desc = "MWAIT 0x00", 381 .flags = MWAIT2flg(0x00), 382 .exit_latency = 1, 383 .target_residency = 1, 384 .enter = &intel_idle, 385 .enter_s2idle = intel_idle_s2idle, }, 386 { 387 .name = "C1E", 388 .desc = "MWAIT 0x01", 389 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 390 .exit_latency = 10, 391 .target_residency = 250, 392 .enter = &intel_idle, 393 .enter_s2idle = intel_idle_s2idle, }, 394 { 395 .name = "C3", 396 .desc = "MWAIT 0x10", 397 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 398 .exit_latency = 59, 399 .target_residency = 300, 400 .enter = &intel_idle, 401 .enter_s2idle = intel_idle_s2idle, }, 402 { 403 .name = "C6", 404 .desc = "MWAIT 0x20", 405 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 406 .exit_latency = 84, 407 .target_residency = 400, 408 .enter = &intel_idle, 409 .enter_s2idle = intel_idle_s2idle, }, 410 { 411 .enter = NULL } 412 }; 413 414 static struct cpuidle_state ivt_cstates_8s[] = { 415 { 416 .name = "C1", 417 .desc = "MWAIT 0x00", 418 .flags = MWAIT2flg(0x00), 419 .exit_latency = 1, 420 .target_residency = 1, 421 .enter = &intel_idle, 422 .enter_s2idle = intel_idle_s2idle, }, 423 { 424 .name = "C1E", 425 .desc = "MWAIT 0x01", 426 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 427 .exit_latency = 10, 428 .target_residency = 500, 429 .enter = &intel_idle, 430 .enter_s2idle = intel_idle_s2idle, }, 431 { 432 .name = "C3", 433 .desc = "MWAIT 0x10", 434 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 435 .exit_latency = 59, 436 .target_residency = 600, 437 .enter = &intel_idle, 438 .enter_s2idle = intel_idle_s2idle, }, 439 { 440 .name = "C6", 441 .desc = "MWAIT 0x20", 442 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 443 .exit_latency = 88, 444 .target_residency = 700, 445 .enter = &intel_idle, 446 .enter_s2idle = intel_idle_s2idle, }, 447 { 448 .enter = NULL } 449 }; 450 451 static struct cpuidle_state hsw_cstates[] = { 452 { 453 .name = "C1", 454 .desc = "MWAIT 0x00", 455 .flags = MWAIT2flg(0x00), 456 .exit_latency = 2, 457 .target_residency = 2, 458 .enter = &intel_idle, 459 .enter_s2idle = intel_idle_s2idle, }, 460 { 461 .name = "C1E", 462 .desc = "MWAIT 0x01", 463 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 464 .exit_latency = 10, 465 .target_residency = 20, 466 .enter = &intel_idle, 467 .enter_s2idle = intel_idle_s2idle, }, 468 { 469 .name = "C3", 470 .desc = "MWAIT 0x10", 471 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 472 .exit_latency = 33, 473 .target_residency = 100, 474 .enter = &intel_idle, 475 .enter_s2idle = intel_idle_s2idle, }, 476 { 477 .name = "C6", 478 .desc = "MWAIT 0x20", 479 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 480 .exit_latency = 133, 481 .target_residency = 400, 482 .enter = &intel_idle, 483 .enter_s2idle = intel_idle_s2idle, }, 484 { 485 .name = "C7s", 486 .desc = "MWAIT 0x32", 487 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 488 .exit_latency = 166, 489 .target_residency = 500, 490 .enter = &intel_idle, 491 .enter_s2idle = intel_idle_s2idle, }, 492 { 493 .name = "C8", 494 .desc = "MWAIT 0x40", 495 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 496 .exit_latency = 300, 497 .target_residency = 900, 498 .enter = &intel_idle, 499 .enter_s2idle = intel_idle_s2idle, }, 500 { 501 .name = "C9", 502 .desc = "MWAIT 0x50", 503 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 504 .exit_latency = 600, 505 .target_residency = 1800, 506 .enter = &intel_idle, 507 .enter_s2idle = intel_idle_s2idle, }, 508 { 509 .name = "C10", 510 .desc = "MWAIT 0x60", 511 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 512 .exit_latency = 2600, 513 .target_residency = 7700, 514 .enter = &intel_idle, 515 .enter_s2idle = intel_idle_s2idle, }, 516 { 517 .enter = NULL } 518 }; 519 static struct cpuidle_state bdw_cstates[] = { 520 { 521 .name = "C1", 522 .desc = "MWAIT 0x00", 523 .flags = MWAIT2flg(0x00), 524 .exit_latency = 2, 525 .target_residency = 2, 526 .enter = &intel_idle, 527 .enter_s2idle = intel_idle_s2idle, }, 528 { 529 .name = "C1E", 530 .desc = "MWAIT 0x01", 531 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 532 .exit_latency = 10, 533 .target_residency = 20, 534 .enter = &intel_idle, 535 .enter_s2idle = intel_idle_s2idle, }, 536 { 537 .name = "C3", 538 .desc = "MWAIT 0x10", 539 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 540 .exit_latency = 40, 541 .target_residency = 100, 542 .enter = &intel_idle, 543 .enter_s2idle = intel_idle_s2idle, }, 544 { 545 .name = "C6", 546 .desc = "MWAIT 0x20", 547 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 548 .exit_latency = 133, 549 .target_residency = 400, 550 .enter = &intel_idle, 551 .enter_s2idle = intel_idle_s2idle, }, 552 { 553 .name = "C7s", 554 .desc = "MWAIT 0x32", 555 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 556 .exit_latency = 166, 557 .target_residency = 500, 558 .enter = &intel_idle, 559 .enter_s2idle = intel_idle_s2idle, }, 560 { 561 .name = "C8", 562 .desc = "MWAIT 0x40", 563 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 564 .exit_latency = 300, 565 .target_residency = 900, 566 .enter = &intel_idle, 567 .enter_s2idle = intel_idle_s2idle, }, 568 { 569 .name = "C9", 570 .desc = "MWAIT 0x50", 571 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 572 .exit_latency = 600, 573 .target_residency = 1800, 574 .enter = &intel_idle, 575 .enter_s2idle = intel_idle_s2idle, }, 576 { 577 .name = "C10", 578 .desc = "MWAIT 0x60", 579 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 580 .exit_latency = 2600, 581 .target_residency = 7700, 582 .enter = &intel_idle, 583 .enter_s2idle = intel_idle_s2idle, }, 584 { 585 .enter = NULL } 586 }; 587 588 static struct cpuidle_state skl_cstates[] = { 589 { 590 .name = "C1", 591 .desc = "MWAIT 0x00", 592 .flags = MWAIT2flg(0x00), 593 .exit_latency = 2, 594 .target_residency = 2, 595 .enter = &intel_idle, 596 .enter_s2idle = intel_idle_s2idle, }, 597 { 598 .name = "C1E", 599 .desc = "MWAIT 0x01", 600 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 601 .exit_latency = 10, 602 .target_residency = 20, 603 .enter = &intel_idle, 604 .enter_s2idle = intel_idle_s2idle, }, 605 { 606 .name = "C3", 607 .desc = "MWAIT 0x10", 608 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 609 .exit_latency = 70, 610 .target_residency = 100, 611 .enter = &intel_idle, 612 .enter_s2idle = intel_idle_s2idle, }, 613 { 614 .name = "C6", 615 .desc = "MWAIT 0x20", 616 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 617 .exit_latency = 85, 618 .target_residency = 200, 619 .enter = &intel_idle, 620 .enter_s2idle = intel_idle_s2idle, }, 621 { 622 .name = "C7s", 623 .desc = "MWAIT 0x33", 624 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 625 .exit_latency = 124, 626 .target_residency = 800, 627 .enter = &intel_idle, 628 .enter_s2idle = intel_idle_s2idle, }, 629 { 630 .name = "C8", 631 .desc = "MWAIT 0x40", 632 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 633 .exit_latency = 200, 634 .target_residency = 800, 635 .enter = &intel_idle, 636 .enter_s2idle = intel_idle_s2idle, }, 637 { 638 .name = "C9", 639 .desc = "MWAIT 0x50", 640 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 641 .exit_latency = 480, 642 .target_residency = 5000, 643 .enter = &intel_idle, 644 .enter_s2idle = intel_idle_s2idle, }, 645 { 646 .name = "C10", 647 .desc = "MWAIT 0x60", 648 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 649 .exit_latency = 890, 650 .target_residency = 5000, 651 .enter = &intel_idle, 652 .enter_s2idle = intel_idle_s2idle, }, 653 { 654 .enter = NULL } 655 }; 656 657 static struct cpuidle_state skx_cstates[] = { 658 { 659 .name = "C1", 660 .desc = "MWAIT 0x00", 661 .flags = MWAIT2flg(0x00), 662 .exit_latency = 2, 663 .target_residency = 2, 664 .enter = &intel_idle, 665 .enter_s2idle = intel_idle_s2idle, }, 666 { 667 .name = "C1E", 668 .desc = "MWAIT 0x01", 669 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 670 .exit_latency = 10, 671 .target_residency = 20, 672 .enter = &intel_idle, 673 .enter_s2idle = intel_idle_s2idle, }, 674 { 675 .name = "C6", 676 .desc = "MWAIT 0x20", 677 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 678 .exit_latency = 133, 679 .target_residency = 600, 680 .enter = &intel_idle, 681 .enter_s2idle = intel_idle_s2idle, }, 682 { 683 .enter = NULL } 684 }; 685 686 static struct cpuidle_state atom_cstates[] = { 687 { 688 .name = "C1E", 689 .desc = "MWAIT 0x00", 690 .flags = MWAIT2flg(0x00), 691 .exit_latency = 10, 692 .target_residency = 20, 693 .enter = &intel_idle, 694 .enter_s2idle = intel_idle_s2idle, }, 695 { 696 .name = "C2", 697 .desc = "MWAIT 0x10", 698 .flags = MWAIT2flg(0x10), 699 .exit_latency = 20, 700 .target_residency = 80, 701 .enter = &intel_idle, 702 .enter_s2idle = intel_idle_s2idle, }, 703 { 704 .name = "C4", 705 .desc = "MWAIT 0x30", 706 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 707 .exit_latency = 100, 708 .target_residency = 400, 709 .enter = &intel_idle, 710 .enter_s2idle = intel_idle_s2idle, }, 711 { 712 .name = "C6", 713 .desc = "MWAIT 0x52", 714 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 715 .exit_latency = 140, 716 .target_residency = 560, 717 .enter = &intel_idle, 718 .enter_s2idle = intel_idle_s2idle, }, 719 { 720 .enter = NULL } 721 }; 722 static struct cpuidle_state tangier_cstates[] = { 723 { 724 .name = "C1", 725 .desc = "MWAIT 0x00", 726 .flags = MWAIT2flg(0x00), 727 .exit_latency = 1, 728 .target_residency = 4, 729 .enter = &intel_idle, 730 .enter_s2idle = intel_idle_s2idle, }, 731 { 732 .name = "C4", 733 .desc = "MWAIT 0x30", 734 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 735 .exit_latency = 100, 736 .target_residency = 400, 737 .enter = &intel_idle, 738 .enter_s2idle = intel_idle_s2idle, }, 739 { 740 .name = "C6", 741 .desc = "MWAIT 0x52", 742 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 743 .exit_latency = 140, 744 .target_residency = 560, 745 .enter = &intel_idle, 746 .enter_s2idle = intel_idle_s2idle, }, 747 { 748 .name = "C7", 749 .desc = "MWAIT 0x60", 750 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 751 .exit_latency = 1200, 752 .target_residency = 4000, 753 .enter = &intel_idle, 754 .enter_s2idle = intel_idle_s2idle, }, 755 { 756 .name = "C9", 757 .desc = "MWAIT 0x64", 758 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 759 .exit_latency = 10000, 760 .target_residency = 20000, 761 .enter = &intel_idle, 762 .enter_s2idle = intel_idle_s2idle, }, 763 { 764 .enter = NULL } 765 }; 766 static struct cpuidle_state avn_cstates[] = { 767 { 768 .name = "C1", 769 .desc = "MWAIT 0x00", 770 .flags = MWAIT2flg(0x00), 771 .exit_latency = 2, 772 .target_residency = 2, 773 .enter = &intel_idle, 774 .enter_s2idle = intel_idle_s2idle, }, 775 { 776 .name = "C6", 777 .desc = "MWAIT 0x51", 778 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 779 .exit_latency = 15, 780 .target_residency = 45, 781 .enter = &intel_idle, 782 .enter_s2idle = intel_idle_s2idle, }, 783 { 784 .enter = NULL } 785 }; 786 static struct cpuidle_state knl_cstates[] = { 787 { 788 .name = "C1", 789 .desc = "MWAIT 0x00", 790 .flags = MWAIT2flg(0x00), 791 .exit_latency = 1, 792 .target_residency = 2, 793 .enter = &intel_idle, 794 .enter_s2idle = intel_idle_s2idle }, 795 { 796 .name = "C6", 797 .desc = "MWAIT 0x10", 798 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 799 .exit_latency = 120, 800 .target_residency = 500, 801 .enter = &intel_idle, 802 .enter_s2idle = intel_idle_s2idle }, 803 { 804 .enter = NULL } 805 }; 806 807 static struct cpuidle_state bxt_cstates[] = { 808 { 809 .name = "C1", 810 .desc = "MWAIT 0x00", 811 .flags = MWAIT2flg(0x00), 812 .exit_latency = 2, 813 .target_residency = 2, 814 .enter = &intel_idle, 815 .enter_s2idle = intel_idle_s2idle, }, 816 { 817 .name = "C1E", 818 .desc = "MWAIT 0x01", 819 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 820 .exit_latency = 10, 821 .target_residency = 20, 822 .enter = &intel_idle, 823 .enter_s2idle = intel_idle_s2idle, }, 824 { 825 .name = "C6", 826 .desc = "MWAIT 0x20", 827 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 828 .exit_latency = 133, 829 .target_residency = 133, 830 .enter = &intel_idle, 831 .enter_s2idle = intel_idle_s2idle, }, 832 { 833 .name = "C7s", 834 .desc = "MWAIT 0x31", 835 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 836 .exit_latency = 155, 837 .target_residency = 155, 838 .enter = &intel_idle, 839 .enter_s2idle = intel_idle_s2idle, }, 840 { 841 .name = "C8", 842 .desc = "MWAIT 0x40", 843 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 844 .exit_latency = 1000, 845 .target_residency = 1000, 846 .enter = &intel_idle, 847 .enter_s2idle = intel_idle_s2idle, }, 848 { 849 .name = "C9", 850 .desc = "MWAIT 0x50", 851 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 852 .exit_latency = 2000, 853 .target_residency = 2000, 854 .enter = &intel_idle, 855 .enter_s2idle = intel_idle_s2idle, }, 856 { 857 .name = "C10", 858 .desc = "MWAIT 0x60", 859 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 860 .exit_latency = 10000, 861 .target_residency = 10000, 862 .enter = &intel_idle, 863 .enter_s2idle = intel_idle_s2idle, }, 864 { 865 .enter = NULL } 866 }; 867 868 static struct cpuidle_state dnv_cstates[] = { 869 { 870 .name = "C1", 871 .desc = "MWAIT 0x00", 872 .flags = MWAIT2flg(0x00), 873 .exit_latency = 2, 874 .target_residency = 2, 875 .enter = &intel_idle, 876 .enter_s2idle = intel_idle_s2idle, }, 877 { 878 .name = "C1E", 879 .desc = "MWAIT 0x01", 880 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 881 .exit_latency = 10, 882 .target_residency = 20, 883 .enter = &intel_idle, 884 .enter_s2idle = intel_idle_s2idle, }, 885 { 886 .name = "C6", 887 .desc = "MWAIT 0x20", 888 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 889 .exit_latency = 50, 890 .target_residency = 500, 891 .enter = &intel_idle, 892 .enter_s2idle = intel_idle_s2idle, }, 893 { 894 .enter = NULL } 895 }; 896 897 /** 898 * intel_idle 899 * @dev: cpuidle_device 900 * @drv: cpuidle driver 901 * @index: index of cpuidle state 902 * 903 * Must be called under local_irq_disable(). 904 */ 905 static __cpuidle int intel_idle(struct cpuidle_device *dev, 906 struct cpuidle_driver *drv, int index) 907 { 908 unsigned long ecx = 1; /* break on interrupt flag */ 909 struct cpuidle_state *state = &drv->states[index]; 910 unsigned long eax = flg2MWAIT(state->flags); 911 unsigned int cstate; 912 bool uninitialized_var(tick); 913 int cpu = smp_processor_id(); 914 915 /* 916 * leave_mm() to avoid costly and often unnecessary wakeups 917 * for flushing the user TLB's associated with the active mm. 918 */ 919 if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) 920 leave_mm(cpu); 921 922 if (!static_cpu_has(X86_FEATURE_ARAT)) { 923 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & 924 MWAIT_CSTATE_MASK) + 1; 925 tick = false; 926 if (!(lapic_timer_reliable_states & (1 << (cstate)))) { 927 tick = true; 928 tick_broadcast_enter(); 929 } 930 } 931 932 mwait_idle_with_hints(eax, ecx); 933 934 if (!static_cpu_has(X86_FEATURE_ARAT) && tick) 935 tick_broadcast_exit(); 936 937 return index; 938 } 939 940 /** 941 * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle 942 * @dev: cpuidle_device 943 * @drv: cpuidle driver 944 * @index: state index 945 */ 946 static void intel_idle_s2idle(struct cpuidle_device *dev, 947 struct cpuidle_driver *drv, int index) 948 { 949 unsigned long ecx = 1; /* break on interrupt flag */ 950 unsigned long eax = flg2MWAIT(drv->states[index].flags); 951 952 mwait_idle_with_hints(eax, ecx); 953 } 954 955 static const struct idle_cpu idle_cpu_nehalem = { 956 .state_table = nehalem_cstates, 957 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 958 .disable_promotion_to_c1e = true, 959 }; 960 961 static const struct idle_cpu idle_cpu_nhx = { 962 .state_table = nehalem_cstates, 963 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 964 .disable_promotion_to_c1e = true, 965 .use_acpi = true, 966 }; 967 968 static const struct idle_cpu idle_cpu_atom = { 969 .state_table = atom_cstates, 970 }; 971 972 static const struct idle_cpu idle_cpu_tangier = { 973 .state_table = tangier_cstates, 974 }; 975 976 static const struct idle_cpu idle_cpu_lincroft = { 977 .state_table = atom_cstates, 978 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 979 }; 980 981 static const struct idle_cpu idle_cpu_snb = { 982 .state_table = snb_cstates, 983 .disable_promotion_to_c1e = true, 984 }; 985 986 static const struct idle_cpu idle_cpu_snx = { 987 .state_table = snb_cstates, 988 .disable_promotion_to_c1e = true, 989 .use_acpi = true, 990 }; 991 992 static const struct idle_cpu idle_cpu_byt = { 993 .state_table = byt_cstates, 994 .disable_promotion_to_c1e = true, 995 .byt_auto_demotion_disable_flag = true, 996 }; 997 998 static const struct idle_cpu idle_cpu_cht = { 999 .state_table = cht_cstates, 1000 .disable_promotion_to_c1e = true, 1001 .byt_auto_demotion_disable_flag = true, 1002 }; 1003 1004 static const struct idle_cpu idle_cpu_ivb = { 1005 .state_table = ivb_cstates, 1006 .disable_promotion_to_c1e = true, 1007 }; 1008 1009 static const struct idle_cpu idle_cpu_ivt = { 1010 .state_table = ivt_cstates, 1011 .disable_promotion_to_c1e = true, 1012 .use_acpi = true, 1013 }; 1014 1015 static const struct idle_cpu idle_cpu_hsw = { 1016 .state_table = hsw_cstates, 1017 .disable_promotion_to_c1e = true, 1018 }; 1019 1020 static const struct idle_cpu idle_cpu_hsx = { 1021 .state_table = hsw_cstates, 1022 .disable_promotion_to_c1e = true, 1023 .use_acpi = true, 1024 }; 1025 1026 static const struct idle_cpu idle_cpu_bdw = { 1027 .state_table = bdw_cstates, 1028 .disable_promotion_to_c1e = true, 1029 }; 1030 1031 static const struct idle_cpu idle_cpu_bdx = { 1032 .state_table = bdw_cstates, 1033 .disable_promotion_to_c1e = true, 1034 .use_acpi = true, 1035 }; 1036 1037 static const struct idle_cpu idle_cpu_skl = { 1038 .state_table = skl_cstates, 1039 .disable_promotion_to_c1e = true, 1040 }; 1041 1042 static const struct idle_cpu idle_cpu_skx = { 1043 .state_table = skx_cstates, 1044 .disable_promotion_to_c1e = true, 1045 .use_acpi = true, 1046 }; 1047 1048 static const struct idle_cpu idle_cpu_avn = { 1049 .state_table = avn_cstates, 1050 .disable_promotion_to_c1e = true, 1051 .use_acpi = true, 1052 }; 1053 1054 static const struct idle_cpu idle_cpu_knl = { 1055 .state_table = knl_cstates, 1056 .use_acpi = true, 1057 }; 1058 1059 static const struct idle_cpu idle_cpu_bxt = { 1060 .state_table = bxt_cstates, 1061 .disable_promotion_to_c1e = true, 1062 }; 1063 1064 static const struct idle_cpu idle_cpu_dnv = { 1065 .state_table = dnv_cstates, 1066 .disable_promotion_to_c1e = true, 1067 .use_acpi = true, 1068 }; 1069 1070 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1071 INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nhx), 1072 INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem), 1073 INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem), 1074 INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem), 1075 INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nhx), 1076 INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nhx), 1077 INTEL_CPU_FAM6(ATOM_BONNELL, idle_cpu_atom), 1078 INTEL_CPU_FAM6(ATOM_BONNELL_MID, idle_cpu_lincroft), 1079 INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nhx), 1080 INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb), 1081 INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snx), 1082 INTEL_CPU_FAM6(ATOM_SALTWELL, idle_cpu_atom), 1083 INTEL_CPU_FAM6(ATOM_SILVERMONT, idle_cpu_byt), 1084 INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, idle_cpu_tangier), 1085 INTEL_CPU_FAM6(ATOM_AIRMONT, idle_cpu_cht), 1086 INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb), 1087 INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt), 1088 INTEL_CPU_FAM6(HASWELL, idle_cpu_hsw), 1089 INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsx), 1090 INTEL_CPU_FAM6(HASWELL_L, idle_cpu_hsw), 1091 INTEL_CPU_FAM6(HASWELL_G, idle_cpu_hsw), 1092 INTEL_CPU_FAM6(ATOM_SILVERMONT_D, idle_cpu_avn), 1093 INTEL_CPU_FAM6(BROADWELL, idle_cpu_bdw), 1094 INTEL_CPU_FAM6(BROADWELL_G, idle_cpu_bdw), 1095 INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdx), 1096 INTEL_CPU_FAM6(BROADWELL_D, idle_cpu_bdx), 1097 INTEL_CPU_FAM6(SKYLAKE_L, idle_cpu_skl), 1098 INTEL_CPU_FAM6(SKYLAKE, idle_cpu_skl), 1099 INTEL_CPU_FAM6(KABYLAKE_L, idle_cpu_skl), 1100 INTEL_CPU_FAM6(KABYLAKE, idle_cpu_skl), 1101 INTEL_CPU_FAM6(SKYLAKE_X, idle_cpu_skx), 1102 INTEL_CPU_FAM6(XEON_PHI_KNL, idle_cpu_knl), 1103 INTEL_CPU_FAM6(XEON_PHI_KNM, idle_cpu_knl), 1104 INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt), 1105 INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, idle_cpu_bxt), 1106 INTEL_CPU_FAM6(ATOM_GOLDMONT_D, idle_cpu_dnv), 1107 INTEL_CPU_FAM6(ATOM_TREMONT_D, idle_cpu_dnv), 1108 {} 1109 }; 1110 1111 #define INTEL_CPU_FAM6_MWAIT \ 1112 { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_MWAIT, 0 } 1113 1114 static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1115 INTEL_CPU_FAM6_MWAIT, 1116 {} 1117 }; 1118 1119 static bool __init intel_idle_max_cstate_reached(int cstate) 1120 { 1121 if (cstate + 1 > max_cstate) { 1122 pr_info("max_cstate %d reached\n", max_cstate); 1123 return true; 1124 } 1125 return false; 1126 } 1127 1128 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1129 #include <acpi/processor.h> 1130 1131 static bool no_acpi __read_mostly; 1132 module_param(no_acpi, bool, 0444); 1133 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1134 1135 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1136 module_param_named(use_acpi, force_use_acpi, bool, 0444); 1137 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1138 1139 static struct acpi_processor_power acpi_state_table __initdata; 1140 1141 /** 1142 * intel_idle_cst_usable - Check if the _CST information can be used. 1143 * 1144 * Check if all of the C-states listed by _CST in the max_cstate range are 1145 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1146 */ 1147 static bool __init intel_idle_cst_usable(void) 1148 { 1149 int cstate, limit; 1150 1151 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1152 acpi_state_table.count); 1153 1154 for (cstate = 1; cstate < limit; cstate++) { 1155 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1156 1157 if (cx->entry_method != ACPI_CSTATE_FFH) 1158 return false; 1159 } 1160 1161 return true; 1162 } 1163 1164 static bool __init intel_idle_acpi_cst_extract(void) 1165 { 1166 unsigned int cpu; 1167 1168 if (no_acpi) { 1169 pr_debug("Not allowed to use ACPI _CST\n"); 1170 return false; 1171 } 1172 1173 for_each_possible_cpu(cpu) { 1174 struct acpi_processor *pr = per_cpu(processors, cpu); 1175 1176 if (!pr) 1177 continue; 1178 1179 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1180 continue; 1181 1182 acpi_state_table.count++; 1183 1184 if (!intel_idle_cst_usable()) 1185 continue; 1186 1187 if (!acpi_processor_claim_cst_control()) { 1188 acpi_state_table.count = 0; 1189 return false; 1190 } 1191 1192 return true; 1193 } 1194 1195 pr_debug("ACPI _CST not found or not usable\n"); 1196 return false; 1197 } 1198 1199 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1200 { 1201 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1202 1203 /* 1204 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1205 * the interesting states are ACPI_CSTATE_FFH. 1206 */ 1207 for (cstate = 1; cstate < limit; cstate++) { 1208 struct acpi_processor_cx *cx; 1209 struct cpuidle_state *state; 1210 1211 if (intel_idle_max_cstate_reached(cstate)) 1212 break; 1213 1214 cx = &acpi_state_table.states[cstate]; 1215 1216 state = &drv->states[drv->state_count++]; 1217 1218 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1219 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1220 state->exit_latency = cx->latency; 1221 /* 1222 * For C1-type C-states use the same number for both the exit 1223 * latency and target residency, because that is the case for 1224 * C1 in the majority of the static C-states tables above. 1225 * For the other types of C-states, however, set the target 1226 * residency to 3 times the exit latency which should lead to 1227 * a reasonable balance between energy-efficiency and 1228 * performance in the majority of interesting cases. 1229 */ 1230 state->target_residency = cx->latency; 1231 if (cx->type > ACPI_STATE_C1) 1232 state->target_residency *= 3; 1233 1234 state->flags = MWAIT2flg(cx->address); 1235 if (cx->type > ACPI_STATE_C2) 1236 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1237 1238 if (disabled_states_mask & BIT(cstate)) 1239 state->flags |= CPUIDLE_FLAG_OFF; 1240 1241 state->enter = intel_idle; 1242 state->enter_s2idle = intel_idle_s2idle; 1243 } 1244 } 1245 1246 static bool __init intel_idle_off_by_default(u32 mwait_hint) 1247 { 1248 int cstate, limit; 1249 1250 /* 1251 * If there are no _CST C-states, do not disable any C-states by 1252 * default. 1253 */ 1254 if (!acpi_state_table.count) 1255 return false; 1256 1257 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1258 /* 1259 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1260 * the interesting states are ACPI_CSTATE_FFH. 1261 */ 1262 for (cstate = 1; cstate < limit; cstate++) { 1263 if (acpi_state_table.states[cstate].address == mwait_hint) 1264 return false; 1265 } 1266 return true; 1267 } 1268 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1269 #define force_use_acpi (false) 1270 1271 static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1272 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1273 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1274 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1275 1276 /* 1277 * ivt_idle_state_table_update(void) 1278 * 1279 * Tune IVT multi-socket targets 1280 * Assumption: num_sockets == (max_package_num + 1) 1281 */ 1282 static void __init ivt_idle_state_table_update(void) 1283 { 1284 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1285 int cpu, package_num, num_sockets = 1; 1286 1287 for_each_online_cpu(cpu) { 1288 package_num = topology_physical_package_id(cpu); 1289 if (package_num + 1 > num_sockets) { 1290 num_sockets = package_num + 1; 1291 1292 if (num_sockets > 4) { 1293 cpuidle_state_table = ivt_cstates_8s; 1294 return; 1295 } 1296 } 1297 } 1298 1299 if (num_sockets > 2) 1300 cpuidle_state_table = ivt_cstates_4s; 1301 1302 /* else, 1 and 2 socket systems use default ivt_cstates */ 1303 } 1304 1305 /** 1306 * irtl_2_usec - IRTL to microseconds conversion. 1307 * @irtl: IRTL MSR value. 1308 * 1309 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1310 */ 1311 static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1312 { 1313 static const unsigned int irtl_ns_units[] __initconst = { 1314 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1315 }; 1316 unsigned long long ns; 1317 1318 if (!irtl) 1319 return 0; 1320 1321 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1322 1323 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1324 } 1325 1326 /* 1327 * bxt_idle_state_table_update(void) 1328 * 1329 * On BXT, we trust the IRTL to show the definitive maximum latency 1330 * We use the same value for target_residency. 1331 */ 1332 static void __init bxt_idle_state_table_update(void) 1333 { 1334 unsigned long long msr; 1335 unsigned int usec; 1336 1337 rdmsrl(MSR_PKGC6_IRTL, msr); 1338 usec = irtl_2_usec(msr); 1339 if (usec) { 1340 bxt_cstates[2].exit_latency = usec; 1341 bxt_cstates[2].target_residency = usec; 1342 } 1343 1344 rdmsrl(MSR_PKGC7_IRTL, msr); 1345 usec = irtl_2_usec(msr); 1346 if (usec) { 1347 bxt_cstates[3].exit_latency = usec; 1348 bxt_cstates[3].target_residency = usec; 1349 } 1350 1351 rdmsrl(MSR_PKGC8_IRTL, msr); 1352 usec = irtl_2_usec(msr); 1353 if (usec) { 1354 bxt_cstates[4].exit_latency = usec; 1355 bxt_cstates[4].target_residency = usec; 1356 } 1357 1358 rdmsrl(MSR_PKGC9_IRTL, msr); 1359 usec = irtl_2_usec(msr); 1360 if (usec) { 1361 bxt_cstates[5].exit_latency = usec; 1362 bxt_cstates[5].target_residency = usec; 1363 } 1364 1365 rdmsrl(MSR_PKGC10_IRTL, msr); 1366 usec = irtl_2_usec(msr); 1367 if (usec) { 1368 bxt_cstates[6].exit_latency = usec; 1369 bxt_cstates[6].target_residency = usec; 1370 } 1371 1372 } 1373 /* 1374 * sklh_idle_state_table_update(void) 1375 * 1376 * On SKL-H (model 0x5e) disable C8 and C9 if: 1377 * C10 is enabled and SGX disabled 1378 */ 1379 static void __init sklh_idle_state_table_update(void) 1380 { 1381 unsigned long long msr; 1382 unsigned int eax, ebx, ecx, edx; 1383 1384 1385 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1386 if (max_cstate <= 7) 1387 return; 1388 1389 /* if PC10 not present in CPUID.MWAIT.EDX */ 1390 if ((mwait_substates & (0xF << 28)) == 0) 1391 return; 1392 1393 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1394 1395 /* PC10 is not enabled in PKG C-state limit */ 1396 if ((msr & 0xF) != 8) 1397 return; 1398 1399 ecx = 0; 1400 cpuid(7, &eax, &ebx, &ecx, &edx); 1401 1402 /* if SGX is present */ 1403 if (ebx & (1 << 2)) { 1404 1405 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1406 1407 /* if SGX is enabled */ 1408 if (msr & (1 << 18)) 1409 return; 1410 } 1411 1412 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1413 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1414 } 1415 1416 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1417 { 1418 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1419 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1420 MWAIT_SUBSTATE_MASK; 1421 1422 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1423 if (num_substates == 0) 1424 return false; 1425 1426 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1427 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1428 1429 return true; 1430 } 1431 1432 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1433 { 1434 int cstate; 1435 1436 switch (boot_cpu_data.x86_model) { 1437 case INTEL_FAM6_IVYBRIDGE_X: 1438 ivt_idle_state_table_update(); 1439 break; 1440 case INTEL_FAM6_ATOM_GOLDMONT: 1441 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1442 bxt_idle_state_table_update(); 1443 break; 1444 case INTEL_FAM6_SKYLAKE: 1445 sklh_idle_state_table_update(); 1446 break; 1447 } 1448 1449 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1450 unsigned int mwait_hint; 1451 1452 if (intel_idle_max_cstate_reached(cstate)) 1453 break; 1454 1455 if (!cpuidle_state_table[cstate].enter && 1456 !cpuidle_state_table[cstate].enter_s2idle) 1457 break; 1458 1459 /* If marked as unusable, skip this state. */ 1460 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1461 pr_debug("state %s is disabled\n", 1462 cpuidle_state_table[cstate].name); 1463 continue; 1464 } 1465 1466 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1467 if (!intel_idle_verify_cstate(mwait_hint)) 1468 continue; 1469 1470 /* Structure copy. */ 1471 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1472 1473 if ((disabled_states_mask & BIT(drv->state_count)) || 1474 ((icpu->use_acpi || force_use_acpi) && 1475 intel_idle_off_by_default(mwait_hint) && 1476 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1477 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1478 1479 drv->state_count++; 1480 } 1481 1482 if (icpu->byt_auto_demotion_disable_flag) { 1483 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1484 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1485 } 1486 } 1487 1488 /* 1489 * intel_idle_cpuidle_driver_init() 1490 * allocate, initialize cpuidle_states 1491 */ 1492 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1493 { 1494 cpuidle_poll_state_init(drv); 1495 1496 if (disabled_states_mask & BIT(0)) 1497 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1498 1499 drv->state_count = 1; 1500 1501 if (icpu) 1502 intel_idle_init_cstates_icpu(drv); 1503 else 1504 intel_idle_init_cstates_acpi(drv); 1505 } 1506 1507 static void auto_demotion_disable(void) 1508 { 1509 unsigned long long msr_bits; 1510 1511 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1512 msr_bits &= ~(icpu->auto_demotion_disable_flags); 1513 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1514 } 1515 1516 static void c1e_promotion_disable(void) 1517 { 1518 unsigned long long msr_bits; 1519 1520 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1521 msr_bits &= ~0x2; 1522 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1523 } 1524 1525 /* 1526 * intel_idle_cpu_init() 1527 * allocate, initialize, register cpuidle_devices 1528 * @cpu: cpu/core to initialize 1529 */ 1530 static int intel_idle_cpu_init(unsigned int cpu) 1531 { 1532 struct cpuidle_device *dev; 1533 1534 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1535 dev->cpu = cpu; 1536 1537 if (cpuidle_register_device(dev)) { 1538 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1539 return -EIO; 1540 } 1541 1542 if (!icpu) 1543 return 0; 1544 1545 if (icpu->auto_demotion_disable_flags) 1546 auto_demotion_disable(); 1547 1548 if (icpu->disable_promotion_to_c1e) 1549 c1e_promotion_disable(); 1550 1551 return 0; 1552 } 1553 1554 static int intel_idle_cpu_online(unsigned int cpu) 1555 { 1556 struct cpuidle_device *dev; 1557 1558 if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) 1559 tick_broadcast_enable(); 1560 1561 /* 1562 * Some systems can hotplug a cpu at runtime after 1563 * the kernel has booted, we have to initialize the 1564 * driver in this case 1565 */ 1566 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1567 if (!dev->registered) 1568 return intel_idle_cpu_init(cpu); 1569 1570 return 0; 1571 } 1572 1573 /** 1574 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 1575 */ 1576 static void __init intel_idle_cpuidle_devices_uninit(void) 1577 { 1578 int i; 1579 1580 for_each_online_cpu(i) 1581 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 1582 } 1583 1584 static int __init intel_idle_init(void) 1585 { 1586 const struct x86_cpu_id *id; 1587 unsigned int eax, ebx, ecx; 1588 int retval; 1589 1590 /* Do not load intel_idle at all for now if idle= is passed */ 1591 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1592 return -ENODEV; 1593 1594 if (max_cstate == 0) { 1595 pr_debug("disabled\n"); 1596 return -EPERM; 1597 } 1598 1599 id = x86_match_cpu(intel_idle_ids); 1600 if (id) { 1601 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1602 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1603 return -ENODEV; 1604 } 1605 } else { 1606 id = x86_match_cpu(intel_mwait_ids); 1607 if (!id) 1608 return -ENODEV; 1609 } 1610 1611 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1612 return -ENODEV; 1613 1614 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1615 1616 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1617 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1618 !mwait_substates) 1619 return -ENODEV; 1620 1621 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1622 1623 icpu = (const struct idle_cpu *)id->driver_data; 1624 if (icpu) { 1625 cpuidle_state_table = icpu->state_table; 1626 if (icpu->use_acpi || force_use_acpi) 1627 intel_idle_acpi_cst_extract(); 1628 } else if (!intel_idle_acpi_cst_extract()) { 1629 return -ENODEV; 1630 } 1631 1632 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1633 boot_cpu_data.x86_model); 1634 1635 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1636 if (!intel_idle_cpuidle_devices) 1637 return -ENOMEM; 1638 1639 intel_idle_cpuidle_driver_init(&intel_idle_driver); 1640 1641 retval = cpuidle_register_driver(&intel_idle_driver); 1642 if (retval) { 1643 struct cpuidle_driver *drv = cpuidle_get_driver(); 1644 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1645 drv ? drv->name : "none"); 1646 goto init_driver_fail; 1647 } 1648 1649 if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ 1650 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; 1651 1652 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1653 intel_idle_cpu_online, NULL); 1654 if (retval < 0) 1655 goto hp_setup_fail; 1656 1657 pr_debug("lapic_timer_reliable_states 0x%x\n", 1658 lapic_timer_reliable_states); 1659 1660 return 0; 1661 1662 hp_setup_fail: 1663 intel_idle_cpuidle_devices_uninit(); 1664 cpuidle_unregister_driver(&intel_idle_driver); 1665 init_driver_fail: 1666 free_percpu(intel_idle_cpuidle_devices); 1667 return retval; 1668 1669 } 1670 device_initcall(intel_idle_init); 1671 1672 /* 1673 * We are not really modular, but we used to support that. Meaning we also 1674 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1675 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1676 * is the easiest way (currently) to continue doing that. 1677 */ 1678 module_param(max_cstate, int, 0444); 1679 /* 1680 * The positions of the bits that are set in this number are the indices of the 1681 * idle states to be disabled by default (as reflected by the names of the 1682 * corresponding idle state directories in sysfs, "state0", "state1" ... 1683 * "state<i>" ..., where <i> is the index of the given state). 1684 */ 1685 module_param_named(states_off, disabled_states_mask, uint, 0444); 1686 MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 1687