1 /* 2 * intel_idle.c - native hardware idle loop for modern Intel processors 3 * 4 * Copyright (c) 2013, Intel Corporation. 5 * Len Brown <len.brown@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms and conditions of the GNU General Public License, 9 * version 2, as published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 19 */ 20 21 /* 22 * intel_idle is a cpuidle driver that loads on specific Intel processors 23 * in lieu of the legacy ACPI processor_idle driver. The intent is to 24 * make Linux more efficient on these processors, as intel_idle knows 25 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 26 */ 27 28 /* 29 * Design Assumptions 30 * 31 * All CPUs have same idle states as boot CPU 32 * 33 * Chipset BM_STS (bus master status) bit is a NOP 34 * for preventing entry into deep C-stats 35 */ 36 37 /* 38 * Known limitations 39 * 40 * The driver currently initializes for_each_online_cpu() upon modprobe. 41 * It it unaware of subsequent processors hot-added to the system. 42 * This means that if you boot with maxcpus=n and later online 43 * processors above n, those processors will use C1 only. 44 * 45 * ACPI has a .suspend hack to turn off deep c-statees during suspend 46 * to avoid complications with the lapic timer workaround. 47 * Have not seen issues with suspend, but may need same workaround here. 48 * 49 */ 50 51 /* un-comment DEBUG to enable pr_debug() statements */ 52 #define DEBUG 53 54 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 55 56 #include <linux/kernel.h> 57 #include <linux/cpuidle.h> 58 #include <linux/tick.h> 59 #include <trace/events/power.h> 60 #include <linux/sched.h> 61 #include <linux/notifier.h> 62 #include <linux/cpu.h> 63 #include <linux/moduleparam.h> 64 #include <asm/cpu_device_id.h> 65 #include <asm/intel-family.h> 66 #include <asm/mwait.h> 67 #include <asm/msr.h> 68 69 #define INTEL_IDLE_VERSION "0.4.1" 70 71 static struct cpuidle_driver intel_idle_driver = { 72 .name = "intel_idle", 73 .owner = THIS_MODULE, 74 }; 75 /* intel_idle.max_cstate=0 disables driver */ 76 static int max_cstate = CPUIDLE_STATE_MAX - 1; 77 78 static unsigned int mwait_substates; 79 80 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF 81 /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ 82 static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ 83 84 struct idle_cpu { 85 struct cpuidle_state *state_table; 86 87 /* 88 * Hardware C-state auto-demotion may not always be optimal. 89 * Indicate which enable bits to clear here. 90 */ 91 unsigned long auto_demotion_disable_flags; 92 bool byt_auto_demotion_disable_flag; 93 bool disable_promotion_to_c1e; 94 }; 95 96 static const struct idle_cpu *icpu; 97 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 98 static int intel_idle(struct cpuidle_device *dev, 99 struct cpuidle_driver *drv, int index); 100 static void intel_idle_s2idle(struct cpuidle_device *dev, 101 struct cpuidle_driver *drv, int index); 102 static struct cpuidle_state *cpuidle_state_table; 103 104 /* 105 * Set this flag for states where the HW flushes the TLB for us 106 * and so we don't need cross-calls to keep it consistent. 107 * If this flag is set, SW flushes the TLB, so even if the 108 * HW doesn't do the flushing, this flag is safe to use. 109 */ 110 #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 111 112 /* 113 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 114 * the C-state (top nibble) and sub-state (bottom nibble) 115 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 116 * 117 * We store the hint at the top of our "flags" for each state. 118 */ 119 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 120 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 121 122 /* 123 * States are indexed by the cstate number, 124 * which is also the index into the MWAIT hint array. 125 * Thus C0 is a dummy. 126 */ 127 static struct cpuidle_state nehalem_cstates[] = { 128 { 129 .name = "C1", 130 .desc = "MWAIT 0x00", 131 .flags = MWAIT2flg(0x00), 132 .exit_latency = 3, 133 .target_residency = 6, 134 .enter = &intel_idle, 135 .enter_s2idle = intel_idle_s2idle, }, 136 { 137 .name = "C1E", 138 .desc = "MWAIT 0x01", 139 .flags = MWAIT2flg(0x01), 140 .exit_latency = 10, 141 .target_residency = 20, 142 .enter = &intel_idle, 143 .enter_s2idle = intel_idle_s2idle, }, 144 { 145 .name = "C3", 146 .desc = "MWAIT 0x10", 147 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 148 .exit_latency = 20, 149 .target_residency = 80, 150 .enter = &intel_idle, 151 .enter_s2idle = intel_idle_s2idle, }, 152 { 153 .name = "C6", 154 .desc = "MWAIT 0x20", 155 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 156 .exit_latency = 200, 157 .target_residency = 800, 158 .enter = &intel_idle, 159 .enter_s2idle = intel_idle_s2idle, }, 160 { 161 .enter = NULL } 162 }; 163 164 static struct cpuidle_state snb_cstates[] = { 165 { 166 .name = "C1", 167 .desc = "MWAIT 0x00", 168 .flags = MWAIT2flg(0x00), 169 .exit_latency = 2, 170 .target_residency = 2, 171 .enter = &intel_idle, 172 .enter_s2idle = intel_idle_s2idle, }, 173 { 174 .name = "C1E", 175 .desc = "MWAIT 0x01", 176 .flags = MWAIT2flg(0x01), 177 .exit_latency = 10, 178 .target_residency = 20, 179 .enter = &intel_idle, 180 .enter_s2idle = intel_idle_s2idle, }, 181 { 182 .name = "C3", 183 .desc = "MWAIT 0x10", 184 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 185 .exit_latency = 80, 186 .target_residency = 211, 187 .enter = &intel_idle, 188 .enter_s2idle = intel_idle_s2idle, }, 189 { 190 .name = "C6", 191 .desc = "MWAIT 0x20", 192 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 193 .exit_latency = 104, 194 .target_residency = 345, 195 .enter = &intel_idle, 196 .enter_s2idle = intel_idle_s2idle, }, 197 { 198 .name = "C7", 199 .desc = "MWAIT 0x30", 200 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 201 .exit_latency = 109, 202 .target_residency = 345, 203 .enter = &intel_idle, 204 .enter_s2idle = intel_idle_s2idle, }, 205 { 206 .enter = NULL } 207 }; 208 209 static struct cpuidle_state byt_cstates[] = { 210 { 211 .name = "C1", 212 .desc = "MWAIT 0x00", 213 .flags = MWAIT2flg(0x00), 214 .exit_latency = 1, 215 .target_residency = 1, 216 .enter = &intel_idle, 217 .enter_s2idle = intel_idle_s2idle, }, 218 { 219 .name = "C6N", 220 .desc = "MWAIT 0x58", 221 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 222 .exit_latency = 300, 223 .target_residency = 275, 224 .enter = &intel_idle, 225 .enter_s2idle = intel_idle_s2idle, }, 226 { 227 .name = "C6S", 228 .desc = "MWAIT 0x52", 229 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 230 .exit_latency = 500, 231 .target_residency = 560, 232 .enter = &intel_idle, 233 .enter_s2idle = intel_idle_s2idle, }, 234 { 235 .name = "C7", 236 .desc = "MWAIT 0x60", 237 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 238 .exit_latency = 1200, 239 .target_residency = 4000, 240 .enter = &intel_idle, 241 .enter_s2idle = intel_idle_s2idle, }, 242 { 243 .name = "C7S", 244 .desc = "MWAIT 0x64", 245 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 246 .exit_latency = 10000, 247 .target_residency = 20000, 248 .enter = &intel_idle, 249 .enter_s2idle = intel_idle_s2idle, }, 250 { 251 .enter = NULL } 252 }; 253 254 static struct cpuidle_state cht_cstates[] = { 255 { 256 .name = "C1", 257 .desc = "MWAIT 0x00", 258 .flags = MWAIT2flg(0x00), 259 .exit_latency = 1, 260 .target_residency = 1, 261 .enter = &intel_idle, 262 .enter_s2idle = intel_idle_s2idle, }, 263 { 264 .name = "C6N", 265 .desc = "MWAIT 0x58", 266 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 267 .exit_latency = 80, 268 .target_residency = 275, 269 .enter = &intel_idle, 270 .enter_s2idle = intel_idle_s2idle, }, 271 { 272 .name = "C6S", 273 .desc = "MWAIT 0x52", 274 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 275 .exit_latency = 200, 276 .target_residency = 560, 277 .enter = &intel_idle, 278 .enter_s2idle = intel_idle_s2idle, }, 279 { 280 .name = "C7", 281 .desc = "MWAIT 0x60", 282 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 283 .exit_latency = 1200, 284 .target_residency = 4000, 285 .enter = &intel_idle, 286 .enter_s2idle = intel_idle_s2idle, }, 287 { 288 .name = "C7S", 289 .desc = "MWAIT 0x64", 290 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 291 .exit_latency = 10000, 292 .target_residency = 20000, 293 .enter = &intel_idle, 294 .enter_s2idle = intel_idle_s2idle, }, 295 { 296 .enter = NULL } 297 }; 298 299 static struct cpuidle_state ivb_cstates[] = { 300 { 301 .name = "C1", 302 .desc = "MWAIT 0x00", 303 .flags = MWAIT2flg(0x00), 304 .exit_latency = 1, 305 .target_residency = 1, 306 .enter = &intel_idle, 307 .enter_s2idle = intel_idle_s2idle, }, 308 { 309 .name = "C1E", 310 .desc = "MWAIT 0x01", 311 .flags = MWAIT2flg(0x01), 312 .exit_latency = 10, 313 .target_residency = 20, 314 .enter = &intel_idle, 315 .enter_s2idle = intel_idle_s2idle, }, 316 { 317 .name = "C3", 318 .desc = "MWAIT 0x10", 319 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 320 .exit_latency = 59, 321 .target_residency = 156, 322 .enter = &intel_idle, 323 .enter_s2idle = intel_idle_s2idle, }, 324 { 325 .name = "C6", 326 .desc = "MWAIT 0x20", 327 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 328 .exit_latency = 80, 329 .target_residency = 300, 330 .enter = &intel_idle, 331 .enter_s2idle = intel_idle_s2idle, }, 332 { 333 .name = "C7", 334 .desc = "MWAIT 0x30", 335 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 336 .exit_latency = 87, 337 .target_residency = 300, 338 .enter = &intel_idle, 339 .enter_s2idle = intel_idle_s2idle, }, 340 { 341 .enter = NULL } 342 }; 343 344 static struct cpuidle_state ivt_cstates[] = { 345 { 346 .name = "C1", 347 .desc = "MWAIT 0x00", 348 .flags = MWAIT2flg(0x00), 349 .exit_latency = 1, 350 .target_residency = 1, 351 .enter = &intel_idle, 352 .enter_s2idle = intel_idle_s2idle, }, 353 { 354 .name = "C1E", 355 .desc = "MWAIT 0x01", 356 .flags = MWAIT2flg(0x01), 357 .exit_latency = 10, 358 .target_residency = 80, 359 .enter = &intel_idle, 360 .enter_s2idle = intel_idle_s2idle, }, 361 { 362 .name = "C3", 363 .desc = "MWAIT 0x10", 364 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 365 .exit_latency = 59, 366 .target_residency = 156, 367 .enter = &intel_idle, 368 .enter_s2idle = intel_idle_s2idle, }, 369 { 370 .name = "C6", 371 .desc = "MWAIT 0x20", 372 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 373 .exit_latency = 82, 374 .target_residency = 300, 375 .enter = &intel_idle, 376 .enter_s2idle = intel_idle_s2idle, }, 377 { 378 .enter = NULL } 379 }; 380 381 static struct cpuidle_state ivt_cstates_4s[] = { 382 { 383 .name = "C1", 384 .desc = "MWAIT 0x00", 385 .flags = MWAIT2flg(0x00), 386 .exit_latency = 1, 387 .target_residency = 1, 388 .enter = &intel_idle, 389 .enter_s2idle = intel_idle_s2idle, }, 390 { 391 .name = "C1E", 392 .desc = "MWAIT 0x01", 393 .flags = MWAIT2flg(0x01), 394 .exit_latency = 10, 395 .target_residency = 250, 396 .enter = &intel_idle, 397 .enter_s2idle = intel_idle_s2idle, }, 398 { 399 .name = "C3", 400 .desc = "MWAIT 0x10", 401 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 402 .exit_latency = 59, 403 .target_residency = 300, 404 .enter = &intel_idle, 405 .enter_s2idle = intel_idle_s2idle, }, 406 { 407 .name = "C6", 408 .desc = "MWAIT 0x20", 409 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 410 .exit_latency = 84, 411 .target_residency = 400, 412 .enter = &intel_idle, 413 .enter_s2idle = intel_idle_s2idle, }, 414 { 415 .enter = NULL } 416 }; 417 418 static struct cpuidle_state ivt_cstates_8s[] = { 419 { 420 .name = "C1", 421 .desc = "MWAIT 0x00", 422 .flags = MWAIT2flg(0x00), 423 .exit_latency = 1, 424 .target_residency = 1, 425 .enter = &intel_idle, 426 .enter_s2idle = intel_idle_s2idle, }, 427 { 428 .name = "C1E", 429 .desc = "MWAIT 0x01", 430 .flags = MWAIT2flg(0x01), 431 .exit_latency = 10, 432 .target_residency = 500, 433 .enter = &intel_idle, 434 .enter_s2idle = intel_idle_s2idle, }, 435 { 436 .name = "C3", 437 .desc = "MWAIT 0x10", 438 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 439 .exit_latency = 59, 440 .target_residency = 600, 441 .enter = &intel_idle, 442 .enter_s2idle = intel_idle_s2idle, }, 443 { 444 .name = "C6", 445 .desc = "MWAIT 0x20", 446 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 447 .exit_latency = 88, 448 .target_residency = 700, 449 .enter = &intel_idle, 450 .enter_s2idle = intel_idle_s2idle, }, 451 { 452 .enter = NULL } 453 }; 454 455 static struct cpuidle_state hsw_cstates[] = { 456 { 457 .name = "C1", 458 .desc = "MWAIT 0x00", 459 .flags = MWAIT2flg(0x00), 460 .exit_latency = 2, 461 .target_residency = 2, 462 .enter = &intel_idle, 463 .enter_s2idle = intel_idle_s2idle, }, 464 { 465 .name = "C1E", 466 .desc = "MWAIT 0x01", 467 .flags = MWAIT2flg(0x01), 468 .exit_latency = 10, 469 .target_residency = 20, 470 .enter = &intel_idle, 471 .enter_s2idle = intel_idle_s2idle, }, 472 { 473 .name = "C3", 474 .desc = "MWAIT 0x10", 475 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 476 .exit_latency = 33, 477 .target_residency = 100, 478 .enter = &intel_idle, 479 .enter_s2idle = intel_idle_s2idle, }, 480 { 481 .name = "C6", 482 .desc = "MWAIT 0x20", 483 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 484 .exit_latency = 133, 485 .target_residency = 400, 486 .enter = &intel_idle, 487 .enter_s2idle = intel_idle_s2idle, }, 488 { 489 .name = "C7s", 490 .desc = "MWAIT 0x32", 491 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 492 .exit_latency = 166, 493 .target_residency = 500, 494 .enter = &intel_idle, 495 .enter_s2idle = intel_idle_s2idle, }, 496 { 497 .name = "C8", 498 .desc = "MWAIT 0x40", 499 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 500 .exit_latency = 300, 501 .target_residency = 900, 502 .enter = &intel_idle, 503 .enter_s2idle = intel_idle_s2idle, }, 504 { 505 .name = "C9", 506 .desc = "MWAIT 0x50", 507 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 508 .exit_latency = 600, 509 .target_residency = 1800, 510 .enter = &intel_idle, 511 .enter_s2idle = intel_idle_s2idle, }, 512 { 513 .name = "C10", 514 .desc = "MWAIT 0x60", 515 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 516 .exit_latency = 2600, 517 .target_residency = 7700, 518 .enter = &intel_idle, 519 .enter_s2idle = intel_idle_s2idle, }, 520 { 521 .enter = NULL } 522 }; 523 static struct cpuidle_state bdw_cstates[] = { 524 { 525 .name = "C1", 526 .desc = "MWAIT 0x00", 527 .flags = MWAIT2flg(0x00), 528 .exit_latency = 2, 529 .target_residency = 2, 530 .enter = &intel_idle, 531 .enter_s2idle = intel_idle_s2idle, }, 532 { 533 .name = "C1E", 534 .desc = "MWAIT 0x01", 535 .flags = MWAIT2flg(0x01), 536 .exit_latency = 10, 537 .target_residency = 20, 538 .enter = &intel_idle, 539 .enter_s2idle = intel_idle_s2idle, }, 540 { 541 .name = "C3", 542 .desc = "MWAIT 0x10", 543 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 544 .exit_latency = 40, 545 .target_residency = 100, 546 .enter = &intel_idle, 547 .enter_s2idle = intel_idle_s2idle, }, 548 { 549 .name = "C6", 550 .desc = "MWAIT 0x20", 551 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 552 .exit_latency = 133, 553 .target_residency = 400, 554 .enter = &intel_idle, 555 .enter_s2idle = intel_idle_s2idle, }, 556 { 557 .name = "C7s", 558 .desc = "MWAIT 0x32", 559 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 560 .exit_latency = 166, 561 .target_residency = 500, 562 .enter = &intel_idle, 563 .enter_s2idle = intel_idle_s2idle, }, 564 { 565 .name = "C8", 566 .desc = "MWAIT 0x40", 567 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 568 .exit_latency = 300, 569 .target_residency = 900, 570 .enter = &intel_idle, 571 .enter_s2idle = intel_idle_s2idle, }, 572 { 573 .name = "C9", 574 .desc = "MWAIT 0x50", 575 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 576 .exit_latency = 600, 577 .target_residency = 1800, 578 .enter = &intel_idle, 579 .enter_s2idle = intel_idle_s2idle, }, 580 { 581 .name = "C10", 582 .desc = "MWAIT 0x60", 583 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 584 .exit_latency = 2600, 585 .target_residency = 7700, 586 .enter = &intel_idle, 587 .enter_s2idle = intel_idle_s2idle, }, 588 { 589 .enter = NULL } 590 }; 591 592 static struct cpuidle_state skl_cstates[] = { 593 { 594 .name = "C1", 595 .desc = "MWAIT 0x00", 596 .flags = MWAIT2flg(0x00), 597 .exit_latency = 2, 598 .target_residency = 2, 599 .enter = &intel_idle, 600 .enter_s2idle = intel_idle_s2idle, }, 601 { 602 .name = "C1E", 603 .desc = "MWAIT 0x01", 604 .flags = MWAIT2flg(0x01), 605 .exit_latency = 10, 606 .target_residency = 20, 607 .enter = &intel_idle, 608 .enter_s2idle = intel_idle_s2idle, }, 609 { 610 .name = "C3", 611 .desc = "MWAIT 0x10", 612 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 613 .exit_latency = 70, 614 .target_residency = 100, 615 .enter = &intel_idle, 616 .enter_s2idle = intel_idle_s2idle, }, 617 { 618 .name = "C6", 619 .desc = "MWAIT 0x20", 620 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 621 .exit_latency = 85, 622 .target_residency = 200, 623 .enter = &intel_idle, 624 .enter_s2idle = intel_idle_s2idle, }, 625 { 626 .name = "C7s", 627 .desc = "MWAIT 0x33", 628 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 629 .exit_latency = 124, 630 .target_residency = 800, 631 .enter = &intel_idle, 632 .enter_s2idle = intel_idle_s2idle, }, 633 { 634 .name = "C8", 635 .desc = "MWAIT 0x40", 636 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 637 .exit_latency = 200, 638 .target_residency = 800, 639 .enter = &intel_idle, 640 .enter_s2idle = intel_idle_s2idle, }, 641 { 642 .name = "C9", 643 .desc = "MWAIT 0x50", 644 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 645 .exit_latency = 480, 646 .target_residency = 5000, 647 .enter = &intel_idle, 648 .enter_s2idle = intel_idle_s2idle, }, 649 { 650 .name = "C10", 651 .desc = "MWAIT 0x60", 652 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 653 .exit_latency = 890, 654 .target_residency = 5000, 655 .enter = &intel_idle, 656 .enter_s2idle = intel_idle_s2idle, }, 657 { 658 .enter = NULL } 659 }; 660 661 static struct cpuidle_state skx_cstates[] = { 662 { 663 .name = "C1", 664 .desc = "MWAIT 0x00", 665 .flags = MWAIT2flg(0x00), 666 .exit_latency = 2, 667 .target_residency = 2, 668 .enter = &intel_idle, 669 .enter_s2idle = intel_idle_s2idle, }, 670 { 671 .name = "C1E", 672 .desc = "MWAIT 0x01", 673 .flags = MWAIT2flg(0x01), 674 .exit_latency = 10, 675 .target_residency = 20, 676 .enter = &intel_idle, 677 .enter_s2idle = intel_idle_s2idle, }, 678 { 679 .name = "C6", 680 .desc = "MWAIT 0x20", 681 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 682 .exit_latency = 133, 683 .target_residency = 600, 684 .enter = &intel_idle, 685 .enter_s2idle = intel_idle_s2idle, }, 686 { 687 .enter = NULL } 688 }; 689 690 static struct cpuidle_state atom_cstates[] = { 691 { 692 .name = "C1E", 693 .desc = "MWAIT 0x00", 694 .flags = MWAIT2flg(0x00), 695 .exit_latency = 10, 696 .target_residency = 20, 697 .enter = &intel_idle, 698 .enter_s2idle = intel_idle_s2idle, }, 699 { 700 .name = "C2", 701 .desc = "MWAIT 0x10", 702 .flags = MWAIT2flg(0x10), 703 .exit_latency = 20, 704 .target_residency = 80, 705 .enter = &intel_idle, 706 .enter_s2idle = intel_idle_s2idle, }, 707 { 708 .name = "C4", 709 .desc = "MWAIT 0x30", 710 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 711 .exit_latency = 100, 712 .target_residency = 400, 713 .enter = &intel_idle, 714 .enter_s2idle = intel_idle_s2idle, }, 715 { 716 .name = "C6", 717 .desc = "MWAIT 0x52", 718 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 719 .exit_latency = 140, 720 .target_residency = 560, 721 .enter = &intel_idle, 722 .enter_s2idle = intel_idle_s2idle, }, 723 { 724 .enter = NULL } 725 }; 726 static struct cpuidle_state tangier_cstates[] = { 727 { 728 .name = "C1", 729 .desc = "MWAIT 0x00", 730 .flags = MWAIT2flg(0x00), 731 .exit_latency = 1, 732 .target_residency = 4, 733 .enter = &intel_idle, 734 .enter_s2idle = intel_idle_s2idle, }, 735 { 736 .name = "C4", 737 .desc = "MWAIT 0x30", 738 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 739 .exit_latency = 100, 740 .target_residency = 400, 741 .enter = &intel_idle, 742 .enter_s2idle = intel_idle_s2idle, }, 743 { 744 .name = "C6", 745 .desc = "MWAIT 0x52", 746 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 747 .exit_latency = 140, 748 .target_residency = 560, 749 .enter = &intel_idle, 750 .enter_s2idle = intel_idle_s2idle, }, 751 { 752 .name = "C7", 753 .desc = "MWAIT 0x60", 754 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 755 .exit_latency = 1200, 756 .target_residency = 4000, 757 .enter = &intel_idle, 758 .enter_s2idle = intel_idle_s2idle, }, 759 { 760 .name = "C9", 761 .desc = "MWAIT 0x64", 762 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 763 .exit_latency = 10000, 764 .target_residency = 20000, 765 .enter = &intel_idle, 766 .enter_s2idle = intel_idle_s2idle, }, 767 { 768 .enter = NULL } 769 }; 770 static struct cpuidle_state avn_cstates[] = { 771 { 772 .name = "C1", 773 .desc = "MWAIT 0x00", 774 .flags = MWAIT2flg(0x00), 775 .exit_latency = 2, 776 .target_residency = 2, 777 .enter = &intel_idle, 778 .enter_s2idle = intel_idle_s2idle, }, 779 { 780 .name = "C6", 781 .desc = "MWAIT 0x51", 782 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 783 .exit_latency = 15, 784 .target_residency = 45, 785 .enter = &intel_idle, 786 .enter_s2idle = intel_idle_s2idle, }, 787 { 788 .enter = NULL } 789 }; 790 static struct cpuidle_state knl_cstates[] = { 791 { 792 .name = "C1", 793 .desc = "MWAIT 0x00", 794 .flags = MWAIT2flg(0x00), 795 .exit_latency = 1, 796 .target_residency = 2, 797 .enter = &intel_idle, 798 .enter_s2idle = intel_idle_s2idle }, 799 { 800 .name = "C6", 801 .desc = "MWAIT 0x10", 802 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 803 .exit_latency = 120, 804 .target_residency = 500, 805 .enter = &intel_idle, 806 .enter_s2idle = intel_idle_s2idle }, 807 { 808 .enter = NULL } 809 }; 810 811 static struct cpuidle_state bxt_cstates[] = { 812 { 813 .name = "C1", 814 .desc = "MWAIT 0x00", 815 .flags = MWAIT2flg(0x00), 816 .exit_latency = 2, 817 .target_residency = 2, 818 .enter = &intel_idle, 819 .enter_s2idle = intel_idle_s2idle, }, 820 { 821 .name = "C1E", 822 .desc = "MWAIT 0x01", 823 .flags = MWAIT2flg(0x01), 824 .exit_latency = 10, 825 .target_residency = 20, 826 .enter = &intel_idle, 827 .enter_s2idle = intel_idle_s2idle, }, 828 { 829 .name = "C6", 830 .desc = "MWAIT 0x20", 831 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 832 .exit_latency = 133, 833 .target_residency = 133, 834 .enter = &intel_idle, 835 .enter_s2idle = intel_idle_s2idle, }, 836 { 837 .name = "C7s", 838 .desc = "MWAIT 0x31", 839 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 840 .exit_latency = 155, 841 .target_residency = 155, 842 .enter = &intel_idle, 843 .enter_s2idle = intel_idle_s2idle, }, 844 { 845 .name = "C8", 846 .desc = "MWAIT 0x40", 847 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 848 .exit_latency = 1000, 849 .target_residency = 1000, 850 .enter = &intel_idle, 851 .enter_s2idle = intel_idle_s2idle, }, 852 { 853 .name = "C9", 854 .desc = "MWAIT 0x50", 855 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 856 .exit_latency = 2000, 857 .target_residency = 2000, 858 .enter = &intel_idle, 859 .enter_s2idle = intel_idle_s2idle, }, 860 { 861 .name = "C10", 862 .desc = "MWAIT 0x60", 863 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 864 .exit_latency = 10000, 865 .target_residency = 10000, 866 .enter = &intel_idle, 867 .enter_s2idle = intel_idle_s2idle, }, 868 { 869 .enter = NULL } 870 }; 871 872 static struct cpuidle_state dnv_cstates[] = { 873 { 874 .name = "C1", 875 .desc = "MWAIT 0x00", 876 .flags = MWAIT2flg(0x00), 877 .exit_latency = 2, 878 .target_residency = 2, 879 .enter = &intel_idle, 880 .enter_s2idle = intel_idle_s2idle, }, 881 { 882 .name = "C1E", 883 .desc = "MWAIT 0x01", 884 .flags = MWAIT2flg(0x01), 885 .exit_latency = 10, 886 .target_residency = 20, 887 .enter = &intel_idle, 888 .enter_s2idle = intel_idle_s2idle, }, 889 { 890 .name = "C6", 891 .desc = "MWAIT 0x20", 892 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 893 .exit_latency = 50, 894 .target_residency = 500, 895 .enter = &intel_idle, 896 .enter_s2idle = intel_idle_s2idle, }, 897 { 898 .enter = NULL } 899 }; 900 901 /** 902 * intel_idle 903 * @dev: cpuidle_device 904 * @drv: cpuidle driver 905 * @index: index of cpuidle state 906 * 907 * Must be called under local_irq_disable(). 908 */ 909 static __cpuidle int intel_idle(struct cpuidle_device *dev, 910 struct cpuidle_driver *drv, int index) 911 { 912 unsigned long ecx = 1; /* break on interrupt flag */ 913 struct cpuidle_state *state = &drv->states[index]; 914 unsigned long eax = flg2MWAIT(state->flags); 915 unsigned int cstate; 916 917 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; 918 919 /* 920 * NB: if CPUIDLE_FLAG_TLB_FLUSHED is set, this idle transition 921 * will probably flush the TLB. It's not guaranteed to flush 922 * the TLB, though, so it's not clear that we can do anything 923 * useful with this knowledge. 924 */ 925 926 if (!(lapic_timer_reliable_states & (1 << (cstate)))) 927 tick_broadcast_enter(); 928 929 mwait_idle_with_hints(eax, ecx); 930 931 if (!(lapic_timer_reliable_states & (1 << (cstate)))) 932 tick_broadcast_exit(); 933 934 return index; 935 } 936 937 /** 938 * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle 939 * @dev: cpuidle_device 940 * @drv: cpuidle driver 941 * @index: state index 942 */ 943 static void intel_idle_s2idle(struct cpuidle_device *dev, 944 struct cpuidle_driver *drv, int index) 945 { 946 unsigned long ecx = 1; /* break on interrupt flag */ 947 unsigned long eax = flg2MWAIT(drv->states[index].flags); 948 949 mwait_idle_with_hints(eax, ecx); 950 } 951 952 static void __setup_broadcast_timer(bool on) 953 { 954 if (on) 955 tick_broadcast_enable(); 956 else 957 tick_broadcast_disable(); 958 } 959 960 static void auto_demotion_disable(void) 961 { 962 unsigned long long msr_bits; 963 964 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 965 msr_bits &= ~(icpu->auto_demotion_disable_flags); 966 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 967 } 968 static void c1e_promotion_disable(void) 969 { 970 unsigned long long msr_bits; 971 972 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 973 msr_bits &= ~0x2; 974 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 975 } 976 977 static const struct idle_cpu idle_cpu_nehalem = { 978 .state_table = nehalem_cstates, 979 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 980 .disable_promotion_to_c1e = true, 981 }; 982 983 static const struct idle_cpu idle_cpu_atom = { 984 .state_table = atom_cstates, 985 }; 986 987 static const struct idle_cpu idle_cpu_tangier = { 988 .state_table = tangier_cstates, 989 }; 990 991 static const struct idle_cpu idle_cpu_lincroft = { 992 .state_table = atom_cstates, 993 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 994 }; 995 996 static const struct idle_cpu idle_cpu_snb = { 997 .state_table = snb_cstates, 998 .disable_promotion_to_c1e = true, 999 }; 1000 1001 static const struct idle_cpu idle_cpu_byt = { 1002 .state_table = byt_cstates, 1003 .disable_promotion_to_c1e = true, 1004 .byt_auto_demotion_disable_flag = true, 1005 }; 1006 1007 static const struct idle_cpu idle_cpu_cht = { 1008 .state_table = cht_cstates, 1009 .disable_promotion_to_c1e = true, 1010 .byt_auto_demotion_disable_flag = true, 1011 }; 1012 1013 static const struct idle_cpu idle_cpu_ivb = { 1014 .state_table = ivb_cstates, 1015 .disable_promotion_to_c1e = true, 1016 }; 1017 1018 static const struct idle_cpu idle_cpu_ivt = { 1019 .state_table = ivt_cstates, 1020 .disable_promotion_to_c1e = true, 1021 }; 1022 1023 static const struct idle_cpu idle_cpu_hsw = { 1024 .state_table = hsw_cstates, 1025 .disable_promotion_to_c1e = true, 1026 }; 1027 1028 static const struct idle_cpu idle_cpu_bdw = { 1029 .state_table = bdw_cstates, 1030 .disable_promotion_to_c1e = true, 1031 }; 1032 1033 static const struct idle_cpu idle_cpu_skl = { 1034 .state_table = skl_cstates, 1035 .disable_promotion_to_c1e = true, 1036 }; 1037 1038 static const struct idle_cpu idle_cpu_skx = { 1039 .state_table = skx_cstates, 1040 .disable_promotion_to_c1e = true, 1041 }; 1042 1043 static const struct idle_cpu idle_cpu_avn = { 1044 .state_table = avn_cstates, 1045 .disable_promotion_to_c1e = true, 1046 }; 1047 1048 static const struct idle_cpu idle_cpu_knl = { 1049 .state_table = knl_cstates, 1050 }; 1051 1052 static const struct idle_cpu idle_cpu_bxt = { 1053 .state_table = bxt_cstates, 1054 .disable_promotion_to_c1e = true, 1055 }; 1056 1057 static const struct idle_cpu idle_cpu_dnv = { 1058 .state_table = dnv_cstates, 1059 .disable_promotion_to_c1e = true, 1060 }; 1061 1062 #define ICPU(model, cpu) \ 1063 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu } 1064 1065 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1066 ICPU(INTEL_FAM6_NEHALEM_EP, idle_cpu_nehalem), 1067 ICPU(INTEL_FAM6_NEHALEM, idle_cpu_nehalem), 1068 ICPU(INTEL_FAM6_NEHALEM_G, idle_cpu_nehalem), 1069 ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem), 1070 ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem), 1071 ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem), 1072 ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom), 1073 ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft), 1074 ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem), 1075 ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb), 1076 ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb), 1077 ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom), 1078 ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt), 1079 ICPU(INTEL_FAM6_ATOM_MERRIFIELD, idle_cpu_tangier), 1080 ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht), 1081 ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb), 1082 ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt), 1083 ICPU(INTEL_FAM6_HASWELL_CORE, idle_cpu_hsw), 1084 ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw), 1085 ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw), 1086 ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw), 1087 ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn), 1088 ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw), 1089 ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw), 1090 ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw), 1091 ICPU(INTEL_FAM6_BROADWELL_XEON_D, idle_cpu_bdw), 1092 ICPU(INTEL_FAM6_SKYLAKE_MOBILE, idle_cpu_skl), 1093 ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, idle_cpu_skl), 1094 ICPU(INTEL_FAM6_KABYLAKE_MOBILE, idle_cpu_skl), 1095 ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, idle_cpu_skl), 1096 ICPU(INTEL_FAM6_SKYLAKE_X, idle_cpu_skx), 1097 ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl), 1098 ICPU(INTEL_FAM6_XEON_PHI_KNM, idle_cpu_knl), 1099 ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt), 1100 ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, idle_cpu_bxt), 1101 ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv), 1102 {} 1103 }; 1104 1105 /* 1106 * intel_idle_probe() 1107 */ 1108 static int __init intel_idle_probe(void) 1109 { 1110 unsigned int eax, ebx, ecx; 1111 const struct x86_cpu_id *id; 1112 1113 if (max_cstate == 0) { 1114 pr_debug("disabled\n"); 1115 return -EPERM; 1116 } 1117 1118 id = x86_match_cpu(intel_idle_ids); 1119 if (!id) { 1120 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 1121 boot_cpu_data.x86 == 6) 1122 pr_debug("does not run on family %d model %d\n", 1123 boot_cpu_data.x86, boot_cpu_data.x86_model); 1124 return -ENODEV; 1125 } 1126 1127 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1128 return -ENODEV; 1129 1130 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1131 1132 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1133 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1134 !mwait_substates) 1135 return -ENODEV; 1136 1137 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1138 1139 icpu = (const struct idle_cpu *)id->driver_data; 1140 cpuidle_state_table = icpu->state_table; 1141 1142 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1143 boot_cpu_data.x86_model); 1144 1145 return 0; 1146 } 1147 1148 /* 1149 * intel_idle_cpuidle_devices_uninit() 1150 * Unregisters the cpuidle devices. 1151 */ 1152 static void intel_idle_cpuidle_devices_uninit(void) 1153 { 1154 int i; 1155 struct cpuidle_device *dev; 1156 1157 for_each_online_cpu(i) { 1158 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); 1159 cpuidle_unregister_device(dev); 1160 } 1161 } 1162 1163 /* 1164 * ivt_idle_state_table_update(void) 1165 * 1166 * Tune IVT multi-socket targets 1167 * Assumption: num_sockets == (max_package_num + 1) 1168 */ 1169 static void ivt_idle_state_table_update(void) 1170 { 1171 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1172 int cpu, package_num, num_sockets = 1; 1173 1174 for_each_online_cpu(cpu) { 1175 package_num = topology_physical_package_id(cpu); 1176 if (package_num + 1 > num_sockets) { 1177 num_sockets = package_num + 1; 1178 1179 if (num_sockets > 4) { 1180 cpuidle_state_table = ivt_cstates_8s; 1181 return; 1182 } 1183 } 1184 } 1185 1186 if (num_sockets > 2) 1187 cpuidle_state_table = ivt_cstates_4s; 1188 1189 /* else, 1 and 2 socket systems use default ivt_cstates */ 1190 } 1191 1192 /* 1193 * Translate IRTL (Interrupt Response Time Limit) MSR to usec 1194 */ 1195 1196 static unsigned int irtl_ns_units[] = { 1197 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 1198 1199 static unsigned long long irtl_2_usec(unsigned long long irtl) 1200 { 1201 unsigned long long ns; 1202 1203 if (!irtl) 1204 return 0; 1205 1206 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1207 1208 return div64_u64((irtl & 0x3FF) * ns, 1000); 1209 } 1210 /* 1211 * bxt_idle_state_table_update(void) 1212 * 1213 * On BXT, we trust the IRTL to show the definitive maximum latency 1214 * We use the same value for target_residency. 1215 */ 1216 static void bxt_idle_state_table_update(void) 1217 { 1218 unsigned long long msr; 1219 unsigned int usec; 1220 1221 rdmsrl(MSR_PKGC6_IRTL, msr); 1222 usec = irtl_2_usec(msr); 1223 if (usec) { 1224 bxt_cstates[2].exit_latency = usec; 1225 bxt_cstates[2].target_residency = usec; 1226 } 1227 1228 rdmsrl(MSR_PKGC7_IRTL, msr); 1229 usec = irtl_2_usec(msr); 1230 if (usec) { 1231 bxt_cstates[3].exit_latency = usec; 1232 bxt_cstates[3].target_residency = usec; 1233 } 1234 1235 rdmsrl(MSR_PKGC8_IRTL, msr); 1236 usec = irtl_2_usec(msr); 1237 if (usec) { 1238 bxt_cstates[4].exit_latency = usec; 1239 bxt_cstates[4].target_residency = usec; 1240 } 1241 1242 rdmsrl(MSR_PKGC9_IRTL, msr); 1243 usec = irtl_2_usec(msr); 1244 if (usec) { 1245 bxt_cstates[5].exit_latency = usec; 1246 bxt_cstates[5].target_residency = usec; 1247 } 1248 1249 rdmsrl(MSR_PKGC10_IRTL, msr); 1250 usec = irtl_2_usec(msr); 1251 if (usec) { 1252 bxt_cstates[6].exit_latency = usec; 1253 bxt_cstates[6].target_residency = usec; 1254 } 1255 1256 } 1257 /* 1258 * sklh_idle_state_table_update(void) 1259 * 1260 * On SKL-H (model 0x5e) disable C8 and C9 if: 1261 * C10 is enabled and SGX disabled 1262 */ 1263 static void sklh_idle_state_table_update(void) 1264 { 1265 unsigned long long msr; 1266 unsigned int eax, ebx, ecx, edx; 1267 1268 1269 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1270 if (max_cstate <= 7) 1271 return; 1272 1273 /* if PC10 not present in CPUID.MWAIT.EDX */ 1274 if ((mwait_substates & (0xF << 28)) == 0) 1275 return; 1276 1277 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1278 1279 /* PC10 is not enabled in PKG C-state limit */ 1280 if ((msr & 0xF) != 8) 1281 return; 1282 1283 ecx = 0; 1284 cpuid(7, &eax, &ebx, &ecx, &edx); 1285 1286 /* if SGX is present */ 1287 if (ebx & (1 << 2)) { 1288 1289 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1290 1291 /* if SGX is enabled */ 1292 if (msr & (1 << 18)) 1293 return; 1294 } 1295 1296 skl_cstates[5].disabled = 1; /* C8-SKL */ 1297 skl_cstates[6].disabled = 1; /* C9-SKL */ 1298 } 1299 /* 1300 * intel_idle_state_table_update() 1301 * 1302 * Update the default state_table for this CPU-id 1303 */ 1304 1305 static void intel_idle_state_table_update(void) 1306 { 1307 switch (boot_cpu_data.x86_model) { 1308 1309 case INTEL_FAM6_IVYBRIDGE_X: 1310 ivt_idle_state_table_update(); 1311 break; 1312 case INTEL_FAM6_ATOM_GOLDMONT: 1313 case INTEL_FAM6_ATOM_GEMINI_LAKE: 1314 bxt_idle_state_table_update(); 1315 break; 1316 case INTEL_FAM6_SKYLAKE_DESKTOP: 1317 sklh_idle_state_table_update(); 1318 break; 1319 } 1320 } 1321 1322 /* 1323 * intel_idle_cpuidle_driver_init() 1324 * allocate, initialize cpuidle_states 1325 */ 1326 static void __init intel_idle_cpuidle_driver_init(void) 1327 { 1328 int cstate; 1329 struct cpuidle_driver *drv = &intel_idle_driver; 1330 1331 intel_idle_state_table_update(); 1332 1333 cpuidle_poll_state_init(drv); 1334 drv->state_count = 1; 1335 1336 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1337 int num_substates, mwait_hint, mwait_cstate; 1338 1339 if ((cpuidle_state_table[cstate].enter == NULL) && 1340 (cpuidle_state_table[cstate].enter_s2idle == NULL)) 1341 break; 1342 1343 if (cstate + 1 > max_cstate) { 1344 pr_info("max_cstate %d reached\n", max_cstate); 1345 break; 1346 } 1347 1348 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1349 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); 1350 1351 /* number of sub-states for this state in CPUID.MWAIT */ 1352 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) 1353 & MWAIT_SUBSTATE_MASK; 1354 1355 /* if NO sub-states for this state in CPUID, skip it */ 1356 if (num_substates == 0) 1357 continue; 1358 1359 /* if state marked as disabled, skip it */ 1360 if (cpuidle_state_table[cstate].disabled != 0) { 1361 pr_debug("state %s is disabled\n", 1362 cpuidle_state_table[cstate].name); 1363 continue; 1364 } 1365 1366 1367 if (((mwait_cstate + 1) > 2) && 1368 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1369 mark_tsc_unstable("TSC halts in idle" 1370 " states deeper than C2"); 1371 1372 drv->states[drv->state_count] = /* structure copy */ 1373 cpuidle_state_table[cstate]; 1374 1375 drv->state_count += 1; 1376 } 1377 1378 if (icpu->byt_auto_demotion_disable_flag) { 1379 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1380 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1381 } 1382 } 1383 1384 1385 /* 1386 * intel_idle_cpu_init() 1387 * allocate, initialize, register cpuidle_devices 1388 * @cpu: cpu/core to initialize 1389 */ 1390 static int intel_idle_cpu_init(unsigned int cpu) 1391 { 1392 struct cpuidle_device *dev; 1393 1394 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1395 dev->cpu = cpu; 1396 1397 if (cpuidle_register_device(dev)) { 1398 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1399 return -EIO; 1400 } 1401 1402 if (icpu->auto_demotion_disable_flags) 1403 auto_demotion_disable(); 1404 1405 if (icpu->disable_promotion_to_c1e) 1406 c1e_promotion_disable(); 1407 1408 return 0; 1409 } 1410 1411 static int intel_idle_cpu_online(unsigned int cpu) 1412 { 1413 struct cpuidle_device *dev; 1414 1415 if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) 1416 __setup_broadcast_timer(true); 1417 1418 /* 1419 * Some systems can hotplug a cpu at runtime after 1420 * the kernel has booted, we have to initialize the 1421 * driver in this case 1422 */ 1423 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1424 if (!dev->registered) 1425 return intel_idle_cpu_init(cpu); 1426 1427 return 0; 1428 } 1429 1430 static int __init intel_idle_init(void) 1431 { 1432 int retval; 1433 1434 /* Do not load intel_idle at all for now if idle= is passed */ 1435 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1436 return -ENODEV; 1437 1438 retval = intel_idle_probe(); 1439 if (retval) 1440 return retval; 1441 1442 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1443 if (intel_idle_cpuidle_devices == NULL) 1444 return -ENOMEM; 1445 1446 intel_idle_cpuidle_driver_init(); 1447 retval = cpuidle_register_driver(&intel_idle_driver); 1448 if (retval) { 1449 struct cpuidle_driver *drv = cpuidle_get_driver(); 1450 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1451 drv ? drv->name : "none"); 1452 goto init_driver_fail; 1453 } 1454 1455 if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ 1456 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; 1457 1458 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1459 intel_idle_cpu_online, NULL); 1460 if (retval < 0) 1461 goto hp_setup_fail; 1462 1463 pr_debug("lapic_timer_reliable_states 0x%x\n", 1464 lapic_timer_reliable_states); 1465 1466 return 0; 1467 1468 hp_setup_fail: 1469 intel_idle_cpuidle_devices_uninit(); 1470 cpuidle_unregister_driver(&intel_idle_driver); 1471 init_driver_fail: 1472 free_percpu(intel_idle_cpuidle_devices); 1473 return retval; 1474 1475 } 1476 device_initcall(intel_idle_init); 1477 1478 /* 1479 * We are not really modular, but we used to support that. Meaning we also 1480 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1481 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1482 * is the easiest way (currently) to continue doing that. 1483 */ 1484 module_param(max_cstate, int, 0444); 1485