1 /* 2 * intel_idle.c - native hardware idle loop for modern Intel processors 3 * 4 * Copyright (c) 2013, Intel Corporation. 5 * Len Brown <len.brown@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms and conditions of the GNU General Public License, 9 * version 2, as published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 19 */ 20 21 /* 22 * intel_idle is a cpuidle driver that loads on specific Intel processors 23 * in lieu of the legacy ACPI processor_idle driver. The intent is to 24 * make Linux more efficient on these processors, as intel_idle knows 25 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 26 */ 27 28 /* 29 * Design Assumptions 30 * 31 * All CPUs have same idle states as boot CPU 32 * 33 * Chipset BM_STS (bus master status) bit is a NOP 34 * for preventing entry into deep C-stats 35 */ 36 37 /* 38 * Known limitations 39 * 40 * The driver currently initializes for_each_online_cpu() upon modprobe. 41 * It it unaware of subsequent processors hot-added to the system. 42 * This means that if you boot with maxcpus=n and later online 43 * processors above n, those processors will use C1 only. 44 * 45 * ACPI has a .suspend hack to turn off deep c-statees during suspend 46 * to avoid complications with the lapic timer workaround. 47 * Have not seen issues with suspend, but may need same workaround here. 48 * 49 */ 50 51 /* un-comment DEBUG to enable pr_debug() statements */ 52 #define DEBUG 53 54 #include <linux/kernel.h> 55 #include <linux/cpuidle.h> 56 #include <linux/tick.h> 57 #include <trace/events/power.h> 58 #include <linux/sched.h> 59 #include <linux/notifier.h> 60 #include <linux/cpu.h> 61 #include <linux/moduleparam.h> 62 #include <asm/cpu_device_id.h> 63 #include <asm/intel-family.h> 64 #include <asm/mwait.h> 65 #include <asm/msr.h> 66 67 #define INTEL_IDLE_VERSION "0.4.1" 68 #define PREFIX "intel_idle: " 69 70 static struct cpuidle_driver intel_idle_driver = { 71 .name = "intel_idle", 72 .owner = THIS_MODULE, 73 }; 74 /* intel_idle.max_cstate=0 disables driver */ 75 static int max_cstate = CPUIDLE_STATE_MAX - 1; 76 77 static unsigned int mwait_substates; 78 79 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF 80 /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ 81 static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ 82 83 struct idle_cpu { 84 struct cpuidle_state *state_table; 85 86 /* 87 * Hardware C-state auto-demotion may not always be optimal. 88 * Indicate which enable bits to clear here. 89 */ 90 unsigned long auto_demotion_disable_flags; 91 bool byt_auto_demotion_disable_flag; 92 bool disable_promotion_to_c1e; 93 }; 94 95 static const struct idle_cpu *icpu; 96 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 97 static int intel_idle(struct cpuidle_device *dev, 98 struct cpuidle_driver *drv, int index); 99 static void intel_idle_freeze(struct cpuidle_device *dev, 100 struct cpuidle_driver *drv, int index); 101 static int intel_idle_cpu_init(int cpu); 102 103 static struct cpuidle_state *cpuidle_state_table; 104 105 /* 106 * Set this flag for states where the HW flushes the TLB for us 107 * and so we don't need cross-calls to keep it consistent. 108 * If this flag is set, SW flushes the TLB, so even if the 109 * HW doesn't do the flushing, this flag is safe to use. 110 */ 111 #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 112 113 /* 114 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 115 * the C-state (top nibble) and sub-state (bottom nibble) 116 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 117 * 118 * We store the hint at the top of our "flags" for each state. 119 */ 120 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 121 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 122 123 /* 124 * States are indexed by the cstate number, 125 * which is also the index into the MWAIT hint array. 126 * Thus C0 is a dummy. 127 */ 128 static struct cpuidle_state nehalem_cstates[] = { 129 { 130 .name = "C1-NHM", 131 .desc = "MWAIT 0x00", 132 .flags = MWAIT2flg(0x00), 133 .exit_latency = 3, 134 .target_residency = 6, 135 .enter = &intel_idle, 136 .enter_freeze = intel_idle_freeze, }, 137 { 138 .name = "C1E-NHM", 139 .desc = "MWAIT 0x01", 140 .flags = MWAIT2flg(0x01), 141 .exit_latency = 10, 142 .target_residency = 20, 143 .enter = &intel_idle, 144 .enter_freeze = intel_idle_freeze, }, 145 { 146 .name = "C3-NHM", 147 .desc = "MWAIT 0x10", 148 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 149 .exit_latency = 20, 150 .target_residency = 80, 151 .enter = &intel_idle, 152 .enter_freeze = intel_idle_freeze, }, 153 { 154 .name = "C6-NHM", 155 .desc = "MWAIT 0x20", 156 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 157 .exit_latency = 200, 158 .target_residency = 800, 159 .enter = &intel_idle, 160 .enter_freeze = intel_idle_freeze, }, 161 { 162 .enter = NULL } 163 }; 164 165 static struct cpuidle_state snb_cstates[] = { 166 { 167 .name = "C1-SNB", 168 .desc = "MWAIT 0x00", 169 .flags = MWAIT2flg(0x00), 170 .exit_latency = 2, 171 .target_residency = 2, 172 .enter = &intel_idle, 173 .enter_freeze = intel_idle_freeze, }, 174 { 175 .name = "C1E-SNB", 176 .desc = "MWAIT 0x01", 177 .flags = MWAIT2flg(0x01), 178 .exit_latency = 10, 179 .target_residency = 20, 180 .enter = &intel_idle, 181 .enter_freeze = intel_idle_freeze, }, 182 { 183 .name = "C3-SNB", 184 .desc = "MWAIT 0x10", 185 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 186 .exit_latency = 80, 187 .target_residency = 211, 188 .enter = &intel_idle, 189 .enter_freeze = intel_idle_freeze, }, 190 { 191 .name = "C6-SNB", 192 .desc = "MWAIT 0x20", 193 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 194 .exit_latency = 104, 195 .target_residency = 345, 196 .enter = &intel_idle, 197 .enter_freeze = intel_idle_freeze, }, 198 { 199 .name = "C7-SNB", 200 .desc = "MWAIT 0x30", 201 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 202 .exit_latency = 109, 203 .target_residency = 345, 204 .enter = &intel_idle, 205 .enter_freeze = intel_idle_freeze, }, 206 { 207 .enter = NULL } 208 }; 209 210 static struct cpuidle_state byt_cstates[] = { 211 { 212 .name = "C1-BYT", 213 .desc = "MWAIT 0x00", 214 .flags = MWAIT2flg(0x00), 215 .exit_latency = 1, 216 .target_residency = 1, 217 .enter = &intel_idle, 218 .enter_freeze = intel_idle_freeze, }, 219 { 220 .name = "C6N-BYT", 221 .desc = "MWAIT 0x58", 222 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 223 .exit_latency = 300, 224 .target_residency = 275, 225 .enter = &intel_idle, 226 .enter_freeze = intel_idle_freeze, }, 227 { 228 .name = "C6S-BYT", 229 .desc = "MWAIT 0x52", 230 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 231 .exit_latency = 500, 232 .target_residency = 560, 233 .enter = &intel_idle, 234 .enter_freeze = intel_idle_freeze, }, 235 { 236 .name = "C7-BYT", 237 .desc = "MWAIT 0x60", 238 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 239 .exit_latency = 1200, 240 .target_residency = 4000, 241 .enter = &intel_idle, 242 .enter_freeze = intel_idle_freeze, }, 243 { 244 .name = "C7S-BYT", 245 .desc = "MWAIT 0x64", 246 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 247 .exit_latency = 10000, 248 .target_residency = 20000, 249 .enter = &intel_idle, 250 .enter_freeze = intel_idle_freeze, }, 251 { 252 .enter = NULL } 253 }; 254 255 static struct cpuidle_state cht_cstates[] = { 256 { 257 .name = "C1-CHT", 258 .desc = "MWAIT 0x00", 259 .flags = MWAIT2flg(0x00), 260 .exit_latency = 1, 261 .target_residency = 1, 262 .enter = &intel_idle, 263 .enter_freeze = intel_idle_freeze, }, 264 { 265 .name = "C6N-CHT", 266 .desc = "MWAIT 0x58", 267 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 268 .exit_latency = 80, 269 .target_residency = 275, 270 .enter = &intel_idle, 271 .enter_freeze = intel_idle_freeze, }, 272 { 273 .name = "C6S-CHT", 274 .desc = "MWAIT 0x52", 275 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 276 .exit_latency = 200, 277 .target_residency = 560, 278 .enter = &intel_idle, 279 .enter_freeze = intel_idle_freeze, }, 280 { 281 .name = "C7-CHT", 282 .desc = "MWAIT 0x60", 283 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 284 .exit_latency = 1200, 285 .target_residency = 4000, 286 .enter = &intel_idle, 287 .enter_freeze = intel_idle_freeze, }, 288 { 289 .name = "C7S-CHT", 290 .desc = "MWAIT 0x64", 291 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 292 .exit_latency = 10000, 293 .target_residency = 20000, 294 .enter = &intel_idle, 295 .enter_freeze = intel_idle_freeze, }, 296 { 297 .enter = NULL } 298 }; 299 300 static struct cpuidle_state ivb_cstates[] = { 301 { 302 .name = "C1-IVB", 303 .desc = "MWAIT 0x00", 304 .flags = MWAIT2flg(0x00), 305 .exit_latency = 1, 306 .target_residency = 1, 307 .enter = &intel_idle, 308 .enter_freeze = intel_idle_freeze, }, 309 { 310 .name = "C1E-IVB", 311 .desc = "MWAIT 0x01", 312 .flags = MWAIT2flg(0x01), 313 .exit_latency = 10, 314 .target_residency = 20, 315 .enter = &intel_idle, 316 .enter_freeze = intel_idle_freeze, }, 317 { 318 .name = "C3-IVB", 319 .desc = "MWAIT 0x10", 320 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 321 .exit_latency = 59, 322 .target_residency = 156, 323 .enter = &intel_idle, 324 .enter_freeze = intel_idle_freeze, }, 325 { 326 .name = "C6-IVB", 327 .desc = "MWAIT 0x20", 328 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 329 .exit_latency = 80, 330 .target_residency = 300, 331 .enter = &intel_idle, 332 .enter_freeze = intel_idle_freeze, }, 333 { 334 .name = "C7-IVB", 335 .desc = "MWAIT 0x30", 336 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 337 .exit_latency = 87, 338 .target_residency = 300, 339 .enter = &intel_idle, 340 .enter_freeze = intel_idle_freeze, }, 341 { 342 .enter = NULL } 343 }; 344 345 static struct cpuidle_state ivt_cstates[] = { 346 { 347 .name = "C1-IVT", 348 .desc = "MWAIT 0x00", 349 .flags = MWAIT2flg(0x00), 350 .exit_latency = 1, 351 .target_residency = 1, 352 .enter = &intel_idle, 353 .enter_freeze = intel_idle_freeze, }, 354 { 355 .name = "C1E-IVT", 356 .desc = "MWAIT 0x01", 357 .flags = MWAIT2flg(0x01), 358 .exit_latency = 10, 359 .target_residency = 80, 360 .enter = &intel_idle, 361 .enter_freeze = intel_idle_freeze, }, 362 { 363 .name = "C3-IVT", 364 .desc = "MWAIT 0x10", 365 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 366 .exit_latency = 59, 367 .target_residency = 156, 368 .enter = &intel_idle, 369 .enter_freeze = intel_idle_freeze, }, 370 { 371 .name = "C6-IVT", 372 .desc = "MWAIT 0x20", 373 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 374 .exit_latency = 82, 375 .target_residency = 300, 376 .enter = &intel_idle, 377 .enter_freeze = intel_idle_freeze, }, 378 { 379 .enter = NULL } 380 }; 381 382 static struct cpuidle_state ivt_cstates_4s[] = { 383 { 384 .name = "C1-IVT-4S", 385 .desc = "MWAIT 0x00", 386 .flags = MWAIT2flg(0x00), 387 .exit_latency = 1, 388 .target_residency = 1, 389 .enter = &intel_idle, 390 .enter_freeze = intel_idle_freeze, }, 391 { 392 .name = "C1E-IVT-4S", 393 .desc = "MWAIT 0x01", 394 .flags = MWAIT2flg(0x01), 395 .exit_latency = 10, 396 .target_residency = 250, 397 .enter = &intel_idle, 398 .enter_freeze = intel_idle_freeze, }, 399 { 400 .name = "C3-IVT-4S", 401 .desc = "MWAIT 0x10", 402 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 403 .exit_latency = 59, 404 .target_residency = 300, 405 .enter = &intel_idle, 406 .enter_freeze = intel_idle_freeze, }, 407 { 408 .name = "C6-IVT-4S", 409 .desc = "MWAIT 0x20", 410 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 411 .exit_latency = 84, 412 .target_residency = 400, 413 .enter = &intel_idle, 414 .enter_freeze = intel_idle_freeze, }, 415 { 416 .enter = NULL } 417 }; 418 419 static struct cpuidle_state ivt_cstates_8s[] = { 420 { 421 .name = "C1-IVT-8S", 422 .desc = "MWAIT 0x00", 423 .flags = MWAIT2flg(0x00), 424 .exit_latency = 1, 425 .target_residency = 1, 426 .enter = &intel_idle, 427 .enter_freeze = intel_idle_freeze, }, 428 { 429 .name = "C1E-IVT-8S", 430 .desc = "MWAIT 0x01", 431 .flags = MWAIT2flg(0x01), 432 .exit_latency = 10, 433 .target_residency = 500, 434 .enter = &intel_idle, 435 .enter_freeze = intel_idle_freeze, }, 436 { 437 .name = "C3-IVT-8S", 438 .desc = "MWAIT 0x10", 439 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 440 .exit_latency = 59, 441 .target_residency = 600, 442 .enter = &intel_idle, 443 .enter_freeze = intel_idle_freeze, }, 444 { 445 .name = "C6-IVT-8S", 446 .desc = "MWAIT 0x20", 447 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 448 .exit_latency = 88, 449 .target_residency = 700, 450 .enter = &intel_idle, 451 .enter_freeze = intel_idle_freeze, }, 452 { 453 .enter = NULL } 454 }; 455 456 static struct cpuidle_state hsw_cstates[] = { 457 { 458 .name = "C1-HSW", 459 .desc = "MWAIT 0x00", 460 .flags = MWAIT2flg(0x00), 461 .exit_latency = 2, 462 .target_residency = 2, 463 .enter = &intel_idle, 464 .enter_freeze = intel_idle_freeze, }, 465 { 466 .name = "C1E-HSW", 467 .desc = "MWAIT 0x01", 468 .flags = MWAIT2flg(0x01), 469 .exit_latency = 10, 470 .target_residency = 20, 471 .enter = &intel_idle, 472 .enter_freeze = intel_idle_freeze, }, 473 { 474 .name = "C3-HSW", 475 .desc = "MWAIT 0x10", 476 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 477 .exit_latency = 33, 478 .target_residency = 100, 479 .enter = &intel_idle, 480 .enter_freeze = intel_idle_freeze, }, 481 { 482 .name = "C6-HSW", 483 .desc = "MWAIT 0x20", 484 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 485 .exit_latency = 133, 486 .target_residency = 400, 487 .enter = &intel_idle, 488 .enter_freeze = intel_idle_freeze, }, 489 { 490 .name = "C7s-HSW", 491 .desc = "MWAIT 0x32", 492 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 493 .exit_latency = 166, 494 .target_residency = 500, 495 .enter = &intel_idle, 496 .enter_freeze = intel_idle_freeze, }, 497 { 498 .name = "C8-HSW", 499 .desc = "MWAIT 0x40", 500 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 501 .exit_latency = 300, 502 .target_residency = 900, 503 .enter = &intel_idle, 504 .enter_freeze = intel_idle_freeze, }, 505 { 506 .name = "C9-HSW", 507 .desc = "MWAIT 0x50", 508 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 509 .exit_latency = 600, 510 .target_residency = 1800, 511 .enter = &intel_idle, 512 .enter_freeze = intel_idle_freeze, }, 513 { 514 .name = "C10-HSW", 515 .desc = "MWAIT 0x60", 516 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 517 .exit_latency = 2600, 518 .target_residency = 7700, 519 .enter = &intel_idle, 520 .enter_freeze = intel_idle_freeze, }, 521 { 522 .enter = NULL } 523 }; 524 static struct cpuidle_state bdw_cstates[] = { 525 { 526 .name = "C1-BDW", 527 .desc = "MWAIT 0x00", 528 .flags = MWAIT2flg(0x00), 529 .exit_latency = 2, 530 .target_residency = 2, 531 .enter = &intel_idle, 532 .enter_freeze = intel_idle_freeze, }, 533 { 534 .name = "C1E-BDW", 535 .desc = "MWAIT 0x01", 536 .flags = MWAIT2flg(0x01), 537 .exit_latency = 10, 538 .target_residency = 20, 539 .enter = &intel_idle, 540 .enter_freeze = intel_idle_freeze, }, 541 { 542 .name = "C3-BDW", 543 .desc = "MWAIT 0x10", 544 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 545 .exit_latency = 40, 546 .target_residency = 100, 547 .enter = &intel_idle, 548 .enter_freeze = intel_idle_freeze, }, 549 { 550 .name = "C6-BDW", 551 .desc = "MWAIT 0x20", 552 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 553 .exit_latency = 133, 554 .target_residency = 400, 555 .enter = &intel_idle, 556 .enter_freeze = intel_idle_freeze, }, 557 { 558 .name = "C7s-BDW", 559 .desc = "MWAIT 0x32", 560 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 561 .exit_latency = 166, 562 .target_residency = 500, 563 .enter = &intel_idle, 564 .enter_freeze = intel_idle_freeze, }, 565 { 566 .name = "C8-BDW", 567 .desc = "MWAIT 0x40", 568 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 569 .exit_latency = 300, 570 .target_residency = 900, 571 .enter = &intel_idle, 572 .enter_freeze = intel_idle_freeze, }, 573 { 574 .name = "C9-BDW", 575 .desc = "MWAIT 0x50", 576 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 577 .exit_latency = 600, 578 .target_residency = 1800, 579 .enter = &intel_idle, 580 .enter_freeze = intel_idle_freeze, }, 581 { 582 .name = "C10-BDW", 583 .desc = "MWAIT 0x60", 584 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 585 .exit_latency = 2600, 586 .target_residency = 7700, 587 .enter = &intel_idle, 588 .enter_freeze = intel_idle_freeze, }, 589 { 590 .enter = NULL } 591 }; 592 593 static struct cpuidle_state skl_cstates[] = { 594 { 595 .name = "C1-SKL", 596 .desc = "MWAIT 0x00", 597 .flags = MWAIT2flg(0x00), 598 .exit_latency = 2, 599 .target_residency = 2, 600 .enter = &intel_idle, 601 .enter_freeze = intel_idle_freeze, }, 602 { 603 .name = "C1E-SKL", 604 .desc = "MWAIT 0x01", 605 .flags = MWAIT2flg(0x01), 606 .exit_latency = 10, 607 .target_residency = 20, 608 .enter = &intel_idle, 609 .enter_freeze = intel_idle_freeze, }, 610 { 611 .name = "C3-SKL", 612 .desc = "MWAIT 0x10", 613 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 614 .exit_latency = 70, 615 .target_residency = 100, 616 .enter = &intel_idle, 617 .enter_freeze = intel_idle_freeze, }, 618 { 619 .name = "C6-SKL", 620 .desc = "MWAIT 0x20", 621 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 622 .exit_latency = 85, 623 .target_residency = 200, 624 .enter = &intel_idle, 625 .enter_freeze = intel_idle_freeze, }, 626 { 627 .name = "C7s-SKL", 628 .desc = "MWAIT 0x33", 629 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 630 .exit_latency = 124, 631 .target_residency = 800, 632 .enter = &intel_idle, 633 .enter_freeze = intel_idle_freeze, }, 634 { 635 .name = "C8-SKL", 636 .desc = "MWAIT 0x40", 637 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 638 .exit_latency = 200, 639 .target_residency = 800, 640 .enter = &intel_idle, 641 .enter_freeze = intel_idle_freeze, }, 642 { 643 .name = "C9-SKL", 644 .desc = "MWAIT 0x50", 645 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 646 .exit_latency = 480, 647 .target_residency = 5000, 648 .enter = &intel_idle, 649 .enter_freeze = intel_idle_freeze, }, 650 { 651 .name = "C10-SKL", 652 .desc = "MWAIT 0x60", 653 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 654 .exit_latency = 890, 655 .target_residency = 5000, 656 .enter = &intel_idle, 657 .enter_freeze = intel_idle_freeze, }, 658 { 659 .enter = NULL } 660 }; 661 662 static struct cpuidle_state skx_cstates[] = { 663 { 664 .name = "C1-SKX", 665 .desc = "MWAIT 0x00", 666 .flags = MWAIT2flg(0x00), 667 .exit_latency = 2, 668 .target_residency = 2, 669 .enter = &intel_idle, 670 .enter_freeze = intel_idle_freeze, }, 671 { 672 .name = "C1E-SKX", 673 .desc = "MWAIT 0x01", 674 .flags = MWAIT2flg(0x01), 675 .exit_latency = 10, 676 .target_residency = 20, 677 .enter = &intel_idle, 678 .enter_freeze = intel_idle_freeze, }, 679 { 680 .name = "C6-SKX", 681 .desc = "MWAIT 0x20", 682 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 683 .exit_latency = 133, 684 .target_residency = 600, 685 .enter = &intel_idle, 686 .enter_freeze = intel_idle_freeze, }, 687 { 688 .enter = NULL } 689 }; 690 691 static struct cpuidle_state atom_cstates[] = { 692 { 693 .name = "C1E-ATM", 694 .desc = "MWAIT 0x00", 695 .flags = MWAIT2flg(0x00), 696 .exit_latency = 10, 697 .target_residency = 20, 698 .enter = &intel_idle, 699 .enter_freeze = intel_idle_freeze, }, 700 { 701 .name = "C2-ATM", 702 .desc = "MWAIT 0x10", 703 .flags = MWAIT2flg(0x10), 704 .exit_latency = 20, 705 .target_residency = 80, 706 .enter = &intel_idle, 707 .enter_freeze = intel_idle_freeze, }, 708 { 709 .name = "C4-ATM", 710 .desc = "MWAIT 0x30", 711 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 712 .exit_latency = 100, 713 .target_residency = 400, 714 .enter = &intel_idle, 715 .enter_freeze = intel_idle_freeze, }, 716 { 717 .name = "C6-ATM", 718 .desc = "MWAIT 0x52", 719 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 720 .exit_latency = 140, 721 .target_residency = 560, 722 .enter = &intel_idle, 723 .enter_freeze = intel_idle_freeze, }, 724 { 725 .enter = NULL } 726 }; 727 static struct cpuidle_state avn_cstates[] = { 728 { 729 .name = "C1-AVN", 730 .desc = "MWAIT 0x00", 731 .flags = MWAIT2flg(0x00), 732 .exit_latency = 2, 733 .target_residency = 2, 734 .enter = &intel_idle, 735 .enter_freeze = intel_idle_freeze, }, 736 { 737 .name = "C6-AVN", 738 .desc = "MWAIT 0x51", 739 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 740 .exit_latency = 15, 741 .target_residency = 45, 742 .enter = &intel_idle, 743 .enter_freeze = intel_idle_freeze, }, 744 { 745 .enter = NULL } 746 }; 747 static struct cpuidle_state knl_cstates[] = { 748 { 749 .name = "C1-KNL", 750 .desc = "MWAIT 0x00", 751 .flags = MWAIT2flg(0x00), 752 .exit_latency = 1, 753 .target_residency = 2, 754 .enter = &intel_idle, 755 .enter_freeze = intel_idle_freeze }, 756 { 757 .name = "C6-KNL", 758 .desc = "MWAIT 0x10", 759 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 760 .exit_latency = 120, 761 .target_residency = 500, 762 .enter = &intel_idle, 763 .enter_freeze = intel_idle_freeze }, 764 { 765 .enter = NULL } 766 }; 767 768 static struct cpuidle_state bxt_cstates[] = { 769 { 770 .name = "C1-BXT", 771 .desc = "MWAIT 0x00", 772 .flags = MWAIT2flg(0x00), 773 .exit_latency = 2, 774 .target_residency = 2, 775 .enter = &intel_idle, 776 .enter_freeze = intel_idle_freeze, }, 777 { 778 .name = "C1E-BXT", 779 .desc = "MWAIT 0x01", 780 .flags = MWAIT2flg(0x01), 781 .exit_latency = 10, 782 .target_residency = 20, 783 .enter = &intel_idle, 784 .enter_freeze = intel_idle_freeze, }, 785 { 786 .name = "C6-BXT", 787 .desc = "MWAIT 0x20", 788 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 789 .exit_latency = 133, 790 .target_residency = 133, 791 .enter = &intel_idle, 792 .enter_freeze = intel_idle_freeze, }, 793 { 794 .name = "C7s-BXT", 795 .desc = "MWAIT 0x31", 796 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 797 .exit_latency = 155, 798 .target_residency = 155, 799 .enter = &intel_idle, 800 .enter_freeze = intel_idle_freeze, }, 801 { 802 .name = "C8-BXT", 803 .desc = "MWAIT 0x40", 804 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 805 .exit_latency = 1000, 806 .target_residency = 1000, 807 .enter = &intel_idle, 808 .enter_freeze = intel_idle_freeze, }, 809 { 810 .name = "C9-BXT", 811 .desc = "MWAIT 0x50", 812 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 813 .exit_latency = 2000, 814 .target_residency = 2000, 815 .enter = &intel_idle, 816 .enter_freeze = intel_idle_freeze, }, 817 { 818 .name = "C10-BXT", 819 .desc = "MWAIT 0x60", 820 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 821 .exit_latency = 10000, 822 .target_residency = 10000, 823 .enter = &intel_idle, 824 .enter_freeze = intel_idle_freeze, }, 825 { 826 .enter = NULL } 827 }; 828 829 static struct cpuidle_state dnv_cstates[] = { 830 { 831 .name = "C1-DNV", 832 .desc = "MWAIT 0x00", 833 .flags = MWAIT2flg(0x00), 834 .exit_latency = 2, 835 .target_residency = 2, 836 .enter = &intel_idle, 837 .enter_freeze = intel_idle_freeze, }, 838 { 839 .name = "C1E-DNV", 840 .desc = "MWAIT 0x01", 841 .flags = MWAIT2flg(0x01), 842 .exit_latency = 10, 843 .target_residency = 20, 844 .enter = &intel_idle, 845 .enter_freeze = intel_idle_freeze, }, 846 { 847 .name = "C6-DNV", 848 .desc = "MWAIT 0x20", 849 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 850 .exit_latency = 50, 851 .target_residency = 500, 852 .enter = &intel_idle, 853 .enter_freeze = intel_idle_freeze, }, 854 { 855 .enter = NULL } 856 }; 857 858 /** 859 * intel_idle 860 * @dev: cpuidle_device 861 * @drv: cpuidle driver 862 * @index: index of cpuidle state 863 * 864 * Must be called under local_irq_disable(). 865 */ 866 static int intel_idle(struct cpuidle_device *dev, 867 struct cpuidle_driver *drv, int index) 868 { 869 unsigned long ecx = 1; /* break on interrupt flag */ 870 struct cpuidle_state *state = &drv->states[index]; 871 unsigned long eax = flg2MWAIT(state->flags); 872 unsigned int cstate; 873 int cpu = smp_processor_id(); 874 875 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; 876 877 /* 878 * leave_mm() to avoid costly and often unnecessary wakeups 879 * for flushing the user TLB's associated with the active mm. 880 */ 881 if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) 882 leave_mm(cpu); 883 884 if (!(lapic_timer_reliable_states & (1 << (cstate)))) 885 tick_broadcast_enter(); 886 887 mwait_idle_with_hints(eax, ecx); 888 889 if (!(lapic_timer_reliable_states & (1 << (cstate)))) 890 tick_broadcast_exit(); 891 892 return index; 893 } 894 895 /** 896 * intel_idle_freeze - simplified "enter" callback routine for suspend-to-idle 897 * @dev: cpuidle_device 898 * @drv: cpuidle driver 899 * @index: state index 900 */ 901 static void intel_idle_freeze(struct cpuidle_device *dev, 902 struct cpuidle_driver *drv, int index) 903 { 904 unsigned long ecx = 1; /* break on interrupt flag */ 905 unsigned long eax = flg2MWAIT(drv->states[index].flags); 906 907 mwait_idle_with_hints(eax, ecx); 908 } 909 910 static void __setup_broadcast_timer(void *arg) 911 { 912 unsigned long on = (unsigned long)arg; 913 914 if (on) 915 tick_broadcast_enable(); 916 else 917 tick_broadcast_disable(); 918 } 919 920 static int cpu_hotplug_notify(struct notifier_block *n, 921 unsigned long action, void *hcpu) 922 { 923 int hotcpu = (unsigned long)hcpu; 924 struct cpuidle_device *dev; 925 926 switch (action & ~CPU_TASKS_FROZEN) { 927 case CPU_ONLINE: 928 929 if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) 930 smp_call_function_single(hotcpu, __setup_broadcast_timer, 931 (void *)true, 1); 932 933 /* 934 * Some systems can hotplug a cpu at runtime after 935 * the kernel has booted, we have to initialize the 936 * driver in this case 937 */ 938 dev = per_cpu_ptr(intel_idle_cpuidle_devices, hotcpu); 939 if (dev->registered) 940 break; 941 942 if (intel_idle_cpu_init(hotcpu)) 943 return NOTIFY_BAD; 944 945 break; 946 } 947 return NOTIFY_OK; 948 } 949 950 static struct notifier_block cpu_hotplug_notifier = { 951 .notifier_call = cpu_hotplug_notify, 952 }; 953 954 static void auto_demotion_disable(void *dummy) 955 { 956 unsigned long long msr_bits; 957 958 rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); 959 msr_bits &= ~(icpu->auto_demotion_disable_flags); 960 wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); 961 } 962 static void c1e_promotion_disable(void *dummy) 963 { 964 unsigned long long msr_bits; 965 966 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 967 msr_bits &= ~0x2; 968 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 969 } 970 971 static const struct idle_cpu idle_cpu_nehalem = { 972 .state_table = nehalem_cstates, 973 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 974 .disable_promotion_to_c1e = true, 975 }; 976 977 static const struct idle_cpu idle_cpu_atom = { 978 .state_table = atom_cstates, 979 }; 980 981 static const struct idle_cpu idle_cpu_lincroft = { 982 .state_table = atom_cstates, 983 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 984 }; 985 986 static const struct idle_cpu idle_cpu_snb = { 987 .state_table = snb_cstates, 988 .disable_promotion_to_c1e = true, 989 }; 990 991 static const struct idle_cpu idle_cpu_byt = { 992 .state_table = byt_cstates, 993 .disable_promotion_to_c1e = true, 994 .byt_auto_demotion_disable_flag = true, 995 }; 996 997 static const struct idle_cpu idle_cpu_cht = { 998 .state_table = cht_cstates, 999 .disable_promotion_to_c1e = true, 1000 .byt_auto_demotion_disable_flag = true, 1001 }; 1002 1003 static const struct idle_cpu idle_cpu_ivb = { 1004 .state_table = ivb_cstates, 1005 .disable_promotion_to_c1e = true, 1006 }; 1007 1008 static const struct idle_cpu idle_cpu_ivt = { 1009 .state_table = ivt_cstates, 1010 .disable_promotion_to_c1e = true, 1011 }; 1012 1013 static const struct idle_cpu idle_cpu_hsw = { 1014 .state_table = hsw_cstates, 1015 .disable_promotion_to_c1e = true, 1016 }; 1017 1018 static const struct idle_cpu idle_cpu_bdw = { 1019 .state_table = bdw_cstates, 1020 .disable_promotion_to_c1e = true, 1021 }; 1022 1023 static const struct idle_cpu idle_cpu_skl = { 1024 .state_table = skl_cstates, 1025 .disable_promotion_to_c1e = true, 1026 }; 1027 1028 static const struct idle_cpu idle_cpu_skx = { 1029 .state_table = skx_cstates, 1030 .disable_promotion_to_c1e = true, 1031 }; 1032 1033 static const struct idle_cpu idle_cpu_avn = { 1034 .state_table = avn_cstates, 1035 .disable_promotion_to_c1e = true, 1036 }; 1037 1038 static const struct idle_cpu idle_cpu_knl = { 1039 .state_table = knl_cstates, 1040 }; 1041 1042 static const struct idle_cpu idle_cpu_bxt = { 1043 .state_table = bxt_cstates, 1044 .disable_promotion_to_c1e = true, 1045 }; 1046 1047 static const struct idle_cpu idle_cpu_dnv = { 1048 .state_table = dnv_cstates, 1049 .disable_promotion_to_c1e = true, 1050 }; 1051 1052 #define ICPU(model, cpu) \ 1053 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu } 1054 1055 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1056 ICPU(INTEL_FAM6_NEHALEM_EP, idle_cpu_nehalem), 1057 ICPU(INTEL_FAM6_NEHALEM, idle_cpu_nehalem), 1058 ICPU(INTEL_FAM6_NEHALEM_G, idle_cpu_nehalem), 1059 ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem), 1060 ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem), 1061 ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem), 1062 ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom), 1063 ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft), 1064 ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem), 1065 ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb), 1066 ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb), 1067 ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom), 1068 ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt), 1069 ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht), 1070 ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb), 1071 ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt), 1072 ICPU(INTEL_FAM6_HASWELL_CORE, idle_cpu_hsw), 1073 ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw), 1074 ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw), 1075 ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw), 1076 ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn), 1077 ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw), 1078 ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw), 1079 ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw), 1080 ICPU(INTEL_FAM6_BROADWELL_XEON_D, idle_cpu_bdw), 1081 ICPU(INTEL_FAM6_SKYLAKE_MOBILE, idle_cpu_skl), 1082 ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, idle_cpu_skl), 1083 ICPU(INTEL_FAM6_KABYLAKE_MOBILE, idle_cpu_skl), 1084 ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, idle_cpu_skl), 1085 ICPU(INTEL_FAM6_SKYLAKE_X, idle_cpu_skx), 1086 ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl), 1087 ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt), 1088 ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv), 1089 {} 1090 }; 1091 1092 /* 1093 * intel_idle_probe() 1094 */ 1095 static int __init intel_idle_probe(void) 1096 { 1097 unsigned int eax, ebx, ecx; 1098 const struct x86_cpu_id *id; 1099 1100 if (max_cstate == 0) { 1101 pr_debug(PREFIX "disabled\n"); 1102 return -EPERM; 1103 } 1104 1105 id = x86_match_cpu(intel_idle_ids); 1106 if (!id) { 1107 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 1108 boot_cpu_data.x86 == 6) 1109 pr_debug(PREFIX "does not run on family %d model %d\n", 1110 boot_cpu_data.x86, boot_cpu_data.x86_model); 1111 return -ENODEV; 1112 } 1113 1114 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1115 return -ENODEV; 1116 1117 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1118 1119 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1120 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1121 !mwait_substates) 1122 return -ENODEV; 1123 1124 pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates); 1125 1126 icpu = (const struct idle_cpu *)id->driver_data; 1127 cpuidle_state_table = icpu->state_table; 1128 1129 pr_debug(PREFIX "v" INTEL_IDLE_VERSION 1130 " model 0x%X\n", boot_cpu_data.x86_model); 1131 1132 return 0; 1133 } 1134 1135 /* 1136 * intel_idle_cpuidle_devices_uninit() 1137 * Unregisters the cpuidle devices. 1138 */ 1139 static void intel_idle_cpuidle_devices_uninit(void) 1140 { 1141 int i; 1142 struct cpuidle_device *dev; 1143 1144 for_each_online_cpu(i) { 1145 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); 1146 cpuidle_unregister_device(dev); 1147 } 1148 } 1149 1150 /* 1151 * ivt_idle_state_table_update(void) 1152 * 1153 * Tune IVT multi-socket targets 1154 * Assumption: num_sockets == (max_package_num + 1) 1155 */ 1156 static void ivt_idle_state_table_update(void) 1157 { 1158 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1159 int cpu, package_num, num_sockets = 1; 1160 1161 for_each_online_cpu(cpu) { 1162 package_num = topology_physical_package_id(cpu); 1163 if (package_num + 1 > num_sockets) { 1164 num_sockets = package_num + 1; 1165 1166 if (num_sockets > 4) { 1167 cpuidle_state_table = ivt_cstates_8s; 1168 return; 1169 } 1170 } 1171 } 1172 1173 if (num_sockets > 2) 1174 cpuidle_state_table = ivt_cstates_4s; 1175 1176 /* else, 1 and 2 socket systems use default ivt_cstates */ 1177 } 1178 1179 /* 1180 * Translate IRTL (Interrupt Response Time Limit) MSR to usec 1181 */ 1182 1183 static unsigned int irtl_ns_units[] = { 1184 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 1185 1186 static unsigned long long irtl_2_usec(unsigned long long irtl) 1187 { 1188 unsigned long long ns; 1189 1190 if (!irtl) 1191 return 0; 1192 1193 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1194 1195 return div64_u64((irtl & 0x3FF) * ns, 1000); 1196 } 1197 /* 1198 * bxt_idle_state_table_update(void) 1199 * 1200 * On BXT, we trust the IRTL to show the definitive maximum latency 1201 * We use the same value for target_residency. 1202 */ 1203 static void bxt_idle_state_table_update(void) 1204 { 1205 unsigned long long msr; 1206 unsigned int usec; 1207 1208 rdmsrl(MSR_PKGC6_IRTL, msr); 1209 usec = irtl_2_usec(msr); 1210 if (usec) { 1211 bxt_cstates[2].exit_latency = usec; 1212 bxt_cstates[2].target_residency = usec; 1213 } 1214 1215 rdmsrl(MSR_PKGC7_IRTL, msr); 1216 usec = irtl_2_usec(msr); 1217 if (usec) { 1218 bxt_cstates[3].exit_latency = usec; 1219 bxt_cstates[3].target_residency = usec; 1220 } 1221 1222 rdmsrl(MSR_PKGC8_IRTL, msr); 1223 usec = irtl_2_usec(msr); 1224 if (usec) { 1225 bxt_cstates[4].exit_latency = usec; 1226 bxt_cstates[4].target_residency = usec; 1227 } 1228 1229 rdmsrl(MSR_PKGC9_IRTL, msr); 1230 usec = irtl_2_usec(msr); 1231 if (usec) { 1232 bxt_cstates[5].exit_latency = usec; 1233 bxt_cstates[5].target_residency = usec; 1234 } 1235 1236 rdmsrl(MSR_PKGC10_IRTL, msr); 1237 usec = irtl_2_usec(msr); 1238 if (usec) { 1239 bxt_cstates[6].exit_latency = usec; 1240 bxt_cstates[6].target_residency = usec; 1241 } 1242 1243 } 1244 /* 1245 * sklh_idle_state_table_update(void) 1246 * 1247 * On SKL-H (model 0x5e) disable C8 and C9 if: 1248 * C10 is enabled and SGX disabled 1249 */ 1250 static void sklh_idle_state_table_update(void) 1251 { 1252 unsigned long long msr; 1253 unsigned int eax, ebx, ecx, edx; 1254 1255 1256 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1257 if (max_cstate <= 7) 1258 return; 1259 1260 /* if PC10 not present in CPUID.MWAIT.EDX */ 1261 if ((mwait_substates & (0xF << 28)) == 0) 1262 return; 1263 1264 rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr); 1265 1266 /* PC10 is not enabled in PKG C-state limit */ 1267 if ((msr & 0xF) != 8) 1268 return; 1269 1270 ecx = 0; 1271 cpuid(7, &eax, &ebx, &ecx, &edx); 1272 1273 /* if SGX is present */ 1274 if (ebx & (1 << 2)) { 1275 1276 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1277 1278 /* if SGX is enabled */ 1279 if (msr & (1 << 18)) 1280 return; 1281 } 1282 1283 skl_cstates[5].disabled = 1; /* C8-SKL */ 1284 skl_cstates[6].disabled = 1; /* C9-SKL */ 1285 } 1286 /* 1287 * intel_idle_state_table_update() 1288 * 1289 * Update the default state_table for this CPU-id 1290 */ 1291 1292 static void intel_idle_state_table_update(void) 1293 { 1294 switch (boot_cpu_data.x86_model) { 1295 1296 case INTEL_FAM6_IVYBRIDGE_X: 1297 ivt_idle_state_table_update(); 1298 break; 1299 case INTEL_FAM6_ATOM_GOLDMONT: 1300 bxt_idle_state_table_update(); 1301 break; 1302 case INTEL_FAM6_SKYLAKE_DESKTOP: 1303 sklh_idle_state_table_update(); 1304 break; 1305 } 1306 } 1307 1308 /* 1309 * intel_idle_cpuidle_driver_init() 1310 * allocate, initialize cpuidle_states 1311 */ 1312 static void __init intel_idle_cpuidle_driver_init(void) 1313 { 1314 int cstate; 1315 struct cpuidle_driver *drv = &intel_idle_driver; 1316 1317 intel_idle_state_table_update(); 1318 1319 drv->state_count = 1; 1320 1321 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1322 int num_substates, mwait_hint, mwait_cstate; 1323 1324 if ((cpuidle_state_table[cstate].enter == NULL) && 1325 (cpuidle_state_table[cstate].enter_freeze == NULL)) 1326 break; 1327 1328 if (cstate + 1 > max_cstate) { 1329 printk(PREFIX "max_cstate %d reached\n", 1330 max_cstate); 1331 break; 1332 } 1333 1334 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1335 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); 1336 1337 /* number of sub-states for this state in CPUID.MWAIT */ 1338 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) 1339 & MWAIT_SUBSTATE_MASK; 1340 1341 /* if NO sub-states for this state in CPUID, skip it */ 1342 if (num_substates == 0) 1343 continue; 1344 1345 /* if state marked as disabled, skip it */ 1346 if (cpuidle_state_table[cstate].disabled != 0) { 1347 pr_debug(PREFIX "state %s is disabled", 1348 cpuidle_state_table[cstate].name); 1349 continue; 1350 } 1351 1352 1353 if (((mwait_cstate + 1) > 2) && 1354 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1355 mark_tsc_unstable("TSC halts in idle" 1356 " states deeper than C2"); 1357 1358 drv->states[drv->state_count] = /* structure copy */ 1359 cpuidle_state_table[cstate]; 1360 1361 drv->state_count += 1; 1362 } 1363 1364 if (icpu->byt_auto_demotion_disable_flag) { 1365 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1366 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1367 } 1368 } 1369 1370 1371 /* 1372 * intel_idle_cpu_init() 1373 * allocate, initialize, register cpuidle_devices 1374 * @cpu: cpu/core to initialize 1375 */ 1376 static int intel_idle_cpu_init(int cpu) 1377 { 1378 struct cpuidle_device *dev; 1379 1380 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1381 1382 dev->cpu = cpu; 1383 1384 if (cpuidle_register_device(dev)) { 1385 pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu); 1386 return -EIO; 1387 } 1388 1389 if (icpu->auto_demotion_disable_flags) 1390 smp_call_function_single(cpu, auto_demotion_disable, NULL, 1); 1391 1392 if (icpu->disable_promotion_to_c1e) 1393 smp_call_function_single(cpu, c1e_promotion_disable, NULL, 1); 1394 1395 return 0; 1396 } 1397 1398 static int __init intel_idle_init(void) 1399 { 1400 int retval, i; 1401 1402 /* Do not load intel_idle at all for now if idle= is passed */ 1403 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1404 return -ENODEV; 1405 1406 retval = intel_idle_probe(); 1407 if (retval) 1408 return retval; 1409 1410 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1411 if (intel_idle_cpuidle_devices == NULL) 1412 return -ENOMEM; 1413 1414 intel_idle_cpuidle_driver_init(); 1415 retval = cpuidle_register_driver(&intel_idle_driver); 1416 if (retval) { 1417 struct cpuidle_driver *drv = cpuidle_get_driver(); 1418 printk(KERN_DEBUG PREFIX "intel_idle yielding to %s", 1419 drv ? drv->name : "none"); 1420 free_percpu(intel_idle_cpuidle_devices); 1421 return retval; 1422 } 1423 1424 cpu_notifier_register_begin(); 1425 1426 for_each_online_cpu(i) { 1427 retval = intel_idle_cpu_init(i); 1428 if (retval) { 1429 intel_idle_cpuidle_devices_uninit(); 1430 cpu_notifier_register_done(); 1431 cpuidle_unregister_driver(&intel_idle_driver); 1432 free_percpu(intel_idle_cpuidle_devices); 1433 return retval; 1434 } 1435 } 1436 __register_cpu_notifier(&cpu_hotplug_notifier); 1437 1438 if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ 1439 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; 1440 else 1441 on_each_cpu(__setup_broadcast_timer, (void *)true, 1); 1442 1443 cpu_notifier_register_done(); 1444 1445 pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n", 1446 lapic_timer_reliable_states); 1447 1448 return 0; 1449 } 1450 device_initcall(intel_idle_init); 1451 1452 /* 1453 * We are not really modular, but we used to support that. Meaning we also 1454 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1455 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1456 * is the easiest way (currently) to continue doing that. 1457 */ 1458 module_param(max_cstate, int, 0444); 1459