1 /* 2 * intel_idle.c - native hardware idle loop for modern Intel processors 3 * 4 * Copyright (c) 2013, Intel Corporation. 5 * Len Brown <len.brown@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms and conditions of the GNU General Public License, 9 * version 2, as published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 19 */ 20 21 /* 22 * intel_idle is a cpuidle driver that loads on specific Intel processors 23 * in lieu of the legacy ACPI processor_idle driver. The intent is to 24 * make Linux more efficient on these processors, as intel_idle knows 25 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 26 */ 27 28 /* 29 * Design Assumptions 30 * 31 * All CPUs have same idle states as boot CPU 32 * 33 * Chipset BM_STS (bus master status) bit is a NOP 34 * for preventing entry into deep C-stats 35 */ 36 37 /* 38 * Known limitations 39 * 40 * The driver currently initializes for_each_online_cpu() upon modprobe. 41 * It it unaware of subsequent processors hot-added to the system. 42 * This means that if you boot with maxcpus=n and later online 43 * processors above n, those processors will use C1 only. 44 * 45 * ACPI has a .suspend hack to turn off deep c-statees during suspend 46 * to avoid complications with the lapic timer workaround. 47 * Have not seen issues with suspend, but may need same workaround here. 48 * 49 */ 50 51 /* un-comment DEBUG to enable pr_debug() statements */ 52 #define DEBUG 53 54 #include <linux/kernel.h> 55 #include <linux/cpuidle.h> 56 #include <linux/tick.h> 57 #include <trace/events/power.h> 58 #include <linux/sched.h> 59 #include <linux/notifier.h> 60 #include <linux/cpu.h> 61 #include <linux/moduleparam.h> 62 #include <asm/cpu_device_id.h> 63 #include <asm/intel-family.h> 64 #include <asm/mwait.h> 65 #include <asm/msr.h> 66 67 #define INTEL_IDLE_VERSION "0.4.1" 68 #define PREFIX "intel_idle: " 69 70 static struct cpuidle_driver intel_idle_driver = { 71 .name = "intel_idle", 72 .owner = THIS_MODULE, 73 }; 74 /* intel_idle.max_cstate=0 disables driver */ 75 static int max_cstate = CPUIDLE_STATE_MAX - 1; 76 77 static unsigned int mwait_substates; 78 79 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF 80 /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ 81 static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ 82 83 struct idle_cpu { 84 struct cpuidle_state *state_table; 85 86 /* 87 * Hardware C-state auto-demotion may not always be optimal. 88 * Indicate which enable bits to clear here. 89 */ 90 unsigned long auto_demotion_disable_flags; 91 bool byt_auto_demotion_disable_flag; 92 bool disable_promotion_to_c1e; 93 }; 94 95 static const struct idle_cpu *icpu; 96 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 97 static int intel_idle(struct cpuidle_device *dev, 98 struct cpuidle_driver *drv, int index); 99 static void intel_idle_freeze(struct cpuidle_device *dev, 100 struct cpuidle_driver *drv, int index); 101 static struct cpuidle_state *cpuidle_state_table; 102 103 /* 104 * Set this flag for states where the HW flushes the TLB for us 105 * and so we don't need cross-calls to keep it consistent. 106 * If this flag is set, SW flushes the TLB, so even if the 107 * HW doesn't do the flushing, this flag is safe to use. 108 */ 109 #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 110 111 /* 112 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 113 * the C-state (top nibble) and sub-state (bottom nibble) 114 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 115 * 116 * We store the hint at the top of our "flags" for each state. 117 */ 118 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 119 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 120 121 /* 122 * States are indexed by the cstate number, 123 * which is also the index into the MWAIT hint array. 124 * Thus C0 is a dummy. 125 */ 126 static struct cpuidle_state nehalem_cstates[] = { 127 { 128 .name = "C1", 129 .desc = "MWAIT 0x00", 130 .flags = MWAIT2flg(0x00), 131 .exit_latency = 3, 132 .target_residency = 6, 133 .enter = &intel_idle, 134 .enter_freeze = intel_idle_freeze, }, 135 { 136 .name = "C1E", 137 .desc = "MWAIT 0x01", 138 .flags = MWAIT2flg(0x01), 139 .exit_latency = 10, 140 .target_residency = 20, 141 .enter = &intel_idle, 142 .enter_freeze = intel_idle_freeze, }, 143 { 144 .name = "C3", 145 .desc = "MWAIT 0x10", 146 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 147 .exit_latency = 20, 148 .target_residency = 80, 149 .enter = &intel_idle, 150 .enter_freeze = intel_idle_freeze, }, 151 { 152 .name = "C6", 153 .desc = "MWAIT 0x20", 154 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 155 .exit_latency = 200, 156 .target_residency = 800, 157 .enter = &intel_idle, 158 .enter_freeze = intel_idle_freeze, }, 159 { 160 .enter = NULL } 161 }; 162 163 static struct cpuidle_state snb_cstates[] = { 164 { 165 .name = "C1", 166 .desc = "MWAIT 0x00", 167 .flags = MWAIT2flg(0x00), 168 .exit_latency = 2, 169 .target_residency = 2, 170 .enter = &intel_idle, 171 .enter_freeze = intel_idle_freeze, }, 172 { 173 .name = "C1E", 174 .desc = "MWAIT 0x01", 175 .flags = MWAIT2flg(0x01), 176 .exit_latency = 10, 177 .target_residency = 20, 178 .enter = &intel_idle, 179 .enter_freeze = intel_idle_freeze, }, 180 { 181 .name = "C3", 182 .desc = "MWAIT 0x10", 183 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 184 .exit_latency = 80, 185 .target_residency = 211, 186 .enter = &intel_idle, 187 .enter_freeze = intel_idle_freeze, }, 188 { 189 .name = "C6", 190 .desc = "MWAIT 0x20", 191 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 192 .exit_latency = 104, 193 .target_residency = 345, 194 .enter = &intel_idle, 195 .enter_freeze = intel_idle_freeze, }, 196 { 197 .name = "C7", 198 .desc = "MWAIT 0x30", 199 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 200 .exit_latency = 109, 201 .target_residency = 345, 202 .enter = &intel_idle, 203 .enter_freeze = intel_idle_freeze, }, 204 { 205 .enter = NULL } 206 }; 207 208 static struct cpuidle_state byt_cstates[] = { 209 { 210 .name = "C1", 211 .desc = "MWAIT 0x00", 212 .flags = MWAIT2flg(0x00), 213 .exit_latency = 1, 214 .target_residency = 1, 215 .enter = &intel_idle, 216 .enter_freeze = intel_idle_freeze, }, 217 { 218 .name = "C6N", 219 .desc = "MWAIT 0x58", 220 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 221 .exit_latency = 300, 222 .target_residency = 275, 223 .enter = &intel_idle, 224 .enter_freeze = intel_idle_freeze, }, 225 { 226 .name = "C6S", 227 .desc = "MWAIT 0x52", 228 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 229 .exit_latency = 500, 230 .target_residency = 560, 231 .enter = &intel_idle, 232 .enter_freeze = intel_idle_freeze, }, 233 { 234 .name = "C7", 235 .desc = "MWAIT 0x60", 236 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 237 .exit_latency = 1200, 238 .target_residency = 4000, 239 .enter = &intel_idle, 240 .enter_freeze = intel_idle_freeze, }, 241 { 242 .name = "C7S", 243 .desc = "MWAIT 0x64", 244 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 245 .exit_latency = 10000, 246 .target_residency = 20000, 247 .enter = &intel_idle, 248 .enter_freeze = intel_idle_freeze, }, 249 { 250 .enter = NULL } 251 }; 252 253 static struct cpuidle_state cht_cstates[] = { 254 { 255 .name = "C1", 256 .desc = "MWAIT 0x00", 257 .flags = MWAIT2flg(0x00), 258 .exit_latency = 1, 259 .target_residency = 1, 260 .enter = &intel_idle, 261 .enter_freeze = intel_idle_freeze, }, 262 { 263 .name = "C6N", 264 .desc = "MWAIT 0x58", 265 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 266 .exit_latency = 80, 267 .target_residency = 275, 268 .enter = &intel_idle, 269 .enter_freeze = intel_idle_freeze, }, 270 { 271 .name = "C6S", 272 .desc = "MWAIT 0x52", 273 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 274 .exit_latency = 200, 275 .target_residency = 560, 276 .enter = &intel_idle, 277 .enter_freeze = intel_idle_freeze, }, 278 { 279 .name = "C7", 280 .desc = "MWAIT 0x60", 281 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 282 .exit_latency = 1200, 283 .target_residency = 4000, 284 .enter = &intel_idle, 285 .enter_freeze = intel_idle_freeze, }, 286 { 287 .name = "C7S", 288 .desc = "MWAIT 0x64", 289 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 290 .exit_latency = 10000, 291 .target_residency = 20000, 292 .enter = &intel_idle, 293 .enter_freeze = intel_idle_freeze, }, 294 { 295 .enter = NULL } 296 }; 297 298 static struct cpuidle_state ivb_cstates[] = { 299 { 300 .name = "C1", 301 .desc = "MWAIT 0x00", 302 .flags = MWAIT2flg(0x00), 303 .exit_latency = 1, 304 .target_residency = 1, 305 .enter = &intel_idle, 306 .enter_freeze = intel_idle_freeze, }, 307 { 308 .name = "C1E", 309 .desc = "MWAIT 0x01", 310 .flags = MWAIT2flg(0x01), 311 .exit_latency = 10, 312 .target_residency = 20, 313 .enter = &intel_idle, 314 .enter_freeze = intel_idle_freeze, }, 315 { 316 .name = "C3", 317 .desc = "MWAIT 0x10", 318 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 319 .exit_latency = 59, 320 .target_residency = 156, 321 .enter = &intel_idle, 322 .enter_freeze = intel_idle_freeze, }, 323 { 324 .name = "C6", 325 .desc = "MWAIT 0x20", 326 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 327 .exit_latency = 80, 328 .target_residency = 300, 329 .enter = &intel_idle, 330 .enter_freeze = intel_idle_freeze, }, 331 { 332 .name = "C7", 333 .desc = "MWAIT 0x30", 334 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 335 .exit_latency = 87, 336 .target_residency = 300, 337 .enter = &intel_idle, 338 .enter_freeze = intel_idle_freeze, }, 339 { 340 .enter = NULL } 341 }; 342 343 static struct cpuidle_state ivt_cstates[] = { 344 { 345 .name = "C1", 346 .desc = "MWAIT 0x00", 347 .flags = MWAIT2flg(0x00), 348 .exit_latency = 1, 349 .target_residency = 1, 350 .enter = &intel_idle, 351 .enter_freeze = intel_idle_freeze, }, 352 { 353 .name = "C1E", 354 .desc = "MWAIT 0x01", 355 .flags = MWAIT2flg(0x01), 356 .exit_latency = 10, 357 .target_residency = 80, 358 .enter = &intel_idle, 359 .enter_freeze = intel_idle_freeze, }, 360 { 361 .name = "C3", 362 .desc = "MWAIT 0x10", 363 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 364 .exit_latency = 59, 365 .target_residency = 156, 366 .enter = &intel_idle, 367 .enter_freeze = intel_idle_freeze, }, 368 { 369 .name = "C6", 370 .desc = "MWAIT 0x20", 371 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 372 .exit_latency = 82, 373 .target_residency = 300, 374 .enter = &intel_idle, 375 .enter_freeze = intel_idle_freeze, }, 376 { 377 .enter = NULL } 378 }; 379 380 static struct cpuidle_state ivt_cstates_4s[] = { 381 { 382 .name = "C1", 383 .desc = "MWAIT 0x00", 384 .flags = MWAIT2flg(0x00), 385 .exit_latency = 1, 386 .target_residency = 1, 387 .enter = &intel_idle, 388 .enter_freeze = intel_idle_freeze, }, 389 { 390 .name = "C1E", 391 .desc = "MWAIT 0x01", 392 .flags = MWAIT2flg(0x01), 393 .exit_latency = 10, 394 .target_residency = 250, 395 .enter = &intel_idle, 396 .enter_freeze = intel_idle_freeze, }, 397 { 398 .name = "C3", 399 .desc = "MWAIT 0x10", 400 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 401 .exit_latency = 59, 402 .target_residency = 300, 403 .enter = &intel_idle, 404 .enter_freeze = intel_idle_freeze, }, 405 { 406 .name = "C6", 407 .desc = "MWAIT 0x20", 408 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 409 .exit_latency = 84, 410 .target_residency = 400, 411 .enter = &intel_idle, 412 .enter_freeze = intel_idle_freeze, }, 413 { 414 .enter = NULL } 415 }; 416 417 static struct cpuidle_state ivt_cstates_8s[] = { 418 { 419 .name = "C1", 420 .desc = "MWAIT 0x00", 421 .flags = MWAIT2flg(0x00), 422 .exit_latency = 1, 423 .target_residency = 1, 424 .enter = &intel_idle, 425 .enter_freeze = intel_idle_freeze, }, 426 { 427 .name = "C1E", 428 .desc = "MWAIT 0x01", 429 .flags = MWAIT2flg(0x01), 430 .exit_latency = 10, 431 .target_residency = 500, 432 .enter = &intel_idle, 433 .enter_freeze = intel_idle_freeze, }, 434 { 435 .name = "C3", 436 .desc = "MWAIT 0x10", 437 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 438 .exit_latency = 59, 439 .target_residency = 600, 440 .enter = &intel_idle, 441 .enter_freeze = intel_idle_freeze, }, 442 { 443 .name = "C6", 444 .desc = "MWAIT 0x20", 445 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 446 .exit_latency = 88, 447 .target_residency = 700, 448 .enter = &intel_idle, 449 .enter_freeze = intel_idle_freeze, }, 450 { 451 .enter = NULL } 452 }; 453 454 static struct cpuidle_state hsw_cstates[] = { 455 { 456 .name = "C1", 457 .desc = "MWAIT 0x00", 458 .flags = MWAIT2flg(0x00), 459 .exit_latency = 2, 460 .target_residency = 2, 461 .enter = &intel_idle, 462 .enter_freeze = intel_idle_freeze, }, 463 { 464 .name = "C1E", 465 .desc = "MWAIT 0x01", 466 .flags = MWAIT2flg(0x01), 467 .exit_latency = 10, 468 .target_residency = 20, 469 .enter = &intel_idle, 470 .enter_freeze = intel_idle_freeze, }, 471 { 472 .name = "C3", 473 .desc = "MWAIT 0x10", 474 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 475 .exit_latency = 33, 476 .target_residency = 100, 477 .enter = &intel_idle, 478 .enter_freeze = intel_idle_freeze, }, 479 { 480 .name = "C6", 481 .desc = "MWAIT 0x20", 482 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 483 .exit_latency = 133, 484 .target_residency = 400, 485 .enter = &intel_idle, 486 .enter_freeze = intel_idle_freeze, }, 487 { 488 .name = "C7s", 489 .desc = "MWAIT 0x32", 490 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 491 .exit_latency = 166, 492 .target_residency = 500, 493 .enter = &intel_idle, 494 .enter_freeze = intel_idle_freeze, }, 495 { 496 .name = "C8", 497 .desc = "MWAIT 0x40", 498 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 499 .exit_latency = 300, 500 .target_residency = 900, 501 .enter = &intel_idle, 502 .enter_freeze = intel_idle_freeze, }, 503 { 504 .name = "C9", 505 .desc = "MWAIT 0x50", 506 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 507 .exit_latency = 600, 508 .target_residency = 1800, 509 .enter = &intel_idle, 510 .enter_freeze = intel_idle_freeze, }, 511 { 512 .name = "C10", 513 .desc = "MWAIT 0x60", 514 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 515 .exit_latency = 2600, 516 .target_residency = 7700, 517 .enter = &intel_idle, 518 .enter_freeze = intel_idle_freeze, }, 519 { 520 .enter = NULL } 521 }; 522 static struct cpuidle_state bdw_cstates[] = { 523 { 524 .name = "C1", 525 .desc = "MWAIT 0x00", 526 .flags = MWAIT2flg(0x00), 527 .exit_latency = 2, 528 .target_residency = 2, 529 .enter = &intel_idle, 530 .enter_freeze = intel_idle_freeze, }, 531 { 532 .name = "C1E", 533 .desc = "MWAIT 0x01", 534 .flags = MWAIT2flg(0x01), 535 .exit_latency = 10, 536 .target_residency = 20, 537 .enter = &intel_idle, 538 .enter_freeze = intel_idle_freeze, }, 539 { 540 .name = "C3", 541 .desc = "MWAIT 0x10", 542 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 543 .exit_latency = 40, 544 .target_residency = 100, 545 .enter = &intel_idle, 546 .enter_freeze = intel_idle_freeze, }, 547 { 548 .name = "C6", 549 .desc = "MWAIT 0x20", 550 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 551 .exit_latency = 133, 552 .target_residency = 400, 553 .enter = &intel_idle, 554 .enter_freeze = intel_idle_freeze, }, 555 { 556 .name = "C7s", 557 .desc = "MWAIT 0x32", 558 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 559 .exit_latency = 166, 560 .target_residency = 500, 561 .enter = &intel_idle, 562 .enter_freeze = intel_idle_freeze, }, 563 { 564 .name = "C8", 565 .desc = "MWAIT 0x40", 566 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 567 .exit_latency = 300, 568 .target_residency = 900, 569 .enter = &intel_idle, 570 .enter_freeze = intel_idle_freeze, }, 571 { 572 .name = "C9", 573 .desc = "MWAIT 0x50", 574 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 575 .exit_latency = 600, 576 .target_residency = 1800, 577 .enter = &intel_idle, 578 .enter_freeze = intel_idle_freeze, }, 579 { 580 .name = "C10", 581 .desc = "MWAIT 0x60", 582 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 583 .exit_latency = 2600, 584 .target_residency = 7700, 585 .enter = &intel_idle, 586 .enter_freeze = intel_idle_freeze, }, 587 { 588 .enter = NULL } 589 }; 590 591 static struct cpuidle_state skl_cstates[] = { 592 { 593 .name = "C1", 594 .desc = "MWAIT 0x00", 595 .flags = MWAIT2flg(0x00), 596 .exit_latency = 2, 597 .target_residency = 2, 598 .enter = &intel_idle, 599 .enter_freeze = intel_idle_freeze, }, 600 { 601 .name = "C1E", 602 .desc = "MWAIT 0x01", 603 .flags = MWAIT2flg(0x01), 604 .exit_latency = 10, 605 .target_residency = 20, 606 .enter = &intel_idle, 607 .enter_freeze = intel_idle_freeze, }, 608 { 609 .name = "C3", 610 .desc = "MWAIT 0x10", 611 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 612 .exit_latency = 70, 613 .target_residency = 100, 614 .enter = &intel_idle, 615 .enter_freeze = intel_idle_freeze, }, 616 { 617 .name = "C6", 618 .desc = "MWAIT 0x20", 619 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 620 .exit_latency = 85, 621 .target_residency = 200, 622 .enter = &intel_idle, 623 .enter_freeze = intel_idle_freeze, }, 624 { 625 .name = "C7s", 626 .desc = "MWAIT 0x33", 627 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 628 .exit_latency = 124, 629 .target_residency = 800, 630 .enter = &intel_idle, 631 .enter_freeze = intel_idle_freeze, }, 632 { 633 .name = "C8", 634 .desc = "MWAIT 0x40", 635 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 636 .exit_latency = 200, 637 .target_residency = 800, 638 .enter = &intel_idle, 639 .enter_freeze = intel_idle_freeze, }, 640 { 641 .name = "C9", 642 .desc = "MWAIT 0x50", 643 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 644 .exit_latency = 480, 645 .target_residency = 5000, 646 .enter = &intel_idle, 647 .enter_freeze = intel_idle_freeze, }, 648 { 649 .name = "C10", 650 .desc = "MWAIT 0x60", 651 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 652 .exit_latency = 890, 653 .target_residency = 5000, 654 .enter = &intel_idle, 655 .enter_freeze = intel_idle_freeze, }, 656 { 657 .enter = NULL } 658 }; 659 660 static struct cpuidle_state skx_cstates[] = { 661 { 662 .name = "C1", 663 .desc = "MWAIT 0x00", 664 .flags = MWAIT2flg(0x00), 665 .exit_latency = 2, 666 .target_residency = 2, 667 .enter = &intel_idle, 668 .enter_freeze = intel_idle_freeze, }, 669 { 670 .name = "C1E", 671 .desc = "MWAIT 0x01", 672 .flags = MWAIT2flg(0x01), 673 .exit_latency = 10, 674 .target_residency = 20, 675 .enter = &intel_idle, 676 .enter_freeze = intel_idle_freeze, }, 677 { 678 .name = "C6", 679 .desc = "MWAIT 0x20", 680 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 681 .exit_latency = 133, 682 .target_residency = 600, 683 .enter = &intel_idle, 684 .enter_freeze = intel_idle_freeze, }, 685 { 686 .enter = NULL } 687 }; 688 689 static struct cpuidle_state atom_cstates[] = { 690 { 691 .name = "C1E", 692 .desc = "MWAIT 0x00", 693 .flags = MWAIT2flg(0x00), 694 .exit_latency = 10, 695 .target_residency = 20, 696 .enter = &intel_idle, 697 .enter_freeze = intel_idle_freeze, }, 698 { 699 .name = "C2", 700 .desc = "MWAIT 0x10", 701 .flags = MWAIT2flg(0x10), 702 .exit_latency = 20, 703 .target_residency = 80, 704 .enter = &intel_idle, 705 .enter_freeze = intel_idle_freeze, }, 706 { 707 .name = "C4", 708 .desc = "MWAIT 0x30", 709 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 710 .exit_latency = 100, 711 .target_residency = 400, 712 .enter = &intel_idle, 713 .enter_freeze = intel_idle_freeze, }, 714 { 715 .name = "C6", 716 .desc = "MWAIT 0x52", 717 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 718 .exit_latency = 140, 719 .target_residency = 560, 720 .enter = &intel_idle, 721 .enter_freeze = intel_idle_freeze, }, 722 { 723 .enter = NULL } 724 }; 725 static struct cpuidle_state tangier_cstates[] = { 726 { 727 .name = "C1", 728 .desc = "MWAIT 0x00", 729 .flags = MWAIT2flg(0x00), 730 .exit_latency = 1, 731 .target_residency = 4, 732 .enter = &intel_idle, 733 .enter_freeze = intel_idle_freeze, }, 734 { 735 .name = "C4", 736 .desc = "MWAIT 0x30", 737 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 738 .exit_latency = 100, 739 .target_residency = 400, 740 .enter = &intel_idle, 741 .enter_freeze = intel_idle_freeze, }, 742 { 743 .name = "C6", 744 .desc = "MWAIT 0x52", 745 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 746 .exit_latency = 140, 747 .target_residency = 560, 748 .enter = &intel_idle, 749 .enter_freeze = intel_idle_freeze, }, 750 { 751 .name = "C7", 752 .desc = "MWAIT 0x60", 753 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 754 .exit_latency = 1200, 755 .target_residency = 4000, 756 .enter = &intel_idle, 757 .enter_freeze = intel_idle_freeze, }, 758 { 759 .name = "C9", 760 .desc = "MWAIT 0x64", 761 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 762 .exit_latency = 10000, 763 .target_residency = 20000, 764 .enter = &intel_idle, 765 .enter_freeze = intel_idle_freeze, }, 766 { 767 .enter = NULL } 768 }; 769 static struct cpuidle_state avn_cstates[] = { 770 { 771 .name = "C1", 772 .desc = "MWAIT 0x00", 773 .flags = MWAIT2flg(0x00), 774 .exit_latency = 2, 775 .target_residency = 2, 776 .enter = &intel_idle, 777 .enter_freeze = intel_idle_freeze, }, 778 { 779 .name = "C6", 780 .desc = "MWAIT 0x51", 781 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 782 .exit_latency = 15, 783 .target_residency = 45, 784 .enter = &intel_idle, 785 .enter_freeze = intel_idle_freeze, }, 786 { 787 .enter = NULL } 788 }; 789 static struct cpuidle_state knl_cstates[] = { 790 { 791 .name = "C1", 792 .desc = "MWAIT 0x00", 793 .flags = MWAIT2flg(0x00), 794 .exit_latency = 1, 795 .target_residency = 2, 796 .enter = &intel_idle, 797 .enter_freeze = intel_idle_freeze }, 798 { 799 .name = "C6", 800 .desc = "MWAIT 0x10", 801 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 802 .exit_latency = 120, 803 .target_residency = 500, 804 .enter = &intel_idle, 805 .enter_freeze = intel_idle_freeze }, 806 { 807 .enter = NULL } 808 }; 809 810 static struct cpuidle_state bxt_cstates[] = { 811 { 812 .name = "C1", 813 .desc = "MWAIT 0x00", 814 .flags = MWAIT2flg(0x00), 815 .exit_latency = 2, 816 .target_residency = 2, 817 .enter = &intel_idle, 818 .enter_freeze = intel_idle_freeze, }, 819 { 820 .name = "C1E", 821 .desc = "MWAIT 0x01", 822 .flags = MWAIT2flg(0x01), 823 .exit_latency = 10, 824 .target_residency = 20, 825 .enter = &intel_idle, 826 .enter_freeze = intel_idle_freeze, }, 827 { 828 .name = "C6", 829 .desc = "MWAIT 0x20", 830 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 831 .exit_latency = 133, 832 .target_residency = 133, 833 .enter = &intel_idle, 834 .enter_freeze = intel_idle_freeze, }, 835 { 836 .name = "C7s", 837 .desc = "MWAIT 0x31", 838 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 839 .exit_latency = 155, 840 .target_residency = 155, 841 .enter = &intel_idle, 842 .enter_freeze = intel_idle_freeze, }, 843 { 844 .name = "C8", 845 .desc = "MWAIT 0x40", 846 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 847 .exit_latency = 1000, 848 .target_residency = 1000, 849 .enter = &intel_idle, 850 .enter_freeze = intel_idle_freeze, }, 851 { 852 .name = "C9", 853 .desc = "MWAIT 0x50", 854 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 855 .exit_latency = 2000, 856 .target_residency = 2000, 857 .enter = &intel_idle, 858 .enter_freeze = intel_idle_freeze, }, 859 { 860 .name = "C10", 861 .desc = "MWAIT 0x60", 862 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 863 .exit_latency = 10000, 864 .target_residency = 10000, 865 .enter = &intel_idle, 866 .enter_freeze = intel_idle_freeze, }, 867 { 868 .enter = NULL } 869 }; 870 871 static struct cpuidle_state dnv_cstates[] = { 872 { 873 .name = "C1", 874 .desc = "MWAIT 0x00", 875 .flags = MWAIT2flg(0x00), 876 .exit_latency = 2, 877 .target_residency = 2, 878 .enter = &intel_idle, 879 .enter_freeze = intel_idle_freeze, }, 880 { 881 .name = "C1E", 882 .desc = "MWAIT 0x01", 883 .flags = MWAIT2flg(0x01), 884 .exit_latency = 10, 885 .target_residency = 20, 886 .enter = &intel_idle, 887 .enter_freeze = intel_idle_freeze, }, 888 { 889 .name = "C6", 890 .desc = "MWAIT 0x20", 891 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 892 .exit_latency = 50, 893 .target_residency = 500, 894 .enter = &intel_idle, 895 .enter_freeze = intel_idle_freeze, }, 896 { 897 .enter = NULL } 898 }; 899 900 /** 901 * intel_idle 902 * @dev: cpuidle_device 903 * @drv: cpuidle driver 904 * @index: index of cpuidle state 905 * 906 * Must be called under local_irq_disable(). 907 */ 908 static __cpuidle int intel_idle(struct cpuidle_device *dev, 909 struct cpuidle_driver *drv, int index) 910 { 911 unsigned long ecx = 1; /* break on interrupt flag */ 912 struct cpuidle_state *state = &drv->states[index]; 913 unsigned long eax = flg2MWAIT(state->flags); 914 unsigned int cstate; 915 int cpu = smp_processor_id(); 916 917 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; 918 919 /* 920 * leave_mm() to avoid costly and often unnecessary wakeups 921 * for flushing the user TLB's associated with the active mm. 922 */ 923 if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) 924 leave_mm(cpu); 925 926 if (!(lapic_timer_reliable_states & (1 << (cstate)))) 927 tick_broadcast_enter(); 928 929 mwait_idle_with_hints(eax, ecx); 930 931 if (!(lapic_timer_reliable_states & (1 << (cstate)))) 932 tick_broadcast_exit(); 933 934 return index; 935 } 936 937 /** 938 * intel_idle_freeze - simplified "enter" callback routine for suspend-to-idle 939 * @dev: cpuidle_device 940 * @drv: cpuidle driver 941 * @index: state index 942 */ 943 static void intel_idle_freeze(struct cpuidle_device *dev, 944 struct cpuidle_driver *drv, int index) 945 { 946 unsigned long ecx = 1; /* break on interrupt flag */ 947 unsigned long eax = flg2MWAIT(drv->states[index].flags); 948 949 mwait_idle_with_hints(eax, ecx); 950 } 951 952 static void __setup_broadcast_timer(bool on) 953 { 954 if (on) 955 tick_broadcast_enable(); 956 else 957 tick_broadcast_disable(); 958 } 959 960 static void auto_demotion_disable(void) 961 { 962 unsigned long long msr_bits; 963 964 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 965 msr_bits &= ~(icpu->auto_demotion_disable_flags); 966 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 967 } 968 static void c1e_promotion_disable(void) 969 { 970 unsigned long long msr_bits; 971 972 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 973 msr_bits &= ~0x2; 974 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 975 } 976 977 static const struct idle_cpu idle_cpu_nehalem = { 978 .state_table = nehalem_cstates, 979 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 980 .disable_promotion_to_c1e = true, 981 }; 982 983 static const struct idle_cpu idle_cpu_atom = { 984 .state_table = atom_cstates, 985 }; 986 987 static const struct idle_cpu idle_cpu_tangier = { 988 .state_table = tangier_cstates, 989 }; 990 991 static const struct idle_cpu idle_cpu_lincroft = { 992 .state_table = atom_cstates, 993 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 994 }; 995 996 static const struct idle_cpu idle_cpu_snb = { 997 .state_table = snb_cstates, 998 .disable_promotion_to_c1e = true, 999 }; 1000 1001 static const struct idle_cpu idle_cpu_byt = { 1002 .state_table = byt_cstates, 1003 .disable_promotion_to_c1e = true, 1004 .byt_auto_demotion_disable_flag = true, 1005 }; 1006 1007 static const struct idle_cpu idle_cpu_cht = { 1008 .state_table = cht_cstates, 1009 .disable_promotion_to_c1e = true, 1010 .byt_auto_demotion_disable_flag = true, 1011 }; 1012 1013 static const struct idle_cpu idle_cpu_ivb = { 1014 .state_table = ivb_cstates, 1015 .disable_promotion_to_c1e = true, 1016 }; 1017 1018 static const struct idle_cpu idle_cpu_ivt = { 1019 .state_table = ivt_cstates, 1020 .disable_promotion_to_c1e = true, 1021 }; 1022 1023 static const struct idle_cpu idle_cpu_hsw = { 1024 .state_table = hsw_cstates, 1025 .disable_promotion_to_c1e = true, 1026 }; 1027 1028 static const struct idle_cpu idle_cpu_bdw = { 1029 .state_table = bdw_cstates, 1030 .disable_promotion_to_c1e = true, 1031 }; 1032 1033 static const struct idle_cpu idle_cpu_skl = { 1034 .state_table = skl_cstates, 1035 .disable_promotion_to_c1e = true, 1036 }; 1037 1038 static const struct idle_cpu idle_cpu_skx = { 1039 .state_table = skx_cstates, 1040 .disable_promotion_to_c1e = true, 1041 }; 1042 1043 static const struct idle_cpu idle_cpu_avn = { 1044 .state_table = avn_cstates, 1045 .disable_promotion_to_c1e = true, 1046 }; 1047 1048 static const struct idle_cpu idle_cpu_knl = { 1049 .state_table = knl_cstates, 1050 }; 1051 1052 static const struct idle_cpu idle_cpu_bxt = { 1053 .state_table = bxt_cstates, 1054 .disable_promotion_to_c1e = true, 1055 }; 1056 1057 static const struct idle_cpu idle_cpu_dnv = { 1058 .state_table = dnv_cstates, 1059 .disable_promotion_to_c1e = true, 1060 }; 1061 1062 #define ICPU(model, cpu) \ 1063 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu } 1064 1065 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1066 ICPU(INTEL_FAM6_NEHALEM_EP, idle_cpu_nehalem), 1067 ICPU(INTEL_FAM6_NEHALEM, idle_cpu_nehalem), 1068 ICPU(INTEL_FAM6_NEHALEM_G, idle_cpu_nehalem), 1069 ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem), 1070 ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem), 1071 ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem), 1072 ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom), 1073 ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft), 1074 ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem), 1075 ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb), 1076 ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb), 1077 ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom), 1078 ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt), 1079 ICPU(INTEL_FAM6_ATOM_MERRIFIELD, idle_cpu_tangier), 1080 ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht), 1081 ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb), 1082 ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt), 1083 ICPU(INTEL_FAM6_HASWELL_CORE, idle_cpu_hsw), 1084 ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw), 1085 ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw), 1086 ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw), 1087 ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn), 1088 ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw), 1089 ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw), 1090 ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw), 1091 ICPU(INTEL_FAM6_BROADWELL_XEON_D, idle_cpu_bdw), 1092 ICPU(INTEL_FAM6_SKYLAKE_MOBILE, idle_cpu_skl), 1093 ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, idle_cpu_skl), 1094 ICPU(INTEL_FAM6_KABYLAKE_MOBILE, idle_cpu_skl), 1095 ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, idle_cpu_skl), 1096 ICPU(INTEL_FAM6_SKYLAKE_X, idle_cpu_skx), 1097 ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl), 1098 ICPU(INTEL_FAM6_XEON_PHI_KNM, idle_cpu_knl), 1099 ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt), 1100 ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, idle_cpu_bxt), 1101 ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv), 1102 {} 1103 }; 1104 1105 /* 1106 * intel_idle_probe() 1107 */ 1108 static int __init intel_idle_probe(void) 1109 { 1110 unsigned int eax, ebx, ecx; 1111 const struct x86_cpu_id *id; 1112 1113 if (max_cstate == 0) { 1114 pr_debug(PREFIX "disabled\n"); 1115 return -EPERM; 1116 } 1117 1118 id = x86_match_cpu(intel_idle_ids); 1119 if (!id) { 1120 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 1121 boot_cpu_data.x86 == 6) 1122 pr_debug(PREFIX "does not run on family %d model %d\n", 1123 boot_cpu_data.x86, boot_cpu_data.x86_model); 1124 return -ENODEV; 1125 } 1126 1127 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1128 return -ENODEV; 1129 1130 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1131 1132 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1133 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1134 !mwait_substates) 1135 return -ENODEV; 1136 1137 pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates); 1138 1139 icpu = (const struct idle_cpu *)id->driver_data; 1140 cpuidle_state_table = icpu->state_table; 1141 1142 pr_debug(PREFIX "v" INTEL_IDLE_VERSION 1143 " model 0x%X\n", boot_cpu_data.x86_model); 1144 1145 return 0; 1146 } 1147 1148 /* 1149 * intel_idle_cpuidle_devices_uninit() 1150 * Unregisters the cpuidle devices. 1151 */ 1152 static void intel_idle_cpuidle_devices_uninit(void) 1153 { 1154 int i; 1155 struct cpuidle_device *dev; 1156 1157 for_each_online_cpu(i) { 1158 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); 1159 cpuidle_unregister_device(dev); 1160 } 1161 } 1162 1163 /* 1164 * ivt_idle_state_table_update(void) 1165 * 1166 * Tune IVT multi-socket targets 1167 * Assumption: num_sockets == (max_package_num + 1) 1168 */ 1169 static void ivt_idle_state_table_update(void) 1170 { 1171 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1172 int cpu, package_num, num_sockets = 1; 1173 1174 for_each_online_cpu(cpu) { 1175 package_num = topology_physical_package_id(cpu); 1176 if (package_num + 1 > num_sockets) { 1177 num_sockets = package_num + 1; 1178 1179 if (num_sockets > 4) { 1180 cpuidle_state_table = ivt_cstates_8s; 1181 return; 1182 } 1183 } 1184 } 1185 1186 if (num_sockets > 2) 1187 cpuidle_state_table = ivt_cstates_4s; 1188 1189 /* else, 1 and 2 socket systems use default ivt_cstates */ 1190 } 1191 1192 /* 1193 * Translate IRTL (Interrupt Response Time Limit) MSR to usec 1194 */ 1195 1196 static unsigned int irtl_ns_units[] = { 1197 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 1198 1199 static unsigned long long irtl_2_usec(unsigned long long irtl) 1200 { 1201 unsigned long long ns; 1202 1203 if (!irtl) 1204 return 0; 1205 1206 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1207 1208 return div64_u64((irtl & 0x3FF) * ns, 1000); 1209 } 1210 /* 1211 * bxt_idle_state_table_update(void) 1212 * 1213 * On BXT, we trust the IRTL to show the definitive maximum latency 1214 * We use the same value for target_residency. 1215 */ 1216 static void bxt_idle_state_table_update(void) 1217 { 1218 unsigned long long msr; 1219 unsigned int usec; 1220 1221 rdmsrl(MSR_PKGC6_IRTL, msr); 1222 usec = irtl_2_usec(msr); 1223 if (usec) { 1224 bxt_cstates[2].exit_latency = usec; 1225 bxt_cstates[2].target_residency = usec; 1226 } 1227 1228 rdmsrl(MSR_PKGC7_IRTL, msr); 1229 usec = irtl_2_usec(msr); 1230 if (usec) { 1231 bxt_cstates[3].exit_latency = usec; 1232 bxt_cstates[3].target_residency = usec; 1233 } 1234 1235 rdmsrl(MSR_PKGC8_IRTL, msr); 1236 usec = irtl_2_usec(msr); 1237 if (usec) { 1238 bxt_cstates[4].exit_latency = usec; 1239 bxt_cstates[4].target_residency = usec; 1240 } 1241 1242 rdmsrl(MSR_PKGC9_IRTL, msr); 1243 usec = irtl_2_usec(msr); 1244 if (usec) { 1245 bxt_cstates[5].exit_latency = usec; 1246 bxt_cstates[5].target_residency = usec; 1247 } 1248 1249 rdmsrl(MSR_PKGC10_IRTL, msr); 1250 usec = irtl_2_usec(msr); 1251 if (usec) { 1252 bxt_cstates[6].exit_latency = usec; 1253 bxt_cstates[6].target_residency = usec; 1254 } 1255 1256 } 1257 /* 1258 * sklh_idle_state_table_update(void) 1259 * 1260 * On SKL-H (model 0x5e) disable C8 and C9 if: 1261 * C10 is enabled and SGX disabled 1262 */ 1263 static void sklh_idle_state_table_update(void) 1264 { 1265 unsigned long long msr; 1266 unsigned int eax, ebx, ecx, edx; 1267 1268 1269 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1270 if (max_cstate <= 7) 1271 return; 1272 1273 /* if PC10 not present in CPUID.MWAIT.EDX */ 1274 if ((mwait_substates & (0xF << 28)) == 0) 1275 return; 1276 1277 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1278 1279 /* PC10 is not enabled in PKG C-state limit */ 1280 if ((msr & 0xF) != 8) 1281 return; 1282 1283 ecx = 0; 1284 cpuid(7, &eax, &ebx, &ecx, &edx); 1285 1286 /* if SGX is present */ 1287 if (ebx & (1 << 2)) { 1288 1289 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1290 1291 /* if SGX is enabled */ 1292 if (msr & (1 << 18)) 1293 return; 1294 } 1295 1296 skl_cstates[5].disabled = 1; /* C8-SKL */ 1297 skl_cstates[6].disabled = 1; /* C9-SKL */ 1298 } 1299 /* 1300 * intel_idle_state_table_update() 1301 * 1302 * Update the default state_table for this CPU-id 1303 */ 1304 1305 static void intel_idle_state_table_update(void) 1306 { 1307 switch (boot_cpu_data.x86_model) { 1308 1309 case INTEL_FAM6_IVYBRIDGE_X: 1310 ivt_idle_state_table_update(); 1311 break; 1312 case INTEL_FAM6_ATOM_GOLDMONT: 1313 case INTEL_FAM6_ATOM_GEMINI_LAKE: 1314 bxt_idle_state_table_update(); 1315 break; 1316 case INTEL_FAM6_SKYLAKE_DESKTOP: 1317 sklh_idle_state_table_update(); 1318 break; 1319 } 1320 } 1321 1322 /* 1323 * intel_idle_cpuidle_driver_init() 1324 * allocate, initialize cpuidle_states 1325 */ 1326 static void __init intel_idle_cpuidle_driver_init(void) 1327 { 1328 int cstate; 1329 struct cpuidle_driver *drv = &intel_idle_driver; 1330 1331 intel_idle_state_table_update(); 1332 1333 drv->state_count = 1; 1334 1335 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1336 int num_substates, mwait_hint, mwait_cstate; 1337 1338 if ((cpuidle_state_table[cstate].enter == NULL) && 1339 (cpuidle_state_table[cstate].enter_freeze == NULL)) 1340 break; 1341 1342 if (cstate + 1 > max_cstate) { 1343 printk(PREFIX "max_cstate %d reached\n", 1344 max_cstate); 1345 break; 1346 } 1347 1348 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1349 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); 1350 1351 /* number of sub-states for this state in CPUID.MWAIT */ 1352 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) 1353 & MWAIT_SUBSTATE_MASK; 1354 1355 /* if NO sub-states for this state in CPUID, skip it */ 1356 if (num_substates == 0) 1357 continue; 1358 1359 /* if state marked as disabled, skip it */ 1360 if (cpuidle_state_table[cstate].disabled != 0) { 1361 pr_debug(PREFIX "state %s is disabled", 1362 cpuidle_state_table[cstate].name); 1363 continue; 1364 } 1365 1366 1367 if (((mwait_cstate + 1) > 2) && 1368 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1369 mark_tsc_unstable("TSC halts in idle" 1370 " states deeper than C2"); 1371 1372 drv->states[drv->state_count] = /* structure copy */ 1373 cpuidle_state_table[cstate]; 1374 1375 drv->state_count += 1; 1376 } 1377 1378 if (icpu->byt_auto_demotion_disable_flag) { 1379 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1380 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1381 } 1382 } 1383 1384 1385 /* 1386 * intel_idle_cpu_init() 1387 * allocate, initialize, register cpuidle_devices 1388 * @cpu: cpu/core to initialize 1389 */ 1390 static int intel_idle_cpu_init(unsigned int cpu) 1391 { 1392 struct cpuidle_device *dev; 1393 1394 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1395 dev->cpu = cpu; 1396 1397 if (cpuidle_register_device(dev)) { 1398 pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu); 1399 return -EIO; 1400 } 1401 1402 if (icpu->auto_demotion_disable_flags) 1403 auto_demotion_disable(); 1404 1405 if (icpu->disable_promotion_to_c1e) 1406 c1e_promotion_disable(); 1407 1408 return 0; 1409 } 1410 1411 static int intel_idle_cpu_online(unsigned int cpu) 1412 { 1413 struct cpuidle_device *dev; 1414 1415 if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) 1416 __setup_broadcast_timer(true); 1417 1418 /* 1419 * Some systems can hotplug a cpu at runtime after 1420 * the kernel has booted, we have to initialize the 1421 * driver in this case 1422 */ 1423 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1424 if (!dev->registered) 1425 return intel_idle_cpu_init(cpu); 1426 1427 return 0; 1428 } 1429 1430 static int __init intel_idle_init(void) 1431 { 1432 int retval; 1433 1434 /* Do not load intel_idle at all for now if idle= is passed */ 1435 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1436 return -ENODEV; 1437 1438 retval = intel_idle_probe(); 1439 if (retval) 1440 return retval; 1441 1442 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1443 if (intel_idle_cpuidle_devices == NULL) 1444 return -ENOMEM; 1445 1446 intel_idle_cpuidle_driver_init(); 1447 retval = cpuidle_register_driver(&intel_idle_driver); 1448 if (retval) { 1449 struct cpuidle_driver *drv = cpuidle_get_driver(); 1450 printk(KERN_DEBUG PREFIX "intel_idle yielding to %s", 1451 drv ? drv->name : "none"); 1452 goto init_driver_fail; 1453 } 1454 1455 if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ 1456 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; 1457 1458 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1459 intel_idle_cpu_online, NULL); 1460 if (retval < 0) 1461 goto hp_setup_fail; 1462 1463 pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n", 1464 lapic_timer_reliable_states); 1465 1466 return 0; 1467 1468 hp_setup_fail: 1469 intel_idle_cpuidle_devices_uninit(); 1470 cpuidle_unregister_driver(&intel_idle_driver); 1471 init_driver_fail: 1472 free_percpu(intel_idle_cpuidle_devices); 1473 return retval; 1474 1475 } 1476 device_initcall(intel_idle_init); 1477 1478 /* 1479 * We are not really modular, but we used to support that. Meaning we also 1480 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1481 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1482 * is the easiest way (currently) to continue doing that. 1483 */ 1484 module_param(max_cstate, int, 0444); 1485