1 /* 2 * intel_idle.c - native hardware idle loop for modern Intel processors 3 * 4 * Copyright (c) 2013, Intel Corporation. 5 * Len Brown <len.brown@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms and conditions of the GNU General Public License, 9 * version 2, as published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 19 */ 20 21 /* 22 * intel_idle is a cpuidle driver that loads on specific Intel processors 23 * in lieu of the legacy ACPI processor_idle driver. The intent is to 24 * make Linux more efficient on these processors, as intel_idle knows 25 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 26 */ 27 28 /* 29 * Design Assumptions 30 * 31 * All CPUs have same idle states as boot CPU 32 * 33 * Chipset BM_STS (bus master status) bit is a NOP 34 * for preventing entry into deep C-stats 35 */ 36 37 /* 38 * Known limitations 39 * 40 * The driver currently initializes for_each_online_cpu() upon modprobe. 41 * It it unaware of subsequent processors hot-added to the system. 42 * This means that if you boot with maxcpus=n and later online 43 * processors above n, those processors will use C1 only. 44 * 45 * ACPI has a .suspend hack to turn off deep c-statees during suspend 46 * to avoid complications with the lapic timer workaround. 47 * Have not seen issues with suspend, but may need same workaround here. 48 * 49 */ 50 51 /* un-comment DEBUG to enable pr_debug() statements */ 52 #define DEBUG 53 54 #include <linux/kernel.h> 55 #include <linux/cpuidle.h> 56 #include <linux/tick.h> 57 #include <trace/events/power.h> 58 #include <linux/sched.h> 59 #include <linux/notifier.h> 60 #include <linux/cpu.h> 61 #include <linux/moduleparam.h> 62 #include <asm/cpu_device_id.h> 63 #include <asm/intel-family.h> 64 #include <asm/mwait.h> 65 #include <asm/msr.h> 66 67 #define INTEL_IDLE_VERSION "0.4.1" 68 #define PREFIX "intel_idle: " 69 70 static struct cpuidle_driver intel_idle_driver = { 71 .name = "intel_idle", 72 .owner = THIS_MODULE, 73 }; 74 /* intel_idle.max_cstate=0 disables driver */ 75 static int max_cstate = CPUIDLE_STATE_MAX - 1; 76 77 static unsigned int mwait_substates; 78 79 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF 80 /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ 81 static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ 82 83 struct idle_cpu { 84 struct cpuidle_state *state_table; 85 86 /* 87 * Hardware C-state auto-demotion may not always be optimal. 88 * Indicate which enable bits to clear here. 89 */ 90 unsigned long auto_demotion_disable_flags; 91 bool byt_auto_demotion_disable_flag; 92 bool disable_promotion_to_c1e; 93 }; 94 95 static const struct idle_cpu *icpu; 96 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 97 static int intel_idle(struct cpuidle_device *dev, 98 struct cpuidle_driver *drv, int index); 99 static void intel_idle_freeze(struct cpuidle_device *dev, 100 struct cpuidle_driver *drv, int index); 101 static struct cpuidle_state *cpuidle_state_table; 102 103 /* 104 * Set this flag for states where the HW flushes the TLB for us 105 * and so we don't need cross-calls to keep it consistent. 106 * If this flag is set, SW flushes the TLB, so even if the 107 * HW doesn't do the flushing, this flag is safe to use. 108 */ 109 #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 110 111 /* 112 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 113 * the C-state (top nibble) and sub-state (bottom nibble) 114 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 115 * 116 * We store the hint at the top of our "flags" for each state. 117 */ 118 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 119 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 120 121 /* 122 * States are indexed by the cstate number, 123 * which is also the index into the MWAIT hint array. 124 * Thus C0 is a dummy. 125 */ 126 static struct cpuidle_state nehalem_cstates[] = { 127 { 128 .name = "C1", 129 .desc = "MWAIT 0x00", 130 .flags = MWAIT2flg(0x00), 131 .exit_latency = 3, 132 .target_residency = 6, 133 .enter = &intel_idle, 134 .enter_freeze = intel_idle_freeze, }, 135 { 136 .name = "C1E", 137 .desc = "MWAIT 0x01", 138 .flags = MWAIT2flg(0x01), 139 .exit_latency = 10, 140 .target_residency = 20, 141 .enter = &intel_idle, 142 .enter_freeze = intel_idle_freeze, }, 143 { 144 .name = "C3", 145 .desc = "MWAIT 0x10", 146 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 147 .exit_latency = 20, 148 .target_residency = 80, 149 .enter = &intel_idle, 150 .enter_freeze = intel_idle_freeze, }, 151 { 152 .name = "C6", 153 .desc = "MWAIT 0x20", 154 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 155 .exit_latency = 200, 156 .target_residency = 800, 157 .enter = &intel_idle, 158 .enter_freeze = intel_idle_freeze, }, 159 { 160 .enter = NULL } 161 }; 162 163 static struct cpuidle_state snb_cstates[] = { 164 { 165 .name = "C1", 166 .desc = "MWAIT 0x00", 167 .flags = MWAIT2flg(0x00), 168 .exit_latency = 2, 169 .target_residency = 2, 170 .enter = &intel_idle, 171 .enter_freeze = intel_idle_freeze, }, 172 { 173 .name = "C1E", 174 .desc = "MWAIT 0x01", 175 .flags = MWAIT2flg(0x01), 176 .exit_latency = 10, 177 .target_residency = 20, 178 .enter = &intel_idle, 179 .enter_freeze = intel_idle_freeze, }, 180 { 181 .name = "C3", 182 .desc = "MWAIT 0x10", 183 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 184 .exit_latency = 80, 185 .target_residency = 211, 186 .enter = &intel_idle, 187 .enter_freeze = intel_idle_freeze, }, 188 { 189 .name = "C6", 190 .desc = "MWAIT 0x20", 191 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 192 .exit_latency = 104, 193 .target_residency = 345, 194 .enter = &intel_idle, 195 .enter_freeze = intel_idle_freeze, }, 196 { 197 .name = "C7", 198 .desc = "MWAIT 0x30", 199 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 200 .exit_latency = 109, 201 .target_residency = 345, 202 .enter = &intel_idle, 203 .enter_freeze = intel_idle_freeze, }, 204 { 205 .enter = NULL } 206 }; 207 208 static struct cpuidle_state byt_cstates[] = { 209 { 210 .name = "C1", 211 .desc = "MWAIT 0x00", 212 .flags = MWAIT2flg(0x00), 213 .exit_latency = 1, 214 .target_residency = 1, 215 .enter = &intel_idle, 216 .enter_freeze = intel_idle_freeze, }, 217 { 218 .name = "C6N", 219 .desc = "MWAIT 0x58", 220 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 221 .exit_latency = 300, 222 .target_residency = 275, 223 .enter = &intel_idle, 224 .enter_freeze = intel_idle_freeze, }, 225 { 226 .name = "C6S", 227 .desc = "MWAIT 0x52", 228 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 229 .exit_latency = 500, 230 .target_residency = 560, 231 .enter = &intel_idle, 232 .enter_freeze = intel_idle_freeze, }, 233 { 234 .name = "C7", 235 .desc = "MWAIT 0x60", 236 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 237 .exit_latency = 1200, 238 .target_residency = 4000, 239 .enter = &intel_idle, 240 .enter_freeze = intel_idle_freeze, }, 241 { 242 .name = "C7S", 243 .desc = "MWAIT 0x64", 244 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 245 .exit_latency = 10000, 246 .target_residency = 20000, 247 .enter = &intel_idle, 248 .enter_freeze = intel_idle_freeze, }, 249 { 250 .enter = NULL } 251 }; 252 253 static struct cpuidle_state cht_cstates[] = { 254 { 255 .name = "C1", 256 .desc = "MWAIT 0x00", 257 .flags = MWAIT2flg(0x00), 258 .exit_latency = 1, 259 .target_residency = 1, 260 .enter = &intel_idle, 261 .enter_freeze = intel_idle_freeze, }, 262 { 263 .name = "C6N", 264 .desc = "MWAIT 0x58", 265 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 266 .exit_latency = 80, 267 .target_residency = 275, 268 .enter = &intel_idle, 269 .enter_freeze = intel_idle_freeze, }, 270 { 271 .name = "C6S", 272 .desc = "MWAIT 0x52", 273 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 274 .exit_latency = 200, 275 .target_residency = 560, 276 .enter = &intel_idle, 277 .enter_freeze = intel_idle_freeze, }, 278 { 279 .name = "C7", 280 .desc = "MWAIT 0x60", 281 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 282 .exit_latency = 1200, 283 .target_residency = 4000, 284 .enter = &intel_idle, 285 .enter_freeze = intel_idle_freeze, }, 286 { 287 .name = "C7S", 288 .desc = "MWAIT 0x64", 289 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 290 .exit_latency = 10000, 291 .target_residency = 20000, 292 .enter = &intel_idle, 293 .enter_freeze = intel_idle_freeze, }, 294 { 295 .enter = NULL } 296 }; 297 298 static struct cpuidle_state ivb_cstates[] = { 299 { 300 .name = "C1", 301 .desc = "MWAIT 0x00", 302 .flags = MWAIT2flg(0x00), 303 .exit_latency = 1, 304 .target_residency = 1, 305 .enter = &intel_idle, 306 .enter_freeze = intel_idle_freeze, }, 307 { 308 .name = "C1E", 309 .desc = "MWAIT 0x01", 310 .flags = MWAIT2flg(0x01), 311 .exit_latency = 10, 312 .target_residency = 20, 313 .enter = &intel_idle, 314 .enter_freeze = intel_idle_freeze, }, 315 { 316 .name = "C3", 317 .desc = "MWAIT 0x10", 318 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 319 .exit_latency = 59, 320 .target_residency = 156, 321 .enter = &intel_idle, 322 .enter_freeze = intel_idle_freeze, }, 323 { 324 .name = "C6", 325 .desc = "MWAIT 0x20", 326 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 327 .exit_latency = 80, 328 .target_residency = 300, 329 .enter = &intel_idle, 330 .enter_freeze = intel_idle_freeze, }, 331 { 332 .name = "C7", 333 .desc = "MWAIT 0x30", 334 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 335 .exit_latency = 87, 336 .target_residency = 300, 337 .enter = &intel_idle, 338 .enter_freeze = intel_idle_freeze, }, 339 { 340 .enter = NULL } 341 }; 342 343 static struct cpuidle_state ivt_cstates[] = { 344 { 345 .name = "C1", 346 .desc = "MWAIT 0x00", 347 .flags = MWAIT2flg(0x00), 348 .exit_latency = 1, 349 .target_residency = 1, 350 .enter = &intel_idle, 351 .enter_freeze = intel_idle_freeze, }, 352 { 353 .name = "C1E", 354 .desc = "MWAIT 0x01", 355 .flags = MWAIT2flg(0x01), 356 .exit_latency = 10, 357 .target_residency = 80, 358 .enter = &intel_idle, 359 .enter_freeze = intel_idle_freeze, }, 360 { 361 .name = "C3", 362 .desc = "MWAIT 0x10", 363 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 364 .exit_latency = 59, 365 .target_residency = 156, 366 .enter = &intel_idle, 367 .enter_freeze = intel_idle_freeze, }, 368 { 369 .name = "C6", 370 .desc = "MWAIT 0x20", 371 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 372 .exit_latency = 82, 373 .target_residency = 300, 374 .enter = &intel_idle, 375 .enter_freeze = intel_idle_freeze, }, 376 { 377 .enter = NULL } 378 }; 379 380 static struct cpuidle_state ivt_cstates_4s[] = { 381 { 382 .name = "C1", 383 .desc = "MWAIT 0x00", 384 .flags = MWAIT2flg(0x00), 385 .exit_latency = 1, 386 .target_residency = 1, 387 .enter = &intel_idle, 388 .enter_freeze = intel_idle_freeze, }, 389 { 390 .name = "C1E", 391 .desc = "MWAIT 0x01", 392 .flags = MWAIT2flg(0x01), 393 .exit_latency = 10, 394 .target_residency = 250, 395 .enter = &intel_idle, 396 .enter_freeze = intel_idle_freeze, }, 397 { 398 .name = "C3", 399 .desc = "MWAIT 0x10", 400 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 401 .exit_latency = 59, 402 .target_residency = 300, 403 .enter = &intel_idle, 404 .enter_freeze = intel_idle_freeze, }, 405 { 406 .name = "C6", 407 .desc = "MWAIT 0x20", 408 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 409 .exit_latency = 84, 410 .target_residency = 400, 411 .enter = &intel_idle, 412 .enter_freeze = intel_idle_freeze, }, 413 { 414 .enter = NULL } 415 }; 416 417 static struct cpuidle_state ivt_cstates_8s[] = { 418 { 419 .name = "C1", 420 .desc = "MWAIT 0x00", 421 .flags = MWAIT2flg(0x00), 422 .exit_latency = 1, 423 .target_residency = 1, 424 .enter = &intel_idle, 425 .enter_freeze = intel_idle_freeze, }, 426 { 427 .name = "C1E", 428 .desc = "MWAIT 0x01", 429 .flags = MWAIT2flg(0x01), 430 .exit_latency = 10, 431 .target_residency = 500, 432 .enter = &intel_idle, 433 .enter_freeze = intel_idle_freeze, }, 434 { 435 .name = "C3", 436 .desc = "MWAIT 0x10", 437 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 438 .exit_latency = 59, 439 .target_residency = 600, 440 .enter = &intel_idle, 441 .enter_freeze = intel_idle_freeze, }, 442 { 443 .name = "C6", 444 .desc = "MWAIT 0x20", 445 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 446 .exit_latency = 88, 447 .target_residency = 700, 448 .enter = &intel_idle, 449 .enter_freeze = intel_idle_freeze, }, 450 { 451 .enter = NULL } 452 }; 453 454 static struct cpuidle_state hsw_cstates[] = { 455 { 456 .name = "C1", 457 .desc = "MWAIT 0x00", 458 .flags = MWAIT2flg(0x00), 459 .exit_latency = 2, 460 .target_residency = 2, 461 .enter = &intel_idle, 462 .enter_freeze = intel_idle_freeze, }, 463 { 464 .name = "C1E", 465 .desc = "MWAIT 0x01", 466 .flags = MWAIT2flg(0x01), 467 .exit_latency = 10, 468 .target_residency = 20, 469 .enter = &intel_idle, 470 .enter_freeze = intel_idle_freeze, }, 471 { 472 .name = "C3", 473 .desc = "MWAIT 0x10", 474 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 475 .exit_latency = 33, 476 .target_residency = 100, 477 .enter = &intel_idle, 478 .enter_freeze = intel_idle_freeze, }, 479 { 480 .name = "C6", 481 .desc = "MWAIT 0x20", 482 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 483 .exit_latency = 133, 484 .target_residency = 400, 485 .enter = &intel_idle, 486 .enter_freeze = intel_idle_freeze, }, 487 { 488 .name = "C7s", 489 .desc = "MWAIT 0x32", 490 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 491 .exit_latency = 166, 492 .target_residency = 500, 493 .enter = &intel_idle, 494 .enter_freeze = intel_idle_freeze, }, 495 { 496 .name = "C8", 497 .desc = "MWAIT 0x40", 498 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 499 .exit_latency = 300, 500 .target_residency = 900, 501 .enter = &intel_idle, 502 .enter_freeze = intel_idle_freeze, }, 503 { 504 .name = "C9", 505 .desc = "MWAIT 0x50", 506 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 507 .exit_latency = 600, 508 .target_residency = 1800, 509 .enter = &intel_idle, 510 .enter_freeze = intel_idle_freeze, }, 511 { 512 .name = "C10", 513 .desc = "MWAIT 0x60", 514 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 515 .exit_latency = 2600, 516 .target_residency = 7700, 517 .enter = &intel_idle, 518 .enter_freeze = intel_idle_freeze, }, 519 { 520 .enter = NULL } 521 }; 522 static struct cpuidle_state bdw_cstates[] = { 523 { 524 .name = "C1", 525 .desc = "MWAIT 0x00", 526 .flags = MWAIT2flg(0x00), 527 .exit_latency = 2, 528 .target_residency = 2, 529 .enter = &intel_idle, 530 .enter_freeze = intel_idle_freeze, }, 531 { 532 .name = "C1E", 533 .desc = "MWAIT 0x01", 534 .flags = MWAIT2flg(0x01), 535 .exit_latency = 10, 536 .target_residency = 20, 537 .enter = &intel_idle, 538 .enter_freeze = intel_idle_freeze, }, 539 { 540 .name = "C3", 541 .desc = "MWAIT 0x10", 542 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 543 .exit_latency = 40, 544 .target_residency = 100, 545 .enter = &intel_idle, 546 .enter_freeze = intel_idle_freeze, }, 547 { 548 .name = "C6", 549 .desc = "MWAIT 0x20", 550 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 551 .exit_latency = 133, 552 .target_residency = 400, 553 .enter = &intel_idle, 554 .enter_freeze = intel_idle_freeze, }, 555 { 556 .name = "C7s", 557 .desc = "MWAIT 0x32", 558 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 559 .exit_latency = 166, 560 .target_residency = 500, 561 .enter = &intel_idle, 562 .enter_freeze = intel_idle_freeze, }, 563 { 564 .name = "C8", 565 .desc = "MWAIT 0x40", 566 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 567 .exit_latency = 300, 568 .target_residency = 900, 569 .enter = &intel_idle, 570 .enter_freeze = intel_idle_freeze, }, 571 { 572 .name = "C9", 573 .desc = "MWAIT 0x50", 574 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 575 .exit_latency = 600, 576 .target_residency = 1800, 577 .enter = &intel_idle, 578 .enter_freeze = intel_idle_freeze, }, 579 { 580 .name = "C10", 581 .desc = "MWAIT 0x60", 582 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 583 .exit_latency = 2600, 584 .target_residency = 7700, 585 .enter = &intel_idle, 586 .enter_freeze = intel_idle_freeze, }, 587 { 588 .enter = NULL } 589 }; 590 591 static struct cpuidle_state skl_cstates[] = { 592 { 593 .name = "C1", 594 .desc = "MWAIT 0x00", 595 .flags = MWAIT2flg(0x00), 596 .exit_latency = 2, 597 .target_residency = 2, 598 .enter = &intel_idle, 599 .enter_freeze = intel_idle_freeze, }, 600 { 601 .name = "C1E", 602 .desc = "MWAIT 0x01", 603 .flags = MWAIT2flg(0x01), 604 .exit_latency = 10, 605 .target_residency = 20, 606 .enter = &intel_idle, 607 .enter_freeze = intel_idle_freeze, }, 608 { 609 .name = "C3", 610 .desc = "MWAIT 0x10", 611 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 612 .exit_latency = 70, 613 .target_residency = 100, 614 .enter = &intel_idle, 615 .enter_freeze = intel_idle_freeze, }, 616 { 617 .name = "C6", 618 .desc = "MWAIT 0x20", 619 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 620 .exit_latency = 85, 621 .target_residency = 200, 622 .enter = &intel_idle, 623 .enter_freeze = intel_idle_freeze, }, 624 { 625 .name = "C7s", 626 .desc = "MWAIT 0x33", 627 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 628 .exit_latency = 124, 629 .target_residency = 800, 630 .enter = &intel_idle, 631 .enter_freeze = intel_idle_freeze, }, 632 { 633 .name = "C8", 634 .desc = "MWAIT 0x40", 635 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 636 .exit_latency = 200, 637 .target_residency = 800, 638 .enter = &intel_idle, 639 .enter_freeze = intel_idle_freeze, }, 640 { 641 .name = "C9", 642 .desc = "MWAIT 0x50", 643 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 644 .exit_latency = 480, 645 .target_residency = 5000, 646 .enter = &intel_idle, 647 .enter_freeze = intel_idle_freeze, }, 648 { 649 .name = "C10", 650 .desc = "MWAIT 0x60", 651 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 652 .exit_latency = 890, 653 .target_residency = 5000, 654 .enter = &intel_idle, 655 .enter_freeze = intel_idle_freeze, }, 656 { 657 .enter = NULL } 658 }; 659 660 static struct cpuidle_state skx_cstates[] = { 661 { 662 .name = "C1", 663 .desc = "MWAIT 0x00", 664 .flags = MWAIT2flg(0x00), 665 .exit_latency = 2, 666 .target_residency = 2, 667 .enter = &intel_idle, 668 .enter_freeze = intel_idle_freeze, }, 669 { 670 .name = "C1E", 671 .desc = "MWAIT 0x01", 672 .flags = MWAIT2flg(0x01), 673 .exit_latency = 10, 674 .target_residency = 20, 675 .enter = &intel_idle, 676 .enter_freeze = intel_idle_freeze, }, 677 { 678 .name = "C6", 679 .desc = "MWAIT 0x20", 680 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 681 .exit_latency = 133, 682 .target_residency = 600, 683 .enter = &intel_idle, 684 .enter_freeze = intel_idle_freeze, }, 685 { 686 .enter = NULL } 687 }; 688 689 static struct cpuidle_state atom_cstates[] = { 690 { 691 .name = "C1E", 692 .desc = "MWAIT 0x00", 693 .flags = MWAIT2flg(0x00), 694 .exit_latency = 10, 695 .target_residency = 20, 696 .enter = &intel_idle, 697 .enter_freeze = intel_idle_freeze, }, 698 { 699 .name = "C2", 700 .desc = "MWAIT 0x10", 701 .flags = MWAIT2flg(0x10), 702 .exit_latency = 20, 703 .target_residency = 80, 704 .enter = &intel_idle, 705 .enter_freeze = intel_idle_freeze, }, 706 { 707 .name = "C4", 708 .desc = "MWAIT 0x30", 709 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 710 .exit_latency = 100, 711 .target_residency = 400, 712 .enter = &intel_idle, 713 .enter_freeze = intel_idle_freeze, }, 714 { 715 .name = "C6", 716 .desc = "MWAIT 0x52", 717 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 718 .exit_latency = 140, 719 .target_residency = 560, 720 .enter = &intel_idle, 721 .enter_freeze = intel_idle_freeze, }, 722 { 723 .enter = NULL } 724 }; 725 static struct cpuidle_state tangier_cstates[] = { 726 { 727 .name = "C1", 728 .desc = "MWAIT 0x00", 729 .flags = MWAIT2flg(0x00), 730 .exit_latency = 1, 731 .target_residency = 4, 732 .enter = &intel_idle, 733 .enter_freeze = intel_idle_freeze, }, 734 { 735 .name = "C4", 736 .desc = "MWAIT 0x30", 737 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 738 .exit_latency = 100, 739 .target_residency = 400, 740 .enter = &intel_idle, 741 .enter_freeze = intel_idle_freeze, }, 742 { 743 .name = "C6", 744 .desc = "MWAIT 0x52", 745 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 746 .exit_latency = 140, 747 .target_residency = 560, 748 .enter = &intel_idle, 749 .enter_freeze = intel_idle_freeze, }, 750 { 751 .name = "C7", 752 .desc = "MWAIT 0x60", 753 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 754 .exit_latency = 1200, 755 .target_residency = 4000, 756 .enter = &intel_idle, 757 .enter_freeze = intel_idle_freeze, }, 758 { 759 .name = "C9", 760 .desc = "MWAIT 0x64", 761 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 762 .exit_latency = 10000, 763 .target_residency = 20000, 764 .enter = &intel_idle, 765 .enter_freeze = intel_idle_freeze, }, 766 { 767 .enter = NULL } 768 }; 769 static struct cpuidle_state avn_cstates[] = { 770 { 771 .name = "C1", 772 .desc = "MWAIT 0x00", 773 .flags = MWAIT2flg(0x00), 774 .exit_latency = 2, 775 .target_residency = 2, 776 .enter = &intel_idle, 777 .enter_freeze = intel_idle_freeze, }, 778 { 779 .name = "C6", 780 .desc = "MWAIT 0x51", 781 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 782 .exit_latency = 15, 783 .target_residency = 45, 784 .enter = &intel_idle, 785 .enter_freeze = intel_idle_freeze, }, 786 { 787 .enter = NULL } 788 }; 789 static struct cpuidle_state knl_cstates[] = { 790 { 791 .name = "C1", 792 .desc = "MWAIT 0x00", 793 .flags = MWAIT2flg(0x00), 794 .exit_latency = 1, 795 .target_residency = 2, 796 .enter = &intel_idle, 797 .enter_freeze = intel_idle_freeze }, 798 { 799 .name = "C6", 800 .desc = "MWAIT 0x10", 801 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 802 .exit_latency = 120, 803 .target_residency = 500, 804 .enter = &intel_idle, 805 .enter_freeze = intel_idle_freeze }, 806 { 807 .enter = NULL } 808 }; 809 810 static struct cpuidle_state bxt_cstates[] = { 811 { 812 .name = "C1", 813 .desc = "MWAIT 0x00", 814 .flags = MWAIT2flg(0x00), 815 .exit_latency = 2, 816 .target_residency = 2, 817 .enter = &intel_idle, 818 .enter_freeze = intel_idle_freeze, }, 819 { 820 .name = "C1E", 821 .desc = "MWAIT 0x01", 822 .flags = MWAIT2flg(0x01), 823 .exit_latency = 10, 824 .target_residency = 20, 825 .enter = &intel_idle, 826 .enter_freeze = intel_idle_freeze, }, 827 { 828 .name = "C6", 829 .desc = "MWAIT 0x20", 830 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 831 .exit_latency = 133, 832 .target_residency = 133, 833 .enter = &intel_idle, 834 .enter_freeze = intel_idle_freeze, }, 835 { 836 .name = "C7s", 837 .desc = "MWAIT 0x31", 838 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 839 .exit_latency = 155, 840 .target_residency = 155, 841 .enter = &intel_idle, 842 .enter_freeze = intel_idle_freeze, }, 843 { 844 .name = "C8", 845 .desc = "MWAIT 0x40", 846 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 847 .exit_latency = 1000, 848 .target_residency = 1000, 849 .enter = &intel_idle, 850 .enter_freeze = intel_idle_freeze, }, 851 { 852 .name = "C9", 853 .desc = "MWAIT 0x50", 854 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 855 .exit_latency = 2000, 856 .target_residency = 2000, 857 .enter = &intel_idle, 858 .enter_freeze = intel_idle_freeze, }, 859 { 860 .name = "C10", 861 .desc = "MWAIT 0x60", 862 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 863 .exit_latency = 10000, 864 .target_residency = 10000, 865 .enter = &intel_idle, 866 .enter_freeze = intel_idle_freeze, }, 867 { 868 .enter = NULL } 869 }; 870 871 static struct cpuidle_state dnv_cstates[] = { 872 { 873 .name = "C1", 874 .desc = "MWAIT 0x00", 875 .flags = MWAIT2flg(0x00), 876 .exit_latency = 2, 877 .target_residency = 2, 878 .enter = &intel_idle, 879 .enter_freeze = intel_idle_freeze, }, 880 { 881 .name = "C1E", 882 .desc = "MWAIT 0x01", 883 .flags = MWAIT2flg(0x01), 884 .exit_latency = 10, 885 .target_residency = 20, 886 .enter = &intel_idle, 887 .enter_freeze = intel_idle_freeze, }, 888 { 889 .name = "C6", 890 .desc = "MWAIT 0x20", 891 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 892 .exit_latency = 50, 893 .target_residency = 500, 894 .enter = &intel_idle, 895 .enter_freeze = intel_idle_freeze, }, 896 { 897 .enter = NULL } 898 }; 899 900 /** 901 * intel_idle 902 * @dev: cpuidle_device 903 * @drv: cpuidle driver 904 * @index: index of cpuidle state 905 * 906 * Must be called under local_irq_disable(). 907 */ 908 static __cpuidle int intel_idle(struct cpuidle_device *dev, 909 struct cpuidle_driver *drv, int index) 910 { 911 unsigned long ecx = 1; /* break on interrupt flag */ 912 struct cpuidle_state *state = &drv->states[index]; 913 unsigned long eax = flg2MWAIT(state->flags); 914 unsigned int cstate; 915 int cpu = smp_processor_id(); 916 917 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; 918 919 /* 920 * leave_mm() to avoid costly and often unnecessary wakeups 921 * for flushing the user TLB's associated with the active mm. 922 */ 923 if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) 924 leave_mm(cpu); 925 926 if (!(lapic_timer_reliable_states & (1 << (cstate)))) 927 tick_broadcast_enter(); 928 929 mwait_idle_with_hints(eax, ecx); 930 931 if (!(lapic_timer_reliable_states & (1 << (cstate)))) 932 tick_broadcast_exit(); 933 934 return index; 935 } 936 937 /** 938 * intel_idle_freeze - simplified "enter" callback routine for suspend-to-idle 939 * @dev: cpuidle_device 940 * @drv: cpuidle driver 941 * @index: state index 942 */ 943 static void intel_idle_freeze(struct cpuidle_device *dev, 944 struct cpuidle_driver *drv, int index) 945 { 946 unsigned long ecx = 1; /* break on interrupt flag */ 947 unsigned long eax = flg2MWAIT(drv->states[index].flags); 948 949 mwait_idle_with_hints(eax, ecx); 950 } 951 952 static void __setup_broadcast_timer(bool on) 953 { 954 if (on) 955 tick_broadcast_enable(); 956 else 957 tick_broadcast_disable(); 958 } 959 960 static void auto_demotion_disable(void) 961 { 962 unsigned long long msr_bits; 963 964 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 965 msr_bits &= ~(icpu->auto_demotion_disable_flags); 966 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 967 } 968 static void c1e_promotion_disable(void) 969 { 970 unsigned long long msr_bits; 971 972 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 973 msr_bits &= ~0x2; 974 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 975 } 976 977 static const struct idle_cpu idle_cpu_nehalem = { 978 .state_table = nehalem_cstates, 979 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 980 .disable_promotion_to_c1e = true, 981 }; 982 983 static const struct idle_cpu idle_cpu_atom = { 984 .state_table = atom_cstates, 985 }; 986 987 static const struct idle_cpu idle_cpu_tangier = { 988 .state_table = tangier_cstates, 989 }; 990 991 static const struct idle_cpu idle_cpu_lincroft = { 992 .state_table = atom_cstates, 993 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 994 }; 995 996 static const struct idle_cpu idle_cpu_snb = { 997 .state_table = snb_cstates, 998 .disable_promotion_to_c1e = true, 999 }; 1000 1001 static const struct idle_cpu idle_cpu_byt = { 1002 .state_table = byt_cstates, 1003 .disable_promotion_to_c1e = true, 1004 .byt_auto_demotion_disable_flag = true, 1005 }; 1006 1007 static const struct idle_cpu idle_cpu_cht = { 1008 .state_table = cht_cstates, 1009 .disable_promotion_to_c1e = true, 1010 .byt_auto_demotion_disable_flag = true, 1011 }; 1012 1013 static const struct idle_cpu idle_cpu_ivb = { 1014 .state_table = ivb_cstates, 1015 .disable_promotion_to_c1e = true, 1016 }; 1017 1018 static const struct idle_cpu idle_cpu_ivt = { 1019 .state_table = ivt_cstates, 1020 .disable_promotion_to_c1e = true, 1021 }; 1022 1023 static const struct idle_cpu idle_cpu_hsw = { 1024 .state_table = hsw_cstates, 1025 .disable_promotion_to_c1e = true, 1026 }; 1027 1028 static const struct idle_cpu idle_cpu_bdw = { 1029 .state_table = bdw_cstates, 1030 .disable_promotion_to_c1e = true, 1031 }; 1032 1033 static const struct idle_cpu idle_cpu_skl = { 1034 .state_table = skl_cstates, 1035 .disable_promotion_to_c1e = true, 1036 }; 1037 1038 static const struct idle_cpu idle_cpu_skx = { 1039 .state_table = skx_cstates, 1040 .disable_promotion_to_c1e = true, 1041 }; 1042 1043 static const struct idle_cpu idle_cpu_avn = { 1044 .state_table = avn_cstates, 1045 .disable_promotion_to_c1e = true, 1046 }; 1047 1048 static const struct idle_cpu idle_cpu_knl = { 1049 .state_table = knl_cstates, 1050 }; 1051 1052 static const struct idle_cpu idle_cpu_bxt = { 1053 .state_table = bxt_cstates, 1054 .disable_promotion_to_c1e = true, 1055 }; 1056 1057 static const struct idle_cpu idle_cpu_dnv = { 1058 .state_table = dnv_cstates, 1059 .disable_promotion_to_c1e = true, 1060 }; 1061 1062 #define ICPU(model, cpu) \ 1063 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu } 1064 1065 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1066 ICPU(INTEL_FAM6_NEHALEM_EP, idle_cpu_nehalem), 1067 ICPU(INTEL_FAM6_NEHALEM, idle_cpu_nehalem), 1068 ICPU(INTEL_FAM6_NEHALEM_G, idle_cpu_nehalem), 1069 ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem), 1070 ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem), 1071 ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem), 1072 ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom), 1073 ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft), 1074 ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem), 1075 ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb), 1076 ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb), 1077 ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom), 1078 ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt), 1079 ICPU(INTEL_FAM6_ATOM_MERRIFIELD, idle_cpu_tangier), 1080 ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht), 1081 ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb), 1082 ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt), 1083 ICPU(INTEL_FAM6_HASWELL_CORE, idle_cpu_hsw), 1084 ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw), 1085 ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw), 1086 ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw), 1087 ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn), 1088 ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw), 1089 ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw), 1090 ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw), 1091 ICPU(INTEL_FAM6_BROADWELL_XEON_D, idle_cpu_bdw), 1092 ICPU(INTEL_FAM6_SKYLAKE_MOBILE, idle_cpu_skl), 1093 ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, idle_cpu_skl), 1094 ICPU(INTEL_FAM6_KABYLAKE_MOBILE, idle_cpu_skl), 1095 ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, idle_cpu_skl), 1096 ICPU(INTEL_FAM6_SKYLAKE_X, idle_cpu_skx), 1097 ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl), 1098 ICPU(INTEL_FAM6_XEON_PHI_KNM, idle_cpu_knl), 1099 ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt), 1100 ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv), 1101 {} 1102 }; 1103 1104 /* 1105 * intel_idle_probe() 1106 */ 1107 static int __init intel_idle_probe(void) 1108 { 1109 unsigned int eax, ebx, ecx; 1110 const struct x86_cpu_id *id; 1111 1112 if (max_cstate == 0) { 1113 pr_debug(PREFIX "disabled\n"); 1114 return -EPERM; 1115 } 1116 1117 id = x86_match_cpu(intel_idle_ids); 1118 if (!id) { 1119 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 1120 boot_cpu_data.x86 == 6) 1121 pr_debug(PREFIX "does not run on family %d model %d\n", 1122 boot_cpu_data.x86, boot_cpu_data.x86_model); 1123 return -ENODEV; 1124 } 1125 1126 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1127 return -ENODEV; 1128 1129 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1130 1131 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1132 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1133 !mwait_substates) 1134 return -ENODEV; 1135 1136 pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates); 1137 1138 icpu = (const struct idle_cpu *)id->driver_data; 1139 cpuidle_state_table = icpu->state_table; 1140 1141 pr_debug(PREFIX "v" INTEL_IDLE_VERSION 1142 " model 0x%X\n", boot_cpu_data.x86_model); 1143 1144 return 0; 1145 } 1146 1147 /* 1148 * intel_idle_cpuidle_devices_uninit() 1149 * Unregisters the cpuidle devices. 1150 */ 1151 static void intel_idle_cpuidle_devices_uninit(void) 1152 { 1153 int i; 1154 struct cpuidle_device *dev; 1155 1156 for_each_online_cpu(i) { 1157 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); 1158 cpuidle_unregister_device(dev); 1159 } 1160 } 1161 1162 /* 1163 * ivt_idle_state_table_update(void) 1164 * 1165 * Tune IVT multi-socket targets 1166 * Assumption: num_sockets == (max_package_num + 1) 1167 */ 1168 static void ivt_idle_state_table_update(void) 1169 { 1170 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1171 int cpu, package_num, num_sockets = 1; 1172 1173 for_each_online_cpu(cpu) { 1174 package_num = topology_physical_package_id(cpu); 1175 if (package_num + 1 > num_sockets) { 1176 num_sockets = package_num + 1; 1177 1178 if (num_sockets > 4) { 1179 cpuidle_state_table = ivt_cstates_8s; 1180 return; 1181 } 1182 } 1183 } 1184 1185 if (num_sockets > 2) 1186 cpuidle_state_table = ivt_cstates_4s; 1187 1188 /* else, 1 and 2 socket systems use default ivt_cstates */ 1189 } 1190 1191 /* 1192 * Translate IRTL (Interrupt Response Time Limit) MSR to usec 1193 */ 1194 1195 static unsigned int irtl_ns_units[] = { 1196 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 1197 1198 static unsigned long long irtl_2_usec(unsigned long long irtl) 1199 { 1200 unsigned long long ns; 1201 1202 if (!irtl) 1203 return 0; 1204 1205 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1206 1207 return div64_u64((irtl & 0x3FF) * ns, 1000); 1208 } 1209 /* 1210 * bxt_idle_state_table_update(void) 1211 * 1212 * On BXT, we trust the IRTL to show the definitive maximum latency 1213 * We use the same value for target_residency. 1214 */ 1215 static void bxt_idle_state_table_update(void) 1216 { 1217 unsigned long long msr; 1218 unsigned int usec; 1219 1220 rdmsrl(MSR_PKGC6_IRTL, msr); 1221 usec = irtl_2_usec(msr); 1222 if (usec) { 1223 bxt_cstates[2].exit_latency = usec; 1224 bxt_cstates[2].target_residency = usec; 1225 } 1226 1227 rdmsrl(MSR_PKGC7_IRTL, msr); 1228 usec = irtl_2_usec(msr); 1229 if (usec) { 1230 bxt_cstates[3].exit_latency = usec; 1231 bxt_cstates[3].target_residency = usec; 1232 } 1233 1234 rdmsrl(MSR_PKGC8_IRTL, msr); 1235 usec = irtl_2_usec(msr); 1236 if (usec) { 1237 bxt_cstates[4].exit_latency = usec; 1238 bxt_cstates[4].target_residency = usec; 1239 } 1240 1241 rdmsrl(MSR_PKGC9_IRTL, msr); 1242 usec = irtl_2_usec(msr); 1243 if (usec) { 1244 bxt_cstates[5].exit_latency = usec; 1245 bxt_cstates[5].target_residency = usec; 1246 } 1247 1248 rdmsrl(MSR_PKGC10_IRTL, msr); 1249 usec = irtl_2_usec(msr); 1250 if (usec) { 1251 bxt_cstates[6].exit_latency = usec; 1252 bxt_cstates[6].target_residency = usec; 1253 } 1254 1255 } 1256 /* 1257 * sklh_idle_state_table_update(void) 1258 * 1259 * On SKL-H (model 0x5e) disable C8 and C9 if: 1260 * C10 is enabled and SGX disabled 1261 */ 1262 static void sklh_idle_state_table_update(void) 1263 { 1264 unsigned long long msr; 1265 unsigned int eax, ebx, ecx, edx; 1266 1267 1268 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1269 if (max_cstate <= 7) 1270 return; 1271 1272 /* if PC10 not present in CPUID.MWAIT.EDX */ 1273 if ((mwait_substates & (0xF << 28)) == 0) 1274 return; 1275 1276 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1277 1278 /* PC10 is not enabled in PKG C-state limit */ 1279 if ((msr & 0xF) != 8) 1280 return; 1281 1282 ecx = 0; 1283 cpuid(7, &eax, &ebx, &ecx, &edx); 1284 1285 /* if SGX is present */ 1286 if (ebx & (1 << 2)) { 1287 1288 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1289 1290 /* if SGX is enabled */ 1291 if (msr & (1 << 18)) 1292 return; 1293 } 1294 1295 skl_cstates[5].disabled = 1; /* C8-SKL */ 1296 skl_cstates[6].disabled = 1; /* C9-SKL */ 1297 } 1298 /* 1299 * intel_idle_state_table_update() 1300 * 1301 * Update the default state_table for this CPU-id 1302 */ 1303 1304 static void intel_idle_state_table_update(void) 1305 { 1306 switch (boot_cpu_data.x86_model) { 1307 1308 case INTEL_FAM6_IVYBRIDGE_X: 1309 ivt_idle_state_table_update(); 1310 break; 1311 case INTEL_FAM6_ATOM_GOLDMONT: 1312 bxt_idle_state_table_update(); 1313 break; 1314 case INTEL_FAM6_SKYLAKE_DESKTOP: 1315 sklh_idle_state_table_update(); 1316 break; 1317 } 1318 } 1319 1320 /* 1321 * intel_idle_cpuidle_driver_init() 1322 * allocate, initialize cpuidle_states 1323 */ 1324 static void __init intel_idle_cpuidle_driver_init(void) 1325 { 1326 int cstate; 1327 struct cpuidle_driver *drv = &intel_idle_driver; 1328 1329 intel_idle_state_table_update(); 1330 1331 drv->state_count = 1; 1332 1333 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1334 int num_substates, mwait_hint, mwait_cstate; 1335 1336 if ((cpuidle_state_table[cstate].enter == NULL) && 1337 (cpuidle_state_table[cstate].enter_freeze == NULL)) 1338 break; 1339 1340 if (cstate + 1 > max_cstate) { 1341 printk(PREFIX "max_cstate %d reached\n", 1342 max_cstate); 1343 break; 1344 } 1345 1346 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1347 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); 1348 1349 /* number of sub-states for this state in CPUID.MWAIT */ 1350 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) 1351 & MWAIT_SUBSTATE_MASK; 1352 1353 /* if NO sub-states for this state in CPUID, skip it */ 1354 if (num_substates == 0) 1355 continue; 1356 1357 /* if state marked as disabled, skip it */ 1358 if (cpuidle_state_table[cstate].disabled != 0) { 1359 pr_debug(PREFIX "state %s is disabled", 1360 cpuidle_state_table[cstate].name); 1361 continue; 1362 } 1363 1364 1365 if (((mwait_cstate + 1) > 2) && 1366 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1367 mark_tsc_unstable("TSC halts in idle" 1368 " states deeper than C2"); 1369 1370 drv->states[drv->state_count] = /* structure copy */ 1371 cpuidle_state_table[cstate]; 1372 1373 drv->state_count += 1; 1374 } 1375 1376 if (icpu->byt_auto_demotion_disable_flag) { 1377 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1378 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1379 } 1380 } 1381 1382 1383 /* 1384 * intel_idle_cpu_init() 1385 * allocate, initialize, register cpuidle_devices 1386 * @cpu: cpu/core to initialize 1387 */ 1388 static int intel_idle_cpu_init(unsigned int cpu) 1389 { 1390 struct cpuidle_device *dev; 1391 1392 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1393 dev->cpu = cpu; 1394 1395 if (cpuidle_register_device(dev)) { 1396 pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu); 1397 return -EIO; 1398 } 1399 1400 if (icpu->auto_demotion_disable_flags) 1401 auto_demotion_disable(); 1402 1403 if (icpu->disable_promotion_to_c1e) 1404 c1e_promotion_disable(); 1405 1406 return 0; 1407 } 1408 1409 static int intel_idle_cpu_online(unsigned int cpu) 1410 { 1411 struct cpuidle_device *dev; 1412 1413 if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) 1414 __setup_broadcast_timer(true); 1415 1416 /* 1417 * Some systems can hotplug a cpu at runtime after 1418 * the kernel has booted, we have to initialize the 1419 * driver in this case 1420 */ 1421 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1422 if (!dev->registered) 1423 return intel_idle_cpu_init(cpu); 1424 1425 return 0; 1426 } 1427 1428 static int __init intel_idle_init(void) 1429 { 1430 int retval; 1431 1432 /* Do not load intel_idle at all for now if idle= is passed */ 1433 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1434 return -ENODEV; 1435 1436 retval = intel_idle_probe(); 1437 if (retval) 1438 return retval; 1439 1440 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1441 if (intel_idle_cpuidle_devices == NULL) 1442 return -ENOMEM; 1443 1444 intel_idle_cpuidle_driver_init(); 1445 retval = cpuidle_register_driver(&intel_idle_driver); 1446 if (retval) { 1447 struct cpuidle_driver *drv = cpuidle_get_driver(); 1448 printk(KERN_DEBUG PREFIX "intel_idle yielding to %s", 1449 drv ? drv->name : "none"); 1450 goto init_driver_fail; 1451 } 1452 1453 if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ 1454 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; 1455 1456 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1457 intel_idle_cpu_online, NULL); 1458 if (retval < 0) 1459 goto hp_setup_fail; 1460 1461 pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n", 1462 lapic_timer_reliable_states); 1463 1464 return 0; 1465 1466 hp_setup_fail: 1467 intel_idle_cpuidle_devices_uninit(); 1468 cpuidle_unregister_driver(&intel_idle_driver); 1469 init_driver_fail: 1470 free_percpu(intel_idle_cpuidle_devices); 1471 return retval; 1472 1473 } 1474 device_initcall(intel_idle_init); 1475 1476 /* 1477 * We are not really modular, but we used to support that. Meaning we also 1478 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1479 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1480 * is the easiest way (currently) to continue doing that. 1481 */ 1482 module_param(max_cstate, int, 0444); 1483