1 /* 2 * intel_idle.c - native hardware idle loop for modern Intel processors 3 * 4 * Copyright (c) 2013, Intel Corporation. 5 * Len Brown <len.brown@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms and conditions of the GNU General Public License, 9 * version 2, as published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 19 */ 20 21 /* 22 * intel_idle is a cpuidle driver that loads on specific Intel processors 23 * in lieu of the legacy ACPI processor_idle driver. The intent is to 24 * make Linux more efficient on these processors, as intel_idle knows 25 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 26 */ 27 28 /* 29 * Design Assumptions 30 * 31 * All CPUs have same idle states as boot CPU 32 * 33 * Chipset BM_STS (bus master status) bit is a NOP 34 * for preventing entry into deep C-stats 35 */ 36 37 /* 38 * Known limitations 39 * 40 * The driver currently initializes for_each_online_cpu() upon modprobe. 41 * It it unaware of subsequent processors hot-added to the system. 42 * This means that if you boot with maxcpus=n and later online 43 * processors above n, those processors will use C1 only. 44 * 45 * ACPI has a .suspend hack to turn off deep c-statees during suspend 46 * to avoid complications with the lapic timer workaround. 47 * Have not seen issues with suspend, but may need same workaround here. 48 * 49 */ 50 51 /* un-comment DEBUG to enable pr_debug() statements */ 52 #define DEBUG 53 54 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 55 56 #include <linux/kernel.h> 57 #include <linux/cpuidle.h> 58 #include <linux/tick.h> 59 #include <trace/events/power.h> 60 #include <linux/sched.h> 61 #include <linux/notifier.h> 62 #include <linux/cpu.h> 63 #include <linux/moduleparam.h> 64 #include <asm/cpu_device_id.h> 65 #include <asm/intel-family.h> 66 #include <asm/mwait.h> 67 #include <asm/msr.h> 68 69 #define INTEL_IDLE_VERSION "0.4.1" 70 71 static struct cpuidle_driver intel_idle_driver = { 72 .name = "intel_idle", 73 .owner = THIS_MODULE, 74 }; 75 /* intel_idle.max_cstate=0 disables driver */ 76 static int max_cstate = CPUIDLE_STATE_MAX - 1; 77 78 static unsigned int mwait_substates; 79 80 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF 81 /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ 82 static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ 83 84 struct idle_cpu { 85 struct cpuidle_state *state_table; 86 87 /* 88 * Hardware C-state auto-demotion may not always be optimal. 89 * Indicate which enable bits to clear here. 90 */ 91 unsigned long auto_demotion_disable_flags; 92 bool byt_auto_demotion_disable_flag; 93 bool disable_promotion_to_c1e; 94 }; 95 96 static const struct idle_cpu *icpu; 97 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 98 static int intel_idle(struct cpuidle_device *dev, 99 struct cpuidle_driver *drv, int index); 100 static void intel_idle_s2idle(struct cpuidle_device *dev, 101 struct cpuidle_driver *drv, int index); 102 static struct cpuidle_state *cpuidle_state_table; 103 104 /* 105 * Set this flag for states where the HW flushes the TLB for us 106 * and so we don't need cross-calls to keep it consistent. 107 * If this flag is set, SW flushes the TLB, so even if the 108 * HW doesn't do the flushing, this flag is safe to use. 109 */ 110 #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 111 112 /* 113 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 114 * the C-state (top nibble) and sub-state (bottom nibble) 115 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 116 * 117 * We store the hint at the top of our "flags" for each state. 118 */ 119 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 120 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 121 122 /* 123 * States are indexed by the cstate number, 124 * which is also the index into the MWAIT hint array. 125 * Thus C0 is a dummy. 126 */ 127 static struct cpuidle_state nehalem_cstates[] = { 128 { 129 .name = "C1", 130 .desc = "MWAIT 0x00", 131 .flags = MWAIT2flg(0x00), 132 .exit_latency = 3, 133 .target_residency = 6, 134 .enter = &intel_idle, 135 .enter_s2idle = intel_idle_s2idle, }, 136 { 137 .name = "C1E", 138 .desc = "MWAIT 0x01", 139 .flags = MWAIT2flg(0x01), 140 .exit_latency = 10, 141 .target_residency = 20, 142 .enter = &intel_idle, 143 .enter_s2idle = intel_idle_s2idle, }, 144 { 145 .name = "C3", 146 .desc = "MWAIT 0x10", 147 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 148 .exit_latency = 20, 149 .target_residency = 80, 150 .enter = &intel_idle, 151 .enter_s2idle = intel_idle_s2idle, }, 152 { 153 .name = "C6", 154 .desc = "MWAIT 0x20", 155 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 156 .exit_latency = 200, 157 .target_residency = 800, 158 .enter = &intel_idle, 159 .enter_s2idle = intel_idle_s2idle, }, 160 { 161 .enter = NULL } 162 }; 163 164 static struct cpuidle_state snb_cstates[] = { 165 { 166 .name = "C1", 167 .desc = "MWAIT 0x00", 168 .flags = MWAIT2flg(0x00), 169 .exit_latency = 2, 170 .target_residency = 2, 171 .enter = &intel_idle, 172 .enter_s2idle = intel_idle_s2idle, }, 173 { 174 .name = "C1E", 175 .desc = "MWAIT 0x01", 176 .flags = MWAIT2flg(0x01), 177 .exit_latency = 10, 178 .target_residency = 20, 179 .enter = &intel_idle, 180 .enter_s2idle = intel_idle_s2idle, }, 181 { 182 .name = "C3", 183 .desc = "MWAIT 0x10", 184 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 185 .exit_latency = 80, 186 .target_residency = 211, 187 .enter = &intel_idle, 188 .enter_s2idle = intel_idle_s2idle, }, 189 { 190 .name = "C6", 191 .desc = "MWAIT 0x20", 192 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 193 .exit_latency = 104, 194 .target_residency = 345, 195 .enter = &intel_idle, 196 .enter_s2idle = intel_idle_s2idle, }, 197 { 198 .name = "C7", 199 .desc = "MWAIT 0x30", 200 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 201 .exit_latency = 109, 202 .target_residency = 345, 203 .enter = &intel_idle, 204 .enter_s2idle = intel_idle_s2idle, }, 205 { 206 .enter = NULL } 207 }; 208 209 static struct cpuidle_state byt_cstates[] = { 210 { 211 .name = "C1", 212 .desc = "MWAIT 0x00", 213 .flags = MWAIT2flg(0x00), 214 .exit_latency = 1, 215 .target_residency = 1, 216 .enter = &intel_idle, 217 .enter_s2idle = intel_idle_s2idle, }, 218 { 219 .name = "C6N", 220 .desc = "MWAIT 0x58", 221 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 222 .exit_latency = 300, 223 .target_residency = 275, 224 .enter = &intel_idle, 225 .enter_s2idle = intel_idle_s2idle, }, 226 { 227 .name = "C6S", 228 .desc = "MWAIT 0x52", 229 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 230 .exit_latency = 500, 231 .target_residency = 560, 232 .enter = &intel_idle, 233 .enter_s2idle = intel_idle_s2idle, }, 234 { 235 .name = "C7", 236 .desc = "MWAIT 0x60", 237 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 238 .exit_latency = 1200, 239 .target_residency = 4000, 240 .enter = &intel_idle, 241 .enter_s2idle = intel_idle_s2idle, }, 242 { 243 .name = "C7S", 244 .desc = "MWAIT 0x64", 245 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 246 .exit_latency = 10000, 247 .target_residency = 20000, 248 .enter = &intel_idle, 249 .enter_s2idle = intel_idle_s2idle, }, 250 { 251 .enter = NULL } 252 }; 253 254 static struct cpuidle_state cht_cstates[] = { 255 { 256 .name = "C1", 257 .desc = "MWAIT 0x00", 258 .flags = MWAIT2flg(0x00), 259 .exit_latency = 1, 260 .target_residency = 1, 261 .enter = &intel_idle, 262 .enter_s2idle = intel_idle_s2idle, }, 263 { 264 .name = "C6N", 265 .desc = "MWAIT 0x58", 266 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 267 .exit_latency = 80, 268 .target_residency = 275, 269 .enter = &intel_idle, 270 .enter_s2idle = intel_idle_s2idle, }, 271 { 272 .name = "C6S", 273 .desc = "MWAIT 0x52", 274 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 275 .exit_latency = 200, 276 .target_residency = 560, 277 .enter = &intel_idle, 278 .enter_s2idle = intel_idle_s2idle, }, 279 { 280 .name = "C7", 281 .desc = "MWAIT 0x60", 282 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 283 .exit_latency = 1200, 284 .target_residency = 4000, 285 .enter = &intel_idle, 286 .enter_s2idle = intel_idle_s2idle, }, 287 { 288 .name = "C7S", 289 .desc = "MWAIT 0x64", 290 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 291 .exit_latency = 10000, 292 .target_residency = 20000, 293 .enter = &intel_idle, 294 .enter_s2idle = intel_idle_s2idle, }, 295 { 296 .enter = NULL } 297 }; 298 299 static struct cpuidle_state ivb_cstates[] = { 300 { 301 .name = "C1", 302 .desc = "MWAIT 0x00", 303 .flags = MWAIT2flg(0x00), 304 .exit_latency = 1, 305 .target_residency = 1, 306 .enter = &intel_idle, 307 .enter_s2idle = intel_idle_s2idle, }, 308 { 309 .name = "C1E", 310 .desc = "MWAIT 0x01", 311 .flags = MWAIT2flg(0x01), 312 .exit_latency = 10, 313 .target_residency = 20, 314 .enter = &intel_idle, 315 .enter_s2idle = intel_idle_s2idle, }, 316 { 317 .name = "C3", 318 .desc = "MWAIT 0x10", 319 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 320 .exit_latency = 59, 321 .target_residency = 156, 322 .enter = &intel_idle, 323 .enter_s2idle = intel_idle_s2idle, }, 324 { 325 .name = "C6", 326 .desc = "MWAIT 0x20", 327 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 328 .exit_latency = 80, 329 .target_residency = 300, 330 .enter = &intel_idle, 331 .enter_s2idle = intel_idle_s2idle, }, 332 { 333 .name = "C7", 334 .desc = "MWAIT 0x30", 335 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 336 .exit_latency = 87, 337 .target_residency = 300, 338 .enter = &intel_idle, 339 .enter_s2idle = intel_idle_s2idle, }, 340 { 341 .enter = NULL } 342 }; 343 344 static struct cpuidle_state ivt_cstates[] = { 345 { 346 .name = "C1", 347 .desc = "MWAIT 0x00", 348 .flags = MWAIT2flg(0x00), 349 .exit_latency = 1, 350 .target_residency = 1, 351 .enter = &intel_idle, 352 .enter_s2idle = intel_idle_s2idle, }, 353 { 354 .name = "C1E", 355 .desc = "MWAIT 0x01", 356 .flags = MWAIT2flg(0x01), 357 .exit_latency = 10, 358 .target_residency = 80, 359 .enter = &intel_idle, 360 .enter_s2idle = intel_idle_s2idle, }, 361 { 362 .name = "C3", 363 .desc = "MWAIT 0x10", 364 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 365 .exit_latency = 59, 366 .target_residency = 156, 367 .enter = &intel_idle, 368 .enter_s2idle = intel_idle_s2idle, }, 369 { 370 .name = "C6", 371 .desc = "MWAIT 0x20", 372 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 373 .exit_latency = 82, 374 .target_residency = 300, 375 .enter = &intel_idle, 376 .enter_s2idle = intel_idle_s2idle, }, 377 { 378 .enter = NULL } 379 }; 380 381 static struct cpuidle_state ivt_cstates_4s[] = { 382 { 383 .name = "C1", 384 .desc = "MWAIT 0x00", 385 .flags = MWAIT2flg(0x00), 386 .exit_latency = 1, 387 .target_residency = 1, 388 .enter = &intel_idle, 389 .enter_s2idle = intel_idle_s2idle, }, 390 { 391 .name = "C1E", 392 .desc = "MWAIT 0x01", 393 .flags = MWAIT2flg(0x01), 394 .exit_latency = 10, 395 .target_residency = 250, 396 .enter = &intel_idle, 397 .enter_s2idle = intel_idle_s2idle, }, 398 { 399 .name = "C3", 400 .desc = "MWAIT 0x10", 401 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 402 .exit_latency = 59, 403 .target_residency = 300, 404 .enter = &intel_idle, 405 .enter_s2idle = intel_idle_s2idle, }, 406 { 407 .name = "C6", 408 .desc = "MWAIT 0x20", 409 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 410 .exit_latency = 84, 411 .target_residency = 400, 412 .enter = &intel_idle, 413 .enter_s2idle = intel_idle_s2idle, }, 414 { 415 .enter = NULL } 416 }; 417 418 static struct cpuidle_state ivt_cstates_8s[] = { 419 { 420 .name = "C1", 421 .desc = "MWAIT 0x00", 422 .flags = MWAIT2flg(0x00), 423 .exit_latency = 1, 424 .target_residency = 1, 425 .enter = &intel_idle, 426 .enter_s2idle = intel_idle_s2idle, }, 427 { 428 .name = "C1E", 429 .desc = "MWAIT 0x01", 430 .flags = MWAIT2flg(0x01), 431 .exit_latency = 10, 432 .target_residency = 500, 433 .enter = &intel_idle, 434 .enter_s2idle = intel_idle_s2idle, }, 435 { 436 .name = "C3", 437 .desc = "MWAIT 0x10", 438 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 439 .exit_latency = 59, 440 .target_residency = 600, 441 .enter = &intel_idle, 442 .enter_s2idle = intel_idle_s2idle, }, 443 { 444 .name = "C6", 445 .desc = "MWAIT 0x20", 446 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 447 .exit_latency = 88, 448 .target_residency = 700, 449 .enter = &intel_idle, 450 .enter_s2idle = intel_idle_s2idle, }, 451 { 452 .enter = NULL } 453 }; 454 455 static struct cpuidle_state hsw_cstates[] = { 456 { 457 .name = "C1", 458 .desc = "MWAIT 0x00", 459 .flags = MWAIT2flg(0x00), 460 .exit_latency = 2, 461 .target_residency = 2, 462 .enter = &intel_idle, 463 .enter_s2idle = intel_idle_s2idle, }, 464 { 465 .name = "C1E", 466 .desc = "MWAIT 0x01", 467 .flags = MWAIT2flg(0x01), 468 .exit_latency = 10, 469 .target_residency = 20, 470 .enter = &intel_idle, 471 .enter_s2idle = intel_idle_s2idle, }, 472 { 473 .name = "C3", 474 .desc = "MWAIT 0x10", 475 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 476 .exit_latency = 33, 477 .target_residency = 100, 478 .enter = &intel_idle, 479 .enter_s2idle = intel_idle_s2idle, }, 480 { 481 .name = "C6", 482 .desc = "MWAIT 0x20", 483 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 484 .exit_latency = 133, 485 .target_residency = 400, 486 .enter = &intel_idle, 487 .enter_s2idle = intel_idle_s2idle, }, 488 { 489 .name = "C7s", 490 .desc = "MWAIT 0x32", 491 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 492 .exit_latency = 166, 493 .target_residency = 500, 494 .enter = &intel_idle, 495 .enter_s2idle = intel_idle_s2idle, }, 496 { 497 .name = "C8", 498 .desc = "MWAIT 0x40", 499 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 500 .exit_latency = 300, 501 .target_residency = 900, 502 .enter = &intel_idle, 503 .enter_s2idle = intel_idle_s2idle, }, 504 { 505 .name = "C9", 506 .desc = "MWAIT 0x50", 507 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 508 .exit_latency = 600, 509 .target_residency = 1800, 510 .enter = &intel_idle, 511 .enter_s2idle = intel_idle_s2idle, }, 512 { 513 .name = "C10", 514 .desc = "MWAIT 0x60", 515 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 516 .exit_latency = 2600, 517 .target_residency = 7700, 518 .enter = &intel_idle, 519 .enter_s2idle = intel_idle_s2idle, }, 520 { 521 .enter = NULL } 522 }; 523 static struct cpuidle_state bdw_cstates[] = { 524 { 525 .name = "C1", 526 .desc = "MWAIT 0x00", 527 .flags = MWAIT2flg(0x00), 528 .exit_latency = 2, 529 .target_residency = 2, 530 .enter = &intel_idle, 531 .enter_s2idle = intel_idle_s2idle, }, 532 { 533 .name = "C1E", 534 .desc = "MWAIT 0x01", 535 .flags = MWAIT2flg(0x01), 536 .exit_latency = 10, 537 .target_residency = 20, 538 .enter = &intel_idle, 539 .enter_s2idle = intel_idle_s2idle, }, 540 { 541 .name = "C3", 542 .desc = "MWAIT 0x10", 543 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 544 .exit_latency = 40, 545 .target_residency = 100, 546 .enter = &intel_idle, 547 .enter_s2idle = intel_idle_s2idle, }, 548 { 549 .name = "C6", 550 .desc = "MWAIT 0x20", 551 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 552 .exit_latency = 133, 553 .target_residency = 400, 554 .enter = &intel_idle, 555 .enter_s2idle = intel_idle_s2idle, }, 556 { 557 .name = "C7s", 558 .desc = "MWAIT 0x32", 559 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 560 .exit_latency = 166, 561 .target_residency = 500, 562 .enter = &intel_idle, 563 .enter_s2idle = intel_idle_s2idle, }, 564 { 565 .name = "C8", 566 .desc = "MWAIT 0x40", 567 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 568 .exit_latency = 300, 569 .target_residency = 900, 570 .enter = &intel_idle, 571 .enter_s2idle = intel_idle_s2idle, }, 572 { 573 .name = "C9", 574 .desc = "MWAIT 0x50", 575 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 576 .exit_latency = 600, 577 .target_residency = 1800, 578 .enter = &intel_idle, 579 .enter_s2idle = intel_idle_s2idle, }, 580 { 581 .name = "C10", 582 .desc = "MWAIT 0x60", 583 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 584 .exit_latency = 2600, 585 .target_residency = 7700, 586 .enter = &intel_idle, 587 .enter_s2idle = intel_idle_s2idle, }, 588 { 589 .enter = NULL } 590 }; 591 592 static struct cpuidle_state skl_cstates[] = { 593 { 594 .name = "C1", 595 .desc = "MWAIT 0x00", 596 .flags = MWAIT2flg(0x00), 597 .exit_latency = 2, 598 .target_residency = 2, 599 .enter = &intel_idle, 600 .enter_s2idle = intel_idle_s2idle, }, 601 { 602 .name = "C1E", 603 .desc = "MWAIT 0x01", 604 .flags = MWAIT2flg(0x01), 605 .exit_latency = 10, 606 .target_residency = 20, 607 .enter = &intel_idle, 608 .enter_s2idle = intel_idle_s2idle, }, 609 { 610 .name = "C3", 611 .desc = "MWAIT 0x10", 612 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 613 .exit_latency = 70, 614 .target_residency = 100, 615 .enter = &intel_idle, 616 .enter_s2idle = intel_idle_s2idle, }, 617 { 618 .name = "C6", 619 .desc = "MWAIT 0x20", 620 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 621 .exit_latency = 85, 622 .target_residency = 200, 623 .enter = &intel_idle, 624 .enter_s2idle = intel_idle_s2idle, }, 625 { 626 .name = "C7s", 627 .desc = "MWAIT 0x33", 628 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 629 .exit_latency = 124, 630 .target_residency = 800, 631 .enter = &intel_idle, 632 .enter_s2idle = intel_idle_s2idle, }, 633 { 634 .name = "C8", 635 .desc = "MWAIT 0x40", 636 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 637 .exit_latency = 200, 638 .target_residency = 800, 639 .enter = &intel_idle, 640 .enter_s2idle = intel_idle_s2idle, }, 641 { 642 .name = "C9", 643 .desc = "MWAIT 0x50", 644 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 645 .exit_latency = 480, 646 .target_residency = 5000, 647 .enter = &intel_idle, 648 .enter_s2idle = intel_idle_s2idle, }, 649 { 650 .name = "C10", 651 .desc = "MWAIT 0x60", 652 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 653 .exit_latency = 890, 654 .target_residency = 5000, 655 .enter = &intel_idle, 656 .enter_s2idle = intel_idle_s2idle, }, 657 { 658 .enter = NULL } 659 }; 660 661 static struct cpuidle_state skx_cstates[] = { 662 { 663 .name = "C1", 664 .desc = "MWAIT 0x00", 665 .flags = MWAIT2flg(0x00), 666 .exit_latency = 2, 667 .target_residency = 2, 668 .enter = &intel_idle, 669 .enter_s2idle = intel_idle_s2idle, }, 670 { 671 .name = "C1E", 672 .desc = "MWAIT 0x01", 673 .flags = MWAIT2flg(0x01), 674 .exit_latency = 10, 675 .target_residency = 20, 676 .enter = &intel_idle, 677 .enter_s2idle = intel_idle_s2idle, }, 678 { 679 .name = "C6", 680 .desc = "MWAIT 0x20", 681 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 682 .exit_latency = 133, 683 .target_residency = 600, 684 .enter = &intel_idle, 685 .enter_s2idle = intel_idle_s2idle, }, 686 { 687 .enter = NULL } 688 }; 689 690 static struct cpuidle_state atom_cstates[] = { 691 { 692 .name = "C1E", 693 .desc = "MWAIT 0x00", 694 .flags = MWAIT2flg(0x00), 695 .exit_latency = 10, 696 .target_residency = 20, 697 .enter = &intel_idle, 698 .enter_s2idle = intel_idle_s2idle, }, 699 { 700 .name = "C2", 701 .desc = "MWAIT 0x10", 702 .flags = MWAIT2flg(0x10), 703 .exit_latency = 20, 704 .target_residency = 80, 705 .enter = &intel_idle, 706 .enter_s2idle = intel_idle_s2idle, }, 707 { 708 .name = "C4", 709 .desc = "MWAIT 0x30", 710 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 711 .exit_latency = 100, 712 .target_residency = 400, 713 .enter = &intel_idle, 714 .enter_s2idle = intel_idle_s2idle, }, 715 { 716 .name = "C6", 717 .desc = "MWAIT 0x52", 718 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 719 .exit_latency = 140, 720 .target_residency = 560, 721 .enter = &intel_idle, 722 .enter_s2idle = intel_idle_s2idle, }, 723 { 724 .enter = NULL } 725 }; 726 static struct cpuidle_state tangier_cstates[] = { 727 { 728 .name = "C1", 729 .desc = "MWAIT 0x00", 730 .flags = MWAIT2flg(0x00), 731 .exit_latency = 1, 732 .target_residency = 4, 733 .enter = &intel_idle, 734 .enter_s2idle = intel_idle_s2idle, }, 735 { 736 .name = "C4", 737 .desc = "MWAIT 0x30", 738 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 739 .exit_latency = 100, 740 .target_residency = 400, 741 .enter = &intel_idle, 742 .enter_s2idle = intel_idle_s2idle, }, 743 { 744 .name = "C6", 745 .desc = "MWAIT 0x52", 746 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 747 .exit_latency = 140, 748 .target_residency = 560, 749 .enter = &intel_idle, 750 .enter_s2idle = intel_idle_s2idle, }, 751 { 752 .name = "C7", 753 .desc = "MWAIT 0x60", 754 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 755 .exit_latency = 1200, 756 .target_residency = 4000, 757 .enter = &intel_idle, 758 .enter_s2idle = intel_idle_s2idle, }, 759 { 760 .name = "C9", 761 .desc = "MWAIT 0x64", 762 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 763 .exit_latency = 10000, 764 .target_residency = 20000, 765 .enter = &intel_idle, 766 .enter_s2idle = intel_idle_s2idle, }, 767 { 768 .enter = NULL } 769 }; 770 static struct cpuidle_state avn_cstates[] = { 771 { 772 .name = "C1", 773 .desc = "MWAIT 0x00", 774 .flags = MWAIT2flg(0x00), 775 .exit_latency = 2, 776 .target_residency = 2, 777 .enter = &intel_idle, 778 .enter_s2idle = intel_idle_s2idle, }, 779 { 780 .name = "C6", 781 .desc = "MWAIT 0x51", 782 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 783 .exit_latency = 15, 784 .target_residency = 45, 785 .enter = &intel_idle, 786 .enter_s2idle = intel_idle_s2idle, }, 787 { 788 .enter = NULL } 789 }; 790 static struct cpuidle_state knl_cstates[] = { 791 { 792 .name = "C1", 793 .desc = "MWAIT 0x00", 794 .flags = MWAIT2flg(0x00), 795 .exit_latency = 1, 796 .target_residency = 2, 797 .enter = &intel_idle, 798 .enter_s2idle = intel_idle_s2idle }, 799 { 800 .name = "C6", 801 .desc = "MWAIT 0x10", 802 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 803 .exit_latency = 120, 804 .target_residency = 500, 805 .enter = &intel_idle, 806 .enter_s2idle = intel_idle_s2idle }, 807 { 808 .enter = NULL } 809 }; 810 811 static struct cpuidle_state bxt_cstates[] = { 812 { 813 .name = "C1", 814 .desc = "MWAIT 0x00", 815 .flags = MWAIT2flg(0x00), 816 .exit_latency = 2, 817 .target_residency = 2, 818 .enter = &intel_idle, 819 .enter_s2idle = intel_idle_s2idle, }, 820 { 821 .name = "C1E", 822 .desc = "MWAIT 0x01", 823 .flags = MWAIT2flg(0x01), 824 .exit_latency = 10, 825 .target_residency = 20, 826 .enter = &intel_idle, 827 .enter_s2idle = intel_idle_s2idle, }, 828 { 829 .name = "C6", 830 .desc = "MWAIT 0x20", 831 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 832 .exit_latency = 133, 833 .target_residency = 133, 834 .enter = &intel_idle, 835 .enter_s2idle = intel_idle_s2idle, }, 836 { 837 .name = "C7s", 838 .desc = "MWAIT 0x31", 839 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 840 .exit_latency = 155, 841 .target_residency = 155, 842 .enter = &intel_idle, 843 .enter_s2idle = intel_idle_s2idle, }, 844 { 845 .name = "C8", 846 .desc = "MWAIT 0x40", 847 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 848 .exit_latency = 1000, 849 .target_residency = 1000, 850 .enter = &intel_idle, 851 .enter_s2idle = intel_idle_s2idle, }, 852 { 853 .name = "C9", 854 .desc = "MWAIT 0x50", 855 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 856 .exit_latency = 2000, 857 .target_residency = 2000, 858 .enter = &intel_idle, 859 .enter_s2idle = intel_idle_s2idle, }, 860 { 861 .name = "C10", 862 .desc = "MWAIT 0x60", 863 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 864 .exit_latency = 10000, 865 .target_residency = 10000, 866 .enter = &intel_idle, 867 .enter_s2idle = intel_idle_s2idle, }, 868 { 869 .enter = NULL } 870 }; 871 872 static struct cpuidle_state dnv_cstates[] = { 873 { 874 .name = "C1", 875 .desc = "MWAIT 0x00", 876 .flags = MWAIT2flg(0x00), 877 .exit_latency = 2, 878 .target_residency = 2, 879 .enter = &intel_idle, 880 .enter_s2idle = intel_idle_s2idle, }, 881 { 882 .name = "C1E", 883 .desc = "MWAIT 0x01", 884 .flags = MWAIT2flg(0x01), 885 .exit_latency = 10, 886 .target_residency = 20, 887 .enter = &intel_idle, 888 .enter_s2idle = intel_idle_s2idle, }, 889 { 890 .name = "C6", 891 .desc = "MWAIT 0x20", 892 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 893 .exit_latency = 50, 894 .target_residency = 500, 895 .enter = &intel_idle, 896 .enter_s2idle = intel_idle_s2idle, }, 897 { 898 .enter = NULL } 899 }; 900 901 /** 902 * intel_idle 903 * @dev: cpuidle_device 904 * @drv: cpuidle driver 905 * @index: index of cpuidle state 906 * 907 * Must be called under local_irq_disable(). 908 */ 909 static __cpuidle int intel_idle(struct cpuidle_device *dev, 910 struct cpuidle_driver *drv, int index) 911 { 912 unsigned long ecx = 1; /* break on interrupt flag */ 913 struct cpuidle_state *state = &drv->states[index]; 914 unsigned long eax = flg2MWAIT(state->flags); 915 unsigned int cstate; 916 bool uninitialized_var(tick); 917 int cpu = smp_processor_id(); 918 919 /* 920 * leave_mm() to avoid costly and often unnecessary wakeups 921 * for flushing the user TLB's associated with the active mm. 922 */ 923 if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) 924 leave_mm(cpu); 925 926 if (!static_cpu_has(X86_FEATURE_ARAT)) { 927 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & 928 MWAIT_CSTATE_MASK) + 1; 929 tick = false; 930 if (!(lapic_timer_reliable_states & (1 << (cstate)))) { 931 tick = true; 932 tick_broadcast_enter(); 933 } 934 } 935 936 mwait_idle_with_hints(eax, ecx); 937 938 if (!static_cpu_has(X86_FEATURE_ARAT) && tick) 939 tick_broadcast_exit(); 940 941 return index; 942 } 943 944 /** 945 * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle 946 * @dev: cpuidle_device 947 * @drv: cpuidle driver 948 * @index: state index 949 */ 950 static void intel_idle_s2idle(struct cpuidle_device *dev, 951 struct cpuidle_driver *drv, int index) 952 { 953 unsigned long ecx = 1; /* break on interrupt flag */ 954 unsigned long eax = flg2MWAIT(drv->states[index].flags); 955 956 mwait_idle_with_hints(eax, ecx); 957 } 958 959 static void __setup_broadcast_timer(bool on) 960 { 961 if (on) 962 tick_broadcast_enable(); 963 else 964 tick_broadcast_disable(); 965 } 966 967 static void auto_demotion_disable(void) 968 { 969 unsigned long long msr_bits; 970 971 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 972 msr_bits &= ~(icpu->auto_demotion_disable_flags); 973 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 974 } 975 static void c1e_promotion_disable(void) 976 { 977 unsigned long long msr_bits; 978 979 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 980 msr_bits &= ~0x2; 981 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 982 } 983 984 static const struct idle_cpu idle_cpu_nehalem = { 985 .state_table = nehalem_cstates, 986 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 987 .disable_promotion_to_c1e = true, 988 }; 989 990 static const struct idle_cpu idle_cpu_atom = { 991 .state_table = atom_cstates, 992 }; 993 994 static const struct idle_cpu idle_cpu_tangier = { 995 .state_table = tangier_cstates, 996 }; 997 998 static const struct idle_cpu idle_cpu_lincroft = { 999 .state_table = atom_cstates, 1000 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1001 }; 1002 1003 static const struct idle_cpu idle_cpu_snb = { 1004 .state_table = snb_cstates, 1005 .disable_promotion_to_c1e = true, 1006 }; 1007 1008 static const struct idle_cpu idle_cpu_byt = { 1009 .state_table = byt_cstates, 1010 .disable_promotion_to_c1e = true, 1011 .byt_auto_demotion_disable_flag = true, 1012 }; 1013 1014 static const struct idle_cpu idle_cpu_cht = { 1015 .state_table = cht_cstates, 1016 .disable_promotion_to_c1e = true, 1017 .byt_auto_demotion_disable_flag = true, 1018 }; 1019 1020 static const struct idle_cpu idle_cpu_ivb = { 1021 .state_table = ivb_cstates, 1022 .disable_promotion_to_c1e = true, 1023 }; 1024 1025 static const struct idle_cpu idle_cpu_ivt = { 1026 .state_table = ivt_cstates, 1027 .disable_promotion_to_c1e = true, 1028 }; 1029 1030 static const struct idle_cpu idle_cpu_hsw = { 1031 .state_table = hsw_cstates, 1032 .disable_promotion_to_c1e = true, 1033 }; 1034 1035 static const struct idle_cpu idle_cpu_bdw = { 1036 .state_table = bdw_cstates, 1037 .disable_promotion_to_c1e = true, 1038 }; 1039 1040 static const struct idle_cpu idle_cpu_skl = { 1041 .state_table = skl_cstates, 1042 .disable_promotion_to_c1e = true, 1043 }; 1044 1045 static const struct idle_cpu idle_cpu_skx = { 1046 .state_table = skx_cstates, 1047 .disable_promotion_to_c1e = true, 1048 }; 1049 1050 static const struct idle_cpu idle_cpu_avn = { 1051 .state_table = avn_cstates, 1052 .disable_promotion_to_c1e = true, 1053 }; 1054 1055 static const struct idle_cpu idle_cpu_knl = { 1056 .state_table = knl_cstates, 1057 }; 1058 1059 static const struct idle_cpu idle_cpu_bxt = { 1060 .state_table = bxt_cstates, 1061 .disable_promotion_to_c1e = true, 1062 }; 1063 1064 static const struct idle_cpu idle_cpu_dnv = { 1065 .state_table = dnv_cstates, 1066 .disable_promotion_to_c1e = true, 1067 }; 1068 1069 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1070 INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nehalem), 1071 INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem), 1072 INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem), 1073 INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem), 1074 INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nehalem), 1075 INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nehalem), 1076 INTEL_CPU_FAM6(ATOM_BONNELL, idle_cpu_atom), 1077 INTEL_CPU_FAM6(ATOM_BONNELL_MID, idle_cpu_lincroft), 1078 INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nehalem), 1079 INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb), 1080 INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snb), 1081 INTEL_CPU_FAM6(ATOM_SALTWELL, idle_cpu_atom), 1082 INTEL_CPU_FAM6(ATOM_SILVERMONT, idle_cpu_byt), 1083 INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, idle_cpu_tangier), 1084 INTEL_CPU_FAM6(ATOM_AIRMONT, idle_cpu_cht), 1085 INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb), 1086 INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt), 1087 INTEL_CPU_FAM6(HASWELL_CORE, idle_cpu_hsw), 1088 INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsw), 1089 INTEL_CPU_FAM6(HASWELL_ULT, idle_cpu_hsw), 1090 INTEL_CPU_FAM6(HASWELL_GT3E, idle_cpu_hsw), 1091 INTEL_CPU_FAM6(ATOM_SILVERMONT_X, idle_cpu_avn), 1092 INTEL_CPU_FAM6(BROADWELL_CORE, idle_cpu_bdw), 1093 INTEL_CPU_FAM6(BROADWELL_GT3E, idle_cpu_bdw), 1094 INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdw), 1095 INTEL_CPU_FAM6(BROADWELL_XEON_D, idle_cpu_bdw), 1096 INTEL_CPU_FAM6(SKYLAKE_MOBILE, idle_cpu_skl), 1097 INTEL_CPU_FAM6(SKYLAKE_DESKTOP, idle_cpu_skl), 1098 INTEL_CPU_FAM6(KABYLAKE_MOBILE, idle_cpu_skl), 1099 INTEL_CPU_FAM6(KABYLAKE_DESKTOP, idle_cpu_skl), 1100 INTEL_CPU_FAM6(SKYLAKE_X, idle_cpu_skx), 1101 INTEL_CPU_FAM6(XEON_PHI_KNL, idle_cpu_knl), 1102 INTEL_CPU_FAM6(XEON_PHI_KNM, idle_cpu_knl), 1103 INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt), 1104 INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, idle_cpu_bxt), 1105 INTEL_CPU_FAM6(ATOM_GOLDMONT_X, idle_cpu_dnv), 1106 INTEL_CPU_FAM6(ATOM_TREMONT_X, idle_cpu_dnv), 1107 {} 1108 }; 1109 1110 /* 1111 * intel_idle_probe() 1112 */ 1113 static int __init intel_idle_probe(void) 1114 { 1115 unsigned int eax, ebx, ecx; 1116 const struct x86_cpu_id *id; 1117 1118 if (max_cstate == 0) { 1119 pr_debug("disabled\n"); 1120 return -EPERM; 1121 } 1122 1123 id = x86_match_cpu(intel_idle_ids); 1124 if (!id) { 1125 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 1126 boot_cpu_data.x86 == 6) 1127 pr_debug("does not run on family %d model %d\n", 1128 boot_cpu_data.x86, boot_cpu_data.x86_model); 1129 return -ENODEV; 1130 } 1131 1132 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1133 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1134 return -ENODEV; 1135 } 1136 1137 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1138 return -ENODEV; 1139 1140 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1141 1142 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1143 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1144 !mwait_substates) 1145 return -ENODEV; 1146 1147 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1148 1149 icpu = (const struct idle_cpu *)id->driver_data; 1150 cpuidle_state_table = icpu->state_table; 1151 1152 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1153 boot_cpu_data.x86_model); 1154 1155 return 0; 1156 } 1157 1158 /* 1159 * intel_idle_cpuidle_devices_uninit() 1160 * Unregisters the cpuidle devices. 1161 */ 1162 static void intel_idle_cpuidle_devices_uninit(void) 1163 { 1164 int i; 1165 struct cpuidle_device *dev; 1166 1167 for_each_online_cpu(i) { 1168 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); 1169 cpuidle_unregister_device(dev); 1170 } 1171 } 1172 1173 /* 1174 * ivt_idle_state_table_update(void) 1175 * 1176 * Tune IVT multi-socket targets 1177 * Assumption: num_sockets == (max_package_num + 1) 1178 */ 1179 static void ivt_idle_state_table_update(void) 1180 { 1181 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1182 int cpu, package_num, num_sockets = 1; 1183 1184 for_each_online_cpu(cpu) { 1185 package_num = topology_physical_package_id(cpu); 1186 if (package_num + 1 > num_sockets) { 1187 num_sockets = package_num + 1; 1188 1189 if (num_sockets > 4) { 1190 cpuidle_state_table = ivt_cstates_8s; 1191 return; 1192 } 1193 } 1194 } 1195 1196 if (num_sockets > 2) 1197 cpuidle_state_table = ivt_cstates_4s; 1198 1199 /* else, 1 and 2 socket systems use default ivt_cstates */ 1200 } 1201 1202 /* 1203 * Translate IRTL (Interrupt Response Time Limit) MSR to usec 1204 */ 1205 1206 static unsigned int irtl_ns_units[] = { 1207 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 1208 1209 static unsigned long long irtl_2_usec(unsigned long long irtl) 1210 { 1211 unsigned long long ns; 1212 1213 if (!irtl) 1214 return 0; 1215 1216 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1217 1218 return div64_u64((irtl & 0x3FF) * ns, 1000); 1219 } 1220 /* 1221 * bxt_idle_state_table_update(void) 1222 * 1223 * On BXT, we trust the IRTL to show the definitive maximum latency 1224 * We use the same value for target_residency. 1225 */ 1226 static void bxt_idle_state_table_update(void) 1227 { 1228 unsigned long long msr; 1229 unsigned int usec; 1230 1231 rdmsrl(MSR_PKGC6_IRTL, msr); 1232 usec = irtl_2_usec(msr); 1233 if (usec) { 1234 bxt_cstates[2].exit_latency = usec; 1235 bxt_cstates[2].target_residency = usec; 1236 } 1237 1238 rdmsrl(MSR_PKGC7_IRTL, msr); 1239 usec = irtl_2_usec(msr); 1240 if (usec) { 1241 bxt_cstates[3].exit_latency = usec; 1242 bxt_cstates[3].target_residency = usec; 1243 } 1244 1245 rdmsrl(MSR_PKGC8_IRTL, msr); 1246 usec = irtl_2_usec(msr); 1247 if (usec) { 1248 bxt_cstates[4].exit_latency = usec; 1249 bxt_cstates[4].target_residency = usec; 1250 } 1251 1252 rdmsrl(MSR_PKGC9_IRTL, msr); 1253 usec = irtl_2_usec(msr); 1254 if (usec) { 1255 bxt_cstates[5].exit_latency = usec; 1256 bxt_cstates[5].target_residency = usec; 1257 } 1258 1259 rdmsrl(MSR_PKGC10_IRTL, msr); 1260 usec = irtl_2_usec(msr); 1261 if (usec) { 1262 bxt_cstates[6].exit_latency = usec; 1263 bxt_cstates[6].target_residency = usec; 1264 } 1265 1266 } 1267 /* 1268 * sklh_idle_state_table_update(void) 1269 * 1270 * On SKL-H (model 0x5e) disable C8 and C9 if: 1271 * C10 is enabled and SGX disabled 1272 */ 1273 static void sklh_idle_state_table_update(void) 1274 { 1275 unsigned long long msr; 1276 unsigned int eax, ebx, ecx, edx; 1277 1278 1279 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1280 if (max_cstate <= 7) 1281 return; 1282 1283 /* if PC10 not present in CPUID.MWAIT.EDX */ 1284 if ((mwait_substates & (0xF << 28)) == 0) 1285 return; 1286 1287 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1288 1289 /* PC10 is not enabled in PKG C-state limit */ 1290 if ((msr & 0xF) != 8) 1291 return; 1292 1293 ecx = 0; 1294 cpuid(7, &eax, &ebx, &ecx, &edx); 1295 1296 /* if SGX is present */ 1297 if (ebx & (1 << 2)) { 1298 1299 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1300 1301 /* if SGX is enabled */ 1302 if (msr & (1 << 18)) 1303 return; 1304 } 1305 1306 skl_cstates[5].disabled = 1; /* C8-SKL */ 1307 skl_cstates[6].disabled = 1; /* C9-SKL */ 1308 } 1309 /* 1310 * intel_idle_state_table_update() 1311 * 1312 * Update the default state_table for this CPU-id 1313 */ 1314 1315 static void intel_idle_state_table_update(void) 1316 { 1317 switch (boot_cpu_data.x86_model) { 1318 1319 case INTEL_FAM6_IVYBRIDGE_X: 1320 ivt_idle_state_table_update(); 1321 break; 1322 case INTEL_FAM6_ATOM_GOLDMONT: 1323 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1324 bxt_idle_state_table_update(); 1325 break; 1326 case INTEL_FAM6_SKYLAKE_DESKTOP: 1327 sklh_idle_state_table_update(); 1328 break; 1329 } 1330 } 1331 1332 /* 1333 * intel_idle_cpuidle_driver_init() 1334 * allocate, initialize cpuidle_states 1335 */ 1336 static void __init intel_idle_cpuidle_driver_init(void) 1337 { 1338 int cstate; 1339 struct cpuidle_driver *drv = &intel_idle_driver; 1340 1341 intel_idle_state_table_update(); 1342 1343 cpuidle_poll_state_init(drv); 1344 drv->state_count = 1; 1345 1346 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1347 int num_substates, mwait_hint, mwait_cstate; 1348 1349 if ((cpuidle_state_table[cstate].enter == NULL) && 1350 (cpuidle_state_table[cstate].enter_s2idle == NULL)) 1351 break; 1352 1353 if (cstate + 1 > max_cstate) { 1354 pr_info("max_cstate %d reached\n", max_cstate); 1355 break; 1356 } 1357 1358 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1359 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); 1360 1361 /* number of sub-states for this state in CPUID.MWAIT */ 1362 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) 1363 & MWAIT_SUBSTATE_MASK; 1364 1365 /* if NO sub-states for this state in CPUID, skip it */ 1366 if (num_substates == 0) 1367 continue; 1368 1369 /* if state marked as disabled, skip it */ 1370 if (cpuidle_state_table[cstate].disabled != 0) { 1371 pr_debug("state %s is disabled\n", 1372 cpuidle_state_table[cstate].name); 1373 continue; 1374 } 1375 1376 1377 if (((mwait_cstate + 1) > 2) && 1378 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1379 mark_tsc_unstable("TSC halts in idle" 1380 " states deeper than C2"); 1381 1382 drv->states[drv->state_count] = /* structure copy */ 1383 cpuidle_state_table[cstate]; 1384 1385 drv->state_count += 1; 1386 } 1387 1388 if (icpu->byt_auto_demotion_disable_flag) { 1389 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1390 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1391 } 1392 } 1393 1394 1395 /* 1396 * intel_idle_cpu_init() 1397 * allocate, initialize, register cpuidle_devices 1398 * @cpu: cpu/core to initialize 1399 */ 1400 static int intel_idle_cpu_init(unsigned int cpu) 1401 { 1402 struct cpuidle_device *dev; 1403 1404 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1405 dev->cpu = cpu; 1406 1407 if (cpuidle_register_device(dev)) { 1408 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1409 return -EIO; 1410 } 1411 1412 if (icpu->auto_demotion_disable_flags) 1413 auto_demotion_disable(); 1414 1415 if (icpu->disable_promotion_to_c1e) 1416 c1e_promotion_disable(); 1417 1418 return 0; 1419 } 1420 1421 static int intel_idle_cpu_online(unsigned int cpu) 1422 { 1423 struct cpuidle_device *dev; 1424 1425 if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) 1426 __setup_broadcast_timer(true); 1427 1428 /* 1429 * Some systems can hotplug a cpu at runtime after 1430 * the kernel has booted, we have to initialize the 1431 * driver in this case 1432 */ 1433 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1434 if (!dev->registered) 1435 return intel_idle_cpu_init(cpu); 1436 1437 return 0; 1438 } 1439 1440 static int __init intel_idle_init(void) 1441 { 1442 int retval; 1443 1444 /* Do not load intel_idle at all for now if idle= is passed */ 1445 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1446 return -ENODEV; 1447 1448 retval = intel_idle_probe(); 1449 if (retval) 1450 return retval; 1451 1452 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1453 if (intel_idle_cpuidle_devices == NULL) 1454 return -ENOMEM; 1455 1456 intel_idle_cpuidle_driver_init(); 1457 retval = cpuidle_register_driver(&intel_idle_driver); 1458 if (retval) { 1459 struct cpuidle_driver *drv = cpuidle_get_driver(); 1460 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1461 drv ? drv->name : "none"); 1462 goto init_driver_fail; 1463 } 1464 1465 if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ 1466 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; 1467 1468 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1469 intel_idle_cpu_online, NULL); 1470 if (retval < 0) 1471 goto hp_setup_fail; 1472 1473 pr_debug("lapic_timer_reliable_states 0x%x\n", 1474 lapic_timer_reliable_states); 1475 1476 return 0; 1477 1478 hp_setup_fail: 1479 intel_idle_cpuidle_devices_uninit(); 1480 cpuidle_unregister_driver(&intel_idle_driver); 1481 init_driver_fail: 1482 free_percpu(intel_idle_cpuidle_devices); 1483 return retval; 1484 1485 } 1486 device_initcall(intel_idle_init); 1487 1488 /* 1489 * We are not really modular, but we used to support that. Meaning we also 1490 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1491 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1492 * is the easiest way (currently) to continue doing that. 1493 */ 1494 module_param(max_cstate, int, 0444); 1495