1 /* 2 * intel_idle.c - native hardware idle loop for modern Intel processors 3 * 4 * Copyright (c) 2013, Intel Corporation. 5 * Len Brown <len.brown@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms and conditions of the GNU General Public License, 9 * version 2, as published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 19 */ 20 21 /* 22 * intel_idle is a cpuidle driver that loads on specific Intel processors 23 * in lieu of the legacy ACPI processor_idle driver. The intent is to 24 * make Linux more efficient on these processors, as intel_idle knows 25 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 26 */ 27 28 /* 29 * Design Assumptions 30 * 31 * All CPUs have same idle states as boot CPU 32 * 33 * Chipset BM_STS (bus master status) bit is a NOP 34 * for preventing entry into deep C-stats 35 */ 36 37 /* 38 * Known limitations 39 * 40 * The driver currently initializes for_each_online_cpu() upon modprobe. 41 * It it unaware of subsequent processors hot-added to the system. 42 * This means that if you boot with maxcpus=n and later online 43 * processors above n, those processors will use C1 only. 44 * 45 * ACPI has a .suspend hack to turn off deep c-statees during suspend 46 * to avoid complications with the lapic timer workaround. 47 * Have not seen issues with suspend, but may need same workaround here. 48 * 49 */ 50 51 /* un-comment DEBUG to enable pr_debug() statements */ 52 #define DEBUG 53 54 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 55 56 #include <linux/kernel.h> 57 #include <linux/cpuidle.h> 58 #include <linux/tick.h> 59 #include <trace/events/power.h> 60 #include <linux/sched.h> 61 #include <linux/notifier.h> 62 #include <linux/cpu.h> 63 #include <linux/moduleparam.h> 64 #include <asm/cpu_device_id.h> 65 #include <asm/intel-family.h> 66 #include <asm/mwait.h> 67 #include <asm/msr.h> 68 69 #define INTEL_IDLE_VERSION "0.4.1" 70 71 static struct cpuidle_driver intel_idle_driver = { 72 .name = "intel_idle", 73 .owner = THIS_MODULE, 74 }; 75 /* intel_idle.max_cstate=0 disables driver */ 76 static int max_cstate = CPUIDLE_STATE_MAX - 1; 77 78 static unsigned int mwait_substates; 79 80 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF 81 /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ 82 static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ 83 84 struct idle_cpu { 85 struct cpuidle_state *state_table; 86 87 /* 88 * Hardware C-state auto-demotion may not always be optimal. 89 * Indicate which enable bits to clear here. 90 */ 91 unsigned long auto_demotion_disable_flags; 92 bool byt_auto_demotion_disable_flag; 93 bool disable_promotion_to_c1e; 94 }; 95 96 static const struct idle_cpu *icpu; 97 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 98 static int intel_idle(struct cpuidle_device *dev, 99 struct cpuidle_driver *drv, int index); 100 static void intel_idle_s2idle(struct cpuidle_device *dev, 101 struct cpuidle_driver *drv, int index); 102 static struct cpuidle_state *cpuidle_state_table; 103 104 /* 105 * Set this flag for states where the HW flushes the TLB for us 106 * and so we don't need cross-calls to keep it consistent. 107 * If this flag is set, SW flushes the TLB, so even if the 108 * HW doesn't do the flushing, this flag is safe to use. 109 */ 110 #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 111 112 /* 113 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 114 * the C-state (top nibble) and sub-state (bottom nibble) 115 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 116 * 117 * We store the hint at the top of our "flags" for each state. 118 */ 119 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 120 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 121 122 /* 123 * States are indexed by the cstate number, 124 * which is also the index into the MWAIT hint array. 125 * Thus C0 is a dummy. 126 */ 127 static struct cpuidle_state nehalem_cstates[] = { 128 { 129 .name = "C1", 130 .desc = "MWAIT 0x00", 131 .flags = MWAIT2flg(0x00), 132 .exit_latency = 3, 133 .target_residency = 6, 134 .enter = &intel_idle, 135 .enter_s2idle = intel_idle_s2idle, }, 136 { 137 .name = "C1E", 138 .desc = "MWAIT 0x01", 139 .flags = MWAIT2flg(0x01), 140 .exit_latency = 10, 141 .target_residency = 20, 142 .enter = &intel_idle, 143 .enter_s2idle = intel_idle_s2idle, }, 144 { 145 .name = "C3", 146 .desc = "MWAIT 0x10", 147 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 148 .exit_latency = 20, 149 .target_residency = 80, 150 .enter = &intel_idle, 151 .enter_s2idle = intel_idle_s2idle, }, 152 { 153 .name = "C6", 154 .desc = "MWAIT 0x20", 155 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 156 .exit_latency = 200, 157 .target_residency = 800, 158 .enter = &intel_idle, 159 .enter_s2idle = intel_idle_s2idle, }, 160 { 161 .enter = NULL } 162 }; 163 164 static struct cpuidle_state snb_cstates[] = { 165 { 166 .name = "C1", 167 .desc = "MWAIT 0x00", 168 .flags = MWAIT2flg(0x00), 169 .exit_latency = 2, 170 .target_residency = 2, 171 .enter = &intel_idle, 172 .enter_s2idle = intel_idle_s2idle, }, 173 { 174 .name = "C1E", 175 .desc = "MWAIT 0x01", 176 .flags = MWAIT2flg(0x01), 177 .exit_latency = 10, 178 .target_residency = 20, 179 .enter = &intel_idle, 180 .enter_s2idle = intel_idle_s2idle, }, 181 { 182 .name = "C3", 183 .desc = "MWAIT 0x10", 184 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 185 .exit_latency = 80, 186 .target_residency = 211, 187 .enter = &intel_idle, 188 .enter_s2idle = intel_idle_s2idle, }, 189 { 190 .name = "C6", 191 .desc = "MWAIT 0x20", 192 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 193 .exit_latency = 104, 194 .target_residency = 345, 195 .enter = &intel_idle, 196 .enter_s2idle = intel_idle_s2idle, }, 197 { 198 .name = "C7", 199 .desc = "MWAIT 0x30", 200 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 201 .exit_latency = 109, 202 .target_residency = 345, 203 .enter = &intel_idle, 204 .enter_s2idle = intel_idle_s2idle, }, 205 { 206 .enter = NULL } 207 }; 208 209 static struct cpuidle_state byt_cstates[] = { 210 { 211 .name = "C1", 212 .desc = "MWAIT 0x00", 213 .flags = MWAIT2flg(0x00), 214 .exit_latency = 1, 215 .target_residency = 1, 216 .enter = &intel_idle, 217 .enter_s2idle = intel_idle_s2idle, }, 218 { 219 .name = "C6N", 220 .desc = "MWAIT 0x58", 221 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 222 .exit_latency = 300, 223 .target_residency = 275, 224 .enter = &intel_idle, 225 .enter_s2idle = intel_idle_s2idle, }, 226 { 227 .name = "C6S", 228 .desc = "MWAIT 0x52", 229 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 230 .exit_latency = 500, 231 .target_residency = 560, 232 .enter = &intel_idle, 233 .enter_s2idle = intel_idle_s2idle, }, 234 { 235 .name = "C7", 236 .desc = "MWAIT 0x60", 237 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 238 .exit_latency = 1200, 239 .target_residency = 4000, 240 .enter = &intel_idle, 241 .enter_s2idle = intel_idle_s2idle, }, 242 { 243 .name = "C7S", 244 .desc = "MWAIT 0x64", 245 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 246 .exit_latency = 10000, 247 .target_residency = 20000, 248 .enter = &intel_idle, 249 .enter_s2idle = intel_idle_s2idle, }, 250 { 251 .enter = NULL } 252 }; 253 254 static struct cpuidle_state cht_cstates[] = { 255 { 256 .name = "C1", 257 .desc = "MWAIT 0x00", 258 .flags = MWAIT2flg(0x00), 259 .exit_latency = 1, 260 .target_residency = 1, 261 .enter = &intel_idle, 262 .enter_s2idle = intel_idle_s2idle, }, 263 { 264 .name = "C6N", 265 .desc = "MWAIT 0x58", 266 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 267 .exit_latency = 80, 268 .target_residency = 275, 269 .enter = &intel_idle, 270 .enter_s2idle = intel_idle_s2idle, }, 271 { 272 .name = "C6S", 273 .desc = "MWAIT 0x52", 274 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 275 .exit_latency = 200, 276 .target_residency = 560, 277 .enter = &intel_idle, 278 .enter_s2idle = intel_idle_s2idle, }, 279 { 280 .name = "C7", 281 .desc = "MWAIT 0x60", 282 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 283 .exit_latency = 1200, 284 .target_residency = 4000, 285 .enter = &intel_idle, 286 .enter_s2idle = intel_idle_s2idle, }, 287 { 288 .name = "C7S", 289 .desc = "MWAIT 0x64", 290 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 291 .exit_latency = 10000, 292 .target_residency = 20000, 293 .enter = &intel_idle, 294 .enter_s2idle = intel_idle_s2idle, }, 295 { 296 .enter = NULL } 297 }; 298 299 static struct cpuidle_state ivb_cstates[] = { 300 { 301 .name = "C1", 302 .desc = "MWAIT 0x00", 303 .flags = MWAIT2flg(0x00), 304 .exit_latency = 1, 305 .target_residency = 1, 306 .enter = &intel_idle, 307 .enter_s2idle = intel_idle_s2idle, }, 308 { 309 .name = "C1E", 310 .desc = "MWAIT 0x01", 311 .flags = MWAIT2flg(0x01), 312 .exit_latency = 10, 313 .target_residency = 20, 314 .enter = &intel_idle, 315 .enter_s2idle = intel_idle_s2idle, }, 316 { 317 .name = "C3", 318 .desc = "MWAIT 0x10", 319 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 320 .exit_latency = 59, 321 .target_residency = 156, 322 .enter = &intel_idle, 323 .enter_s2idle = intel_idle_s2idle, }, 324 { 325 .name = "C6", 326 .desc = "MWAIT 0x20", 327 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 328 .exit_latency = 80, 329 .target_residency = 300, 330 .enter = &intel_idle, 331 .enter_s2idle = intel_idle_s2idle, }, 332 { 333 .name = "C7", 334 .desc = "MWAIT 0x30", 335 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 336 .exit_latency = 87, 337 .target_residency = 300, 338 .enter = &intel_idle, 339 .enter_s2idle = intel_idle_s2idle, }, 340 { 341 .enter = NULL } 342 }; 343 344 static struct cpuidle_state ivt_cstates[] = { 345 { 346 .name = "C1", 347 .desc = "MWAIT 0x00", 348 .flags = MWAIT2flg(0x00), 349 .exit_latency = 1, 350 .target_residency = 1, 351 .enter = &intel_idle, 352 .enter_s2idle = intel_idle_s2idle, }, 353 { 354 .name = "C1E", 355 .desc = "MWAIT 0x01", 356 .flags = MWAIT2flg(0x01), 357 .exit_latency = 10, 358 .target_residency = 80, 359 .enter = &intel_idle, 360 .enter_s2idle = intel_idle_s2idle, }, 361 { 362 .name = "C3", 363 .desc = "MWAIT 0x10", 364 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 365 .exit_latency = 59, 366 .target_residency = 156, 367 .enter = &intel_idle, 368 .enter_s2idle = intel_idle_s2idle, }, 369 { 370 .name = "C6", 371 .desc = "MWAIT 0x20", 372 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 373 .exit_latency = 82, 374 .target_residency = 300, 375 .enter = &intel_idle, 376 .enter_s2idle = intel_idle_s2idle, }, 377 { 378 .enter = NULL } 379 }; 380 381 static struct cpuidle_state ivt_cstates_4s[] = { 382 { 383 .name = "C1", 384 .desc = "MWAIT 0x00", 385 .flags = MWAIT2flg(0x00), 386 .exit_latency = 1, 387 .target_residency = 1, 388 .enter = &intel_idle, 389 .enter_s2idle = intel_idle_s2idle, }, 390 { 391 .name = "C1E", 392 .desc = "MWAIT 0x01", 393 .flags = MWAIT2flg(0x01), 394 .exit_latency = 10, 395 .target_residency = 250, 396 .enter = &intel_idle, 397 .enter_s2idle = intel_idle_s2idle, }, 398 { 399 .name = "C3", 400 .desc = "MWAIT 0x10", 401 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 402 .exit_latency = 59, 403 .target_residency = 300, 404 .enter = &intel_idle, 405 .enter_s2idle = intel_idle_s2idle, }, 406 { 407 .name = "C6", 408 .desc = "MWAIT 0x20", 409 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 410 .exit_latency = 84, 411 .target_residency = 400, 412 .enter = &intel_idle, 413 .enter_s2idle = intel_idle_s2idle, }, 414 { 415 .enter = NULL } 416 }; 417 418 static struct cpuidle_state ivt_cstates_8s[] = { 419 { 420 .name = "C1", 421 .desc = "MWAIT 0x00", 422 .flags = MWAIT2flg(0x00), 423 .exit_latency = 1, 424 .target_residency = 1, 425 .enter = &intel_idle, 426 .enter_s2idle = intel_idle_s2idle, }, 427 { 428 .name = "C1E", 429 .desc = "MWAIT 0x01", 430 .flags = MWAIT2flg(0x01), 431 .exit_latency = 10, 432 .target_residency = 500, 433 .enter = &intel_idle, 434 .enter_s2idle = intel_idle_s2idle, }, 435 { 436 .name = "C3", 437 .desc = "MWAIT 0x10", 438 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 439 .exit_latency = 59, 440 .target_residency = 600, 441 .enter = &intel_idle, 442 .enter_s2idle = intel_idle_s2idle, }, 443 { 444 .name = "C6", 445 .desc = "MWAIT 0x20", 446 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 447 .exit_latency = 88, 448 .target_residency = 700, 449 .enter = &intel_idle, 450 .enter_s2idle = intel_idle_s2idle, }, 451 { 452 .enter = NULL } 453 }; 454 455 static struct cpuidle_state hsw_cstates[] = { 456 { 457 .name = "C1", 458 .desc = "MWAIT 0x00", 459 .flags = MWAIT2flg(0x00), 460 .exit_latency = 2, 461 .target_residency = 2, 462 .enter = &intel_idle, 463 .enter_s2idle = intel_idle_s2idle, }, 464 { 465 .name = "C1E", 466 .desc = "MWAIT 0x01", 467 .flags = MWAIT2flg(0x01), 468 .exit_latency = 10, 469 .target_residency = 20, 470 .enter = &intel_idle, 471 .enter_s2idle = intel_idle_s2idle, }, 472 { 473 .name = "C3", 474 .desc = "MWAIT 0x10", 475 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 476 .exit_latency = 33, 477 .target_residency = 100, 478 .enter = &intel_idle, 479 .enter_s2idle = intel_idle_s2idle, }, 480 { 481 .name = "C6", 482 .desc = "MWAIT 0x20", 483 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 484 .exit_latency = 133, 485 .target_residency = 400, 486 .enter = &intel_idle, 487 .enter_s2idle = intel_idle_s2idle, }, 488 { 489 .name = "C7s", 490 .desc = "MWAIT 0x32", 491 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 492 .exit_latency = 166, 493 .target_residency = 500, 494 .enter = &intel_idle, 495 .enter_s2idle = intel_idle_s2idle, }, 496 { 497 .name = "C8", 498 .desc = "MWAIT 0x40", 499 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 500 .exit_latency = 300, 501 .target_residency = 900, 502 .enter = &intel_idle, 503 .enter_s2idle = intel_idle_s2idle, }, 504 { 505 .name = "C9", 506 .desc = "MWAIT 0x50", 507 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 508 .exit_latency = 600, 509 .target_residency = 1800, 510 .enter = &intel_idle, 511 .enter_s2idle = intel_idle_s2idle, }, 512 { 513 .name = "C10", 514 .desc = "MWAIT 0x60", 515 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 516 .exit_latency = 2600, 517 .target_residency = 7700, 518 .enter = &intel_idle, 519 .enter_s2idle = intel_idle_s2idle, }, 520 { 521 .enter = NULL } 522 }; 523 static struct cpuidle_state bdw_cstates[] = { 524 { 525 .name = "C1", 526 .desc = "MWAIT 0x00", 527 .flags = MWAIT2flg(0x00), 528 .exit_latency = 2, 529 .target_residency = 2, 530 .enter = &intel_idle, 531 .enter_s2idle = intel_idle_s2idle, }, 532 { 533 .name = "C1E", 534 .desc = "MWAIT 0x01", 535 .flags = MWAIT2flg(0x01), 536 .exit_latency = 10, 537 .target_residency = 20, 538 .enter = &intel_idle, 539 .enter_s2idle = intel_idle_s2idle, }, 540 { 541 .name = "C3", 542 .desc = "MWAIT 0x10", 543 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 544 .exit_latency = 40, 545 .target_residency = 100, 546 .enter = &intel_idle, 547 .enter_s2idle = intel_idle_s2idle, }, 548 { 549 .name = "C6", 550 .desc = "MWAIT 0x20", 551 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 552 .exit_latency = 133, 553 .target_residency = 400, 554 .enter = &intel_idle, 555 .enter_s2idle = intel_idle_s2idle, }, 556 { 557 .name = "C7s", 558 .desc = "MWAIT 0x32", 559 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 560 .exit_latency = 166, 561 .target_residency = 500, 562 .enter = &intel_idle, 563 .enter_s2idle = intel_idle_s2idle, }, 564 { 565 .name = "C8", 566 .desc = "MWAIT 0x40", 567 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 568 .exit_latency = 300, 569 .target_residency = 900, 570 .enter = &intel_idle, 571 .enter_s2idle = intel_idle_s2idle, }, 572 { 573 .name = "C9", 574 .desc = "MWAIT 0x50", 575 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 576 .exit_latency = 600, 577 .target_residency = 1800, 578 .enter = &intel_idle, 579 .enter_s2idle = intel_idle_s2idle, }, 580 { 581 .name = "C10", 582 .desc = "MWAIT 0x60", 583 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 584 .exit_latency = 2600, 585 .target_residency = 7700, 586 .enter = &intel_idle, 587 .enter_s2idle = intel_idle_s2idle, }, 588 { 589 .enter = NULL } 590 }; 591 592 static struct cpuidle_state skl_cstates[] = { 593 { 594 .name = "C1", 595 .desc = "MWAIT 0x00", 596 .flags = MWAIT2flg(0x00), 597 .exit_latency = 2, 598 .target_residency = 2, 599 .enter = &intel_idle, 600 .enter_s2idle = intel_idle_s2idle, }, 601 { 602 .name = "C1E", 603 .desc = "MWAIT 0x01", 604 .flags = MWAIT2flg(0x01), 605 .exit_latency = 10, 606 .target_residency = 20, 607 .enter = &intel_idle, 608 .enter_s2idle = intel_idle_s2idle, }, 609 { 610 .name = "C3", 611 .desc = "MWAIT 0x10", 612 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 613 .exit_latency = 70, 614 .target_residency = 100, 615 .enter = &intel_idle, 616 .enter_s2idle = intel_idle_s2idle, }, 617 { 618 .name = "C6", 619 .desc = "MWAIT 0x20", 620 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 621 .exit_latency = 85, 622 .target_residency = 200, 623 .enter = &intel_idle, 624 .enter_s2idle = intel_idle_s2idle, }, 625 { 626 .name = "C7s", 627 .desc = "MWAIT 0x33", 628 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 629 .exit_latency = 124, 630 .target_residency = 800, 631 .enter = &intel_idle, 632 .enter_s2idle = intel_idle_s2idle, }, 633 { 634 .name = "C8", 635 .desc = "MWAIT 0x40", 636 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 637 .exit_latency = 200, 638 .target_residency = 800, 639 .enter = &intel_idle, 640 .enter_s2idle = intel_idle_s2idle, }, 641 { 642 .name = "C9", 643 .desc = "MWAIT 0x50", 644 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 645 .exit_latency = 480, 646 .target_residency = 5000, 647 .enter = &intel_idle, 648 .enter_s2idle = intel_idle_s2idle, }, 649 { 650 .name = "C10", 651 .desc = "MWAIT 0x60", 652 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 653 .exit_latency = 890, 654 .target_residency = 5000, 655 .enter = &intel_idle, 656 .enter_s2idle = intel_idle_s2idle, }, 657 { 658 .enter = NULL } 659 }; 660 661 static struct cpuidle_state skx_cstates[] = { 662 { 663 .name = "C1", 664 .desc = "MWAIT 0x00", 665 .flags = MWAIT2flg(0x00), 666 .exit_latency = 2, 667 .target_residency = 2, 668 .enter = &intel_idle, 669 .enter_s2idle = intel_idle_s2idle, }, 670 { 671 .name = "C1E", 672 .desc = "MWAIT 0x01", 673 .flags = MWAIT2flg(0x01), 674 .exit_latency = 10, 675 .target_residency = 20, 676 .enter = &intel_idle, 677 .enter_s2idle = intel_idle_s2idle, }, 678 { 679 .name = "C6", 680 .desc = "MWAIT 0x20", 681 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 682 .exit_latency = 133, 683 .target_residency = 600, 684 .enter = &intel_idle, 685 .enter_s2idle = intel_idle_s2idle, }, 686 { 687 .enter = NULL } 688 }; 689 690 static struct cpuidle_state atom_cstates[] = { 691 { 692 .name = "C1E", 693 .desc = "MWAIT 0x00", 694 .flags = MWAIT2flg(0x00), 695 .exit_latency = 10, 696 .target_residency = 20, 697 .enter = &intel_idle, 698 .enter_s2idle = intel_idle_s2idle, }, 699 { 700 .name = "C2", 701 .desc = "MWAIT 0x10", 702 .flags = MWAIT2flg(0x10), 703 .exit_latency = 20, 704 .target_residency = 80, 705 .enter = &intel_idle, 706 .enter_s2idle = intel_idle_s2idle, }, 707 { 708 .name = "C4", 709 .desc = "MWAIT 0x30", 710 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 711 .exit_latency = 100, 712 .target_residency = 400, 713 .enter = &intel_idle, 714 .enter_s2idle = intel_idle_s2idle, }, 715 { 716 .name = "C6", 717 .desc = "MWAIT 0x52", 718 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 719 .exit_latency = 140, 720 .target_residency = 560, 721 .enter = &intel_idle, 722 .enter_s2idle = intel_idle_s2idle, }, 723 { 724 .enter = NULL } 725 }; 726 static struct cpuidle_state tangier_cstates[] = { 727 { 728 .name = "C1", 729 .desc = "MWAIT 0x00", 730 .flags = MWAIT2flg(0x00), 731 .exit_latency = 1, 732 .target_residency = 4, 733 .enter = &intel_idle, 734 .enter_s2idle = intel_idle_s2idle, }, 735 { 736 .name = "C4", 737 .desc = "MWAIT 0x30", 738 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 739 .exit_latency = 100, 740 .target_residency = 400, 741 .enter = &intel_idle, 742 .enter_s2idle = intel_idle_s2idle, }, 743 { 744 .name = "C6", 745 .desc = "MWAIT 0x52", 746 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 747 .exit_latency = 140, 748 .target_residency = 560, 749 .enter = &intel_idle, 750 .enter_s2idle = intel_idle_s2idle, }, 751 { 752 .name = "C7", 753 .desc = "MWAIT 0x60", 754 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 755 .exit_latency = 1200, 756 .target_residency = 4000, 757 .enter = &intel_idle, 758 .enter_s2idle = intel_idle_s2idle, }, 759 { 760 .name = "C9", 761 .desc = "MWAIT 0x64", 762 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 763 .exit_latency = 10000, 764 .target_residency = 20000, 765 .enter = &intel_idle, 766 .enter_s2idle = intel_idle_s2idle, }, 767 { 768 .enter = NULL } 769 }; 770 static struct cpuidle_state avn_cstates[] = { 771 { 772 .name = "C1", 773 .desc = "MWAIT 0x00", 774 .flags = MWAIT2flg(0x00), 775 .exit_latency = 2, 776 .target_residency = 2, 777 .enter = &intel_idle, 778 .enter_s2idle = intel_idle_s2idle, }, 779 { 780 .name = "C6", 781 .desc = "MWAIT 0x51", 782 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 783 .exit_latency = 15, 784 .target_residency = 45, 785 .enter = &intel_idle, 786 .enter_s2idle = intel_idle_s2idle, }, 787 { 788 .enter = NULL } 789 }; 790 static struct cpuidle_state knl_cstates[] = { 791 { 792 .name = "C1", 793 .desc = "MWAIT 0x00", 794 .flags = MWAIT2flg(0x00), 795 .exit_latency = 1, 796 .target_residency = 2, 797 .enter = &intel_idle, 798 .enter_s2idle = intel_idle_s2idle }, 799 { 800 .name = "C6", 801 .desc = "MWAIT 0x10", 802 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 803 .exit_latency = 120, 804 .target_residency = 500, 805 .enter = &intel_idle, 806 .enter_s2idle = intel_idle_s2idle }, 807 { 808 .enter = NULL } 809 }; 810 811 static struct cpuidle_state bxt_cstates[] = { 812 { 813 .name = "C1", 814 .desc = "MWAIT 0x00", 815 .flags = MWAIT2flg(0x00), 816 .exit_latency = 2, 817 .target_residency = 2, 818 .enter = &intel_idle, 819 .enter_s2idle = intel_idle_s2idle, }, 820 { 821 .name = "C1E", 822 .desc = "MWAIT 0x01", 823 .flags = MWAIT2flg(0x01), 824 .exit_latency = 10, 825 .target_residency = 20, 826 .enter = &intel_idle, 827 .enter_s2idle = intel_idle_s2idle, }, 828 { 829 .name = "C6", 830 .desc = "MWAIT 0x20", 831 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 832 .exit_latency = 133, 833 .target_residency = 133, 834 .enter = &intel_idle, 835 .enter_s2idle = intel_idle_s2idle, }, 836 { 837 .name = "C7s", 838 .desc = "MWAIT 0x31", 839 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 840 .exit_latency = 155, 841 .target_residency = 155, 842 .enter = &intel_idle, 843 .enter_s2idle = intel_idle_s2idle, }, 844 { 845 .name = "C8", 846 .desc = "MWAIT 0x40", 847 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 848 .exit_latency = 1000, 849 .target_residency = 1000, 850 .enter = &intel_idle, 851 .enter_s2idle = intel_idle_s2idle, }, 852 { 853 .name = "C9", 854 .desc = "MWAIT 0x50", 855 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 856 .exit_latency = 2000, 857 .target_residency = 2000, 858 .enter = &intel_idle, 859 .enter_s2idle = intel_idle_s2idle, }, 860 { 861 .name = "C10", 862 .desc = "MWAIT 0x60", 863 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 864 .exit_latency = 10000, 865 .target_residency = 10000, 866 .enter = &intel_idle, 867 .enter_s2idle = intel_idle_s2idle, }, 868 { 869 .enter = NULL } 870 }; 871 872 static struct cpuidle_state dnv_cstates[] = { 873 { 874 .name = "C1", 875 .desc = "MWAIT 0x00", 876 .flags = MWAIT2flg(0x00), 877 .exit_latency = 2, 878 .target_residency = 2, 879 .enter = &intel_idle, 880 .enter_s2idle = intel_idle_s2idle, }, 881 { 882 .name = "C1E", 883 .desc = "MWAIT 0x01", 884 .flags = MWAIT2flg(0x01), 885 .exit_latency = 10, 886 .target_residency = 20, 887 .enter = &intel_idle, 888 .enter_s2idle = intel_idle_s2idle, }, 889 { 890 .name = "C6", 891 .desc = "MWAIT 0x20", 892 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 893 .exit_latency = 50, 894 .target_residency = 500, 895 .enter = &intel_idle, 896 .enter_s2idle = intel_idle_s2idle, }, 897 { 898 .enter = NULL } 899 }; 900 901 /** 902 * intel_idle 903 * @dev: cpuidle_device 904 * @drv: cpuidle driver 905 * @index: index of cpuidle state 906 * 907 * Must be called under local_irq_disable(). 908 */ 909 static __cpuidle int intel_idle(struct cpuidle_device *dev, 910 struct cpuidle_driver *drv, int index) 911 { 912 unsigned long ecx = 1; /* break on interrupt flag */ 913 struct cpuidle_state *state = &drv->states[index]; 914 unsigned long eax = flg2MWAIT(state->flags); 915 unsigned int cstate; 916 bool uninitialized_var(tick); 917 int cpu = smp_processor_id(); 918 919 /* 920 * leave_mm() to avoid costly and often unnecessary wakeups 921 * for flushing the user TLB's associated with the active mm. 922 */ 923 if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) 924 leave_mm(cpu); 925 926 if (!static_cpu_has(X86_FEATURE_ARAT)) { 927 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & 928 MWAIT_CSTATE_MASK) + 1; 929 tick = false; 930 if (!(lapic_timer_reliable_states & (1 << (cstate)))) { 931 tick = true; 932 tick_broadcast_enter(); 933 } 934 } 935 936 mwait_idle_with_hints(eax, ecx); 937 938 if (!static_cpu_has(X86_FEATURE_ARAT) && tick) 939 tick_broadcast_exit(); 940 941 return index; 942 } 943 944 /** 945 * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle 946 * @dev: cpuidle_device 947 * @drv: cpuidle driver 948 * @index: state index 949 */ 950 static void intel_idle_s2idle(struct cpuidle_device *dev, 951 struct cpuidle_driver *drv, int index) 952 { 953 unsigned long ecx = 1; /* break on interrupt flag */ 954 unsigned long eax = flg2MWAIT(drv->states[index].flags); 955 956 mwait_idle_with_hints(eax, ecx); 957 } 958 959 static void __setup_broadcast_timer(bool on) 960 { 961 if (on) 962 tick_broadcast_enable(); 963 else 964 tick_broadcast_disable(); 965 } 966 967 static void auto_demotion_disable(void) 968 { 969 unsigned long long msr_bits; 970 971 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 972 msr_bits &= ~(icpu->auto_demotion_disable_flags); 973 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 974 } 975 static void c1e_promotion_disable(void) 976 { 977 unsigned long long msr_bits; 978 979 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 980 msr_bits &= ~0x2; 981 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 982 } 983 984 static const struct idle_cpu idle_cpu_nehalem = { 985 .state_table = nehalem_cstates, 986 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 987 .disable_promotion_to_c1e = true, 988 }; 989 990 static const struct idle_cpu idle_cpu_atom = { 991 .state_table = atom_cstates, 992 }; 993 994 static const struct idle_cpu idle_cpu_tangier = { 995 .state_table = tangier_cstates, 996 }; 997 998 static const struct idle_cpu idle_cpu_lincroft = { 999 .state_table = atom_cstates, 1000 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1001 }; 1002 1003 static const struct idle_cpu idle_cpu_snb = { 1004 .state_table = snb_cstates, 1005 .disable_promotion_to_c1e = true, 1006 }; 1007 1008 static const struct idle_cpu idle_cpu_byt = { 1009 .state_table = byt_cstates, 1010 .disable_promotion_to_c1e = true, 1011 .byt_auto_demotion_disable_flag = true, 1012 }; 1013 1014 static const struct idle_cpu idle_cpu_cht = { 1015 .state_table = cht_cstates, 1016 .disable_promotion_to_c1e = true, 1017 .byt_auto_demotion_disable_flag = true, 1018 }; 1019 1020 static const struct idle_cpu idle_cpu_ivb = { 1021 .state_table = ivb_cstates, 1022 .disable_promotion_to_c1e = true, 1023 }; 1024 1025 static const struct idle_cpu idle_cpu_ivt = { 1026 .state_table = ivt_cstates, 1027 .disable_promotion_to_c1e = true, 1028 }; 1029 1030 static const struct idle_cpu idle_cpu_hsw = { 1031 .state_table = hsw_cstates, 1032 .disable_promotion_to_c1e = true, 1033 }; 1034 1035 static const struct idle_cpu idle_cpu_bdw = { 1036 .state_table = bdw_cstates, 1037 .disable_promotion_to_c1e = true, 1038 }; 1039 1040 static const struct idle_cpu idle_cpu_skl = { 1041 .state_table = skl_cstates, 1042 .disable_promotion_to_c1e = true, 1043 }; 1044 1045 static const struct idle_cpu idle_cpu_skx = { 1046 .state_table = skx_cstates, 1047 .disable_promotion_to_c1e = true, 1048 }; 1049 1050 static const struct idle_cpu idle_cpu_avn = { 1051 .state_table = avn_cstates, 1052 .disable_promotion_to_c1e = true, 1053 }; 1054 1055 static const struct idle_cpu idle_cpu_knl = { 1056 .state_table = knl_cstates, 1057 }; 1058 1059 static const struct idle_cpu idle_cpu_bxt = { 1060 .state_table = bxt_cstates, 1061 .disable_promotion_to_c1e = true, 1062 }; 1063 1064 static const struct idle_cpu idle_cpu_dnv = { 1065 .state_table = dnv_cstates, 1066 .disable_promotion_to_c1e = true, 1067 }; 1068 1069 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1070 INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nehalem), 1071 INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem), 1072 INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem), 1073 INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem), 1074 INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nehalem), 1075 INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nehalem), 1076 INTEL_CPU_FAM6(ATOM_BONNELL, idle_cpu_atom), 1077 INTEL_CPU_FAM6(ATOM_BONNELL_MID, idle_cpu_lincroft), 1078 INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nehalem), 1079 INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb), 1080 INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snb), 1081 INTEL_CPU_FAM6(ATOM_SALTWELL, idle_cpu_atom), 1082 INTEL_CPU_FAM6(ATOM_SILVERMONT, idle_cpu_byt), 1083 INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, idle_cpu_tangier), 1084 INTEL_CPU_FAM6(ATOM_AIRMONT, idle_cpu_cht), 1085 INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb), 1086 INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt), 1087 INTEL_CPU_FAM6(HASWELL_CORE, idle_cpu_hsw), 1088 INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsw), 1089 INTEL_CPU_FAM6(HASWELL_ULT, idle_cpu_hsw), 1090 INTEL_CPU_FAM6(HASWELL_GT3E, idle_cpu_hsw), 1091 INTEL_CPU_FAM6(ATOM_SILVERMONT_X, idle_cpu_avn), 1092 INTEL_CPU_FAM6(BROADWELL_CORE, idle_cpu_bdw), 1093 INTEL_CPU_FAM6(BROADWELL_GT3E, idle_cpu_bdw), 1094 INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdw), 1095 INTEL_CPU_FAM6(BROADWELL_XEON_D, idle_cpu_bdw), 1096 INTEL_CPU_FAM6(SKYLAKE_MOBILE, idle_cpu_skl), 1097 INTEL_CPU_FAM6(SKYLAKE_DESKTOP, idle_cpu_skl), 1098 INTEL_CPU_FAM6(KABYLAKE_MOBILE, idle_cpu_skl), 1099 INTEL_CPU_FAM6(KABYLAKE_DESKTOP, idle_cpu_skl), 1100 INTEL_CPU_FAM6(SKYLAKE_X, idle_cpu_skx), 1101 INTEL_CPU_FAM6(XEON_PHI_KNL, idle_cpu_knl), 1102 INTEL_CPU_FAM6(XEON_PHI_KNM, idle_cpu_knl), 1103 INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt), 1104 INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, idle_cpu_bxt), 1105 INTEL_CPU_FAM6(ATOM_GOLDMONT_X, idle_cpu_dnv), 1106 {} 1107 }; 1108 1109 /* 1110 * intel_idle_probe() 1111 */ 1112 static int __init intel_idle_probe(void) 1113 { 1114 unsigned int eax, ebx, ecx; 1115 const struct x86_cpu_id *id; 1116 1117 if (max_cstate == 0) { 1118 pr_debug("disabled\n"); 1119 return -EPERM; 1120 } 1121 1122 id = x86_match_cpu(intel_idle_ids); 1123 if (!id) { 1124 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 1125 boot_cpu_data.x86 == 6) 1126 pr_debug("does not run on family %d model %d\n", 1127 boot_cpu_data.x86, boot_cpu_data.x86_model); 1128 return -ENODEV; 1129 } 1130 1131 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1132 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1133 return -ENODEV; 1134 } 1135 1136 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1137 return -ENODEV; 1138 1139 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1140 1141 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1142 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1143 !mwait_substates) 1144 return -ENODEV; 1145 1146 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1147 1148 icpu = (const struct idle_cpu *)id->driver_data; 1149 cpuidle_state_table = icpu->state_table; 1150 1151 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1152 boot_cpu_data.x86_model); 1153 1154 return 0; 1155 } 1156 1157 /* 1158 * intel_idle_cpuidle_devices_uninit() 1159 * Unregisters the cpuidle devices. 1160 */ 1161 static void intel_idle_cpuidle_devices_uninit(void) 1162 { 1163 int i; 1164 struct cpuidle_device *dev; 1165 1166 for_each_online_cpu(i) { 1167 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); 1168 cpuidle_unregister_device(dev); 1169 } 1170 } 1171 1172 /* 1173 * ivt_idle_state_table_update(void) 1174 * 1175 * Tune IVT multi-socket targets 1176 * Assumption: num_sockets == (max_package_num + 1) 1177 */ 1178 static void ivt_idle_state_table_update(void) 1179 { 1180 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1181 int cpu, package_num, num_sockets = 1; 1182 1183 for_each_online_cpu(cpu) { 1184 package_num = topology_physical_package_id(cpu); 1185 if (package_num + 1 > num_sockets) { 1186 num_sockets = package_num + 1; 1187 1188 if (num_sockets > 4) { 1189 cpuidle_state_table = ivt_cstates_8s; 1190 return; 1191 } 1192 } 1193 } 1194 1195 if (num_sockets > 2) 1196 cpuidle_state_table = ivt_cstates_4s; 1197 1198 /* else, 1 and 2 socket systems use default ivt_cstates */ 1199 } 1200 1201 /* 1202 * Translate IRTL (Interrupt Response Time Limit) MSR to usec 1203 */ 1204 1205 static unsigned int irtl_ns_units[] = { 1206 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 1207 1208 static unsigned long long irtl_2_usec(unsigned long long irtl) 1209 { 1210 unsigned long long ns; 1211 1212 if (!irtl) 1213 return 0; 1214 1215 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1216 1217 return div64_u64((irtl & 0x3FF) * ns, 1000); 1218 } 1219 /* 1220 * bxt_idle_state_table_update(void) 1221 * 1222 * On BXT, we trust the IRTL to show the definitive maximum latency 1223 * We use the same value for target_residency. 1224 */ 1225 static void bxt_idle_state_table_update(void) 1226 { 1227 unsigned long long msr; 1228 unsigned int usec; 1229 1230 rdmsrl(MSR_PKGC6_IRTL, msr); 1231 usec = irtl_2_usec(msr); 1232 if (usec) { 1233 bxt_cstates[2].exit_latency = usec; 1234 bxt_cstates[2].target_residency = usec; 1235 } 1236 1237 rdmsrl(MSR_PKGC7_IRTL, msr); 1238 usec = irtl_2_usec(msr); 1239 if (usec) { 1240 bxt_cstates[3].exit_latency = usec; 1241 bxt_cstates[3].target_residency = usec; 1242 } 1243 1244 rdmsrl(MSR_PKGC8_IRTL, msr); 1245 usec = irtl_2_usec(msr); 1246 if (usec) { 1247 bxt_cstates[4].exit_latency = usec; 1248 bxt_cstates[4].target_residency = usec; 1249 } 1250 1251 rdmsrl(MSR_PKGC9_IRTL, msr); 1252 usec = irtl_2_usec(msr); 1253 if (usec) { 1254 bxt_cstates[5].exit_latency = usec; 1255 bxt_cstates[5].target_residency = usec; 1256 } 1257 1258 rdmsrl(MSR_PKGC10_IRTL, msr); 1259 usec = irtl_2_usec(msr); 1260 if (usec) { 1261 bxt_cstates[6].exit_latency = usec; 1262 bxt_cstates[6].target_residency = usec; 1263 } 1264 1265 } 1266 /* 1267 * sklh_idle_state_table_update(void) 1268 * 1269 * On SKL-H (model 0x5e) disable C8 and C9 if: 1270 * C10 is enabled and SGX disabled 1271 */ 1272 static void sklh_idle_state_table_update(void) 1273 { 1274 unsigned long long msr; 1275 unsigned int eax, ebx, ecx, edx; 1276 1277 1278 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1279 if (max_cstate <= 7) 1280 return; 1281 1282 /* if PC10 not present in CPUID.MWAIT.EDX */ 1283 if ((mwait_substates & (0xF << 28)) == 0) 1284 return; 1285 1286 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1287 1288 /* PC10 is not enabled in PKG C-state limit */ 1289 if ((msr & 0xF) != 8) 1290 return; 1291 1292 ecx = 0; 1293 cpuid(7, &eax, &ebx, &ecx, &edx); 1294 1295 /* if SGX is present */ 1296 if (ebx & (1 << 2)) { 1297 1298 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1299 1300 /* if SGX is enabled */ 1301 if (msr & (1 << 18)) 1302 return; 1303 } 1304 1305 skl_cstates[5].disabled = 1; /* C8-SKL */ 1306 skl_cstates[6].disabled = 1; /* C9-SKL */ 1307 } 1308 /* 1309 * intel_idle_state_table_update() 1310 * 1311 * Update the default state_table for this CPU-id 1312 */ 1313 1314 static void intel_idle_state_table_update(void) 1315 { 1316 switch (boot_cpu_data.x86_model) { 1317 1318 case INTEL_FAM6_IVYBRIDGE_X: 1319 ivt_idle_state_table_update(); 1320 break; 1321 case INTEL_FAM6_ATOM_GOLDMONT: 1322 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1323 bxt_idle_state_table_update(); 1324 break; 1325 case INTEL_FAM6_SKYLAKE_DESKTOP: 1326 sklh_idle_state_table_update(); 1327 break; 1328 } 1329 } 1330 1331 /* 1332 * intel_idle_cpuidle_driver_init() 1333 * allocate, initialize cpuidle_states 1334 */ 1335 static void __init intel_idle_cpuidle_driver_init(void) 1336 { 1337 int cstate; 1338 struct cpuidle_driver *drv = &intel_idle_driver; 1339 1340 intel_idle_state_table_update(); 1341 1342 cpuidle_poll_state_init(drv); 1343 drv->state_count = 1; 1344 1345 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1346 int num_substates, mwait_hint, mwait_cstate; 1347 1348 if ((cpuidle_state_table[cstate].enter == NULL) && 1349 (cpuidle_state_table[cstate].enter_s2idle == NULL)) 1350 break; 1351 1352 if (cstate + 1 > max_cstate) { 1353 pr_info("max_cstate %d reached\n", max_cstate); 1354 break; 1355 } 1356 1357 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1358 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); 1359 1360 /* number of sub-states for this state in CPUID.MWAIT */ 1361 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) 1362 & MWAIT_SUBSTATE_MASK; 1363 1364 /* if NO sub-states for this state in CPUID, skip it */ 1365 if (num_substates == 0) 1366 continue; 1367 1368 /* if state marked as disabled, skip it */ 1369 if (cpuidle_state_table[cstate].disabled != 0) { 1370 pr_debug("state %s is disabled\n", 1371 cpuidle_state_table[cstate].name); 1372 continue; 1373 } 1374 1375 1376 if (((mwait_cstate + 1) > 2) && 1377 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1378 mark_tsc_unstable("TSC halts in idle" 1379 " states deeper than C2"); 1380 1381 drv->states[drv->state_count] = /* structure copy */ 1382 cpuidle_state_table[cstate]; 1383 1384 drv->state_count += 1; 1385 } 1386 1387 if (icpu->byt_auto_demotion_disable_flag) { 1388 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1389 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1390 } 1391 } 1392 1393 1394 /* 1395 * intel_idle_cpu_init() 1396 * allocate, initialize, register cpuidle_devices 1397 * @cpu: cpu/core to initialize 1398 */ 1399 static int intel_idle_cpu_init(unsigned int cpu) 1400 { 1401 struct cpuidle_device *dev; 1402 1403 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1404 dev->cpu = cpu; 1405 1406 if (cpuidle_register_device(dev)) { 1407 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1408 return -EIO; 1409 } 1410 1411 if (icpu->auto_demotion_disable_flags) 1412 auto_demotion_disable(); 1413 1414 if (icpu->disable_promotion_to_c1e) 1415 c1e_promotion_disable(); 1416 1417 return 0; 1418 } 1419 1420 static int intel_idle_cpu_online(unsigned int cpu) 1421 { 1422 struct cpuidle_device *dev; 1423 1424 if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) 1425 __setup_broadcast_timer(true); 1426 1427 /* 1428 * Some systems can hotplug a cpu at runtime after 1429 * the kernel has booted, we have to initialize the 1430 * driver in this case 1431 */ 1432 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1433 if (!dev->registered) 1434 return intel_idle_cpu_init(cpu); 1435 1436 return 0; 1437 } 1438 1439 static int __init intel_idle_init(void) 1440 { 1441 int retval; 1442 1443 /* Do not load intel_idle at all for now if idle= is passed */ 1444 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1445 return -ENODEV; 1446 1447 retval = intel_idle_probe(); 1448 if (retval) 1449 return retval; 1450 1451 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1452 if (intel_idle_cpuidle_devices == NULL) 1453 return -ENOMEM; 1454 1455 intel_idle_cpuidle_driver_init(); 1456 retval = cpuidle_register_driver(&intel_idle_driver); 1457 if (retval) { 1458 struct cpuidle_driver *drv = cpuidle_get_driver(); 1459 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1460 drv ? drv->name : "none"); 1461 goto init_driver_fail; 1462 } 1463 1464 if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ 1465 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; 1466 1467 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1468 intel_idle_cpu_online, NULL); 1469 if (retval < 0) 1470 goto hp_setup_fail; 1471 1472 pr_debug("lapic_timer_reliable_states 0x%x\n", 1473 lapic_timer_reliable_states); 1474 1475 return 0; 1476 1477 hp_setup_fail: 1478 intel_idle_cpuidle_devices_uninit(); 1479 cpuidle_unregister_driver(&intel_idle_driver); 1480 init_driver_fail: 1481 free_percpu(intel_idle_cpuidle_devices); 1482 return retval; 1483 1484 } 1485 device_initcall(intel_idle_init); 1486 1487 /* 1488 * We are not really modular, but we used to support that. Meaning we also 1489 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1490 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1491 * is the easiest way (currently) to continue doing that. 1492 */ 1493 module_param(max_cstate, int, 0444); 1494