1 /* 2 * intel_idle.c - native hardware idle loop for modern Intel processors 3 * 4 * Copyright (c) 2013, Intel Corporation. 5 * Len Brown <len.brown@intel.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms and conditions of the GNU General Public License, 9 * version 2, as published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 19 */ 20 21 /* 22 * intel_idle is a cpuidle driver that loads on specific Intel processors 23 * in lieu of the legacy ACPI processor_idle driver. The intent is to 24 * make Linux more efficient on these processors, as intel_idle knows 25 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 26 */ 27 28 /* 29 * Design Assumptions 30 * 31 * All CPUs have same idle states as boot CPU 32 * 33 * Chipset BM_STS (bus master status) bit is a NOP 34 * for preventing entry into deep C-stats 35 */ 36 37 /* 38 * Known limitations 39 * 40 * The driver currently initializes for_each_online_cpu() upon modprobe. 41 * It it unaware of subsequent processors hot-added to the system. 42 * This means that if you boot with maxcpus=n and later online 43 * processors above n, those processors will use C1 only. 44 * 45 * ACPI has a .suspend hack to turn off deep c-statees during suspend 46 * to avoid complications with the lapic timer workaround. 47 * Have not seen issues with suspend, but may need same workaround here. 48 * 49 */ 50 51 /* un-comment DEBUG to enable pr_debug() statements */ 52 #define DEBUG 53 54 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 55 56 #include <linux/kernel.h> 57 #include <linux/cpuidle.h> 58 #include <linux/tick.h> 59 #include <trace/events/power.h> 60 #include <linux/sched.h> 61 #include <linux/notifier.h> 62 #include <linux/cpu.h> 63 #include <linux/moduleparam.h> 64 #include <asm/cpu_device_id.h> 65 #include <asm/intel-family.h> 66 #include <asm/mwait.h> 67 #include <asm/msr.h> 68 69 #define INTEL_IDLE_VERSION "0.4.1" 70 71 static struct cpuidle_driver intel_idle_driver = { 72 .name = "intel_idle", 73 .owner = THIS_MODULE, 74 }; 75 /* intel_idle.max_cstate=0 disables driver */ 76 static int max_cstate = CPUIDLE_STATE_MAX - 1; 77 78 static unsigned int mwait_substates; 79 80 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF 81 /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ 82 static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ 83 84 struct idle_cpu { 85 struct cpuidle_state *state_table; 86 87 /* 88 * Hardware C-state auto-demotion may not always be optimal. 89 * Indicate which enable bits to clear here. 90 */ 91 unsigned long auto_demotion_disable_flags; 92 bool byt_auto_demotion_disable_flag; 93 bool disable_promotion_to_c1e; 94 }; 95 96 static const struct idle_cpu *icpu; 97 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 98 static int intel_idle(struct cpuidle_device *dev, 99 struct cpuidle_driver *drv, int index); 100 static void intel_idle_s2idle(struct cpuidle_device *dev, 101 struct cpuidle_driver *drv, int index); 102 static struct cpuidle_state *cpuidle_state_table; 103 104 /* 105 * Set this flag for states where the HW flushes the TLB for us 106 * and so we don't need cross-calls to keep it consistent. 107 * If this flag is set, SW flushes the TLB, so even if the 108 * HW doesn't do the flushing, this flag is safe to use. 109 */ 110 #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 111 112 /* 113 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 114 * the C-state (top nibble) and sub-state (bottom nibble) 115 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 116 * 117 * We store the hint at the top of our "flags" for each state. 118 */ 119 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 120 #define MWAIT2flg(eax) ((eax & 0xFF) << 24) 121 122 /* 123 * States are indexed by the cstate number, 124 * which is also the index into the MWAIT hint array. 125 * Thus C0 is a dummy. 126 */ 127 static struct cpuidle_state nehalem_cstates[] = { 128 { 129 .name = "C1", 130 .desc = "MWAIT 0x00", 131 .flags = MWAIT2flg(0x00), 132 .exit_latency = 3, 133 .target_residency = 6, 134 .enter = &intel_idle, 135 .enter_s2idle = intel_idle_s2idle, }, 136 { 137 .name = "C1E", 138 .desc = "MWAIT 0x01", 139 .flags = MWAIT2flg(0x01), 140 .exit_latency = 10, 141 .target_residency = 20, 142 .enter = &intel_idle, 143 .enter_s2idle = intel_idle_s2idle, }, 144 { 145 .name = "C3", 146 .desc = "MWAIT 0x10", 147 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 148 .exit_latency = 20, 149 .target_residency = 80, 150 .enter = &intel_idle, 151 .enter_s2idle = intel_idle_s2idle, }, 152 { 153 .name = "C6", 154 .desc = "MWAIT 0x20", 155 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 156 .exit_latency = 200, 157 .target_residency = 800, 158 .enter = &intel_idle, 159 .enter_s2idle = intel_idle_s2idle, }, 160 { 161 .enter = NULL } 162 }; 163 164 static struct cpuidle_state snb_cstates[] = { 165 { 166 .name = "C1", 167 .desc = "MWAIT 0x00", 168 .flags = MWAIT2flg(0x00), 169 .exit_latency = 2, 170 .target_residency = 2, 171 .enter = &intel_idle, 172 .enter_s2idle = intel_idle_s2idle, }, 173 { 174 .name = "C1E", 175 .desc = "MWAIT 0x01", 176 .flags = MWAIT2flg(0x01), 177 .exit_latency = 10, 178 .target_residency = 20, 179 .enter = &intel_idle, 180 .enter_s2idle = intel_idle_s2idle, }, 181 { 182 .name = "C3", 183 .desc = "MWAIT 0x10", 184 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 185 .exit_latency = 80, 186 .target_residency = 211, 187 .enter = &intel_idle, 188 .enter_s2idle = intel_idle_s2idle, }, 189 { 190 .name = "C6", 191 .desc = "MWAIT 0x20", 192 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 193 .exit_latency = 104, 194 .target_residency = 345, 195 .enter = &intel_idle, 196 .enter_s2idle = intel_idle_s2idle, }, 197 { 198 .name = "C7", 199 .desc = "MWAIT 0x30", 200 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 201 .exit_latency = 109, 202 .target_residency = 345, 203 .enter = &intel_idle, 204 .enter_s2idle = intel_idle_s2idle, }, 205 { 206 .enter = NULL } 207 }; 208 209 static struct cpuidle_state byt_cstates[] = { 210 { 211 .name = "C1", 212 .desc = "MWAIT 0x00", 213 .flags = MWAIT2flg(0x00), 214 .exit_latency = 1, 215 .target_residency = 1, 216 .enter = &intel_idle, 217 .enter_s2idle = intel_idle_s2idle, }, 218 { 219 .name = "C6N", 220 .desc = "MWAIT 0x58", 221 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 222 .exit_latency = 300, 223 .target_residency = 275, 224 .enter = &intel_idle, 225 .enter_s2idle = intel_idle_s2idle, }, 226 { 227 .name = "C6S", 228 .desc = "MWAIT 0x52", 229 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 230 .exit_latency = 500, 231 .target_residency = 560, 232 .enter = &intel_idle, 233 .enter_s2idle = intel_idle_s2idle, }, 234 { 235 .name = "C7", 236 .desc = "MWAIT 0x60", 237 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 238 .exit_latency = 1200, 239 .target_residency = 4000, 240 .enter = &intel_idle, 241 .enter_s2idle = intel_idle_s2idle, }, 242 { 243 .name = "C7S", 244 .desc = "MWAIT 0x64", 245 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 246 .exit_latency = 10000, 247 .target_residency = 20000, 248 .enter = &intel_idle, 249 .enter_s2idle = intel_idle_s2idle, }, 250 { 251 .enter = NULL } 252 }; 253 254 static struct cpuidle_state cht_cstates[] = { 255 { 256 .name = "C1", 257 .desc = "MWAIT 0x00", 258 .flags = MWAIT2flg(0x00), 259 .exit_latency = 1, 260 .target_residency = 1, 261 .enter = &intel_idle, 262 .enter_s2idle = intel_idle_s2idle, }, 263 { 264 .name = "C6N", 265 .desc = "MWAIT 0x58", 266 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 267 .exit_latency = 80, 268 .target_residency = 275, 269 .enter = &intel_idle, 270 .enter_s2idle = intel_idle_s2idle, }, 271 { 272 .name = "C6S", 273 .desc = "MWAIT 0x52", 274 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 275 .exit_latency = 200, 276 .target_residency = 560, 277 .enter = &intel_idle, 278 .enter_s2idle = intel_idle_s2idle, }, 279 { 280 .name = "C7", 281 .desc = "MWAIT 0x60", 282 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 283 .exit_latency = 1200, 284 .target_residency = 4000, 285 .enter = &intel_idle, 286 .enter_s2idle = intel_idle_s2idle, }, 287 { 288 .name = "C7S", 289 .desc = "MWAIT 0x64", 290 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 291 .exit_latency = 10000, 292 .target_residency = 20000, 293 .enter = &intel_idle, 294 .enter_s2idle = intel_idle_s2idle, }, 295 { 296 .enter = NULL } 297 }; 298 299 static struct cpuidle_state ivb_cstates[] = { 300 { 301 .name = "C1", 302 .desc = "MWAIT 0x00", 303 .flags = MWAIT2flg(0x00), 304 .exit_latency = 1, 305 .target_residency = 1, 306 .enter = &intel_idle, 307 .enter_s2idle = intel_idle_s2idle, }, 308 { 309 .name = "C1E", 310 .desc = "MWAIT 0x01", 311 .flags = MWAIT2flg(0x01), 312 .exit_latency = 10, 313 .target_residency = 20, 314 .enter = &intel_idle, 315 .enter_s2idle = intel_idle_s2idle, }, 316 { 317 .name = "C3", 318 .desc = "MWAIT 0x10", 319 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 320 .exit_latency = 59, 321 .target_residency = 156, 322 .enter = &intel_idle, 323 .enter_s2idle = intel_idle_s2idle, }, 324 { 325 .name = "C6", 326 .desc = "MWAIT 0x20", 327 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 328 .exit_latency = 80, 329 .target_residency = 300, 330 .enter = &intel_idle, 331 .enter_s2idle = intel_idle_s2idle, }, 332 { 333 .name = "C7", 334 .desc = "MWAIT 0x30", 335 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 336 .exit_latency = 87, 337 .target_residency = 300, 338 .enter = &intel_idle, 339 .enter_s2idle = intel_idle_s2idle, }, 340 { 341 .enter = NULL } 342 }; 343 344 static struct cpuidle_state ivt_cstates[] = { 345 { 346 .name = "C1", 347 .desc = "MWAIT 0x00", 348 .flags = MWAIT2flg(0x00), 349 .exit_latency = 1, 350 .target_residency = 1, 351 .enter = &intel_idle, 352 .enter_s2idle = intel_idle_s2idle, }, 353 { 354 .name = "C1E", 355 .desc = "MWAIT 0x01", 356 .flags = MWAIT2flg(0x01), 357 .exit_latency = 10, 358 .target_residency = 80, 359 .enter = &intel_idle, 360 .enter_s2idle = intel_idle_s2idle, }, 361 { 362 .name = "C3", 363 .desc = "MWAIT 0x10", 364 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 365 .exit_latency = 59, 366 .target_residency = 156, 367 .enter = &intel_idle, 368 .enter_s2idle = intel_idle_s2idle, }, 369 { 370 .name = "C6", 371 .desc = "MWAIT 0x20", 372 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 373 .exit_latency = 82, 374 .target_residency = 300, 375 .enter = &intel_idle, 376 .enter_s2idle = intel_idle_s2idle, }, 377 { 378 .enter = NULL } 379 }; 380 381 static struct cpuidle_state ivt_cstates_4s[] = { 382 { 383 .name = "C1", 384 .desc = "MWAIT 0x00", 385 .flags = MWAIT2flg(0x00), 386 .exit_latency = 1, 387 .target_residency = 1, 388 .enter = &intel_idle, 389 .enter_s2idle = intel_idle_s2idle, }, 390 { 391 .name = "C1E", 392 .desc = "MWAIT 0x01", 393 .flags = MWAIT2flg(0x01), 394 .exit_latency = 10, 395 .target_residency = 250, 396 .enter = &intel_idle, 397 .enter_s2idle = intel_idle_s2idle, }, 398 { 399 .name = "C3", 400 .desc = "MWAIT 0x10", 401 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 402 .exit_latency = 59, 403 .target_residency = 300, 404 .enter = &intel_idle, 405 .enter_s2idle = intel_idle_s2idle, }, 406 { 407 .name = "C6", 408 .desc = "MWAIT 0x20", 409 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 410 .exit_latency = 84, 411 .target_residency = 400, 412 .enter = &intel_idle, 413 .enter_s2idle = intel_idle_s2idle, }, 414 { 415 .enter = NULL } 416 }; 417 418 static struct cpuidle_state ivt_cstates_8s[] = { 419 { 420 .name = "C1", 421 .desc = "MWAIT 0x00", 422 .flags = MWAIT2flg(0x00), 423 .exit_latency = 1, 424 .target_residency = 1, 425 .enter = &intel_idle, 426 .enter_s2idle = intel_idle_s2idle, }, 427 { 428 .name = "C1E", 429 .desc = "MWAIT 0x01", 430 .flags = MWAIT2flg(0x01), 431 .exit_latency = 10, 432 .target_residency = 500, 433 .enter = &intel_idle, 434 .enter_s2idle = intel_idle_s2idle, }, 435 { 436 .name = "C3", 437 .desc = "MWAIT 0x10", 438 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 439 .exit_latency = 59, 440 .target_residency = 600, 441 .enter = &intel_idle, 442 .enter_s2idle = intel_idle_s2idle, }, 443 { 444 .name = "C6", 445 .desc = "MWAIT 0x20", 446 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 447 .exit_latency = 88, 448 .target_residency = 700, 449 .enter = &intel_idle, 450 .enter_s2idle = intel_idle_s2idle, }, 451 { 452 .enter = NULL } 453 }; 454 455 static struct cpuidle_state hsw_cstates[] = { 456 { 457 .name = "C1", 458 .desc = "MWAIT 0x00", 459 .flags = MWAIT2flg(0x00), 460 .exit_latency = 2, 461 .target_residency = 2, 462 .enter = &intel_idle, 463 .enter_s2idle = intel_idle_s2idle, }, 464 { 465 .name = "C1E", 466 .desc = "MWAIT 0x01", 467 .flags = MWAIT2flg(0x01), 468 .exit_latency = 10, 469 .target_residency = 20, 470 .enter = &intel_idle, 471 .enter_s2idle = intel_idle_s2idle, }, 472 { 473 .name = "C3", 474 .desc = "MWAIT 0x10", 475 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 476 .exit_latency = 33, 477 .target_residency = 100, 478 .enter = &intel_idle, 479 .enter_s2idle = intel_idle_s2idle, }, 480 { 481 .name = "C6", 482 .desc = "MWAIT 0x20", 483 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 484 .exit_latency = 133, 485 .target_residency = 400, 486 .enter = &intel_idle, 487 .enter_s2idle = intel_idle_s2idle, }, 488 { 489 .name = "C7s", 490 .desc = "MWAIT 0x32", 491 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 492 .exit_latency = 166, 493 .target_residency = 500, 494 .enter = &intel_idle, 495 .enter_s2idle = intel_idle_s2idle, }, 496 { 497 .name = "C8", 498 .desc = "MWAIT 0x40", 499 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 500 .exit_latency = 300, 501 .target_residency = 900, 502 .enter = &intel_idle, 503 .enter_s2idle = intel_idle_s2idle, }, 504 { 505 .name = "C9", 506 .desc = "MWAIT 0x50", 507 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 508 .exit_latency = 600, 509 .target_residency = 1800, 510 .enter = &intel_idle, 511 .enter_s2idle = intel_idle_s2idle, }, 512 { 513 .name = "C10", 514 .desc = "MWAIT 0x60", 515 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 516 .exit_latency = 2600, 517 .target_residency = 7700, 518 .enter = &intel_idle, 519 .enter_s2idle = intel_idle_s2idle, }, 520 { 521 .enter = NULL } 522 }; 523 static struct cpuidle_state bdw_cstates[] = { 524 { 525 .name = "C1", 526 .desc = "MWAIT 0x00", 527 .flags = MWAIT2flg(0x00), 528 .exit_latency = 2, 529 .target_residency = 2, 530 .enter = &intel_idle, 531 .enter_s2idle = intel_idle_s2idle, }, 532 { 533 .name = "C1E", 534 .desc = "MWAIT 0x01", 535 .flags = MWAIT2flg(0x01), 536 .exit_latency = 10, 537 .target_residency = 20, 538 .enter = &intel_idle, 539 .enter_s2idle = intel_idle_s2idle, }, 540 { 541 .name = "C3", 542 .desc = "MWAIT 0x10", 543 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 544 .exit_latency = 40, 545 .target_residency = 100, 546 .enter = &intel_idle, 547 .enter_s2idle = intel_idle_s2idle, }, 548 { 549 .name = "C6", 550 .desc = "MWAIT 0x20", 551 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 552 .exit_latency = 133, 553 .target_residency = 400, 554 .enter = &intel_idle, 555 .enter_s2idle = intel_idle_s2idle, }, 556 { 557 .name = "C7s", 558 .desc = "MWAIT 0x32", 559 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 560 .exit_latency = 166, 561 .target_residency = 500, 562 .enter = &intel_idle, 563 .enter_s2idle = intel_idle_s2idle, }, 564 { 565 .name = "C8", 566 .desc = "MWAIT 0x40", 567 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 568 .exit_latency = 300, 569 .target_residency = 900, 570 .enter = &intel_idle, 571 .enter_s2idle = intel_idle_s2idle, }, 572 { 573 .name = "C9", 574 .desc = "MWAIT 0x50", 575 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 576 .exit_latency = 600, 577 .target_residency = 1800, 578 .enter = &intel_idle, 579 .enter_s2idle = intel_idle_s2idle, }, 580 { 581 .name = "C10", 582 .desc = "MWAIT 0x60", 583 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 584 .exit_latency = 2600, 585 .target_residency = 7700, 586 .enter = &intel_idle, 587 .enter_s2idle = intel_idle_s2idle, }, 588 { 589 .enter = NULL } 590 }; 591 592 static struct cpuidle_state skl_cstates[] = { 593 { 594 .name = "C1", 595 .desc = "MWAIT 0x00", 596 .flags = MWAIT2flg(0x00), 597 .exit_latency = 2, 598 .target_residency = 2, 599 .enter = &intel_idle, 600 .enter_s2idle = intel_idle_s2idle, }, 601 { 602 .name = "C1E", 603 .desc = "MWAIT 0x01", 604 .flags = MWAIT2flg(0x01), 605 .exit_latency = 10, 606 .target_residency = 20, 607 .enter = &intel_idle, 608 .enter_s2idle = intel_idle_s2idle, }, 609 { 610 .name = "C3", 611 .desc = "MWAIT 0x10", 612 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 613 .exit_latency = 70, 614 .target_residency = 100, 615 .enter = &intel_idle, 616 .enter_s2idle = intel_idle_s2idle, }, 617 { 618 .name = "C6", 619 .desc = "MWAIT 0x20", 620 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 621 .exit_latency = 85, 622 .target_residency = 200, 623 .enter = &intel_idle, 624 .enter_s2idle = intel_idle_s2idle, }, 625 { 626 .name = "C7s", 627 .desc = "MWAIT 0x33", 628 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, 629 .exit_latency = 124, 630 .target_residency = 800, 631 .enter = &intel_idle, 632 .enter_s2idle = intel_idle_s2idle, }, 633 { 634 .name = "C8", 635 .desc = "MWAIT 0x40", 636 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 637 .exit_latency = 200, 638 .target_residency = 800, 639 .enter = &intel_idle, 640 .enter_s2idle = intel_idle_s2idle, }, 641 { 642 .name = "C9", 643 .desc = "MWAIT 0x50", 644 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 645 .exit_latency = 480, 646 .target_residency = 5000, 647 .enter = &intel_idle, 648 .enter_s2idle = intel_idle_s2idle, }, 649 { 650 .name = "C10", 651 .desc = "MWAIT 0x60", 652 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 653 .exit_latency = 890, 654 .target_residency = 5000, 655 .enter = &intel_idle, 656 .enter_s2idle = intel_idle_s2idle, }, 657 { 658 .enter = NULL } 659 }; 660 661 static struct cpuidle_state skx_cstates[] = { 662 { 663 .name = "C1", 664 .desc = "MWAIT 0x00", 665 .flags = MWAIT2flg(0x00), 666 .exit_latency = 2, 667 .target_residency = 2, 668 .enter = &intel_idle, 669 .enter_s2idle = intel_idle_s2idle, }, 670 { 671 .name = "C1E", 672 .desc = "MWAIT 0x01", 673 .flags = MWAIT2flg(0x01), 674 .exit_latency = 10, 675 .target_residency = 20, 676 .enter = &intel_idle, 677 .enter_s2idle = intel_idle_s2idle, }, 678 { 679 .name = "C6", 680 .desc = "MWAIT 0x20", 681 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 682 .exit_latency = 133, 683 .target_residency = 600, 684 .enter = &intel_idle, 685 .enter_s2idle = intel_idle_s2idle, }, 686 { 687 .enter = NULL } 688 }; 689 690 static struct cpuidle_state atom_cstates[] = { 691 { 692 .name = "C1E", 693 .desc = "MWAIT 0x00", 694 .flags = MWAIT2flg(0x00), 695 .exit_latency = 10, 696 .target_residency = 20, 697 .enter = &intel_idle, 698 .enter_s2idle = intel_idle_s2idle, }, 699 { 700 .name = "C2", 701 .desc = "MWAIT 0x10", 702 .flags = MWAIT2flg(0x10), 703 .exit_latency = 20, 704 .target_residency = 80, 705 .enter = &intel_idle, 706 .enter_s2idle = intel_idle_s2idle, }, 707 { 708 .name = "C4", 709 .desc = "MWAIT 0x30", 710 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 711 .exit_latency = 100, 712 .target_residency = 400, 713 .enter = &intel_idle, 714 .enter_s2idle = intel_idle_s2idle, }, 715 { 716 .name = "C6", 717 .desc = "MWAIT 0x52", 718 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 719 .exit_latency = 140, 720 .target_residency = 560, 721 .enter = &intel_idle, 722 .enter_s2idle = intel_idle_s2idle, }, 723 { 724 .enter = NULL } 725 }; 726 static struct cpuidle_state tangier_cstates[] = { 727 { 728 .name = "C1", 729 .desc = "MWAIT 0x00", 730 .flags = MWAIT2flg(0x00), 731 .exit_latency = 1, 732 .target_residency = 4, 733 .enter = &intel_idle, 734 .enter_s2idle = intel_idle_s2idle, }, 735 { 736 .name = "C4", 737 .desc = "MWAIT 0x30", 738 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 739 .exit_latency = 100, 740 .target_residency = 400, 741 .enter = &intel_idle, 742 .enter_s2idle = intel_idle_s2idle, }, 743 { 744 .name = "C6", 745 .desc = "MWAIT 0x52", 746 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 747 .exit_latency = 140, 748 .target_residency = 560, 749 .enter = &intel_idle, 750 .enter_s2idle = intel_idle_s2idle, }, 751 { 752 .name = "C7", 753 .desc = "MWAIT 0x60", 754 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 755 .exit_latency = 1200, 756 .target_residency = 4000, 757 .enter = &intel_idle, 758 .enter_s2idle = intel_idle_s2idle, }, 759 { 760 .name = "C9", 761 .desc = "MWAIT 0x64", 762 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 763 .exit_latency = 10000, 764 .target_residency = 20000, 765 .enter = &intel_idle, 766 .enter_s2idle = intel_idle_s2idle, }, 767 { 768 .enter = NULL } 769 }; 770 static struct cpuidle_state avn_cstates[] = { 771 { 772 .name = "C1", 773 .desc = "MWAIT 0x00", 774 .flags = MWAIT2flg(0x00), 775 .exit_latency = 2, 776 .target_residency = 2, 777 .enter = &intel_idle, 778 .enter_s2idle = intel_idle_s2idle, }, 779 { 780 .name = "C6", 781 .desc = "MWAIT 0x51", 782 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 783 .exit_latency = 15, 784 .target_residency = 45, 785 .enter = &intel_idle, 786 .enter_s2idle = intel_idle_s2idle, }, 787 { 788 .enter = NULL } 789 }; 790 static struct cpuidle_state knl_cstates[] = { 791 { 792 .name = "C1", 793 .desc = "MWAIT 0x00", 794 .flags = MWAIT2flg(0x00), 795 .exit_latency = 1, 796 .target_residency = 2, 797 .enter = &intel_idle, 798 .enter_s2idle = intel_idle_s2idle }, 799 { 800 .name = "C6", 801 .desc = "MWAIT 0x10", 802 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 803 .exit_latency = 120, 804 .target_residency = 500, 805 .enter = &intel_idle, 806 .enter_s2idle = intel_idle_s2idle }, 807 { 808 .enter = NULL } 809 }; 810 811 static struct cpuidle_state bxt_cstates[] = { 812 { 813 .name = "C1", 814 .desc = "MWAIT 0x00", 815 .flags = MWAIT2flg(0x00), 816 .exit_latency = 2, 817 .target_residency = 2, 818 .enter = &intel_idle, 819 .enter_s2idle = intel_idle_s2idle, }, 820 { 821 .name = "C1E", 822 .desc = "MWAIT 0x01", 823 .flags = MWAIT2flg(0x01), 824 .exit_latency = 10, 825 .target_residency = 20, 826 .enter = &intel_idle, 827 .enter_s2idle = intel_idle_s2idle, }, 828 { 829 .name = "C6", 830 .desc = "MWAIT 0x20", 831 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 832 .exit_latency = 133, 833 .target_residency = 133, 834 .enter = &intel_idle, 835 .enter_s2idle = intel_idle_s2idle, }, 836 { 837 .name = "C7s", 838 .desc = "MWAIT 0x31", 839 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 840 .exit_latency = 155, 841 .target_residency = 155, 842 .enter = &intel_idle, 843 .enter_s2idle = intel_idle_s2idle, }, 844 { 845 .name = "C8", 846 .desc = "MWAIT 0x40", 847 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 848 .exit_latency = 1000, 849 .target_residency = 1000, 850 .enter = &intel_idle, 851 .enter_s2idle = intel_idle_s2idle, }, 852 { 853 .name = "C9", 854 .desc = "MWAIT 0x50", 855 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 856 .exit_latency = 2000, 857 .target_residency = 2000, 858 .enter = &intel_idle, 859 .enter_s2idle = intel_idle_s2idle, }, 860 { 861 .name = "C10", 862 .desc = "MWAIT 0x60", 863 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 864 .exit_latency = 10000, 865 .target_residency = 10000, 866 .enter = &intel_idle, 867 .enter_s2idle = intel_idle_s2idle, }, 868 { 869 .enter = NULL } 870 }; 871 872 static struct cpuidle_state dnv_cstates[] = { 873 { 874 .name = "C1", 875 .desc = "MWAIT 0x00", 876 .flags = MWAIT2flg(0x00), 877 .exit_latency = 2, 878 .target_residency = 2, 879 .enter = &intel_idle, 880 .enter_s2idle = intel_idle_s2idle, }, 881 { 882 .name = "C1E", 883 .desc = "MWAIT 0x01", 884 .flags = MWAIT2flg(0x01), 885 .exit_latency = 10, 886 .target_residency = 20, 887 .enter = &intel_idle, 888 .enter_s2idle = intel_idle_s2idle, }, 889 { 890 .name = "C6", 891 .desc = "MWAIT 0x20", 892 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 893 .exit_latency = 50, 894 .target_residency = 500, 895 .enter = &intel_idle, 896 .enter_s2idle = intel_idle_s2idle, }, 897 { 898 .enter = NULL } 899 }; 900 901 /** 902 * intel_idle 903 * @dev: cpuidle_device 904 * @drv: cpuidle driver 905 * @index: index of cpuidle state 906 * 907 * Must be called under local_irq_disable(). 908 */ 909 static __cpuidle int intel_idle(struct cpuidle_device *dev, 910 struct cpuidle_driver *drv, int index) 911 { 912 unsigned long ecx = 1; /* break on interrupt flag */ 913 struct cpuidle_state *state = &drv->states[index]; 914 unsigned long eax = flg2MWAIT(state->flags); 915 unsigned int cstate; 916 bool uninitialized_var(tick); 917 int cpu = smp_processor_id(); 918 919 /* 920 * leave_mm() to avoid costly and often unnecessary wakeups 921 * for flushing the user TLB's associated with the active mm. 922 */ 923 if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) 924 leave_mm(cpu); 925 926 if (!static_cpu_has(X86_FEATURE_ARAT)) { 927 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & 928 MWAIT_CSTATE_MASK) + 1; 929 tick = false; 930 if (!(lapic_timer_reliable_states & (1 << (cstate)))) { 931 tick = true; 932 tick_broadcast_enter(); 933 } 934 } 935 936 mwait_idle_with_hints(eax, ecx); 937 938 if (!static_cpu_has(X86_FEATURE_ARAT) && tick) 939 tick_broadcast_exit(); 940 941 return index; 942 } 943 944 /** 945 * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle 946 * @dev: cpuidle_device 947 * @drv: cpuidle driver 948 * @index: state index 949 */ 950 static void intel_idle_s2idle(struct cpuidle_device *dev, 951 struct cpuidle_driver *drv, int index) 952 { 953 unsigned long ecx = 1; /* break on interrupt flag */ 954 unsigned long eax = flg2MWAIT(drv->states[index].flags); 955 956 mwait_idle_with_hints(eax, ecx); 957 } 958 959 static void __setup_broadcast_timer(bool on) 960 { 961 if (on) 962 tick_broadcast_enable(); 963 else 964 tick_broadcast_disable(); 965 } 966 967 static void auto_demotion_disable(void) 968 { 969 unsigned long long msr_bits; 970 971 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 972 msr_bits &= ~(icpu->auto_demotion_disable_flags); 973 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 974 } 975 static void c1e_promotion_disable(void) 976 { 977 unsigned long long msr_bits; 978 979 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 980 msr_bits &= ~0x2; 981 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 982 } 983 984 static const struct idle_cpu idle_cpu_nehalem = { 985 .state_table = nehalem_cstates, 986 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 987 .disable_promotion_to_c1e = true, 988 }; 989 990 static const struct idle_cpu idle_cpu_atom = { 991 .state_table = atom_cstates, 992 }; 993 994 static const struct idle_cpu idle_cpu_tangier = { 995 .state_table = tangier_cstates, 996 }; 997 998 static const struct idle_cpu idle_cpu_lincroft = { 999 .state_table = atom_cstates, 1000 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1001 }; 1002 1003 static const struct idle_cpu idle_cpu_snb = { 1004 .state_table = snb_cstates, 1005 .disable_promotion_to_c1e = true, 1006 }; 1007 1008 static const struct idle_cpu idle_cpu_byt = { 1009 .state_table = byt_cstates, 1010 .disable_promotion_to_c1e = true, 1011 .byt_auto_demotion_disable_flag = true, 1012 }; 1013 1014 static const struct idle_cpu idle_cpu_cht = { 1015 .state_table = cht_cstates, 1016 .disable_promotion_to_c1e = true, 1017 .byt_auto_demotion_disable_flag = true, 1018 }; 1019 1020 static const struct idle_cpu idle_cpu_ivb = { 1021 .state_table = ivb_cstates, 1022 .disable_promotion_to_c1e = true, 1023 }; 1024 1025 static const struct idle_cpu idle_cpu_ivt = { 1026 .state_table = ivt_cstates, 1027 .disable_promotion_to_c1e = true, 1028 }; 1029 1030 static const struct idle_cpu idle_cpu_hsw = { 1031 .state_table = hsw_cstates, 1032 .disable_promotion_to_c1e = true, 1033 }; 1034 1035 static const struct idle_cpu idle_cpu_bdw = { 1036 .state_table = bdw_cstates, 1037 .disable_promotion_to_c1e = true, 1038 }; 1039 1040 static const struct idle_cpu idle_cpu_skl = { 1041 .state_table = skl_cstates, 1042 .disable_promotion_to_c1e = true, 1043 }; 1044 1045 static const struct idle_cpu idle_cpu_skx = { 1046 .state_table = skx_cstates, 1047 .disable_promotion_to_c1e = true, 1048 }; 1049 1050 static const struct idle_cpu idle_cpu_avn = { 1051 .state_table = avn_cstates, 1052 .disable_promotion_to_c1e = true, 1053 }; 1054 1055 static const struct idle_cpu idle_cpu_knl = { 1056 .state_table = knl_cstates, 1057 }; 1058 1059 static const struct idle_cpu idle_cpu_bxt = { 1060 .state_table = bxt_cstates, 1061 .disable_promotion_to_c1e = true, 1062 }; 1063 1064 static const struct idle_cpu idle_cpu_dnv = { 1065 .state_table = dnv_cstates, 1066 .disable_promotion_to_c1e = true, 1067 }; 1068 1069 #define ICPU(model, cpu) \ 1070 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&cpu } 1071 1072 static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1073 ICPU(INTEL_FAM6_NEHALEM_EP, idle_cpu_nehalem), 1074 ICPU(INTEL_FAM6_NEHALEM, idle_cpu_nehalem), 1075 ICPU(INTEL_FAM6_NEHALEM_G, idle_cpu_nehalem), 1076 ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem), 1077 ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem), 1078 ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem), 1079 ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom), 1080 ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft), 1081 ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem), 1082 ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb), 1083 ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb), 1084 ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom), 1085 ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt), 1086 ICPU(INTEL_FAM6_ATOM_MERRIFIELD, idle_cpu_tangier), 1087 ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht), 1088 ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb), 1089 ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt), 1090 ICPU(INTEL_FAM6_HASWELL_CORE, idle_cpu_hsw), 1091 ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw), 1092 ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw), 1093 ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw), 1094 ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn), 1095 ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw), 1096 ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw), 1097 ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw), 1098 ICPU(INTEL_FAM6_BROADWELL_XEON_D, idle_cpu_bdw), 1099 ICPU(INTEL_FAM6_SKYLAKE_MOBILE, idle_cpu_skl), 1100 ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, idle_cpu_skl), 1101 ICPU(INTEL_FAM6_KABYLAKE_MOBILE, idle_cpu_skl), 1102 ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, idle_cpu_skl), 1103 ICPU(INTEL_FAM6_SKYLAKE_X, idle_cpu_skx), 1104 ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl), 1105 ICPU(INTEL_FAM6_XEON_PHI_KNM, idle_cpu_knl), 1106 ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt), 1107 ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, idle_cpu_bxt), 1108 ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv), 1109 {} 1110 }; 1111 1112 /* 1113 * intel_idle_probe() 1114 */ 1115 static int __init intel_idle_probe(void) 1116 { 1117 unsigned int eax, ebx, ecx; 1118 const struct x86_cpu_id *id; 1119 1120 if (max_cstate == 0) { 1121 pr_debug("disabled\n"); 1122 return -EPERM; 1123 } 1124 1125 id = x86_match_cpu(intel_idle_ids); 1126 if (!id) { 1127 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 1128 boot_cpu_data.x86 == 6) 1129 pr_debug("does not run on family %d model %d\n", 1130 boot_cpu_data.x86, boot_cpu_data.x86_model); 1131 return -ENODEV; 1132 } 1133 1134 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1135 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1136 return -ENODEV; 1137 } 1138 1139 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1140 return -ENODEV; 1141 1142 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1143 1144 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1145 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1146 !mwait_substates) 1147 return -ENODEV; 1148 1149 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1150 1151 icpu = (const struct idle_cpu *)id->driver_data; 1152 cpuidle_state_table = icpu->state_table; 1153 1154 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1155 boot_cpu_data.x86_model); 1156 1157 return 0; 1158 } 1159 1160 /* 1161 * intel_idle_cpuidle_devices_uninit() 1162 * Unregisters the cpuidle devices. 1163 */ 1164 static void intel_idle_cpuidle_devices_uninit(void) 1165 { 1166 int i; 1167 struct cpuidle_device *dev; 1168 1169 for_each_online_cpu(i) { 1170 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); 1171 cpuidle_unregister_device(dev); 1172 } 1173 } 1174 1175 /* 1176 * ivt_idle_state_table_update(void) 1177 * 1178 * Tune IVT multi-socket targets 1179 * Assumption: num_sockets == (max_package_num + 1) 1180 */ 1181 static void ivt_idle_state_table_update(void) 1182 { 1183 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1184 int cpu, package_num, num_sockets = 1; 1185 1186 for_each_online_cpu(cpu) { 1187 package_num = topology_physical_package_id(cpu); 1188 if (package_num + 1 > num_sockets) { 1189 num_sockets = package_num + 1; 1190 1191 if (num_sockets > 4) { 1192 cpuidle_state_table = ivt_cstates_8s; 1193 return; 1194 } 1195 } 1196 } 1197 1198 if (num_sockets > 2) 1199 cpuidle_state_table = ivt_cstates_4s; 1200 1201 /* else, 1 and 2 socket systems use default ivt_cstates */ 1202 } 1203 1204 /* 1205 * Translate IRTL (Interrupt Response Time Limit) MSR to usec 1206 */ 1207 1208 static unsigned int irtl_ns_units[] = { 1209 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 1210 1211 static unsigned long long irtl_2_usec(unsigned long long irtl) 1212 { 1213 unsigned long long ns; 1214 1215 if (!irtl) 1216 return 0; 1217 1218 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1219 1220 return div64_u64((irtl & 0x3FF) * ns, 1000); 1221 } 1222 /* 1223 * bxt_idle_state_table_update(void) 1224 * 1225 * On BXT, we trust the IRTL to show the definitive maximum latency 1226 * We use the same value for target_residency. 1227 */ 1228 static void bxt_idle_state_table_update(void) 1229 { 1230 unsigned long long msr; 1231 unsigned int usec; 1232 1233 rdmsrl(MSR_PKGC6_IRTL, msr); 1234 usec = irtl_2_usec(msr); 1235 if (usec) { 1236 bxt_cstates[2].exit_latency = usec; 1237 bxt_cstates[2].target_residency = usec; 1238 } 1239 1240 rdmsrl(MSR_PKGC7_IRTL, msr); 1241 usec = irtl_2_usec(msr); 1242 if (usec) { 1243 bxt_cstates[3].exit_latency = usec; 1244 bxt_cstates[3].target_residency = usec; 1245 } 1246 1247 rdmsrl(MSR_PKGC8_IRTL, msr); 1248 usec = irtl_2_usec(msr); 1249 if (usec) { 1250 bxt_cstates[4].exit_latency = usec; 1251 bxt_cstates[4].target_residency = usec; 1252 } 1253 1254 rdmsrl(MSR_PKGC9_IRTL, msr); 1255 usec = irtl_2_usec(msr); 1256 if (usec) { 1257 bxt_cstates[5].exit_latency = usec; 1258 bxt_cstates[5].target_residency = usec; 1259 } 1260 1261 rdmsrl(MSR_PKGC10_IRTL, msr); 1262 usec = irtl_2_usec(msr); 1263 if (usec) { 1264 bxt_cstates[6].exit_latency = usec; 1265 bxt_cstates[6].target_residency = usec; 1266 } 1267 1268 } 1269 /* 1270 * sklh_idle_state_table_update(void) 1271 * 1272 * On SKL-H (model 0x5e) disable C8 and C9 if: 1273 * C10 is enabled and SGX disabled 1274 */ 1275 static void sklh_idle_state_table_update(void) 1276 { 1277 unsigned long long msr; 1278 unsigned int eax, ebx, ecx, edx; 1279 1280 1281 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1282 if (max_cstate <= 7) 1283 return; 1284 1285 /* if PC10 not present in CPUID.MWAIT.EDX */ 1286 if ((mwait_substates & (0xF << 28)) == 0) 1287 return; 1288 1289 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1290 1291 /* PC10 is not enabled in PKG C-state limit */ 1292 if ((msr & 0xF) != 8) 1293 return; 1294 1295 ecx = 0; 1296 cpuid(7, &eax, &ebx, &ecx, &edx); 1297 1298 /* if SGX is present */ 1299 if (ebx & (1 << 2)) { 1300 1301 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1302 1303 /* if SGX is enabled */ 1304 if (msr & (1 << 18)) 1305 return; 1306 } 1307 1308 skl_cstates[5].disabled = 1; /* C8-SKL */ 1309 skl_cstates[6].disabled = 1; /* C9-SKL */ 1310 } 1311 /* 1312 * intel_idle_state_table_update() 1313 * 1314 * Update the default state_table for this CPU-id 1315 */ 1316 1317 static void intel_idle_state_table_update(void) 1318 { 1319 switch (boot_cpu_data.x86_model) { 1320 1321 case INTEL_FAM6_IVYBRIDGE_X: 1322 ivt_idle_state_table_update(); 1323 break; 1324 case INTEL_FAM6_ATOM_GOLDMONT: 1325 case INTEL_FAM6_ATOM_GEMINI_LAKE: 1326 bxt_idle_state_table_update(); 1327 break; 1328 case INTEL_FAM6_SKYLAKE_DESKTOP: 1329 sklh_idle_state_table_update(); 1330 break; 1331 } 1332 } 1333 1334 /* 1335 * intel_idle_cpuidle_driver_init() 1336 * allocate, initialize cpuidle_states 1337 */ 1338 static void __init intel_idle_cpuidle_driver_init(void) 1339 { 1340 int cstate; 1341 struct cpuidle_driver *drv = &intel_idle_driver; 1342 1343 intel_idle_state_table_update(); 1344 1345 cpuidle_poll_state_init(drv); 1346 drv->state_count = 1; 1347 1348 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1349 int num_substates, mwait_hint, mwait_cstate; 1350 1351 if ((cpuidle_state_table[cstate].enter == NULL) && 1352 (cpuidle_state_table[cstate].enter_s2idle == NULL)) 1353 break; 1354 1355 if (cstate + 1 > max_cstate) { 1356 pr_info("max_cstate %d reached\n", max_cstate); 1357 break; 1358 } 1359 1360 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1361 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); 1362 1363 /* number of sub-states for this state in CPUID.MWAIT */ 1364 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) 1365 & MWAIT_SUBSTATE_MASK; 1366 1367 /* if NO sub-states for this state in CPUID, skip it */ 1368 if (num_substates == 0) 1369 continue; 1370 1371 /* if state marked as disabled, skip it */ 1372 if (cpuidle_state_table[cstate].disabled != 0) { 1373 pr_debug("state %s is disabled\n", 1374 cpuidle_state_table[cstate].name); 1375 continue; 1376 } 1377 1378 1379 if (((mwait_cstate + 1) > 2) && 1380 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1381 mark_tsc_unstable("TSC halts in idle" 1382 " states deeper than C2"); 1383 1384 drv->states[drv->state_count] = /* structure copy */ 1385 cpuidle_state_table[cstate]; 1386 1387 drv->state_count += 1; 1388 } 1389 1390 if (icpu->byt_auto_demotion_disable_flag) { 1391 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1392 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1393 } 1394 } 1395 1396 1397 /* 1398 * intel_idle_cpu_init() 1399 * allocate, initialize, register cpuidle_devices 1400 * @cpu: cpu/core to initialize 1401 */ 1402 static int intel_idle_cpu_init(unsigned int cpu) 1403 { 1404 struct cpuidle_device *dev; 1405 1406 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1407 dev->cpu = cpu; 1408 1409 if (cpuidle_register_device(dev)) { 1410 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1411 return -EIO; 1412 } 1413 1414 if (icpu->auto_demotion_disable_flags) 1415 auto_demotion_disable(); 1416 1417 if (icpu->disable_promotion_to_c1e) 1418 c1e_promotion_disable(); 1419 1420 return 0; 1421 } 1422 1423 static int intel_idle_cpu_online(unsigned int cpu) 1424 { 1425 struct cpuidle_device *dev; 1426 1427 if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) 1428 __setup_broadcast_timer(true); 1429 1430 /* 1431 * Some systems can hotplug a cpu at runtime after 1432 * the kernel has booted, we have to initialize the 1433 * driver in this case 1434 */ 1435 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1436 if (!dev->registered) 1437 return intel_idle_cpu_init(cpu); 1438 1439 return 0; 1440 } 1441 1442 static int __init intel_idle_init(void) 1443 { 1444 int retval; 1445 1446 /* Do not load intel_idle at all for now if idle= is passed */ 1447 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1448 return -ENODEV; 1449 1450 retval = intel_idle_probe(); 1451 if (retval) 1452 return retval; 1453 1454 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1455 if (intel_idle_cpuidle_devices == NULL) 1456 return -ENOMEM; 1457 1458 intel_idle_cpuidle_driver_init(); 1459 retval = cpuidle_register_driver(&intel_idle_driver); 1460 if (retval) { 1461 struct cpuidle_driver *drv = cpuidle_get_driver(); 1462 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1463 drv ? drv->name : "none"); 1464 goto init_driver_fail; 1465 } 1466 1467 if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ 1468 lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; 1469 1470 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1471 intel_idle_cpu_online, NULL); 1472 if (retval < 0) 1473 goto hp_setup_fail; 1474 1475 pr_debug("lapic_timer_reliable_states 0x%x\n", 1476 lapic_timer_reliable_states); 1477 1478 return 0; 1479 1480 hp_setup_fail: 1481 intel_idle_cpuidle_devices_uninit(); 1482 cpuidle_unregister_driver(&intel_idle_driver); 1483 init_driver_fail: 1484 free_percpu(intel_idle_cpuidle_devices); 1485 return retval; 1486 1487 } 1488 device_initcall(intel_idle_init); 1489 1490 /* 1491 * We are not really modular, but we used to support that. Meaning we also 1492 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1493 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1494 * is the easiest way (currently) to continue doing that. 1495 */ 1496 module_param(max_cstate, int, 0444); 1497