xref: /openbmc/linux/drivers/idle/intel_idle.c (revision 911b8eac)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013 - 2020, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8  */
9 
10 /*
11  * intel_idle is a cpuidle driver that loads on specific Intel processors
12  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
13  * make Linux more efficient on these processors, as intel_idle knows
14  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15  */
16 
17 /*
18  * Design Assumptions
19  *
20  * All CPUs have same idle states as boot CPU
21  *
22  * Chipset BM_STS (bus master status) bit is a NOP
23  *	for preventing entry into deep C-stats
24  */
25 
26 /*
27  * Known limitations
28  *
29  * ACPI has a .suspend hack to turn off deep c-statees during suspend
30  * to avoid complications with the lapic timer workaround.
31  * Have not seen issues with suspend, but may need same workaround here.
32  *
33  */
34 
35 /* un-comment DEBUG to enable pr_debug() statements */
36 #define DEBUG
37 
38 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
39 
40 #include <linux/acpi.h>
41 #include <linux/kernel.h>
42 #include <linux/cpuidle.h>
43 #include <linux/tick.h>
44 #include <trace/events/power.h>
45 #include <linux/sched.h>
46 #include <linux/notifier.h>
47 #include <linux/cpu.h>
48 #include <linux/moduleparam.h>
49 #include <asm/cpu_device_id.h>
50 #include <asm/intel-family.h>
51 #include <asm/mwait.h>
52 #include <asm/msr.h>
53 
54 #define INTEL_IDLE_VERSION "0.5.1"
55 
56 static struct cpuidle_driver intel_idle_driver = {
57 	.name = "intel_idle",
58 	.owner = THIS_MODULE,
59 };
60 /* intel_idle.max_cstate=0 disables driver */
61 static int max_cstate = CPUIDLE_STATE_MAX - 1;
62 static unsigned int disabled_states_mask;
63 
64 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
65 
66 static unsigned long auto_demotion_disable_flags;
67 static bool disable_promotion_to_c1e;
68 
69 struct idle_cpu {
70 	struct cpuidle_state *state_table;
71 
72 	/*
73 	 * Hardware C-state auto-demotion may not always be optimal.
74 	 * Indicate which enable bits to clear here.
75 	 */
76 	unsigned long auto_demotion_disable_flags;
77 	bool byt_auto_demotion_disable_flag;
78 	bool disable_promotion_to_c1e;
79 	bool use_acpi;
80 };
81 
82 static const struct idle_cpu *icpu __initdata;
83 static struct cpuidle_state *cpuidle_state_table __initdata;
84 
85 static unsigned int mwait_substates __initdata;
86 
87 /*
88  * Enable this state by default even if the ACPI _CST does not list it.
89  */
90 #define CPUIDLE_FLAG_ALWAYS_ENABLE	BIT(15)
91 
92 /*
93  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
94  * the C-state (top nibble) and sub-state (bottom nibble)
95  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
96  *
97  * We store the hint at the top of our "flags" for each state.
98  */
99 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
100 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
101 
102 /**
103  * intel_idle - Ask the processor to enter the given idle state.
104  * @dev: cpuidle device of the target CPU.
105  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
106  * @index: Target idle state index.
107  *
108  * Use the MWAIT instruction to notify the processor that the CPU represented by
109  * @dev is idle and it can try to enter the idle state corresponding to @index.
110  *
111  * If the local APIC timer is not known to be reliable in the target idle state,
112  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
113  *
114  * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to
115  * flushing user TLBs.
116  *
117  * Must be called under local_irq_disable().
118  */
119 static __cpuidle int intel_idle(struct cpuidle_device *dev,
120 				struct cpuidle_driver *drv, int index)
121 {
122 	struct cpuidle_state *state = &drv->states[index];
123 	unsigned long eax = flg2MWAIT(state->flags);
124 	unsigned long ecx = 1; /* break on interrupt flag */
125 	bool tick;
126 
127 	if (!static_cpu_has(X86_FEATURE_ARAT)) {
128 		/*
129 		 * Switch over to one-shot tick broadcast if the target C-state
130 		 * is deeper than C1.
131 		 */
132 		if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) {
133 			tick = true;
134 			tick_broadcast_enter();
135 		} else {
136 			tick = false;
137 		}
138 	}
139 
140 	mwait_idle_with_hints(eax, ecx);
141 
142 	if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
143 		tick_broadcast_exit();
144 
145 	return index;
146 }
147 
148 /**
149  * intel_idle_s2idle - Ask the processor to enter the given idle state.
150  * @dev: cpuidle device of the target CPU.
151  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
152  * @index: Target idle state index.
153  *
154  * Use the MWAIT instruction to notify the processor that the CPU represented by
155  * @dev is idle and it can try to enter the idle state corresponding to @index.
156  *
157  * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
158  * scheduler tick and suspended scheduler clock on the target CPU.
159  */
160 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
161 				       struct cpuidle_driver *drv, int index)
162 {
163 	unsigned long eax = flg2MWAIT(drv->states[index].flags);
164 	unsigned long ecx = 1; /* break on interrupt flag */
165 
166 	mwait_idle_with_hints(eax, ecx);
167 
168 	return 0;
169 }
170 
171 /*
172  * States are indexed by the cstate number,
173  * which is also the index into the MWAIT hint array.
174  * Thus C0 is a dummy.
175  */
176 static struct cpuidle_state nehalem_cstates[] __initdata = {
177 	{
178 		.name = "C1",
179 		.desc = "MWAIT 0x00",
180 		.flags = MWAIT2flg(0x00),
181 		.exit_latency = 3,
182 		.target_residency = 6,
183 		.enter = &intel_idle,
184 		.enter_s2idle = intel_idle_s2idle, },
185 	{
186 		.name = "C1E",
187 		.desc = "MWAIT 0x01",
188 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
189 		.exit_latency = 10,
190 		.target_residency = 20,
191 		.enter = &intel_idle,
192 		.enter_s2idle = intel_idle_s2idle, },
193 	{
194 		.name = "C3",
195 		.desc = "MWAIT 0x10",
196 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
197 		.exit_latency = 20,
198 		.target_residency = 80,
199 		.enter = &intel_idle,
200 		.enter_s2idle = intel_idle_s2idle, },
201 	{
202 		.name = "C6",
203 		.desc = "MWAIT 0x20",
204 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
205 		.exit_latency = 200,
206 		.target_residency = 800,
207 		.enter = &intel_idle,
208 		.enter_s2idle = intel_idle_s2idle, },
209 	{
210 		.enter = NULL }
211 };
212 
213 static struct cpuidle_state snb_cstates[] __initdata = {
214 	{
215 		.name = "C1",
216 		.desc = "MWAIT 0x00",
217 		.flags = MWAIT2flg(0x00),
218 		.exit_latency = 2,
219 		.target_residency = 2,
220 		.enter = &intel_idle,
221 		.enter_s2idle = intel_idle_s2idle, },
222 	{
223 		.name = "C1E",
224 		.desc = "MWAIT 0x01",
225 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
226 		.exit_latency = 10,
227 		.target_residency = 20,
228 		.enter = &intel_idle,
229 		.enter_s2idle = intel_idle_s2idle, },
230 	{
231 		.name = "C3",
232 		.desc = "MWAIT 0x10",
233 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
234 		.exit_latency = 80,
235 		.target_residency = 211,
236 		.enter = &intel_idle,
237 		.enter_s2idle = intel_idle_s2idle, },
238 	{
239 		.name = "C6",
240 		.desc = "MWAIT 0x20",
241 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
242 		.exit_latency = 104,
243 		.target_residency = 345,
244 		.enter = &intel_idle,
245 		.enter_s2idle = intel_idle_s2idle, },
246 	{
247 		.name = "C7",
248 		.desc = "MWAIT 0x30",
249 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
250 		.exit_latency = 109,
251 		.target_residency = 345,
252 		.enter = &intel_idle,
253 		.enter_s2idle = intel_idle_s2idle, },
254 	{
255 		.enter = NULL }
256 };
257 
258 static struct cpuidle_state byt_cstates[] __initdata = {
259 	{
260 		.name = "C1",
261 		.desc = "MWAIT 0x00",
262 		.flags = MWAIT2flg(0x00),
263 		.exit_latency = 1,
264 		.target_residency = 1,
265 		.enter = &intel_idle,
266 		.enter_s2idle = intel_idle_s2idle, },
267 	{
268 		.name = "C6N",
269 		.desc = "MWAIT 0x58",
270 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
271 		.exit_latency = 300,
272 		.target_residency = 275,
273 		.enter = &intel_idle,
274 		.enter_s2idle = intel_idle_s2idle, },
275 	{
276 		.name = "C6S",
277 		.desc = "MWAIT 0x52",
278 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
279 		.exit_latency = 500,
280 		.target_residency = 560,
281 		.enter = &intel_idle,
282 		.enter_s2idle = intel_idle_s2idle, },
283 	{
284 		.name = "C7",
285 		.desc = "MWAIT 0x60",
286 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
287 		.exit_latency = 1200,
288 		.target_residency = 4000,
289 		.enter = &intel_idle,
290 		.enter_s2idle = intel_idle_s2idle, },
291 	{
292 		.name = "C7S",
293 		.desc = "MWAIT 0x64",
294 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
295 		.exit_latency = 10000,
296 		.target_residency = 20000,
297 		.enter = &intel_idle,
298 		.enter_s2idle = intel_idle_s2idle, },
299 	{
300 		.enter = NULL }
301 };
302 
303 static struct cpuidle_state cht_cstates[] __initdata = {
304 	{
305 		.name = "C1",
306 		.desc = "MWAIT 0x00",
307 		.flags = MWAIT2flg(0x00),
308 		.exit_latency = 1,
309 		.target_residency = 1,
310 		.enter = &intel_idle,
311 		.enter_s2idle = intel_idle_s2idle, },
312 	{
313 		.name = "C6N",
314 		.desc = "MWAIT 0x58",
315 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
316 		.exit_latency = 80,
317 		.target_residency = 275,
318 		.enter = &intel_idle,
319 		.enter_s2idle = intel_idle_s2idle, },
320 	{
321 		.name = "C6S",
322 		.desc = "MWAIT 0x52",
323 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
324 		.exit_latency = 200,
325 		.target_residency = 560,
326 		.enter = &intel_idle,
327 		.enter_s2idle = intel_idle_s2idle, },
328 	{
329 		.name = "C7",
330 		.desc = "MWAIT 0x60",
331 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
332 		.exit_latency = 1200,
333 		.target_residency = 4000,
334 		.enter = &intel_idle,
335 		.enter_s2idle = intel_idle_s2idle, },
336 	{
337 		.name = "C7S",
338 		.desc = "MWAIT 0x64",
339 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
340 		.exit_latency = 10000,
341 		.target_residency = 20000,
342 		.enter = &intel_idle,
343 		.enter_s2idle = intel_idle_s2idle, },
344 	{
345 		.enter = NULL }
346 };
347 
348 static struct cpuidle_state ivb_cstates[] __initdata = {
349 	{
350 		.name = "C1",
351 		.desc = "MWAIT 0x00",
352 		.flags = MWAIT2flg(0x00),
353 		.exit_latency = 1,
354 		.target_residency = 1,
355 		.enter = &intel_idle,
356 		.enter_s2idle = intel_idle_s2idle, },
357 	{
358 		.name = "C1E",
359 		.desc = "MWAIT 0x01",
360 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
361 		.exit_latency = 10,
362 		.target_residency = 20,
363 		.enter = &intel_idle,
364 		.enter_s2idle = intel_idle_s2idle, },
365 	{
366 		.name = "C3",
367 		.desc = "MWAIT 0x10",
368 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
369 		.exit_latency = 59,
370 		.target_residency = 156,
371 		.enter = &intel_idle,
372 		.enter_s2idle = intel_idle_s2idle, },
373 	{
374 		.name = "C6",
375 		.desc = "MWAIT 0x20",
376 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
377 		.exit_latency = 80,
378 		.target_residency = 300,
379 		.enter = &intel_idle,
380 		.enter_s2idle = intel_idle_s2idle, },
381 	{
382 		.name = "C7",
383 		.desc = "MWAIT 0x30",
384 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
385 		.exit_latency = 87,
386 		.target_residency = 300,
387 		.enter = &intel_idle,
388 		.enter_s2idle = intel_idle_s2idle, },
389 	{
390 		.enter = NULL }
391 };
392 
393 static struct cpuidle_state ivt_cstates[] __initdata = {
394 	{
395 		.name = "C1",
396 		.desc = "MWAIT 0x00",
397 		.flags = MWAIT2flg(0x00),
398 		.exit_latency = 1,
399 		.target_residency = 1,
400 		.enter = &intel_idle,
401 		.enter_s2idle = intel_idle_s2idle, },
402 	{
403 		.name = "C1E",
404 		.desc = "MWAIT 0x01",
405 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
406 		.exit_latency = 10,
407 		.target_residency = 80,
408 		.enter = &intel_idle,
409 		.enter_s2idle = intel_idle_s2idle, },
410 	{
411 		.name = "C3",
412 		.desc = "MWAIT 0x10",
413 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
414 		.exit_latency = 59,
415 		.target_residency = 156,
416 		.enter = &intel_idle,
417 		.enter_s2idle = intel_idle_s2idle, },
418 	{
419 		.name = "C6",
420 		.desc = "MWAIT 0x20",
421 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
422 		.exit_latency = 82,
423 		.target_residency = 300,
424 		.enter = &intel_idle,
425 		.enter_s2idle = intel_idle_s2idle, },
426 	{
427 		.enter = NULL }
428 };
429 
430 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
431 	{
432 		.name = "C1",
433 		.desc = "MWAIT 0x00",
434 		.flags = MWAIT2flg(0x00),
435 		.exit_latency = 1,
436 		.target_residency = 1,
437 		.enter = &intel_idle,
438 		.enter_s2idle = intel_idle_s2idle, },
439 	{
440 		.name = "C1E",
441 		.desc = "MWAIT 0x01",
442 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
443 		.exit_latency = 10,
444 		.target_residency = 250,
445 		.enter = &intel_idle,
446 		.enter_s2idle = intel_idle_s2idle, },
447 	{
448 		.name = "C3",
449 		.desc = "MWAIT 0x10",
450 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
451 		.exit_latency = 59,
452 		.target_residency = 300,
453 		.enter = &intel_idle,
454 		.enter_s2idle = intel_idle_s2idle, },
455 	{
456 		.name = "C6",
457 		.desc = "MWAIT 0x20",
458 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
459 		.exit_latency = 84,
460 		.target_residency = 400,
461 		.enter = &intel_idle,
462 		.enter_s2idle = intel_idle_s2idle, },
463 	{
464 		.enter = NULL }
465 };
466 
467 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
468 	{
469 		.name = "C1",
470 		.desc = "MWAIT 0x00",
471 		.flags = MWAIT2flg(0x00),
472 		.exit_latency = 1,
473 		.target_residency = 1,
474 		.enter = &intel_idle,
475 		.enter_s2idle = intel_idle_s2idle, },
476 	{
477 		.name = "C1E",
478 		.desc = "MWAIT 0x01",
479 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
480 		.exit_latency = 10,
481 		.target_residency = 500,
482 		.enter = &intel_idle,
483 		.enter_s2idle = intel_idle_s2idle, },
484 	{
485 		.name = "C3",
486 		.desc = "MWAIT 0x10",
487 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
488 		.exit_latency = 59,
489 		.target_residency = 600,
490 		.enter = &intel_idle,
491 		.enter_s2idle = intel_idle_s2idle, },
492 	{
493 		.name = "C6",
494 		.desc = "MWAIT 0x20",
495 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
496 		.exit_latency = 88,
497 		.target_residency = 700,
498 		.enter = &intel_idle,
499 		.enter_s2idle = intel_idle_s2idle, },
500 	{
501 		.enter = NULL }
502 };
503 
504 static struct cpuidle_state hsw_cstates[] __initdata = {
505 	{
506 		.name = "C1",
507 		.desc = "MWAIT 0x00",
508 		.flags = MWAIT2flg(0x00),
509 		.exit_latency = 2,
510 		.target_residency = 2,
511 		.enter = &intel_idle,
512 		.enter_s2idle = intel_idle_s2idle, },
513 	{
514 		.name = "C1E",
515 		.desc = "MWAIT 0x01",
516 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
517 		.exit_latency = 10,
518 		.target_residency = 20,
519 		.enter = &intel_idle,
520 		.enter_s2idle = intel_idle_s2idle, },
521 	{
522 		.name = "C3",
523 		.desc = "MWAIT 0x10",
524 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
525 		.exit_latency = 33,
526 		.target_residency = 100,
527 		.enter = &intel_idle,
528 		.enter_s2idle = intel_idle_s2idle, },
529 	{
530 		.name = "C6",
531 		.desc = "MWAIT 0x20",
532 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
533 		.exit_latency = 133,
534 		.target_residency = 400,
535 		.enter = &intel_idle,
536 		.enter_s2idle = intel_idle_s2idle, },
537 	{
538 		.name = "C7s",
539 		.desc = "MWAIT 0x32",
540 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
541 		.exit_latency = 166,
542 		.target_residency = 500,
543 		.enter = &intel_idle,
544 		.enter_s2idle = intel_idle_s2idle, },
545 	{
546 		.name = "C8",
547 		.desc = "MWAIT 0x40",
548 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
549 		.exit_latency = 300,
550 		.target_residency = 900,
551 		.enter = &intel_idle,
552 		.enter_s2idle = intel_idle_s2idle, },
553 	{
554 		.name = "C9",
555 		.desc = "MWAIT 0x50",
556 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
557 		.exit_latency = 600,
558 		.target_residency = 1800,
559 		.enter = &intel_idle,
560 		.enter_s2idle = intel_idle_s2idle, },
561 	{
562 		.name = "C10",
563 		.desc = "MWAIT 0x60",
564 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
565 		.exit_latency = 2600,
566 		.target_residency = 7700,
567 		.enter = &intel_idle,
568 		.enter_s2idle = intel_idle_s2idle, },
569 	{
570 		.enter = NULL }
571 };
572 static struct cpuidle_state bdw_cstates[] __initdata = {
573 	{
574 		.name = "C1",
575 		.desc = "MWAIT 0x00",
576 		.flags = MWAIT2flg(0x00),
577 		.exit_latency = 2,
578 		.target_residency = 2,
579 		.enter = &intel_idle,
580 		.enter_s2idle = intel_idle_s2idle, },
581 	{
582 		.name = "C1E",
583 		.desc = "MWAIT 0x01",
584 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
585 		.exit_latency = 10,
586 		.target_residency = 20,
587 		.enter = &intel_idle,
588 		.enter_s2idle = intel_idle_s2idle, },
589 	{
590 		.name = "C3",
591 		.desc = "MWAIT 0x10",
592 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
593 		.exit_latency = 40,
594 		.target_residency = 100,
595 		.enter = &intel_idle,
596 		.enter_s2idle = intel_idle_s2idle, },
597 	{
598 		.name = "C6",
599 		.desc = "MWAIT 0x20",
600 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
601 		.exit_latency = 133,
602 		.target_residency = 400,
603 		.enter = &intel_idle,
604 		.enter_s2idle = intel_idle_s2idle, },
605 	{
606 		.name = "C7s",
607 		.desc = "MWAIT 0x32",
608 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
609 		.exit_latency = 166,
610 		.target_residency = 500,
611 		.enter = &intel_idle,
612 		.enter_s2idle = intel_idle_s2idle, },
613 	{
614 		.name = "C8",
615 		.desc = "MWAIT 0x40",
616 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
617 		.exit_latency = 300,
618 		.target_residency = 900,
619 		.enter = &intel_idle,
620 		.enter_s2idle = intel_idle_s2idle, },
621 	{
622 		.name = "C9",
623 		.desc = "MWAIT 0x50",
624 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
625 		.exit_latency = 600,
626 		.target_residency = 1800,
627 		.enter = &intel_idle,
628 		.enter_s2idle = intel_idle_s2idle, },
629 	{
630 		.name = "C10",
631 		.desc = "MWAIT 0x60",
632 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
633 		.exit_latency = 2600,
634 		.target_residency = 7700,
635 		.enter = &intel_idle,
636 		.enter_s2idle = intel_idle_s2idle, },
637 	{
638 		.enter = NULL }
639 };
640 
641 static struct cpuidle_state skl_cstates[] __initdata = {
642 	{
643 		.name = "C1",
644 		.desc = "MWAIT 0x00",
645 		.flags = MWAIT2flg(0x00),
646 		.exit_latency = 2,
647 		.target_residency = 2,
648 		.enter = &intel_idle,
649 		.enter_s2idle = intel_idle_s2idle, },
650 	{
651 		.name = "C1E",
652 		.desc = "MWAIT 0x01",
653 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
654 		.exit_latency = 10,
655 		.target_residency = 20,
656 		.enter = &intel_idle,
657 		.enter_s2idle = intel_idle_s2idle, },
658 	{
659 		.name = "C3",
660 		.desc = "MWAIT 0x10",
661 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
662 		.exit_latency = 70,
663 		.target_residency = 100,
664 		.enter = &intel_idle,
665 		.enter_s2idle = intel_idle_s2idle, },
666 	{
667 		.name = "C6",
668 		.desc = "MWAIT 0x20",
669 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
670 		.exit_latency = 85,
671 		.target_residency = 200,
672 		.enter = &intel_idle,
673 		.enter_s2idle = intel_idle_s2idle, },
674 	{
675 		.name = "C7s",
676 		.desc = "MWAIT 0x33",
677 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
678 		.exit_latency = 124,
679 		.target_residency = 800,
680 		.enter = &intel_idle,
681 		.enter_s2idle = intel_idle_s2idle, },
682 	{
683 		.name = "C8",
684 		.desc = "MWAIT 0x40",
685 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
686 		.exit_latency = 200,
687 		.target_residency = 800,
688 		.enter = &intel_idle,
689 		.enter_s2idle = intel_idle_s2idle, },
690 	{
691 		.name = "C9",
692 		.desc = "MWAIT 0x50",
693 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
694 		.exit_latency = 480,
695 		.target_residency = 5000,
696 		.enter = &intel_idle,
697 		.enter_s2idle = intel_idle_s2idle, },
698 	{
699 		.name = "C10",
700 		.desc = "MWAIT 0x60",
701 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
702 		.exit_latency = 890,
703 		.target_residency = 5000,
704 		.enter = &intel_idle,
705 		.enter_s2idle = intel_idle_s2idle, },
706 	{
707 		.enter = NULL }
708 };
709 
710 static struct cpuidle_state skx_cstates[] __initdata = {
711 	{
712 		.name = "C1",
713 		.desc = "MWAIT 0x00",
714 		.flags = MWAIT2flg(0x00),
715 		.exit_latency = 2,
716 		.target_residency = 2,
717 		.enter = &intel_idle,
718 		.enter_s2idle = intel_idle_s2idle, },
719 	{
720 		.name = "C1E",
721 		.desc = "MWAIT 0x01",
722 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
723 		.exit_latency = 10,
724 		.target_residency = 20,
725 		.enter = &intel_idle,
726 		.enter_s2idle = intel_idle_s2idle, },
727 	{
728 		.name = "C6",
729 		.desc = "MWAIT 0x20",
730 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
731 		.exit_latency = 133,
732 		.target_residency = 600,
733 		.enter = &intel_idle,
734 		.enter_s2idle = intel_idle_s2idle, },
735 	{
736 		.enter = NULL }
737 };
738 
739 static struct cpuidle_state icx_cstates[] __initdata = {
740 	{
741 		.name = "C1",
742 		.desc = "MWAIT 0x00",
743 		.flags = MWAIT2flg(0x00),
744 		.exit_latency = 1,
745 		.target_residency = 1,
746 		.enter = &intel_idle,
747 		.enter_s2idle = intel_idle_s2idle, },
748 	{
749 		.name = "C1E",
750 		.desc = "MWAIT 0x01",
751 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
752 		.exit_latency = 4,
753 		.target_residency = 4,
754 		.enter = &intel_idle,
755 		.enter_s2idle = intel_idle_s2idle, },
756 	{
757 		.name = "C6",
758 		.desc = "MWAIT 0x20",
759 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
760 		.exit_latency = 128,
761 		.target_residency = 384,
762 		.enter = &intel_idle,
763 		.enter_s2idle = intel_idle_s2idle, },
764 	{
765 		.enter = NULL }
766 };
767 
768 static struct cpuidle_state atom_cstates[] __initdata = {
769 	{
770 		.name = "C1E",
771 		.desc = "MWAIT 0x00",
772 		.flags = MWAIT2flg(0x00),
773 		.exit_latency = 10,
774 		.target_residency = 20,
775 		.enter = &intel_idle,
776 		.enter_s2idle = intel_idle_s2idle, },
777 	{
778 		.name = "C2",
779 		.desc = "MWAIT 0x10",
780 		.flags = MWAIT2flg(0x10),
781 		.exit_latency = 20,
782 		.target_residency = 80,
783 		.enter = &intel_idle,
784 		.enter_s2idle = intel_idle_s2idle, },
785 	{
786 		.name = "C4",
787 		.desc = "MWAIT 0x30",
788 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
789 		.exit_latency = 100,
790 		.target_residency = 400,
791 		.enter = &intel_idle,
792 		.enter_s2idle = intel_idle_s2idle, },
793 	{
794 		.name = "C6",
795 		.desc = "MWAIT 0x52",
796 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
797 		.exit_latency = 140,
798 		.target_residency = 560,
799 		.enter = &intel_idle,
800 		.enter_s2idle = intel_idle_s2idle, },
801 	{
802 		.enter = NULL }
803 };
804 static struct cpuidle_state tangier_cstates[] __initdata = {
805 	{
806 		.name = "C1",
807 		.desc = "MWAIT 0x00",
808 		.flags = MWAIT2flg(0x00),
809 		.exit_latency = 1,
810 		.target_residency = 4,
811 		.enter = &intel_idle,
812 		.enter_s2idle = intel_idle_s2idle, },
813 	{
814 		.name = "C4",
815 		.desc = "MWAIT 0x30",
816 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
817 		.exit_latency = 100,
818 		.target_residency = 400,
819 		.enter = &intel_idle,
820 		.enter_s2idle = intel_idle_s2idle, },
821 	{
822 		.name = "C6",
823 		.desc = "MWAIT 0x52",
824 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
825 		.exit_latency = 140,
826 		.target_residency = 560,
827 		.enter = &intel_idle,
828 		.enter_s2idle = intel_idle_s2idle, },
829 	{
830 		.name = "C7",
831 		.desc = "MWAIT 0x60",
832 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
833 		.exit_latency = 1200,
834 		.target_residency = 4000,
835 		.enter = &intel_idle,
836 		.enter_s2idle = intel_idle_s2idle, },
837 	{
838 		.name = "C9",
839 		.desc = "MWAIT 0x64",
840 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
841 		.exit_latency = 10000,
842 		.target_residency = 20000,
843 		.enter = &intel_idle,
844 		.enter_s2idle = intel_idle_s2idle, },
845 	{
846 		.enter = NULL }
847 };
848 static struct cpuidle_state avn_cstates[] __initdata = {
849 	{
850 		.name = "C1",
851 		.desc = "MWAIT 0x00",
852 		.flags = MWAIT2flg(0x00),
853 		.exit_latency = 2,
854 		.target_residency = 2,
855 		.enter = &intel_idle,
856 		.enter_s2idle = intel_idle_s2idle, },
857 	{
858 		.name = "C6",
859 		.desc = "MWAIT 0x51",
860 		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
861 		.exit_latency = 15,
862 		.target_residency = 45,
863 		.enter = &intel_idle,
864 		.enter_s2idle = intel_idle_s2idle, },
865 	{
866 		.enter = NULL }
867 };
868 static struct cpuidle_state knl_cstates[] __initdata = {
869 	{
870 		.name = "C1",
871 		.desc = "MWAIT 0x00",
872 		.flags = MWAIT2flg(0x00),
873 		.exit_latency = 1,
874 		.target_residency = 2,
875 		.enter = &intel_idle,
876 		.enter_s2idle = intel_idle_s2idle },
877 	{
878 		.name = "C6",
879 		.desc = "MWAIT 0x10",
880 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
881 		.exit_latency = 120,
882 		.target_residency = 500,
883 		.enter = &intel_idle,
884 		.enter_s2idle = intel_idle_s2idle },
885 	{
886 		.enter = NULL }
887 };
888 
889 static struct cpuidle_state bxt_cstates[] __initdata = {
890 	{
891 		.name = "C1",
892 		.desc = "MWAIT 0x00",
893 		.flags = MWAIT2flg(0x00),
894 		.exit_latency = 2,
895 		.target_residency = 2,
896 		.enter = &intel_idle,
897 		.enter_s2idle = intel_idle_s2idle, },
898 	{
899 		.name = "C1E",
900 		.desc = "MWAIT 0x01",
901 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
902 		.exit_latency = 10,
903 		.target_residency = 20,
904 		.enter = &intel_idle,
905 		.enter_s2idle = intel_idle_s2idle, },
906 	{
907 		.name = "C6",
908 		.desc = "MWAIT 0x20",
909 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
910 		.exit_latency = 133,
911 		.target_residency = 133,
912 		.enter = &intel_idle,
913 		.enter_s2idle = intel_idle_s2idle, },
914 	{
915 		.name = "C7s",
916 		.desc = "MWAIT 0x31",
917 		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
918 		.exit_latency = 155,
919 		.target_residency = 155,
920 		.enter = &intel_idle,
921 		.enter_s2idle = intel_idle_s2idle, },
922 	{
923 		.name = "C8",
924 		.desc = "MWAIT 0x40",
925 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
926 		.exit_latency = 1000,
927 		.target_residency = 1000,
928 		.enter = &intel_idle,
929 		.enter_s2idle = intel_idle_s2idle, },
930 	{
931 		.name = "C9",
932 		.desc = "MWAIT 0x50",
933 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
934 		.exit_latency = 2000,
935 		.target_residency = 2000,
936 		.enter = &intel_idle,
937 		.enter_s2idle = intel_idle_s2idle, },
938 	{
939 		.name = "C10",
940 		.desc = "MWAIT 0x60",
941 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
942 		.exit_latency = 10000,
943 		.target_residency = 10000,
944 		.enter = &intel_idle,
945 		.enter_s2idle = intel_idle_s2idle, },
946 	{
947 		.enter = NULL }
948 };
949 
950 static struct cpuidle_state dnv_cstates[] __initdata = {
951 	{
952 		.name = "C1",
953 		.desc = "MWAIT 0x00",
954 		.flags = MWAIT2flg(0x00),
955 		.exit_latency = 2,
956 		.target_residency = 2,
957 		.enter = &intel_idle,
958 		.enter_s2idle = intel_idle_s2idle, },
959 	{
960 		.name = "C1E",
961 		.desc = "MWAIT 0x01",
962 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
963 		.exit_latency = 10,
964 		.target_residency = 20,
965 		.enter = &intel_idle,
966 		.enter_s2idle = intel_idle_s2idle, },
967 	{
968 		.name = "C6",
969 		.desc = "MWAIT 0x20",
970 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
971 		.exit_latency = 50,
972 		.target_residency = 500,
973 		.enter = &intel_idle,
974 		.enter_s2idle = intel_idle_s2idle, },
975 	{
976 		.enter = NULL }
977 };
978 
979 static const struct idle_cpu idle_cpu_nehalem __initconst = {
980 	.state_table = nehalem_cstates,
981 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
982 	.disable_promotion_to_c1e = true,
983 };
984 
985 static const struct idle_cpu idle_cpu_nhx __initconst = {
986 	.state_table = nehalem_cstates,
987 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
988 	.disable_promotion_to_c1e = true,
989 	.use_acpi = true,
990 };
991 
992 static const struct idle_cpu idle_cpu_atom __initconst = {
993 	.state_table = atom_cstates,
994 };
995 
996 static const struct idle_cpu idle_cpu_tangier __initconst = {
997 	.state_table = tangier_cstates,
998 };
999 
1000 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1001 	.state_table = atom_cstates,
1002 	.auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1003 };
1004 
1005 static const struct idle_cpu idle_cpu_snb __initconst = {
1006 	.state_table = snb_cstates,
1007 	.disable_promotion_to_c1e = true,
1008 };
1009 
1010 static const struct idle_cpu idle_cpu_snx __initconst = {
1011 	.state_table = snb_cstates,
1012 	.disable_promotion_to_c1e = true,
1013 	.use_acpi = true,
1014 };
1015 
1016 static const struct idle_cpu idle_cpu_byt __initconst = {
1017 	.state_table = byt_cstates,
1018 	.disable_promotion_to_c1e = true,
1019 	.byt_auto_demotion_disable_flag = true,
1020 };
1021 
1022 static const struct idle_cpu idle_cpu_cht __initconst = {
1023 	.state_table = cht_cstates,
1024 	.disable_promotion_to_c1e = true,
1025 	.byt_auto_demotion_disable_flag = true,
1026 };
1027 
1028 static const struct idle_cpu idle_cpu_ivb __initconst = {
1029 	.state_table = ivb_cstates,
1030 	.disable_promotion_to_c1e = true,
1031 };
1032 
1033 static const struct idle_cpu idle_cpu_ivt __initconst = {
1034 	.state_table = ivt_cstates,
1035 	.disable_promotion_to_c1e = true,
1036 	.use_acpi = true,
1037 };
1038 
1039 static const struct idle_cpu idle_cpu_hsw __initconst = {
1040 	.state_table = hsw_cstates,
1041 	.disable_promotion_to_c1e = true,
1042 };
1043 
1044 static const struct idle_cpu idle_cpu_hsx __initconst = {
1045 	.state_table = hsw_cstates,
1046 	.disable_promotion_to_c1e = true,
1047 	.use_acpi = true,
1048 };
1049 
1050 static const struct idle_cpu idle_cpu_bdw __initconst = {
1051 	.state_table = bdw_cstates,
1052 	.disable_promotion_to_c1e = true,
1053 };
1054 
1055 static const struct idle_cpu idle_cpu_bdx __initconst = {
1056 	.state_table = bdw_cstates,
1057 	.disable_promotion_to_c1e = true,
1058 	.use_acpi = true,
1059 };
1060 
1061 static const struct idle_cpu idle_cpu_skl __initconst = {
1062 	.state_table = skl_cstates,
1063 	.disable_promotion_to_c1e = true,
1064 };
1065 
1066 static const struct idle_cpu idle_cpu_skx __initconst = {
1067 	.state_table = skx_cstates,
1068 	.disable_promotion_to_c1e = true,
1069 	.use_acpi = true,
1070 };
1071 
1072 static const struct idle_cpu idle_cpu_icx __initconst = {
1073 	.state_table = icx_cstates,
1074 	.disable_promotion_to_c1e = true,
1075 	.use_acpi = true,
1076 };
1077 
1078 static const struct idle_cpu idle_cpu_avn __initconst = {
1079 	.state_table = avn_cstates,
1080 	.disable_promotion_to_c1e = true,
1081 	.use_acpi = true,
1082 };
1083 
1084 static const struct idle_cpu idle_cpu_knl __initconst = {
1085 	.state_table = knl_cstates,
1086 	.use_acpi = true,
1087 };
1088 
1089 static const struct idle_cpu idle_cpu_bxt __initconst = {
1090 	.state_table = bxt_cstates,
1091 	.disable_promotion_to_c1e = true,
1092 };
1093 
1094 static const struct idle_cpu idle_cpu_dnv __initconst = {
1095 	.state_table = dnv_cstates,
1096 	.disable_promotion_to_c1e = true,
1097 	.use_acpi = true,
1098 };
1099 
1100 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1101 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		&idle_cpu_nhx),
1102 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		&idle_cpu_nehalem),
1103 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,		&idle_cpu_nehalem),
1104 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,		&idle_cpu_nehalem),
1105 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,		&idle_cpu_nhx),
1106 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,		&idle_cpu_nhx),
1107 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL,	&idle_cpu_atom),
1108 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID,	&idle_cpu_lincroft),
1109 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,		&idle_cpu_nhx),
1110 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		&idle_cpu_snb),
1111 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	&idle_cpu_snx),
1112 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL,	&idle_cpu_atom),
1113 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,	&idle_cpu_byt),
1114 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID,	&idle_cpu_tangier),
1115 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,	&idle_cpu_cht),
1116 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		&idle_cpu_ivb),
1117 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,		&idle_cpu_ivt),
1118 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL,		&idle_cpu_hsw),
1119 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,		&idle_cpu_hsx),
1120 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,		&idle_cpu_hsw),
1121 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,		&idle_cpu_hsw),
1122 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,	&idle_cpu_avn),
1123 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		&idle_cpu_bdw),
1124 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,		&idle_cpu_bdw),
1125 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,		&idle_cpu_bdx),
1126 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,		&idle_cpu_bdx),
1127 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,		&idle_cpu_skl),
1128 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,		&idle_cpu_skl),
1129 	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,		&idle_cpu_skl),
1130 	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		&idle_cpu_skl),
1131 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&idle_cpu_skx),
1132 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&idle_cpu_icx),
1133 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&idle_cpu_knl),
1134 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&idle_cpu_knl),
1135 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	&idle_cpu_bxt),
1136 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,	&idle_cpu_bxt),
1137 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,	&idle_cpu_dnv),
1138 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&idle_cpu_dnv),
1139 	{}
1140 };
1141 
1142 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1143 	X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1144 	{}
1145 };
1146 
1147 static bool __init intel_idle_max_cstate_reached(int cstate)
1148 {
1149 	if (cstate + 1 > max_cstate) {
1150 		pr_info("max_cstate %d reached\n", max_cstate);
1151 		return true;
1152 	}
1153 	return false;
1154 }
1155 
1156 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1157 #include <acpi/processor.h>
1158 
1159 static bool no_acpi __read_mostly;
1160 module_param(no_acpi, bool, 0444);
1161 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1162 
1163 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1164 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1165 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1166 
1167 static struct acpi_processor_power acpi_state_table __initdata;
1168 
1169 /**
1170  * intel_idle_cst_usable - Check if the _CST information can be used.
1171  *
1172  * Check if all of the C-states listed by _CST in the max_cstate range are
1173  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1174  */
1175 static bool __init intel_idle_cst_usable(void)
1176 {
1177 	int cstate, limit;
1178 
1179 	limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1180 		      acpi_state_table.count);
1181 
1182 	for (cstate = 1; cstate < limit; cstate++) {
1183 		struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1184 
1185 		if (cx->entry_method != ACPI_CSTATE_FFH)
1186 			return false;
1187 	}
1188 
1189 	return true;
1190 }
1191 
1192 static bool __init intel_idle_acpi_cst_extract(void)
1193 {
1194 	unsigned int cpu;
1195 
1196 	if (no_acpi) {
1197 		pr_debug("Not allowed to use ACPI _CST\n");
1198 		return false;
1199 	}
1200 
1201 	for_each_possible_cpu(cpu) {
1202 		struct acpi_processor *pr = per_cpu(processors, cpu);
1203 
1204 		if (!pr)
1205 			continue;
1206 
1207 		if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1208 			continue;
1209 
1210 		acpi_state_table.count++;
1211 
1212 		if (!intel_idle_cst_usable())
1213 			continue;
1214 
1215 		if (!acpi_processor_claim_cst_control()) {
1216 			acpi_state_table.count = 0;
1217 			return false;
1218 		}
1219 
1220 		return true;
1221 	}
1222 
1223 	pr_debug("ACPI _CST not found or not usable\n");
1224 	return false;
1225 }
1226 
1227 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1228 {
1229 	int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1230 
1231 	/*
1232 	 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1233 	 * the interesting states are ACPI_CSTATE_FFH.
1234 	 */
1235 	for (cstate = 1; cstate < limit; cstate++) {
1236 		struct acpi_processor_cx *cx;
1237 		struct cpuidle_state *state;
1238 
1239 		if (intel_idle_max_cstate_reached(cstate))
1240 			break;
1241 
1242 		cx = &acpi_state_table.states[cstate];
1243 
1244 		state = &drv->states[drv->state_count++];
1245 
1246 		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1247 		strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1248 		state->exit_latency = cx->latency;
1249 		/*
1250 		 * For C1-type C-states use the same number for both the exit
1251 		 * latency and target residency, because that is the case for
1252 		 * C1 in the majority of the static C-states tables above.
1253 		 * For the other types of C-states, however, set the target
1254 		 * residency to 3 times the exit latency which should lead to
1255 		 * a reasonable balance between energy-efficiency and
1256 		 * performance in the majority of interesting cases.
1257 		 */
1258 		state->target_residency = cx->latency;
1259 		if (cx->type > ACPI_STATE_C1)
1260 			state->target_residency *= 3;
1261 
1262 		state->flags = MWAIT2flg(cx->address);
1263 		if (cx->type > ACPI_STATE_C2)
1264 			state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1265 
1266 		if (disabled_states_mask & BIT(cstate))
1267 			state->flags |= CPUIDLE_FLAG_OFF;
1268 
1269 		state->enter = intel_idle;
1270 		state->enter_s2idle = intel_idle_s2idle;
1271 	}
1272 }
1273 
1274 static bool __init intel_idle_off_by_default(u32 mwait_hint)
1275 {
1276 	int cstate, limit;
1277 
1278 	/*
1279 	 * If there are no _CST C-states, do not disable any C-states by
1280 	 * default.
1281 	 */
1282 	if (!acpi_state_table.count)
1283 		return false;
1284 
1285 	limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1286 	/*
1287 	 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1288 	 * the interesting states are ACPI_CSTATE_FFH.
1289 	 */
1290 	for (cstate = 1; cstate < limit; cstate++) {
1291 		if (acpi_state_table.states[cstate].address == mwait_hint)
1292 			return false;
1293 	}
1294 	return true;
1295 }
1296 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1297 #define force_use_acpi	(false)
1298 
1299 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1300 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1301 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1302 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1303 
1304 /**
1305  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1306  *
1307  * Tune IVT multi-socket targets.
1308  * Assumption: num_sockets == (max_package_num + 1).
1309  */
1310 static void __init ivt_idle_state_table_update(void)
1311 {
1312 	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1313 	int cpu, package_num, num_sockets = 1;
1314 
1315 	for_each_online_cpu(cpu) {
1316 		package_num = topology_physical_package_id(cpu);
1317 		if (package_num + 1 > num_sockets) {
1318 			num_sockets = package_num + 1;
1319 
1320 			if (num_sockets > 4) {
1321 				cpuidle_state_table = ivt_cstates_8s;
1322 				return;
1323 			}
1324 		}
1325 	}
1326 
1327 	if (num_sockets > 2)
1328 		cpuidle_state_table = ivt_cstates_4s;
1329 
1330 	/* else, 1 and 2 socket systems use default ivt_cstates */
1331 }
1332 
1333 /**
1334  * irtl_2_usec - IRTL to microseconds conversion.
1335  * @irtl: IRTL MSR value.
1336  *
1337  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1338  */
1339 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1340 {
1341 	static const unsigned int irtl_ns_units[] __initconst = {
1342 		1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1343 	};
1344 	unsigned long long ns;
1345 
1346 	if (!irtl)
1347 		return 0;
1348 
1349 	ns = irtl_ns_units[(irtl >> 10) & 0x7];
1350 
1351 	return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1352 }
1353 
1354 /**
1355  * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1356  *
1357  * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1358  * definitive maximum latency and use the same value for target_residency.
1359  */
1360 static void __init bxt_idle_state_table_update(void)
1361 {
1362 	unsigned long long msr;
1363 	unsigned int usec;
1364 
1365 	rdmsrl(MSR_PKGC6_IRTL, msr);
1366 	usec = irtl_2_usec(msr);
1367 	if (usec) {
1368 		bxt_cstates[2].exit_latency = usec;
1369 		bxt_cstates[2].target_residency = usec;
1370 	}
1371 
1372 	rdmsrl(MSR_PKGC7_IRTL, msr);
1373 	usec = irtl_2_usec(msr);
1374 	if (usec) {
1375 		bxt_cstates[3].exit_latency = usec;
1376 		bxt_cstates[3].target_residency = usec;
1377 	}
1378 
1379 	rdmsrl(MSR_PKGC8_IRTL, msr);
1380 	usec = irtl_2_usec(msr);
1381 	if (usec) {
1382 		bxt_cstates[4].exit_latency = usec;
1383 		bxt_cstates[4].target_residency = usec;
1384 	}
1385 
1386 	rdmsrl(MSR_PKGC9_IRTL, msr);
1387 	usec = irtl_2_usec(msr);
1388 	if (usec) {
1389 		bxt_cstates[5].exit_latency = usec;
1390 		bxt_cstates[5].target_residency = usec;
1391 	}
1392 
1393 	rdmsrl(MSR_PKGC10_IRTL, msr);
1394 	usec = irtl_2_usec(msr);
1395 	if (usec) {
1396 		bxt_cstates[6].exit_latency = usec;
1397 		bxt_cstates[6].target_residency = usec;
1398 	}
1399 
1400 }
1401 
1402 /**
1403  * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1404  *
1405  * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1406  */
1407 static void __init sklh_idle_state_table_update(void)
1408 {
1409 	unsigned long long msr;
1410 	unsigned int eax, ebx, ecx, edx;
1411 
1412 
1413 	/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1414 	if (max_cstate <= 7)
1415 		return;
1416 
1417 	/* if PC10 not present in CPUID.MWAIT.EDX */
1418 	if ((mwait_substates & (0xF << 28)) == 0)
1419 		return;
1420 
1421 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1422 
1423 	/* PC10 is not enabled in PKG C-state limit */
1424 	if ((msr & 0xF) != 8)
1425 		return;
1426 
1427 	ecx = 0;
1428 	cpuid(7, &eax, &ebx, &ecx, &edx);
1429 
1430 	/* if SGX is present */
1431 	if (ebx & (1 << 2)) {
1432 
1433 		rdmsrl(MSR_IA32_FEAT_CTL, msr);
1434 
1435 		/* if SGX is enabled */
1436 		if (msr & (1 << 18))
1437 			return;
1438 	}
1439 
1440 	skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C8-SKL */
1441 	skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C9-SKL */
1442 }
1443 
1444 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1445 {
1446 	unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1447 	unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1448 					MWAIT_SUBSTATE_MASK;
1449 
1450 	/* Ignore the C-state if there are NO sub-states in CPUID for it. */
1451 	if (num_substates == 0)
1452 		return false;
1453 
1454 	if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1455 		mark_tsc_unstable("TSC halts in idle states deeper than C2");
1456 
1457 	return true;
1458 }
1459 
1460 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1461 {
1462 	int cstate;
1463 
1464 	switch (boot_cpu_data.x86_model) {
1465 	case INTEL_FAM6_IVYBRIDGE_X:
1466 		ivt_idle_state_table_update();
1467 		break;
1468 	case INTEL_FAM6_ATOM_GOLDMONT:
1469 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1470 		bxt_idle_state_table_update();
1471 		break;
1472 	case INTEL_FAM6_SKYLAKE:
1473 		sklh_idle_state_table_update();
1474 		break;
1475 	}
1476 
1477 	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1478 		unsigned int mwait_hint;
1479 
1480 		if (intel_idle_max_cstate_reached(cstate))
1481 			break;
1482 
1483 		if (!cpuidle_state_table[cstate].enter &&
1484 		    !cpuidle_state_table[cstate].enter_s2idle)
1485 			break;
1486 
1487 		/* If marked as unusable, skip this state. */
1488 		if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1489 			pr_debug("state %s is disabled\n",
1490 				 cpuidle_state_table[cstate].name);
1491 			continue;
1492 		}
1493 
1494 		mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1495 		if (!intel_idle_verify_cstate(mwait_hint))
1496 			continue;
1497 
1498 		/* Structure copy. */
1499 		drv->states[drv->state_count] = cpuidle_state_table[cstate];
1500 
1501 		if ((disabled_states_mask & BIT(drv->state_count)) ||
1502 		    ((icpu->use_acpi || force_use_acpi) &&
1503 		     intel_idle_off_by_default(mwait_hint) &&
1504 		     !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1505 			drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1506 
1507 		drv->state_count++;
1508 	}
1509 
1510 	if (icpu->byt_auto_demotion_disable_flag) {
1511 		wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1512 		wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1513 	}
1514 }
1515 
1516 /**
1517  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1518  * @drv: cpuidle driver structure to initialize.
1519  */
1520 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1521 {
1522 	cpuidle_poll_state_init(drv);
1523 
1524 	if (disabled_states_mask & BIT(0))
1525 		drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1526 
1527 	drv->state_count = 1;
1528 
1529 	if (icpu)
1530 		intel_idle_init_cstates_icpu(drv);
1531 	else
1532 		intel_idle_init_cstates_acpi(drv);
1533 }
1534 
1535 static void auto_demotion_disable(void)
1536 {
1537 	unsigned long long msr_bits;
1538 
1539 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1540 	msr_bits &= ~auto_demotion_disable_flags;
1541 	wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1542 }
1543 
1544 static void c1e_promotion_disable(void)
1545 {
1546 	unsigned long long msr_bits;
1547 
1548 	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1549 	msr_bits &= ~0x2;
1550 	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1551 }
1552 
1553 /**
1554  * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1555  * @cpu: CPU to initialize.
1556  *
1557  * Register a cpuidle device object for @cpu and update its MSRs in accordance
1558  * with the processor model flags.
1559  */
1560 static int intel_idle_cpu_init(unsigned int cpu)
1561 {
1562 	struct cpuidle_device *dev;
1563 
1564 	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1565 	dev->cpu = cpu;
1566 
1567 	if (cpuidle_register_device(dev)) {
1568 		pr_debug("cpuidle_register_device %d failed!\n", cpu);
1569 		return -EIO;
1570 	}
1571 
1572 	if (auto_demotion_disable_flags)
1573 		auto_demotion_disable();
1574 
1575 	if (disable_promotion_to_c1e)
1576 		c1e_promotion_disable();
1577 
1578 	return 0;
1579 }
1580 
1581 static int intel_idle_cpu_online(unsigned int cpu)
1582 {
1583 	struct cpuidle_device *dev;
1584 
1585 	if (!boot_cpu_has(X86_FEATURE_ARAT))
1586 		tick_broadcast_enable();
1587 
1588 	/*
1589 	 * Some systems can hotplug a cpu at runtime after
1590 	 * the kernel has booted, we have to initialize the
1591 	 * driver in this case
1592 	 */
1593 	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1594 	if (!dev->registered)
1595 		return intel_idle_cpu_init(cpu);
1596 
1597 	return 0;
1598 }
1599 
1600 /**
1601  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1602  */
1603 static void __init intel_idle_cpuidle_devices_uninit(void)
1604 {
1605 	int i;
1606 
1607 	for_each_online_cpu(i)
1608 		cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1609 }
1610 
1611 static int __init intel_idle_init(void)
1612 {
1613 	const struct x86_cpu_id *id;
1614 	unsigned int eax, ebx, ecx;
1615 	int retval;
1616 
1617 	/* Do not load intel_idle at all for now if idle= is passed */
1618 	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1619 		return -ENODEV;
1620 
1621 	if (max_cstate == 0) {
1622 		pr_debug("disabled\n");
1623 		return -EPERM;
1624 	}
1625 
1626 	id = x86_match_cpu(intel_idle_ids);
1627 	if (id) {
1628 		if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1629 			pr_debug("Please enable MWAIT in BIOS SETUP\n");
1630 			return -ENODEV;
1631 		}
1632 	} else {
1633 		id = x86_match_cpu(intel_mwait_ids);
1634 		if (!id)
1635 			return -ENODEV;
1636 	}
1637 
1638 	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1639 		return -ENODEV;
1640 
1641 	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1642 
1643 	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1644 	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1645 	    !mwait_substates)
1646 			return -ENODEV;
1647 
1648 	pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1649 
1650 	icpu = (const struct idle_cpu *)id->driver_data;
1651 	if (icpu) {
1652 		cpuidle_state_table = icpu->state_table;
1653 		auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
1654 		disable_promotion_to_c1e = icpu->disable_promotion_to_c1e;
1655 		if (icpu->use_acpi || force_use_acpi)
1656 			intel_idle_acpi_cst_extract();
1657 	} else if (!intel_idle_acpi_cst_extract()) {
1658 		return -ENODEV;
1659 	}
1660 
1661 	pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1662 		 boot_cpu_data.x86_model);
1663 
1664 	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1665 	if (!intel_idle_cpuidle_devices)
1666 		return -ENOMEM;
1667 
1668 	intel_idle_cpuidle_driver_init(&intel_idle_driver);
1669 
1670 	retval = cpuidle_register_driver(&intel_idle_driver);
1671 	if (retval) {
1672 		struct cpuidle_driver *drv = cpuidle_get_driver();
1673 		printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1674 		       drv ? drv->name : "none");
1675 		goto init_driver_fail;
1676 	}
1677 
1678 	retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1679 				   intel_idle_cpu_online, NULL);
1680 	if (retval < 0)
1681 		goto hp_setup_fail;
1682 
1683 	pr_debug("Local APIC timer is reliable in %s\n",
1684 		 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
1685 
1686 	return 0;
1687 
1688 hp_setup_fail:
1689 	intel_idle_cpuidle_devices_uninit();
1690 	cpuidle_unregister_driver(&intel_idle_driver);
1691 init_driver_fail:
1692 	free_percpu(intel_idle_cpuidle_devices);
1693 	return retval;
1694 
1695 }
1696 device_initcall(intel_idle_init);
1697 
1698 /*
1699  * We are not really modular, but we used to support that.  Meaning we also
1700  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1701  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1702  * is the easiest way (currently) to continue doing that.
1703  */
1704 module_param(max_cstate, int, 0444);
1705 /*
1706  * The positions of the bits that are set in this number are the indices of the
1707  * idle states to be disabled by default (as reflected by the names of the
1708  * corresponding idle state directories in sysfs, "state0", "state1" ...
1709  * "state<i>" ..., where <i> is the index of the given state).
1710  */
1711 module_param_named(states_off, disabled_states_mask, uint, 0444);
1712 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
1713