xref: /openbmc/linux/drivers/idle/intel_idle.c (revision 6f69e2a3)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013, Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  */
8 
9 /*
10  * intel_idle is a cpuidle driver that loads on specific Intel processors
11  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
12  * make Linux more efficient on these processors, as intel_idle knows
13  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
14  */
15 
16 /*
17  * Design Assumptions
18  *
19  * All CPUs have same idle states as boot CPU
20  *
21  * Chipset BM_STS (bus master status) bit is a NOP
22  *	for preventing entry into deep C-stats
23  */
24 
25 /*
26  * Known limitations
27  *
28  * The driver currently initializes for_each_online_cpu() upon modprobe.
29  * It it unaware of subsequent processors hot-added to the system.
30  * This means that if you boot with maxcpus=n and later online
31  * processors above n, those processors will use C1 only.
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38 
39 /* un-comment DEBUG to enable pr_debug() statements */
40 #define DEBUG
41 
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43 
44 #include <linux/kernel.h>
45 #include <linux/cpuidle.h>
46 #include <linux/tick.h>
47 #include <trace/events/power.h>
48 #include <linux/sched.h>
49 #include <linux/notifier.h>
50 #include <linux/cpu.h>
51 #include <linux/moduleparam.h>
52 #include <asm/cpu_device_id.h>
53 #include <asm/intel-family.h>
54 #include <asm/mwait.h>
55 #include <asm/msr.h>
56 
57 #define INTEL_IDLE_VERSION "0.4.1"
58 
59 static struct cpuidle_driver intel_idle_driver = {
60 	.name = "intel_idle",
61 	.owner = THIS_MODULE,
62 };
63 /* intel_idle.max_cstate=0 disables driver */
64 static int max_cstate = CPUIDLE_STATE_MAX - 1;
65 
66 static unsigned int mwait_substates;
67 
68 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
69 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
70 static unsigned int lapic_timer_reliable_states = (1 << 1);	 /* Default to only C1 */
71 
72 struct idle_cpu {
73 	struct cpuidle_state *state_table;
74 
75 	/*
76 	 * Hardware C-state auto-demotion may not always be optimal.
77 	 * Indicate which enable bits to clear here.
78 	 */
79 	unsigned long auto_demotion_disable_flags;
80 	bool byt_auto_demotion_disable_flag;
81 	bool disable_promotion_to_c1e;
82 };
83 
84 static const struct idle_cpu *icpu;
85 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
86 static int intel_idle(struct cpuidle_device *dev,
87 			struct cpuidle_driver *drv, int index);
88 static void intel_idle_s2idle(struct cpuidle_device *dev,
89 			      struct cpuidle_driver *drv, int index);
90 static struct cpuidle_state *cpuidle_state_table;
91 
92 /*
93  * Set this flag for states where the HW flushes the TLB for us
94  * and so we don't need cross-calls to keep it consistent.
95  * If this flag is set, SW flushes the TLB, so even if the
96  * HW doesn't do the flushing, this flag is safe to use.
97  */
98 #define CPUIDLE_FLAG_TLB_FLUSHED	0x10000
99 
100 /*
101  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
102  * the C-state (top nibble) and sub-state (bottom nibble)
103  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
104  *
105  * We store the hint at the top of our "flags" for each state.
106  */
107 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
108 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
109 
110 /*
111  * States are indexed by the cstate number,
112  * which is also the index into the MWAIT hint array.
113  * Thus C0 is a dummy.
114  */
115 static struct cpuidle_state nehalem_cstates[] = {
116 	{
117 		.name = "C1",
118 		.desc = "MWAIT 0x00",
119 		.flags = MWAIT2flg(0x00),
120 		.exit_latency = 3,
121 		.target_residency = 6,
122 		.enter = &intel_idle,
123 		.enter_s2idle = intel_idle_s2idle, },
124 	{
125 		.name = "C1E",
126 		.desc = "MWAIT 0x01",
127 		.flags = MWAIT2flg(0x01),
128 		.exit_latency = 10,
129 		.target_residency = 20,
130 		.enter = &intel_idle,
131 		.enter_s2idle = intel_idle_s2idle, },
132 	{
133 		.name = "C3",
134 		.desc = "MWAIT 0x10",
135 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
136 		.exit_latency = 20,
137 		.target_residency = 80,
138 		.enter = &intel_idle,
139 		.enter_s2idle = intel_idle_s2idle, },
140 	{
141 		.name = "C6",
142 		.desc = "MWAIT 0x20",
143 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
144 		.exit_latency = 200,
145 		.target_residency = 800,
146 		.enter = &intel_idle,
147 		.enter_s2idle = intel_idle_s2idle, },
148 	{
149 		.enter = NULL }
150 };
151 
152 static struct cpuidle_state snb_cstates[] = {
153 	{
154 		.name = "C1",
155 		.desc = "MWAIT 0x00",
156 		.flags = MWAIT2flg(0x00),
157 		.exit_latency = 2,
158 		.target_residency = 2,
159 		.enter = &intel_idle,
160 		.enter_s2idle = intel_idle_s2idle, },
161 	{
162 		.name = "C1E",
163 		.desc = "MWAIT 0x01",
164 		.flags = MWAIT2flg(0x01),
165 		.exit_latency = 10,
166 		.target_residency = 20,
167 		.enter = &intel_idle,
168 		.enter_s2idle = intel_idle_s2idle, },
169 	{
170 		.name = "C3",
171 		.desc = "MWAIT 0x10",
172 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
173 		.exit_latency = 80,
174 		.target_residency = 211,
175 		.enter = &intel_idle,
176 		.enter_s2idle = intel_idle_s2idle, },
177 	{
178 		.name = "C6",
179 		.desc = "MWAIT 0x20",
180 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
181 		.exit_latency = 104,
182 		.target_residency = 345,
183 		.enter = &intel_idle,
184 		.enter_s2idle = intel_idle_s2idle, },
185 	{
186 		.name = "C7",
187 		.desc = "MWAIT 0x30",
188 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
189 		.exit_latency = 109,
190 		.target_residency = 345,
191 		.enter = &intel_idle,
192 		.enter_s2idle = intel_idle_s2idle, },
193 	{
194 		.enter = NULL }
195 };
196 
197 static struct cpuidle_state byt_cstates[] = {
198 	{
199 		.name = "C1",
200 		.desc = "MWAIT 0x00",
201 		.flags = MWAIT2flg(0x00),
202 		.exit_latency = 1,
203 		.target_residency = 1,
204 		.enter = &intel_idle,
205 		.enter_s2idle = intel_idle_s2idle, },
206 	{
207 		.name = "C6N",
208 		.desc = "MWAIT 0x58",
209 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
210 		.exit_latency = 300,
211 		.target_residency = 275,
212 		.enter = &intel_idle,
213 		.enter_s2idle = intel_idle_s2idle, },
214 	{
215 		.name = "C6S",
216 		.desc = "MWAIT 0x52",
217 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
218 		.exit_latency = 500,
219 		.target_residency = 560,
220 		.enter = &intel_idle,
221 		.enter_s2idle = intel_idle_s2idle, },
222 	{
223 		.name = "C7",
224 		.desc = "MWAIT 0x60",
225 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
226 		.exit_latency = 1200,
227 		.target_residency = 4000,
228 		.enter = &intel_idle,
229 		.enter_s2idle = intel_idle_s2idle, },
230 	{
231 		.name = "C7S",
232 		.desc = "MWAIT 0x64",
233 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
234 		.exit_latency = 10000,
235 		.target_residency = 20000,
236 		.enter = &intel_idle,
237 		.enter_s2idle = intel_idle_s2idle, },
238 	{
239 		.enter = NULL }
240 };
241 
242 static struct cpuidle_state cht_cstates[] = {
243 	{
244 		.name = "C1",
245 		.desc = "MWAIT 0x00",
246 		.flags = MWAIT2flg(0x00),
247 		.exit_latency = 1,
248 		.target_residency = 1,
249 		.enter = &intel_idle,
250 		.enter_s2idle = intel_idle_s2idle, },
251 	{
252 		.name = "C6N",
253 		.desc = "MWAIT 0x58",
254 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
255 		.exit_latency = 80,
256 		.target_residency = 275,
257 		.enter = &intel_idle,
258 		.enter_s2idle = intel_idle_s2idle, },
259 	{
260 		.name = "C6S",
261 		.desc = "MWAIT 0x52",
262 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
263 		.exit_latency = 200,
264 		.target_residency = 560,
265 		.enter = &intel_idle,
266 		.enter_s2idle = intel_idle_s2idle, },
267 	{
268 		.name = "C7",
269 		.desc = "MWAIT 0x60",
270 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
271 		.exit_latency = 1200,
272 		.target_residency = 4000,
273 		.enter = &intel_idle,
274 		.enter_s2idle = intel_idle_s2idle, },
275 	{
276 		.name = "C7S",
277 		.desc = "MWAIT 0x64",
278 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
279 		.exit_latency = 10000,
280 		.target_residency = 20000,
281 		.enter = &intel_idle,
282 		.enter_s2idle = intel_idle_s2idle, },
283 	{
284 		.enter = NULL }
285 };
286 
287 static struct cpuidle_state ivb_cstates[] = {
288 	{
289 		.name = "C1",
290 		.desc = "MWAIT 0x00",
291 		.flags = MWAIT2flg(0x00),
292 		.exit_latency = 1,
293 		.target_residency = 1,
294 		.enter = &intel_idle,
295 		.enter_s2idle = intel_idle_s2idle, },
296 	{
297 		.name = "C1E",
298 		.desc = "MWAIT 0x01",
299 		.flags = MWAIT2flg(0x01),
300 		.exit_latency = 10,
301 		.target_residency = 20,
302 		.enter = &intel_idle,
303 		.enter_s2idle = intel_idle_s2idle, },
304 	{
305 		.name = "C3",
306 		.desc = "MWAIT 0x10",
307 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
308 		.exit_latency = 59,
309 		.target_residency = 156,
310 		.enter = &intel_idle,
311 		.enter_s2idle = intel_idle_s2idle, },
312 	{
313 		.name = "C6",
314 		.desc = "MWAIT 0x20",
315 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
316 		.exit_latency = 80,
317 		.target_residency = 300,
318 		.enter = &intel_idle,
319 		.enter_s2idle = intel_idle_s2idle, },
320 	{
321 		.name = "C7",
322 		.desc = "MWAIT 0x30",
323 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
324 		.exit_latency = 87,
325 		.target_residency = 300,
326 		.enter = &intel_idle,
327 		.enter_s2idle = intel_idle_s2idle, },
328 	{
329 		.enter = NULL }
330 };
331 
332 static struct cpuidle_state ivt_cstates[] = {
333 	{
334 		.name = "C1",
335 		.desc = "MWAIT 0x00",
336 		.flags = MWAIT2flg(0x00),
337 		.exit_latency = 1,
338 		.target_residency = 1,
339 		.enter = &intel_idle,
340 		.enter_s2idle = intel_idle_s2idle, },
341 	{
342 		.name = "C1E",
343 		.desc = "MWAIT 0x01",
344 		.flags = MWAIT2flg(0x01),
345 		.exit_latency = 10,
346 		.target_residency = 80,
347 		.enter = &intel_idle,
348 		.enter_s2idle = intel_idle_s2idle, },
349 	{
350 		.name = "C3",
351 		.desc = "MWAIT 0x10",
352 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
353 		.exit_latency = 59,
354 		.target_residency = 156,
355 		.enter = &intel_idle,
356 		.enter_s2idle = intel_idle_s2idle, },
357 	{
358 		.name = "C6",
359 		.desc = "MWAIT 0x20",
360 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
361 		.exit_latency = 82,
362 		.target_residency = 300,
363 		.enter = &intel_idle,
364 		.enter_s2idle = intel_idle_s2idle, },
365 	{
366 		.enter = NULL }
367 };
368 
369 static struct cpuidle_state ivt_cstates_4s[] = {
370 	{
371 		.name = "C1",
372 		.desc = "MWAIT 0x00",
373 		.flags = MWAIT2flg(0x00),
374 		.exit_latency = 1,
375 		.target_residency = 1,
376 		.enter = &intel_idle,
377 		.enter_s2idle = intel_idle_s2idle, },
378 	{
379 		.name = "C1E",
380 		.desc = "MWAIT 0x01",
381 		.flags = MWAIT2flg(0x01),
382 		.exit_latency = 10,
383 		.target_residency = 250,
384 		.enter = &intel_idle,
385 		.enter_s2idle = intel_idle_s2idle, },
386 	{
387 		.name = "C3",
388 		.desc = "MWAIT 0x10",
389 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
390 		.exit_latency = 59,
391 		.target_residency = 300,
392 		.enter = &intel_idle,
393 		.enter_s2idle = intel_idle_s2idle, },
394 	{
395 		.name = "C6",
396 		.desc = "MWAIT 0x20",
397 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
398 		.exit_latency = 84,
399 		.target_residency = 400,
400 		.enter = &intel_idle,
401 		.enter_s2idle = intel_idle_s2idle, },
402 	{
403 		.enter = NULL }
404 };
405 
406 static struct cpuidle_state ivt_cstates_8s[] = {
407 	{
408 		.name = "C1",
409 		.desc = "MWAIT 0x00",
410 		.flags = MWAIT2flg(0x00),
411 		.exit_latency = 1,
412 		.target_residency = 1,
413 		.enter = &intel_idle,
414 		.enter_s2idle = intel_idle_s2idle, },
415 	{
416 		.name = "C1E",
417 		.desc = "MWAIT 0x01",
418 		.flags = MWAIT2flg(0x01),
419 		.exit_latency = 10,
420 		.target_residency = 500,
421 		.enter = &intel_idle,
422 		.enter_s2idle = intel_idle_s2idle, },
423 	{
424 		.name = "C3",
425 		.desc = "MWAIT 0x10",
426 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
427 		.exit_latency = 59,
428 		.target_residency = 600,
429 		.enter = &intel_idle,
430 		.enter_s2idle = intel_idle_s2idle, },
431 	{
432 		.name = "C6",
433 		.desc = "MWAIT 0x20",
434 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
435 		.exit_latency = 88,
436 		.target_residency = 700,
437 		.enter = &intel_idle,
438 		.enter_s2idle = intel_idle_s2idle, },
439 	{
440 		.enter = NULL }
441 };
442 
443 static struct cpuidle_state hsw_cstates[] = {
444 	{
445 		.name = "C1",
446 		.desc = "MWAIT 0x00",
447 		.flags = MWAIT2flg(0x00),
448 		.exit_latency = 2,
449 		.target_residency = 2,
450 		.enter = &intel_idle,
451 		.enter_s2idle = intel_idle_s2idle, },
452 	{
453 		.name = "C1E",
454 		.desc = "MWAIT 0x01",
455 		.flags = MWAIT2flg(0x01),
456 		.exit_latency = 10,
457 		.target_residency = 20,
458 		.enter = &intel_idle,
459 		.enter_s2idle = intel_idle_s2idle, },
460 	{
461 		.name = "C3",
462 		.desc = "MWAIT 0x10",
463 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
464 		.exit_latency = 33,
465 		.target_residency = 100,
466 		.enter = &intel_idle,
467 		.enter_s2idle = intel_idle_s2idle, },
468 	{
469 		.name = "C6",
470 		.desc = "MWAIT 0x20",
471 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
472 		.exit_latency = 133,
473 		.target_residency = 400,
474 		.enter = &intel_idle,
475 		.enter_s2idle = intel_idle_s2idle, },
476 	{
477 		.name = "C7s",
478 		.desc = "MWAIT 0x32",
479 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
480 		.exit_latency = 166,
481 		.target_residency = 500,
482 		.enter = &intel_idle,
483 		.enter_s2idle = intel_idle_s2idle, },
484 	{
485 		.name = "C8",
486 		.desc = "MWAIT 0x40",
487 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
488 		.exit_latency = 300,
489 		.target_residency = 900,
490 		.enter = &intel_idle,
491 		.enter_s2idle = intel_idle_s2idle, },
492 	{
493 		.name = "C9",
494 		.desc = "MWAIT 0x50",
495 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
496 		.exit_latency = 600,
497 		.target_residency = 1800,
498 		.enter = &intel_idle,
499 		.enter_s2idle = intel_idle_s2idle, },
500 	{
501 		.name = "C10",
502 		.desc = "MWAIT 0x60",
503 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
504 		.exit_latency = 2600,
505 		.target_residency = 7700,
506 		.enter = &intel_idle,
507 		.enter_s2idle = intel_idle_s2idle, },
508 	{
509 		.enter = NULL }
510 };
511 static struct cpuidle_state bdw_cstates[] = {
512 	{
513 		.name = "C1",
514 		.desc = "MWAIT 0x00",
515 		.flags = MWAIT2flg(0x00),
516 		.exit_latency = 2,
517 		.target_residency = 2,
518 		.enter = &intel_idle,
519 		.enter_s2idle = intel_idle_s2idle, },
520 	{
521 		.name = "C1E",
522 		.desc = "MWAIT 0x01",
523 		.flags = MWAIT2flg(0x01),
524 		.exit_latency = 10,
525 		.target_residency = 20,
526 		.enter = &intel_idle,
527 		.enter_s2idle = intel_idle_s2idle, },
528 	{
529 		.name = "C3",
530 		.desc = "MWAIT 0x10",
531 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
532 		.exit_latency = 40,
533 		.target_residency = 100,
534 		.enter = &intel_idle,
535 		.enter_s2idle = intel_idle_s2idle, },
536 	{
537 		.name = "C6",
538 		.desc = "MWAIT 0x20",
539 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
540 		.exit_latency = 133,
541 		.target_residency = 400,
542 		.enter = &intel_idle,
543 		.enter_s2idle = intel_idle_s2idle, },
544 	{
545 		.name = "C7s",
546 		.desc = "MWAIT 0x32",
547 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
548 		.exit_latency = 166,
549 		.target_residency = 500,
550 		.enter = &intel_idle,
551 		.enter_s2idle = intel_idle_s2idle, },
552 	{
553 		.name = "C8",
554 		.desc = "MWAIT 0x40",
555 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
556 		.exit_latency = 300,
557 		.target_residency = 900,
558 		.enter = &intel_idle,
559 		.enter_s2idle = intel_idle_s2idle, },
560 	{
561 		.name = "C9",
562 		.desc = "MWAIT 0x50",
563 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
564 		.exit_latency = 600,
565 		.target_residency = 1800,
566 		.enter = &intel_idle,
567 		.enter_s2idle = intel_idle_s2idle, },
568 	{
569 		.name = "C10",
570 		.desc = "MWAIT 0x60",
571 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
572 		.exit_latency = 2600,
573 		.target_residency = 7700,
574 		.enter = &intel_idle,
575 		.enter_s2idle = intel_idle_s2idle, },
576 	{
577 		.enter = NULL }
578 };
579 
580 static struct cpuidle_state skl_cstates[] = {
581 	{
582 		.name = "C1",
583 		.desc = "MWAIT 0x00",
584 		.flags = MWAIT2flg(0x00),
585 		.exit_latency = 2,
586 		.target_residency = 2,
587 		.enter = &intel_idle,
588 		.enter_s2idle = intel_idle_s2idle, },
589 	{
590 		.name = "C1E",
591 		.desc = "MWAIT 0x01",
592 		.flags = MWAIT2flg(0x01),
593 		.exit_latency = 10,
594 		.target_residency = 20,
595 		.enter = &intel_idle,
596 		.enter_s2idle = intel_idle_s2idle, },
597 	{
598 		.name = "C3",
599 		.desc = "MWAIT 0x10",
600 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
601 		.exit_latency = 70,
602 		.target_residency = 100,
603 		.enter = &intel_idle,
604 		.enter_s2idle = intel_idle_s2idle, },
605 	{
606 		.name = "C6",
607 		.desc = "MWAIT 0x20",
608 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
609 		.exit_latency = 85,
610 		.target_residency = 200,
611 		.enter = &intel_idle,
612 		.enter_s2idle = intel_idle_s2idle, },
613 	{
614 		.name = "C7s",
615 		.desc = "MWAIT 0x33",
616 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
617 		.exit_latency = 124,
618 		.target_residency = 800,
619 		.enter = &intel_idle,
620 		.enter_s2idle = intel_idle_s2idle, },
621 	{
622 		.name = "C8",
623 		.desc = "MWAIT 0x40",
624 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
625 		.exit_latency = 200,
626 		.target_residency = 800,
627 		.enter = &intel_idle,
628 		.enter_s2idle = intel_idle_s2idle, },
629 	{
630 		.name = "C9",
631 		.desc = "MWAIT 0x50",
632 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
633 		.exit_latency = 480,
634 		.target_residency = 5000,
635 		.enter = &intel_idle,
636 		.enter_s2idle = intel_idle_s2idle, },
637 	{
638 		.name = "C10",
639 		.desc = "MWAIT 0x60",
640 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
641 		.exit_latency = 890,
642 		.target_residency = 5000,
643 		.enter = &intel_idle,
644 		.enter_s2idle = intel_idle_s2idle, },
645 	{
646 		.enter = NULL }
647 };
648 
649 static struct cpuidle_state skx_cstates[] = {
650 	{
651 		.name = "C1",
652 		.desc = "MWAIT 0x00",
653 		.flags = MWAIT2flg(0x00),
654 		.exit_latency = 2,
655 		.target_residency = 2,
656 		.enter = &intel_idle,
657 		.enter_s2idle = intel_idle_s2idle, },
658 	{
659 		.name = "C1E",
660 		.desc = "MWAIT 0x01",
661 		.flags = MWAIT2flg(0x01),
662 		.exit_latency = 10,
663 		.target_residency = 20,
664 		.enter = &intel_idle,
665 		.enter_s2idle = intel_idle_s2idle, },
666 	{
667 		.name = "C6",
668 		.desc = "MWAIT 0x20",
669 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
670 		.exit_latency = 133,
671 		.target_residency = 600,
672 		.enter = &intel_idle,
673 		.enter_s2idle = intel_idle_s2idle, },
674 	{
675 		.enter = NULL }
676 };
677 
678 static struct cpuidle_state atom_cstates[] = {
679 	{
680 		.name = "C1E",
681 		.desc = "MWAIT 0x00",
682 		.flags = MWAIT2flg(0x00),
683 		.exit_latency = 10,
684 		.target_residency = 20,
685 		.enter = &intel_idle,
686 		.enter_s2idle = intel_idle_s2idle, },
687 	{
688 		.name = "C2",
689 		.desc = "MWAIT 0x10",
690 		.flags = MWAIT2flg(0x10),
691 		.exit_latency = 20,
692 		.target_residency = 80,
693 		.enter = &intel_idle,
694 		.enter_s2idle = intel_idle_s2idle, },
695 	{
696 		.name = "C4",
697 		.desc = "MWAIT 0x30",
698 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
699 		.exit_latency = 100,
700 		.target_residency = 400,
701 		.enter = &intel_idle,
702 		.enter_s2idle = intel_idle_s2idle, },
703 	{
704 		.name = "C6",
705 		.desc = "MWAIT 0x52",
706 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
707 		.exit_latency = 140,
708 		.target_residency = 560,
709 		.enter = &intel_idle,
710 		.enter_s2idle = intel_idle_s2idle, },
711 	{
712 		.enter = NULL }
713 };
714 static struct cpuidle_state tangier_cstates[] = {
715 	{
716 		.name = "C1",
717 		.desc = "MWAIT 0x00",
718 		.flags = MWAIT2flg(0x00),
719 		.exit_latency = 1,
720 		.target_residency = 4,
721 		.enter = &intel_idle,
722 		.enter_s2idle = intel_idle_s2idle, },
723 	{
724 		.name = "C4",
725 		.desc = "MWAIT 0x30",
726 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
727 		.exit_latency = 100,
728 		.target_residency = 400,
729 		.enter = &intel_idle,
730 		.enter_s2idle = intel_idle_s2idle, },
731 	{
732 		.name = "C6",
733 		.desc = "MWAIT 0x52",
734 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
735 		.exit_latency = 140,
736 		.target_residency = 560,
737 		.enter = &intel_idle,
738 		.enter_s2idle = intel_idle_s2idle, },
739 	{
740 		.name = "C7",
741 		.desc = "MWAIT 0x60",
742 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
743 		.exit_latency = 1200,
744 		.target_residency = 4000,
745 		.enter = &intel_idle,
746 		.enter_s2idle = intel_idle_s2idle, },
747 	{
748 		.name = "C9",
749 		.desc = "MWAIT 0x64",
750 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
751 		.exit_latency = 10000,
752 		.target_residency = 20000,
753 		.enter = &intel_idle,
754 		.enter_s2idle = intel_idle_s2idle, },
755 	{
756 		.enter = NULL }
757 };
758 static struct cpuidle_state avn_cstates[] = {
759 	{
760 		.name = "C1",
761 		.desc = "MWAIT 0x00",
762 		.flags = MWAIT2flg(0x00),
763 		.exit_latency = 2,
764 		.target_residency = 2,
765 		.enter = &intel_idle,
766 		.enter_s2idle = intel_idle_s2idle, },
767 	{
768 		.name = "C6",
769 		.desc = "MWAIT 0x51",
770 		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
771 		.exit_latency = 15,
772 		.target_residency = 45,
773 		.enter = &intel_idle,
774 		.enter_s2idle = intel_idle_s2idle, },
775 	{
776 		.enter = NULL }
777 };
778 static struct cpuidle_state knl_cstates[] = {
779 	{
780 		.name = "C1",
781 		.desc = "MWAIT 0x00",
782 		.flags = MWAIT2flg(0x00),
783 		.exit_latency = 1,
784 		.target_residency = 2,
785 		.enter = &intel_idle,
786 		.enter_s2idle = intel_idle_s2idle },
787 	{
788 		.name = "C6",
789 		.desc = "MWAIT 0x10",
790 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
791 		.exit_latency = 120,
792 		.target_residency = 500,
793 		.enter = &intel_idle,
794 		.enter_s2idle = intel_idle_s2idle },
795 	{
796 		.enter = NULL }
797 };
798 
799 static struct cpuidle_state bxt_cstates[] = {
800 	{
801 		.name = "C1",
802 		.desc = "MWAIT 0x00",
803 		.flags = MWAIT2flg(0x00),
804 		.exit_latency = 2,
805 		.target_residency = 2,
806 		.enter = &intel_idle,
807 		.enter_s2idle = intel_idle_s2idle, },
808 	{
809 		.name = "C1E",
810 		.desc = "MWAIT 0x01",
811 		.flags = MWAIT2flg(0x01),
812 		.exit_latency = 10,
813 		.target_residency = 20,
814 		.enter = &intel_idle,
815 		.enter_s2idle = intel_idle_s2idle, },
816 	{
817 		.name = "C6",
818 		.desc = "MWAIT 0x20",
819 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
820 		.exit_latency = 133,
821 		.target_residency = 133,
822 		.enter = &intel_idle,
823 		.enter_s2idle = intel_idle_s2idle, },
824 	{
825 		.name = "C7s",
826 		.desc = "MWAIT 0x31",
827 		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
828 		.exit_latency = 155,
829 		.target_residency = 155,
830 		.enter = &intel_idle,
831 		.enter_s2idle = intel_idle_s2idle, },
832 	{
833 		.name = "C8",
834 		.desc = "MWAIT 0x40",
835 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
836 		.exit_latency = 1000,
837 		.target_residency = 1000,
838 		.enter = &intel_idle,
839 		.enter_s2idle = intel_idle_s2idle, },
840 	{
841 		.name = "C9",
842 		.desc = "MWAIT 0x50",
843 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
844 		.exit_latency = 2000,
845 		.target_residency = 2000,
846 		.enter = &intel_idle,
847 		.enter_s2idle = intel_idle_s2idle, },
848 	{
849 		.name = "C10",
850 		.desc = "MWAIT 0x60",
851 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
852 		.exit_latency = 10000,
853 		.target_residency = 10000,
854 		.enter = &intel_idle,
855 		.enter_s2idle = intel_idle_s2idle, },
856 	{
857 		.enter = NULL }
858 };
859 
860 static struct cpuidle_state dnv_cstates[] = {
861 	{
862 		.name = "C1",
863 		.desc = "MWAIT 0x00",
864 		.flags = MWAIT2flg(0x00),
865 		.exit_latency = 2,
866 		.target_residency = 2,
867 		.enter = &intel_idle,
868 		.enter_s2idle = intel_idle_s2idle, },
869 	{
870 		.name = "C1E",
871 		.desc = "MWAIT 0x01",
872 		.flags = MWAIT2flg(0x01),
873 		.exit_latency = 10,
874 		.target_residency = 20,
875 		.enter = &intel_idle,
876 		.enter_s2idle = intel_idle_s2idle, },
877 	{
878 		.name = "C6",
879 		.desc = "MWAIT 0x20",
880 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
881 		.exit_latency = 50,
882 		.target_residency = 500,
883 		.enter = &intel_idle,
884 		.enter_s2idle = intel_idle_s2idle, },
885 	{
886 		.enter = NULL }
887 };
888 
889 /**
890  * intel_idle
891  * @dev: cpuidle_device
892  * @drv: cpuidle driver
893  * @index: index of cpuidle state
894  *
895  * Must be called under local_irq_disable().
896  */
897 static __cpuidle int intel_idle(struct cpuidle_device *dev,
898 				struct cpuidle_driver *drv, int index)
899 {
900 	unsigned long ecx = 1; /* break on interrupt flag */
901 	struct cpuidle_state *state = &drv->states[index];
902 	unsigned long eax = flg2MWAIT(state->flags);
903 	unsigned int cstate;
904 	bool uninitialized_var(tick);
905 	int cpu = smp_processor_id();
906 
907 	/*
908 	 * leave_mm() to avoid costly and often unnecessary wakeups
909 	 * for flushing the user TLB's associated with the active mm.
910 	 */
911 	if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
912 		leave_mm(cpu);
913 
914 	if (!static_cpu_has(X86_FEATURE_ARAT)) {
915 		cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) &
916 				MWAIT_CSTATE_MASK) + 1;
917 		tick = false;
918 		if (!(lapic_timer_reliable_states & (1 << (cstate)))) {
919 			tick = true;
920 			tick_broadcast_enter();
921 		}
922 	}
923 
924 	mwait_idle_with_hints(eax, ecx);
925 
926 	if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
927 		tick_broadcast_exit();
928 
929 	return index;
930 }
931 
932 /**
933  * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle
934  * @dev: cpuidle_device
935  * @drv: cpuidle driver
936  * @index: state index
937  */
938 static void intel_idle_s2idle(struct cpuidle_device *dev,
939 			     struct cpuidle_driver *drv, int index)
940 {
941 	unsigned long ecx = 1; /* break on interrupt flag */
942 	unsigned long eax = flg2MWAIT(drv->states[index].flags);
943 
944 	mwait_idle_with_hints(eax, ecx);
945 }
946 
947 static void __setup_broadcast_timer(bool on)
948 {
949 	if (on)
950 		tick_broadcast_enable();
951 	else
952 		tick_broadcast_disable();
953 }
954 
955 static void auto_demotion_disable(void)
956 {
957 	unsigned long long msr_bits;
958 
959 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
960 	msr_bits &= ~(icpu->auto_demotion_disable_flags);
961 	wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
962 }
963 static void c1e_promotion_disable(void)
964 {
965 	unsigned long long msr_bits;
966 
967 	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
968 	msr_bits &= ~0x2;
969 	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
970 }
971 
972 static const struct idle_cpu idle_cpu_nehalem = {
973 	.state_table = nehalem_cstates,
974 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
975 	.disable_promotion_to_c1e = true,
976 };
977 
978 static const struct idle_cpu idle_cpu_atom = {
979 	.state_table = atom_cstates,
980 };
981 
982 static const struct idle_cpu idle_cpu_tangier = {
983 	.state_table = tangier_cstates,
984 };
985 
986 static const struct idle_cpu idle_cpu_lincroft = {
987 	.state_table = atom_cstates,
988 	.auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
989 };
990 
991 static const struct idle_cpu idle_cpu_snb = {
992 	.state_table = snb_cstates,
993 	.disable_promotion_to_c1e = true,
994 };
995 
996 static const struct idle_cpu idle_cpu_byt = {
997 	.state_table = byt_cstates,
998 	.disable_promotion_to_c1e = true,
999 	.byt_auto_demotion_disable_flag = true,
1000 };
1001 
1002 static const struct idle_cpu idle_cpu_cht = {
1003 	.state_table = cht_cstates,
1004 	.disable_promotion_to_c1e = true,
1005 	.byt_auto_demotion_disable_flag = true,
1006 };
1007 
1008 static const struct idle_cpu idle_cpu_ivb = {
1009 	.state_table = ivb_cstates,
1010 	.disable_promotion_to_c1e = true,
1011 };
1012 
1013 static const struct idle_cpu idle_cpu_ivt = {
1014 	.state_table = ivt_cstates,
1015 	.disable_promotion_to_c1e = true,
1016 };
1017 
1018 static const struct idle_cpu idle_cpu_hsw = {
1019 	.state_table = hsw_cstates,
1020 	.disable_promotion_to_c1e = true,
1021 };
1022 
1023 static const struct idle_cpu idle_cpu_bdw = {
1024 	.state_table = bdw_cstates,
1025 	.disable_promotion_to_c1e = true,
1026 };
1027 
1028 static const struct idle_cpu idle_cpu_skl = {
1029 	.state_table = skl_cstates,
1030 	.disable_promotion_to_c1e = true,
1031 };
1032 
1033 static const struct idle_cpu idle_cpu_skx = {
1034 	.state_table = skx_cstates,
1035 	.disable_promotion_to_c1e = true,
1036 };
1037 
1038 static const struct idle_cpu idle_cpu_avn = {
1039 	.state_table = avn_cstates,
1040 	.disable_promotion_to_c1e = true,
1041 };
1042 
1043 static const struct idle_cpu idle_cpu_knl = {
1044 	.state_table = knl_cstates,
1045 };
1046 
1047 static const struct idle_cpu idle_cpu_bxt = {
1048 	.state_table = bxt_cstates,
1049 	.disable_promotion_to_c1e = true,
1050 };
1051 
1052 static const struct idle_cpu idle_cpu_dnv = {
1053 	.state_table = dnv_cstates,
1054 	.disable_promotion_to_c1e = true,
1055 };
1056 
1057 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1058 	INTEL_CPU_FAM6(NEHALEM_EP,		idle_cpu_nehalem),
1059 	INTEL_CPU_FAM6(NEHALEM,			idle_cpu_nehalem),
1060 	INTEL_CPU_FAM6(NEHALEM_G,		idle_cpu_nehalem),
1061 	INTEL_CPU_FAM6(WESTMERE,		idle_cpu_nehalem),
1062 	INTEL_CPU_FAM6(WESTMERE_EP,		idle_cpu_nehalem),
1063 	INTEL_CPU_FAM6(NEHALEM_EX,		idle_cpu_nehalem),
1064 	INTEL_CPU_FAM6(ATOM_BONNELL,		idle_cpu_atom),
1065 	INTEL_CPU_FAM6(ATOM_BONNELL_MID,	idle_cpu_lincroft),
1066 	INTEL_CPU_FAM6(WESTMERE_EX,		idle_cpu_nehalem),
1067 	INTEL_CPU_FAM6(SANDYBRIDGE,		idle_cpu_snb),
1068 	INTEL_CPU_FAM6(SANDYBRIDGE_X,		idle_cpu_snb),
1069 	INTEL_CPU_FAM6(ATOM_SALTWELL,		idle_cpu_atom),
1070 	INTEL_CPU_FAM6(ATOM_SILVERMONT,		idle_cpu_byt),
1071 	INTEL_CPU_FAM6(ATOM_SILVERMONT_MID,	idle_cpu_tangier),
1072 	INTEL_CPU_FAM6(ATOM_AIRMONT,		idle_cpu_cht),
1073 	INTEL_CPU_FAM6(IVYBRIDGE,		idle_cpu_ivb),
1074 	INTEL_CPU_FAM6(IVYBRIDGE_X,		idle_cpu_ivt),
1075 	INTEL_CPU_FAM6(HASWELL,			idle_cpu_hsw),
1076 	INTEL_CPU_FAM6(HASWELL_X,		idle_cpu_hsw),
1077 	INTEL_CPU_FAM6(HASWELL_L,		idle_cpu_hsw),
1078 	INTEL_CPU_FAM6(HASWELL_G,		idle_cpu_hsw),
1079 	INTEL_CPU_FAM6(ATOM_SILVERMONT_D,	idle_cpu_avn),
1080 	INTEL_CPU_FAM6(BROADWELL,		idle_cpu_bdw),
1081 	INTEL_CPU_FAM6(BROADWELL_G,		idle_cpu_bdw),
1082 	INTEL_CPU_FAM6(BROADWELL_X,		idle_cpu_bdw),
1083 	INTEL_CPU_FAM6(BROADWELL_D,		idle_cpu_bdw),
1084 	INTEL_CPU_FAM6(SKYLAKE_L,		idle_cpu_skl),
1085 	INTEL_CPU_FAM6(SKYLAKE,			idle_cpu_skl),
1086 	INTEL_CPU_FAM6(KABYLAKE_L,		idle_cpu_skl),
1087 	INTEL_CPU_FAM6(KABYLAKE,		idle_cpu_skl),
1088 	INTEL_CPU_FAM6(SKYLAKE_X,		idle_cpu_skx),
1089 	INTEL_CPU_FAM6(XEON_PHI_KNL,		idle_cpu_knl),
1090 	INTEL_CPU_FAM6(XEON_PHI_KNM,		idle_cpu_knl),
1091 	INTEL_CPU_FAM6(ATOM_GOLDMONT,		idle_cpu_bxt),
1092 	INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS,	idle_cpu_bxt),
1093 	INTEL_CPU_FAM6(ATOM_GOLDMONT_D,		idle_cpu_dnv),
1094 	INTEL_CPU_FAM6(ATOM_TREMONT_D,		idle_cpu_dnv),
1095 	{}
1096 };
1097 
1098 /*
1099  * intel_idle_probe()
1100  */
1101 static int __init intel_idle_probe(void)
1102 {
1103 	unsigned int eax, ebx, ecx;
1104 	const struct x86_cpu_id *id;
1105 
1106 	if (max_cstate == 0) {
1107 		pr_debug("disabled\n");
1108 		return -EPERM;
1109 	}
1110 
1111 	id = x86_match_cpu(intel_idle_ids);
1112 	if (!id) {
1113 		if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1114 		    boot_cpu_data.x86 == 6)
1115 			pr_debug("does not run on family %d model %d\n",
1116 				 boot_cpu_data.x86, boot_cpu_data.x86_model);
1117 		return -ENODEV;
1118 	}
1119 
1120 	if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
1121 		pr_debug("Please enable MWAIT in BIOS SETUP\n");
1122 		return -ENODEV;
1123 	}
1124 
1125 	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1126 		return -ENODEV;
1127 
1128 	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1129 
1130 	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1131 	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1132 	    !mwait_substates)
1133 			return -ENODEV;
1134 
1135 	pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
1136 
1137 	icpu = (const struct idle_cpu *)id->driver_data;
1138 	cpuidle_state_table = icpu->state_table;
1139 
1140 	pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
1141 		 boot_cpu_data.x86_model);
1142 
1143 	return 0;
1144 }
1145 
1146 /*
1147  * intel_idle_cpuidle_devices_uninit()
1148  * Unregisters the cpuidle devices.
1149  */
1150 static void intel_idle_cpuidle_devices_uninit(void)
1151 {
1152 	int i;
1153 	struct cpuidle_device *dev;
1154 
1155 	for_each_online_cpu(i) {
1156 		dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1157 		cpuidle_unregister_device(dev);
1158 	}
1159 }
1160 
1161 /*
1162  * ivt_idle_state_table_update(void)
1163  *
1164  * Tune IVT multi-socket targets
1165  * Assumption: num_sockets == (max_package_num + 1)
1166  */
1167 static void ivt_idle_state_table_update(void)
1168 {
1169 	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1170 	int cpu, package_num, num_sockets = 1;
1171 
1172 	for_each_online_cpu(cpu) {
1173 		package_num = topology_physical_package_id(cpu);
1174 		if (package_num + 1 > num_sockets) {
1175 			num_sockets = package_num + 1;
1176 
1177 			if (num_sockets > 4) {
1178 				cpuidle_state_table = ivt_cstates_8s;
1179 				return;
1180 			}
1181 		}
1182 	}
1183 
1184 	if (num_sockets > 2)
1185 		cpuidle_state_table = ivt_cstates_4s;
1186 
1187 	/* else, 1 and 2 socket systems use default ivt_cstates */
1188 }
1189 
1190 /*
1191  * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1192  */
1193 
1194 static unsigned int irtl_ns_units[] = {
1195 	1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1196 
1197 static unsigned long long irtl_2_usec(unsigned long long irtl)
1198 {
1199 	unsigned long long ns;
1200 
1201 	if (!irtl)
1202 		return 0;
1203 
1204 	ns = irtl_ns_units[(irtl >> 10) & 0x7];
1205 
1206 	return div64_u64((irtl & 0x3FF) * ns, 1000);
1207 }
1208 /*
1209  * bxt_idle_state_table_update(void)
1210  *
1211  * On BXT, we trust the IRTL to show the definitive maximum latency
1212  * We use the same value for target_residency.
1213  */
1214 static void bxt_idle_state_table_update(void)
1215 {
1216 	unsigned long long msr;
1217 	unsigned int usec;
1218 
1219 	rdmsrl(MSR_PKGC6_IRTL, msr);
1220 	usec = irtl_2_usec(msr);
1221 	if (usec) {
1222 		bxt_cstates[2].exit_latency = usec;
1223 		bxt_cstates[2].target_residency = usec;
1224 	}
1225 
1226 	rdmsrl(MSR_PKGC7_IRTL, msr);
1227 	usec = irtl_2_usec(msr);
1228 	if (usec) {
1229 		bxt_cstates[3].exit_latency = usec;
1230 		bxt_cstates[3].target_residency = usec;
1231 	}
1232 
1233 	rdmsrl(MSR_PKGC8_IRTL, msr);
1234 	usec = irtl_2_usec(msr);
1235 	if (usec) {
1236 		bxt_cstates[4].exit_latency = usec;
1237 		bxt_cstates[4].target_residency = usec;
1238 	}
1239 
1240 	rdmsrl(MSR_PKGC9_IRTL, msr);
1241 	usec = irtl_2_usec(msr);
1242 	if (usec) {
1243 		bxt_cstates[5].exit_latency = usec;
1244 		bxt_cstates[5].target_residency = usec;
1245 	}
1246 
1247 	rdmsrl(MSR_PKGC10_IRTL, msr);
1248 	usec = irtl_2_usec(msr);
1249 	if (usec) {
1250 		bxt_cstates[6].exit_latency = usec;
1251 		bxt_cstates[6].target_residency = usec;
1252 	}
1253 
1254 }
1255 /*
1256  * sklh_idle_state_table_update(void)
1257  *
1258  * On SKL-H (model 0x5e) disable C8 and C9 if:
1259  * C10 is enabled and SGX disabled
1260  */
1261 static void sklh_idle_state_table_update(void)
1262 {
1263 	unsigned long long msr;
1264 	unsigned int eax, ebx, ecx, edx;
1265 
1266 
1267 	/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1268 	if (max_cstate <= 7)
1269 		return;
1270 
1271 	/* if PC10 not present in CPUID.MWAIT.EDX */
1272 	if ((mwait_substates & (0xF << 28)) == 0)
1273 		return;
1274 
1275 	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1276 
1277 	/* PC10 is not enabled in PKG C-state limit */
1278 	if ((msr & 0xF) != 8)
1279 		return;
1280 
1281 	ecx = 0;
1282 	cpuid(7, &eax, &ebx, &ecx, &edx);
1283 
1284 	/* if SGX is present */
1285 	if (ebx & (1 << 2)) {
1286 
1287 		rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1288 
1289 		/* if SGX is enabled */
1290 		if (msr & (1 << 18))
1291 			return;
1292 	}
1293 
1294 	skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C8-SKL */
1295 	skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C9-SKL */
1296 }
1297 /*
1298  * intel_idle_state_table_update()
1299  *
1300  * Update the default state_table for this CPU-id
1301  */
1302 
1303 static void intel_idle_state_table_update(void)
1304 {
1305 	switch (boot_cpu_data.x86_model) {
1306 
1307 	case INTEL_FAM6_IVYBRIDGE_X:
1308 		ivt_idle_state_table_update();
1309 		break;
1310 	case INTEL_FAM6_ATOM_GOLDMONT:
1311 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1312 		bxt_idle_state_table_update();
1313 		break;
1314 	case INTEL_FAM6_SKYLAKE:
1315 		sklh_idle_state_table_update();
1316 		break;
1317 	}
1318 }
1319 
1320 /*
1321  * intel_idle_cpuidle_driver_init()
1322  * allocate, initialize cpuidle_states
1323  */
1324 static void __init intel_idle_cpuidle_driver_init(void)
1325 {
1326 	int cstate;
1327 	struct cpuidle_driver *drv = &intel_idle_driver;
1328 
1329 	intel_idle_state_table_update();
1330 
1331 	cpuidle_poll_state_init(drv);
1332 	drv->state_count = 1;
1333 
1334 	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1335 		int num_substates, mwait_hint, mwait_cstate;
1336 
1337 		if ((cpuidle_state_table[cstate].enter == NULL) &&
1338 		    (cpuidle_state_table[cstate].enter_s2idle == NULL))
1339 			break;
1340 
1341 		if (cstate + 1 > max_cstate) {
1342 			pr_info("max_cstate %d reached\n", max_cstate);
1343 			break;
1344 		}
1345 
1346 		mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1347 		mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1348 
1349 		/* number of sub-states for this state in CPUID.MWAIT */
1350 		num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1351 					& MWAIT_SUBSTATE_MASK;
1352 
1353 		/* if NO sub-states for this state in CPUID, skip it */
1354 		if (num_substates == 0)
1355 			continue;
1356 
1357 		/* if state marked as disabled, skip it */
1358 		if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1359 			pr_debug("state %s is disabled\n",
1360 				 cpuidle_state_table[cstate].name);
1361 			continue;
1362 		}
1363 
1364 
1365 		if (((mwait_cstate + 1) > 2) &&
1366 			!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1367 			mark_tsc_unstable("TSC halts in idle"
1368 					" states deeper than C2");
1369 
1370 		drv->states[drv->state_count] =	/* structure copy */
1371 			cpuidle_state_table[cstate];
1372 
1373 		drv->state_count += 1;
1374 	}
1375 
1376 	if (icpu->byt_auto_demotion_disable_flag) {
1377 		wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1378 		wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1379 	}
1380 }
1381 
1382 
1383 /*
1384  * intel_idle_cpu_init()
1385  * allocate, initialize, register cpuidle_devices
1386  * @cpu: cpu/core to initialize
1387  */
1388 static int intel_idle_cpu_init(unsigned int cpu)
1389 {
1390 	struct cpuidle_device *dev;
1391 
1392 	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1393 	dev->cpu = cpu;
1394 
1395 	if (cpuidle_register_device(dev)) {
1396 		pr_debug("cpuidle_register_device %d failed!\n", cpu);
1397 		return -EIO;
1398 	}
1399 
1400 	if (icpu->auto_demotion_disable_flags)
1401 		auto_demotion_disable();
1402 
1403 	if (icpu->disable_promotion_to_c1e)
1404 		c1e_promotion_disable();
1405 
1406 	return 0;
1407 }
1408 
1409 static int intel_idle_cpu_online(unsigned int cpu)
1410 {
1411 	struct cpuidle_device *dev;
1412 
1413 	if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
1414 		__setup_broadcast_timer(true);
1415 
1416 	/*
1417 	 * Some systems can hotplug a cpu at runtime after
1418 	 * the kernel has booted, we have to initialize the
1419 	 * driver in this case
1420 	 */
1421 	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1422 	if (!dev->registered)
1423 		return intel_idle_cpu_init(cpu);
1424 
1425 	return 0;
1426 }
1427 
1428 static int __init intel_idle_init(void)
1429 {
1430 	int retval;
1431 
1432 	/* Do not load intel_idle at all for now if idle= is passed */
1433 	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1434 		return -ENODEV;
1435 
1436 	retval = intel_idle_probe();
1437 	if (retval)
1438 		return retval;
1439 
1440 	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1441 	if (intel_idle_cpuidle_devices == NULL)
1442 		return -ENOMEM;
1443 
1444 	intel_idle_cpuidle_driver_init();
1445 	retval = cpuidle_register_driver(&intel_idle_driver);
1446 	if (retval) {
1447 		struct cpuidle_driver *drv = cpuidle_get_driver();
1448 		printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
1449 		       drv ? drv->name : "none");
1450 		goto init_driver_fail;
1451 	}
1452 
1453 	if (boot_cpu_has(X86_FEATURE_ARAT))	/* Always Reliable APIC Timer */
1454 		lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1455 
1456 	retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
1457 				   intel_idle_cpu_online, NULL);
1458 	if (retval < 0)
1459 		goto hp_setup_fail;
1460 
1461 	pr_debug("lapic_timer_reliable_states 0x%x\n",
1462 		 lapic_timer_reliable_states);
1463 
1464 	return 0;
1465 
1466 hp_setup_fail:
1467 	intel_idle_cpuidle_devices_uninit();
1468 	cpuidle_unregister_driver(&intel_idle_driver);
1469 init_driver_fail:
1470 	free_percpu(intel_idle_cpuidle_devices);
1471 	return retval;
1472 
1473 }
1474 device_initcall(intel_idle_init);
1475 
1476 /*
1477  * We are not really modular, but we used to support that.  Meaning we also
1478  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1479  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1480  * is the easiest way (currently) to continue doing that.
1481  */
1482 module_param(max_cstate, int, 0444);
1483