xref: /openbmc/linux/drivers/idle/intel_idle.c (revision 293d5b43)
1 /*
2  * intel_idle.c - native hardware idle loop for modern Intel processors
3  *
4  * Copyright (c) 2013, Intel Corporation.
5  * Len Brown <len.brown@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms and conditions of the GNU General Public License,
9  * version 2, as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 /*
22  * intel_idle is a cpuidle driver that loads on specific Intel processors
23  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
24  * make Linux more efficient on these processors, as intel_idle knows
25  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
26  */
27 
28 /*
29  * Design Assumptions
30  *
31  * All CPUs have same idle states as boot CPU
32  *
33  * Chipset BM_STS (bus master status) bit is a NOP
34  *	for preventing entry into deep C-stats
35  */
36 
37 /*
38  * Known limitations
39  *
40  * The driver currently initializes for_each_online_cpu() upon modprobe.
41  * It it unaware of subsequent processors hot-added to the system.
42  * This means that if you boot with maxcpus=n and later online
43  * processors above n, those processors will use C1 only.
44  *
45  * ACPI has a .suspend hack to turn off deep c-statees during suspend
46  * to avoid complications with the lapic timer workaround.
47  * Have not seen issues with suspend, but may need same workaround here.
48  *
49  */
50 
51 /* un-comment DEBUG to enable pr_debug() statements */
52 #define DEBUG
53 
54 #include <linux/kernel.h>
55 #include <linux/cpuidle.h>
56 #include <linux/tick.h>
57 #include <trace/events/power.h>
58 #include <linux/sched.h>
59 #include <linux/notifier.h>
60 #include <linux/cpu.h>
61 #include <linux/moduleparam.h>
62 #include <asm/cpu_device_id.h>
63 #include <asm/intel-family.h>
64 #include <asm/mwait.h>
65 #include <asm/msr.h>
66 
67 #define INTEL_IDLE_VERSION "0.4.1"
68 #define PREFIX "intel_idle: "
69 
70 static struct cpuidle_driver intel_idle_driver = {
71 	.name = "intel_idle",
72 	.owner = THIS_MODULE,
73 };
74 /* intel_idle.max_cstate=0 disables driver */
75 static int max_cstate = CPUIDLE_STATE_MAX - 1;
76 
77 static unsigned int mwait_substates;
78 
79 #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
80 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
81 static unsigned int lapic_timer_reliable_states = (1 << 1);	 /* Default to only C1 */
82 
83 struct idle_cpu {
84 	struct cpuidle_state *state_table;
85 
86 	/*
87 	 * Hardware C-state auto-demotion may not always be optimal.
88 	 * Indicate which enable bits to clear here.
89 	 */
90 	unsigned long auto_demotion_disable_flags;
91 	bool byt_auto_demotion_disable_flag;
92 	bool disable_promotion_to_c1e;
93 };
94 
95 static const struct idle_cpu *icpu;
96 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
97 static int intel_idle(struct cpuidle_device *dev,
98 			struct cpuidle_driver *drv, int index);
99 static void intel_idle_freeze(struct cpuidle_device *dev,
100 			      struct cpuidle_driver *drv, int index);
101 static int intel_idle_cpu_init(int cpu);
102 
103 static struct cpuidle_state *cpuidle_state_table;
104 
105 /*
106  * Set this flag for states where the HW flushes the TLB for us
107  * and so we don't need cross-calls to keep it consistent.
108  * If this flag is set, SW flushes the TLB, so even if the
109  * HW doesn't do the flushing, this flag is safe to use.
110  */
111 #define CPUIDLE_FLAG_TLB_FLUSHED	0x10000
112 
113 /*
114  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
115  * the C-state (top nibble) and sub-state (bottom nibble)
116  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
117  *
118  * We store the hint at the top of our "flags" for each state.
119  */
120 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
121 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
122 
123 /*
124  * States are indexed by the cstate number,
125  * which is also the index into the MWAIT hint array.
126  * Thus C0 is a dummy.
127  */
128 static struct cpuidle_state nehalem_cstates[] = {
129 	{
130 		.name = "C1-NHM",
131 		.desc = "MWAIT 0x00",
132 		.flags = MWAIT2flg(0x00),
133 		.exit_latency = 3,
134 		.target_residency = 6,
135 		.enter = &intel_idle,
136 		.enter_freeze = intel_idle_freeze, },
137 	{
138 		.name = "C1E-NHM",
139 		.desc = "MWAIT 0x01",
140 		.flags = MWAIT2flg(0x01),
141 		.exit_latency = 10,
142 		.target_residency = 20,
143 		.enter = &intel_idle,
144 		.enter_freeze = intel_idle_freeze, },
145 	{
146 		.name = "C3-NHM",
147 		.desc = "MWAIT 0x10",
148 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
149 		.exit_latency = 20,
150 		.target_residency = 80,
151 		.enter = &intel_idle,
152 		.enter_freeze = intel_idle_freeze, },
153 	{
154 		.name = "C6-NHM",
155 		.desc = "MWAIT 0x20",
156 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
157 		.exit_latency = 200,
158 		.target_residency = 800,
159 		.enter = &intel_idle,
160 		.enter_freeze = intel_idle_freeze, },
161 	{
162 		.enter = NULL }
163 };
164 
165 static struct cpuidle_state snb_cstates[] = {
166 	{
167 		.name = "C1-SNB",
168 		.desc = "MWAIT 0x00",
169 		.flags = MWAIT2flg(0x00),
170 		.exit_latency = 2,
171 		.target_residency = 2,
172 		.enter = &intel_idle,
173 		.enter_freeze = intel_idle_freeze, },
174 	{
175 		.name = "C1E-SNB",
176 		.desc = "MWAIT 0x01",
177 		.flags = MWAIT2flg(0x01),
178 		.exit_latency = 10,
179 		.target_residency = 20,
180 		.enter = &intel_idle,
181 		.enter_freeze = intel_idle_freeze, },
182 	{
183 		.name = "C3-SNB",
184 		.desc = "MWAIT 0x10",
185 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
186 		.exit_latency = 80,
187 		.target_residency = 211,
188 		.enter = &intel_idle,
189 		.enter_freeze = intel_idle_freeze, },
190 	{
191 		.name = "C6-SNB",
192 		.desc = "MWAIT 0x20",
193 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
194 		.exit_latency = 104,
195 		.target_residency = 345,
196 		.enter = &intel_idle,
197 		.enter_freeze = intel_idle_freeze, },
198 	{
199 		.name = "C7-SNB",
200 		.desc = "MWAIT 0x30",
201 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
202 		.exit_latency = 109,
203 		.target_residency = 345,
204 		.enter = &intel_idle,
205 		.enter_freeze = intel_idle_freeze, },
206 	{
207 		.enter = NULL }
208 };
209 
210 static struct cpuidle_state byt_cstates[] = {
211 	{
212 		.name = "C1-BYT",
213 		.desc = "MWAIT 0x00",
214 		.flags = MWAIT2flg(0x00),
215 		.exit_latency = 1,
216 		.target_residency = 1,
217 		.enter = &intel_idle,
218 		.enter_freeze = intel_idle_freeze, },
219 	{
220 		.name = "C6N-BYT",
221 		.desc = "MWAIT 0x58",
222 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
223 		.exit_latency = 300,
224 		.target_residency = 275,
225 		.enter = &intel_idle,
226 		.enter_freeze = intel_idle_freeze, },
227 	{
228 		.name = "C6S-BYT",
229 		.desc = "MWAIT 0x52",
230 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
231 		.exit_latency = 500,
232 		.target_residency = 560,
233 		.enter = &intel_idle,
234 		.enter_freeze = intel_idle_freeze, },
235 	{
236 		.name = "C7-BYT",
237 		.desc = "MWAIT 0x60",
238 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
239 		.exit_latency = 1200,
240 		.target_residency = 4000,
241 		.enter = &intel_idle,
242 		.enter_freeze = intel_idle_freeze, },
243 	{
244 		.name = "C7S-BYT",
245 		.desc = "MWAIT 0x64",
246 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
247 		.exit_latency = 10000,
248 		.target_residency = 20000,
249 		.enter = &intel_idle,
250 		.enter_freeze = intel_idle_freeze, },
251 	{
252 		.enter = NULL }
253 };
254 
255 static struct cpuidle_state cht_cstates[] = {
256 	{
257 		.name = "C1-CHT",
258 		.desc = "MWAIT 0x00",
259 		.flags = MWAIT2flg(0x00),
260 		.exit_latency = 1,
261 		.target_residency = 1,
262 		.enter = &intel_idle,
263 		.enter_freeze = intel_idle_freeze, },
264 	{
265 		.name = "C6N-CHT",
266 		.desc = "MWAIT 0x58",
267 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
268 		.exit_latency = 80,
269 		.target_residency = 275,
270 		.enter = &intel_idle,
271 		.enter_freeze = intel_idle_freeze, },
272 	{
273 		.name = "C6S-CHT",
274 		.desc = "MWAIT 0x52",
275 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
276 		.exit_latency = 200,
277 		.target_residency = 560,
278 		.enter = &intel_idle,
279 		.enter_freeze = intel_idle_freeze, },
280 	{
281 		.name = "C7-CHT",
282 		.desc = "MWAIT 0x60",
283 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
284 		.exit_latency = 1200,
285 		.target_residency = 4000,
286 		.enter = &intel_idle,
287 		.enter_freeze = intel_idle_freeze, },
288 	{
289 		.name = "C7S-CHT",
290 		.desc = "MWAIT 0x64",
291 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
292 		.exit_latency = 10000,
293 		.target_residency = 20000,
294 		.enter = &intel_idle,
295 		.enter_freeze = intel_idle_freeze, },
296 	{
297 		.enter = NULL }
298 };
299 
300 static struct cpuidle_state ivb_cstates[] = {
301 	{
302 		.name = "C1-IVB",
303 		.desc = "MWAIT 0x00",
304 		.flags = MWAIT2flg(0x00),
305 		.exit_latency = 1,
306 		.target_residency = 1,
307 		.enter = &intel_idle,
308 		.enter_freeze = intel_idle_freeze, },
309 	{
310 		.name = "C1E-IVB",
311 		.desc = "MWAIT 0x01",
312 		.flags = MWAIT2flg(0x01),
313 		.exit_latency = 10,
314 		.target_residency = 20,
315 		.enter = &intel_idle,
316 		.enter_freeze = intel_idle_freeze, },
317 	{
318 		.name = "C3-IVB",
319 		.desc = "MWAIT 0x10",
320 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
321 		.exit_latency = 59,
322 		.target_residency = 156,
323 		.enter = &intel_idle,
324 		.enter_freeze = intel_idle_freeze, },
325 	{
326 		.name = "C6-IVB",
327 		.desc = "MWAIT 0x20",
328 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
329 		.exit_latency = 80,
330 		.target_residency = 300,
331 		.enter = &intel_idle,
332 		.enter_freeze = intel_idle_freeze, },
333 	{
334 		.name = "C7-IVB",
335 		.desc = "MWAIT 0x30",
336 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
337 		.exit_latency = 87,
338 		.target_residency = 300,
339 		.enter = &intel_idle,
340 		.enter_freeze = intel_idle_freeze, },
341 	{
342 		.enter = NULL }
343 };
344 
345 static struct cpuidle_state ivt_cstates[] = {
346 	{
347 		.name = "C1-IVT",
348 		.desc = "MWAIT 0x00",
349 		.flags = MWAIT2flg(0x00),
350 		.exit_latency = 1,
351 		.target_residency = 1,
352 		.enter = &intel_idle,
353 		.enter_freeze = intel_idle_freeze, },
354 	{
355 		.name = "C1E-IVT",
356 		.desc = "MWAIT 0x01",
357 		.flags = MWAIT2flg(0x01),
358 		.exit_latency = 10,
359 		.target_residency = 80,
360 		.enter = &intel_idle,
361 		.enter_freeze = intel_idle_freeze, },
362 	{
363 		.name = "C3-IVT",
364 		.desc = "MWAIT 0x10",
365 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
366 		.exit_latency = 59,
367 		.target_residency = 156,
368 		.enter = &intel_idle,
369 		.enter_freeze = intel_idle_freeze, },
370 	{
371 		.name = "C6-IVT",
372 		.desc = "MWAIT 0x20",
373 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
374 		.exit_latency = 82,
375 		.target_residency = 300,
376 		.enter = &intel_idle,
377 		.enter_freeze = intel_idle_freeze, },
378 	{
379 		.enter = NULL }
380 };
381 
382 static struct cpuidle_state ivt_cstates_4s[] = {
383 	{
384 		.name = "C1-IVT-4S",
385 		.desc = "MWAIT 0x00",
386 		.flags = MWAIT2flg(0x00),
387 		.exit_latency = 1,
388 		.target_residency = 1,
389 		.enter = &intel_idle,
390 		.enter_freeze = intel_idle_freeze, },
391 	{
392 		.name = "C1E-IVT-4S",
393 		.desc = "MWAIT 0x01",
394 		.flags = MWAIT2flg(0x01),
395 		.exit_latency = 10,
396 		.target_residency = 250,
397 		.enter = &intel_idle,
398 		.enter_freeze = intel_idle_freeze, },
399 	{
400 		.name = "C3-IVT-4S",
401 		.desc = "MWAIT 0x10",
402 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
403 		.exit_latency = 59,
404 		.target_residency = 300,
405 		.enter = &intel_idle,
406 		.enter_freeze = intel_idle_freeze, },
407 	{
408 		.name = "C6-IVT-4S",
409 		.desc = "MWAIT 0x20",
410 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
411 		.exit_latency = 84,
412 		.target_residency = 400,
413 		.enter = &intel_idle,
414 		.enter_freeze = intel_idle_freeze, },
415 	{
416 		.enter = NULL }
417 };
418 
419 static struct cpuidle_state ivt_cstates_8s[] = {
420 	{
421 		.name = "C1-IVT-8S",
422 		.desc = "MWAIT 0x00",
423 		.flags = MWAIT2flg(0x00),
424 		.exit_latency = 1,
425 		.target_residency = 1,
426 		.enter = &intel_idle,
427 		.enter_freeze = intel_idle_freeze, },
428 	{
429 		.name = "C1E-IVT-8S",
430 		.desc = "MWAIT 0x01",
431 		.flags = MWAIT2flg(0x01),
432 		.exit_latency = 10,
433 		.target_residency = 500,
434 		.enter = &intel_idle,
435 		.enter_freeze = intel_idle_freeze, },
436 	{
437 		.name = "C3-IVT-8S",
438 		.desc = "MWAIT 0x10",
439 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
440 		.exit_latency = 59,
441 		.target_residency = 600,
442 		.enter = &intel_idle,
443 		.enter_freeze = intel_idle_freeze, },
444 	{
445 		.name = "C6-IVT-8S",
446 		.desc = "MWAIT 0x20",
447 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
448 		.exit_latency = 88,
449 		.target_residency = 700,
450 		.enter = &intel_idle,
451 		.enter_freeze = intel_idle_freeze, },
452 	{
453 		.enter = NULL }
454 };
455 
456 static struct cpuidle_state hsw_cstates[] = {
457 	{
458 		.name = "C1-HSW",
459 		.desc = "MWAIT 0x00",
460 		.flags = MWAIT2flg(0x00),
461 		.exit_latency = 2,
462 		.target_residency = 2,
463 		.enter = &intel_idle,
464 		.enter_freeze = intel_idle_freeze, },
465 	{
466 		.name = "C1E-HSW",
467 		.desc = "MWAIT 0x01",
468 		.flags = MWAIT2flg(0x01),
469 		.exit_latency = 10,
470 		.target_residency = 20,
471 		.enter = &intel_idle,
472 		.enter_freeze = intel_idle_freeze, },
473 	{
474 		.name = "C3-HSW",
475 		.desc = "MWAIT 0x10",
476 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
477 		.exit_latency = 33,
478 		.target_residency = 100,
479 		.enter = &intel_idle,
480 		.enter_freeze = intel_idle_freeze, },
481 	{
482 		.name = "C6-HSW",
483 		.desc = "MWAIT 0x20",
484 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
485 		.exit_latency = 133,
486 		.target_residency = 400,
487 		.enter = &intel_idle,
488 		.enter_freeze = intel_idle_freeze, },
489 	{
490 		.name = "C7s-HSW",
491 		.desc = "MWAIT 0x32",
492 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
493 		.exit_latency = 166,
494 		.target_residency = 500,
495 		.enter = &intel_idle,
496 		.enter_freeze = intel_idle_freeze, },
497 	{
498 		.name = "C8-HSW",
499 		.desc = "MWAIT 0x40",
500 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
501 		.exit_latency = 300,
502 		.target_residency = 900,
503 		.enter = &intel_idle,
504 		.enter_freeze = intel_idle_freeze, },
505 	{
506 		.name = "C9-HSW",
507 		.desc = "MWAIT 0x50",
508 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
509 		.exit_latency = 600,
510 		.target_residency = 1800,
511 		.enter = &intel_idle,
512 		.enter_freeze = intel_idle_freeze, },
513 	{
514 		.name = "C10-HSW",
515 		.desc = "MWAIT 0x60",
516 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
517 		.exit_latency = 2600,
518 		.target_residency = 7700,
519 		.enter = &intel_idle,
520 		.enter_freeze = intel_idle_freeze, },
521 	{
522 		.enter = NULL }
523 };
524 static struct cpuidle_state bdw_cstates[] = {
525 	{
526 		.name = "C1-BDW",
527 		.desc = "MWAIT 0x00",
528 		.flags = MWAIT2flg(0x00),
529 		.exit_latency = 2,
530 		.target_residency = 2,
531 		.enter = &intel_idle,
532 		.enter_freeze = intel_idle_freeze, },
533 	{
534 		.name = "C1E-BDW",
535 		.desc = "MWAIT 0x01",
536 		.flags = MWAIT2flg(0x01),
537 		.exit_latency = 10,
538 		.target_residency = 20,
539 		.enter = &intel_idle,
540 		.enter_freeze = intel_idle_freeze, },
541 	{
542 		.name = "C3-BDW",
543 		.desc = "MWAIT 0x10",
544 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
545 		.exit_latency = 40,
546 		.target_residency = 100,
547 		.enter = &intel_idle,
548 		.enter_freeze = intel_idle_freeze, },
549 	{
550 		.name = "C6-BDW",
551 		.desc = "MWAIT 0x20",
552 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
553 		.exit_latency = 133,
554 		.target_residency = 400,
555 		.enter = &intel_idle,
556 		.enter_freeze = intel_idle_freeze, },
557 	{
558 		.name = "C7s-BDW",
559 		.desc = "MWAIT 0x32",
560 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
561 		.exit_latency = 166,
562 		.target_residency = 500,
563 		.enter = &intel_idle,
564 		.enter_freeze = intel_idle_freeze, },
565 	{
566 		.name = "C8-BDW",
567 		.desc = "MWAIT 0x40",
568 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
569 		.exit_latency = 300,
570 		.target_residency = 900,
571 		.enter = &intel_idle,
572 		.enter_freeze = intel_idle_freeze, },
573 	{
574 		.name = "C9-BDW",
575 		.desc = "MWAIT 0x50",
576 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
577 		.exit_latency = 600,
578 		.target_residency = 1800,
579 		.enter = &intel_idle,
580 		.enter_freeze = intel_idle_freeze, },
581 	{
582 		.name = "C10-BDW",
583 		.desc = "MWAIT 0x60",
584 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
585 		.exit_latency = 2600,
586 		.target_residency = 7700,
587 		.enter = &intel_idle,
588 		.enter_freeze = intel_idle_freeze, },
589 	{
590 		.enter = NULL }
591 };
592 
593 static struct cpuidle_state skl_cstates[] = {
594 	{
595 		.name = "C1-SKL",
596 		.desc = "MWAIT 0x00",
597 		.flags = MWAIT2flg(0x00),
598 		.exit_latency = 2,
599 		.target_residency = 2,
600 		.enter = &intel_idle,
601 		.enter_freeze = intel_idle_freeze, },
602 	{
603 		.name = "C1E-SKL",
604 		.desc = "MWAIT 0x01",
605 		.flags = MWAIT2flg(0x01),
606 		.exit_latency = 10,
607 		.target_residency = 20,
608 		.enter = &intel_idle,
609 		.enter_freeze = intel_idle_freeze, },
610 	{
611 		.name = "C3-SKL",
612 		.desc = "MWAIT 0x10",
613 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
614 		.exit_latency = 70,
615 		.target_residency = 100,
616 		.enter = &intel_idle,
617 		.enter_freeze = intel_idle_freeze, },
618 	{
619 		.name = "C6-SKL",
620 		.desc = "MWAIT 0x20",
621 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
622 		.exit_latency = 85,
623 		.target_residency = 200,
624 		.enter = &intel_idle,
625 		.enter_freeze = intel_idle_freeze, },
626 	{
627 		.name = "C7s-SKL",
628 		.desc = "MWAIT 0x33",
629 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
630 		.exit_latency = 124,
631 		.target_residency = 800,
632 		.enter = &intel_idle,
633 		.enter_freeze = intel_idle_freeze, },
634 	{
635 		.name = "C8-SKL",
636 		.desc = "MWAIT 0x40",
637 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
638 		.exit_latency = 200,
639 		.target_residency = 800,
640 		.enter = &intel_idle,
641 		.enter_freeze = intel_idle_freeze, },
642 	{
643 		.name = "C9-SKL",
644 		.desc = "MWAIT 0x50",
645 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
646 		.exit_latency = 480,
647 		.target_residency = 5000,
648 		.enter = &intel_idle,
649 		.enter_freeze = intel_idle_freeze, },
650 	{
651 		.name = "C10-SKL",
652 		.desc = "MWAIT 0x60",
653 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
654 		.exit_latency = 890,
655 		.target_residency = 5000,
656 		.enter = &intel_idle,
657 		.enter_freeze = intel_idle_freeze, },
658 	{
659 		.enter = NULL }
660 };
661 
662 static struct cpuidle_state skx_cstates[] = {
663 	{
664 		.name = "C1-SKX",
665 		.desc = "MWAIT 0x00",
666 		.flags = MWAIT2flg(0x00),
667 		.exit_latency = 2,
668 		.target_residency = 2,
669 		.enter = &intel_idle,
670 		.enter_freeze = intel_idle_freeze, },
671 	{
672 		.name = "C1E-SKX",
673 		.desc = "MWAIT 0x01",
674 		.flags = MWAIT2flg(0x01),
675 		.exit_latency = 10,
676 		.target_residency = 20,
677 		.enter = &intel_idle,
678 		.enter_freeze = intel_idle_freeze, },
679 	{
680 		.name = "C6-SKX",
681 		.desc = "MWAIT 0x20",
682 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
683 		.exit_latency = 133,
684 		.target_residency = 600,
685 		.enter = &intel_idle,
686 		.enter_freeze = intel_idle_freeze, },
687 	{
688 		.enter = NULL }
689 };
690 
691 static struct cpuidle_state atom_cstates[] = {
692 	{
693 		.name = "C1E-ATM",
694 		.desc = "MWAIT 0x00",
695 		.flags = MWAIT2flg(0x00),
696 		.exit_latency = 10,
697 		.target_residency = 20,
698 		.enter = &intel_idle,
699 		.enter_freeze = intel_idle_freeze, },
700 	{
701 		.name = "C2-ATM",
702 		.desc = "MWAIT 0x10",
703 		.flags = MWAIT2flg(0x10),
704 		.exit_latency = 20,
705 		.target_residency = 80,
706 		.enter = &intel_idle,
707 		.enter_freeze = intel_idle_freeze, },
708 	{
709 		.name = "C4-ATM",
710 		.desc = "MWAIT 0x30",
711 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
712 		.exit_latency = 100,
713 		.target_residency = 400,
714 		.enter = &intel_idle,
715 		.enter_freeze = intel_idle_freeze, },
716 	{
717 		.name = "C6-ATM",
718 		.desc = "MWAIT 0x52",
719 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
720 		.exit_latency = 140,
721 		.target_residency = 560,
722 		.enter = &intel_idle,
723 		.enter_freeze = intel_idle_freeze, },
724 	{
725 		.enter = NULL }
726 };
727 static struct cpuidle_state avn_cstates[] = {
728 	{
729 		.name = "C1-AVN",
730 		.desc = "MWAIT 0x00",
731 		.flags = MWAIT2flg(0x00),
732 		.exit_latency = 2,
733 		.target_residency = 2,
734 		.enter = &intel_idle,
735 		.enter_freeze = intel_idle_freeze, },
736 	{
737 		.name = "C6-AVN",
738 		.desc = "MWAIT 0x51",
739 		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
740 		.exit_latency = 15,
741 		.target_residency = 45,
742 		.enter = &intel_idle,
743 		.enter_freeze = intel_idle_freeze, },
744 	{
745 		.enter = NULL }
746 };
747 static struct cpuidle_state knl_cstates[] = {
748 	{
749 		.name = "C1-KNL",
750 		.desc = "MWAIT 0x00",
751 		.flags = MWAIT2flg(0x00),
752 		.exit_latency = 1,
753 		.target_residency = 2,
754 		.enter = &intel_idle,
755 		.enter_freeze = intel_idle_freeze },
756 	{
757 		.name = "C6-KNL",
758 		.desc = "MWAIT 0x10",
759 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
760 		.exit_latency = 120,
761 		.target_residency = 500,
762 		.enter = &intel_idle,
763 		.enter_freeze = intel_idle_freeze },
764 	{
765 		.enter = NULL }
766 };
767 
768 static struct cpuidle_state bxt_cstates[] = {
769 	{
770 		.name = "C1-BXT",
771 		.desc = "MWAIT 0x00",
772 		.flags = MWAIT2flg(0x00),
773 		.exit_latency = 2,
774 		.target_residency = 2,
775 		.enter = &intel_idle,
776 		.enter_freeze = intel_idle_freeze, },
777 	{
778 		.name = "C1E-BXT",
779 		.desc = "MWAIT 0x01",
780 		.flags = MWAIT2flg(0x01),
781 		.exit_latency = 10,
782 		.target_residency = 20,
783 		.enter = &intel_idle,
784 		.enter_freeze = intel_idle_freeze, },
785 	{
786 		.name = "C6-BXT",
787 		.desc = "MWAIT 0x20",
788 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
789 		.exit_latency = 133,
790 		.target_residency = 133,
791 		.enter = &intel_idle,
792 		.enter_freeze = intel_idle_freeze, },
793 	{
794 		.name = "C7s-BXT",
795 		.desc = "MWAIT 0x31",
796 		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
797 		.exit_latency = 155,
798 		.target_residency = 155,
799 		.enter = &intel_idle,
800 		.enter_freeze = intel_idle_freeze, },
801 	{
802 		.name = "C8-BXT",
803 		.desc = "MWAIT 0x40",
804 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
805 		.exit_latency = 1000,
806 		.target_residency = 1000,
807 		.enter = &intel_idle,
808 		.enter_freeze = intel_idle_freeze, },
809 	{
810 		.name = "C9-BXT",
811 		.desc = "MWAIT 0x50",
812 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
813 		.exit_latency = 2000,
814 		.target_residency = 2000,
815 		.enter = &intel_idle,
816 		.enter_freeze = intel_idle_freeze, },
817 	{
818 		.name = "C10-BXT",
819 		.desc = "MWAIT 0x60",
820 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
821 		.exit_latency = 10000,
822 		.target_residency = 10000,
823 		.enter = &intel_idle,
824 		.enter_freeze = intel_idle_freeze, },
825 	{
826 		.enter = NULL }
827 };
828 
829 static struct cpuidle_state dnv_cstates[] = {
830 	{
831 		.name = "C1-DNV",
832 		.desc = "MWAIT 0x00",
833 		.flags = MWAIT2flg(0x00),
834 		.exit_latency = 2,
835 		.target_residency = 2,
836 		.enter = &intel_idle,
837 		.enter_freeze = intel_idle_freeze, },
838 	{
839 		.name = "C1E-DNV",
840 		.desc = "MWAIT 0x01",
841 		.flags = MWAIT2flg(0x01),
842 		.exit_latency = 10,
843 		.target_residency = 20,
844 		.enter = &intel_idle,
845 		.enter_freeze = intel_idle_freeze, },
846 	{
847 		.name = "C6-DNV",
848 		.desc = "MWAIT 0x20",
849 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
850 		.exit_latency = 50,
851 		.target_residency = 500,
852 		.enter = &intel_idle,
853 		.enter_freeze = intel_idle_freeze, },
854 	{
855 		.enter = NULL }
856 };
857 
858 /**
859  * intel_idle
860  * @dev: cpuidle_device
861  * @drv: cpuidle driver
862  * @index: index of cpuidle state
863  *
864  * Must be called under local_irq_disable().
865  */
866 static int intel_idle(struct cpuidle_device *dev,
867 		struct cpuidle_driver *drv, int index)
868 {
869 	unsigned long ecx = 1; /* break on interrupt flag */
870 	struct cpuidle_state *state = &drv->states[index];
871 	unsigned long eax = flg2MWAIT(state->flags);
872 	unsigned int cstate;
873 	int cpu = smp_processor_id();
874 
875 	cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
876 
877 	/*
878 	 * leave_mm() to avoid costly and often unnecessary wakeups
879 	 * for flushing the user TLB's associated with the active mm.
880 	 */
881 	if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
882 		leave_mm(cpu);
883 
884 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
885 		tick_broadcast_enter();
886 
887 	mwait_idle_with_hints(eax, ecx);
888 
889 	if (!(lapic_timer_reliable_states & (1 << (cstate))))
890 		tick_broadcast_exit();
891 
892 	return index;
893 }
894 
895 /**
896  * intel_idle_freeze - simplified "enter" callback routine for suspend-to-idle
897  * @dev: cpuidle_device
898  * @drv: cpuidle driver
899  * @index: state index
900  */
901 static void intel_idle_freeze(struct cpuidle_device *dev,
902 			     struct cpuidle_driver *drv, int index)
903 {
904 	unsigned long ecx = 1; /* break on interrupt flag */
905 	unsigned long eax = flg2MWAIT(drv->states[index].flags);
906 
907 	mwait_idle_with_hints(eax, ecx);
908 }
909 
910 static void __setup_broadcast_timer(void *arg)
911 {
912 	unsigned long on = (unsigned long)arg;
913 
914 	if (on)
915 		tick_broadcast_enable();
916 	else
917 		tick_broadcast_disable();
918 }
919 
920 static int cpu_hotplug_notify(struct notifier_block *n,
921 			      unsigned long action, void *hcpu)
922 {
923 	int hotcpu = (unsigned long)hcpu;
924 	struct cpuidle_device *dev;
925 
926 	switch (action & ~CPU_TASKS_FROZEN) {
927 	case CPU_ONLINE:
928 
929 		if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
930 			smp_call_function_single(hotcpu, __setup_broadcast_timer,
931 						 (void *)true, 1);
932 
933 		/*
934 		 * Some systems can hotplug a cpu at runtime after
935 		 * the kernel has booted, we have to initialize the
936 		 * driver in this case
937 		 */
938 		dev = per_cpu_ptr(intel_idle_cpuidle_devices, hotcpu);
939 		if (dev->registered)
940 			break;
941 
942 		if (intel_idle_cpu_init(hotcpu))
943 			return NOTIFY_BAD;
944 
945 		break;
946 	}
947 	return NOTIFY_OK;
948 }
949 
950 static struct notifier_block cpu_hotplug_notifier = {
951 	.notifier_call = cpu_hotplug_notify,
952 };
953 
954 static void auto_demotion_disable(void *dummy)
955 {
956 	unsigned long long msr_bits;
957 
958 	rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
959 	msr_bits &= ~(icpu->auto_demotion_disable_flags);
960 	wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
961 }
962 static void c1e_promotion_disable(void *dummy)
963 {
964 	unsigned long long msr_bits;
965 
966 	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
967 	msr_bits &= ~0x2;
968 	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
969 }
970 
971 static const struct idle_cpu idle_cpu_nehalem = {
972 	.state_table = nehalem_cstates,
973 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
974 	.disable_promotion_to_c1e = true,
975 };
976 
977 static const struct idle_cpu idle_cpu_atom = {
978 	.state_table = atom_cstates,
979 };
980 
981 static const struct idle_cpu idle_cpu_lincroft = {
982 	.state_table = atom_cstates,
983 	.auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
984 };
985 
986 static const struct idle_cpu idle_cpu_snb = {
987 	.state_table = snb_cstates,
988 	.disable_promotion_to_c1e = true,
989 };
990 
991 static const struct idle_cpu idle_cpu_byt = {
992 	.state_table = byt_cstates,
993 	.disable_promotion_to_c1e = true,
994 	.byt_auto_demotion_disable_flag = true,
995 };
996 
997 static const struct idle_cpu idle_cpu_cht = {
998 	.state_table = cht_cstates,
999 	.disable_promotion_to_c1e = true,
1000 	.byt_auto_demotion_disable_flag = true,
1001 };
1002 
1003 static const struct idle_cpu idle_cpu_ivb = {
1004 	.state_table = ivb_cstates,
1005 	.disable_promotion_to_c1e = true,
1006 };
1007 
1008 static const struct idle_cpu idle_cpu_ivt = {
1009 	.state_table = ivt_cstates,
1010 	.disable_promotion_to_c1e = true,
1011 };
1012 
1013 static const struct idle_cpu idle_cpu_hsw = {
1014 	.state_table = hsw_cstates,
1015 	.disable_promotion_to_c1e = true,
1016 };
1017 
1018 static const struct idle_cpu idle_cpu_bdw = {
1019 	.state_table = bdw_cstates,
1020 	.disable_promotion_to_c1e = true,
1021 };
1022 
1023 static const struct idle_cpu idle_cpu_skl = {
1024 	.state_table = skl_cstates,
1025 	.disable_promotion_to_c1e = true,
1026 };
1027 
1028 static const struct idle_cpu idle_cpu_skx = {
1029 	.state_table = skx_cstates,
1030 	.disable_promotion_to_c1e = true,
1031 };
1032 
1033 static const struct idle_cpu idle_cpu_avn = {
1034 	.state_table = avn_cstates,
1035 	.disable_promotion_to_c1e = true,
1036 };
1037 
1038 static const struct idle_cpu idle_cpu_knl = {
1039 	.state_table = knl_cstates,
1040 };
1041 
1042 static const struct idle_cpu idle_cpu_bxt = {
1043 	.state_table = bxt_cstates,
1044 	.disable_promotion_to_c1e = true,
1045 };
1046 
1047 static const struct idle_cpu idle_cpu_dnv = {
1048 	.state_table = dnv_cstates,
1049 	.disable_promotion_to_c1e = true,
1050 };
1051 
1052 #define ICPU(model, cpu) \
1053 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
1054 
1055 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1056 	ICPU(INTEL_FAM6_NEHALEM_EP,		idle_cpu_nehalem),
1057 	ICPU(INTEL_FAM6_NEHALEM,		idle_cpu_nehalem),
1058 	ICPU(INTEL_FAM6_NEHALEM_G,		idle_cpu_nehalem),
1059 	ICPU(INTEL_FAM6_WESTMERE,		idle_cpu_nehalem),
1060 	ICPU(INTEL_FAM6_WESTMERE_EP,		idle_cpu_nehalem),
1061 	ICPU(INTEL_FAM6_NEHALEM_EX,		idle_cpu_nehalem),
1062 	ICPU(INTEL_FAM6_ATOM_PINEVIEW,		idle_cpu_atom),
1063 	ICPU(INTEL_FAM6_ATOM_LINCROFT,		idle_cpu_lincroft),
1064 	ICPU(INTEL_FAM6_WESTMERE_EX,		idle_cpu_nehalem),
1065 	ICPU(INTEL_FAM6_SANDYBRIDGE,		idle_cpu_snb),
1066 	ICPU(INTEL_FAM6_SANDYBRIDGE_X,		idle_cpu_snb),
1067 	ICPU(INTEL_FAM6_ATOM_CEDARVIEW,		idle_cpu_atom),
1068 	ICPU(INTEL_FAM6_ATOM_SILVERMONT1,	idle_cpu_byt),
1069 	ICPU(INTEL_FAM6_ATOM_AIRMONT,		idle_cpu_cht),
1070 	ICPU(INTEL_FAM6_IVYBRIDGE,		idle_cpu_ivb),
1071 	ICPU(INTEL_FAM6_IVYBRIDGE_X,		idle_cpu_ivt),
1072 	ICPU(INTEL_FAM6_HASWELL_CORE,		idle_cpu_hsw),
1073 	ICPU(INTEL_FAM6_HASWELL_X,		idle_cpu_hsw),
1074 	ICPU(INTEL_FAM6_HASWELL_ULT,		idle_cpu_hsw),
1075 	ICPU(INTEL_FAM6_HASWELL_GT3E,		idle_cpu_hsw),
1076 	ICPU(INTEL_FAM6_ATOM_SILVERMONT2,	idle_cpu_avn),
1077 	ICPU(INTEL_FAM6_BROADWELL_CORE,		idle_cpu_bdw),
1078 	ICPU(INTEL_FAM6_BROADWELL_GT3E,		idle_cpu_bdw),
1079 	ICPU(INTEL_FAM6_BROADWELL_X,		idle_cpu_bdw),
1080 	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	idle_cpu_bdw),
1081 	ICPU(INTEL_FAM6_SKYLAKE_MOBILE,		idle_cpu_skl),
1082 	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	idle_cpu_skl),
1083 	ICPU(INTEL_FAM6_KABYLAKE_MOBILE,	idle_cpu_skl),
1084 	ICPU(INTEL_FAM6_KABYLAKE_DESKTOP,	idle_cpu_skl),
1085 	ICPU(INTEL_FAM6_SKYLAKE_X,		idle_cpu_skx),
1086 	ICPU(INTEL_FAM6_XEON_PHI_KNL,		idle_cpu_knl),
1087 	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		idle_cpu_bxt),
1088 	ICPU(INTEL_FAM6_ATOM_DENVERTON,		idle_cpu_dnv),
1089 	{}
1090 };
1091 
1092 /*
1093  * intel_idle_probe()
1094  */
1095 static int __init intel_idle_probe(void)
1096 {
1097 	unsigned int eax, ebx, ecx;
1098 	const struct x86_cpu_id *id;
1099 
1100 	if (max_cstate == 0) {
1101 		pr_debug(PREFIX "disabled\n");
1102 		return -EPERM;
1103 	}
1104 
1105 	id = x86_match_cpu(intel_idle_ids);
1106 	if (!id) {
1107 		if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
1108 		    boot_cpu_data.x86 == 6)
1109 			pr_debug(PREFIX "does not run on family %d model %d\n",
1110 				boot_cpu_data.x86, boot_cpu_data.x86_model);
1111 		return -ENODEV;
1112 	}
1113 
1114 	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1115 		return -ENODEV;
1116 
1117 	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
1118 
1119 	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
1120 	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
1121 	    !mwait_substates)
1122 			return -ENODEV;
1123 
1124 	pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates);
1125 
1126 	icpu = (const struct idle_cpu *)id->driver_data;
1127 	cpuidle_state_table = icpu->state_table;
1128 
1129 	pr_debug(PREFIX "v" INTEL_IDLE_VERSION
1130 		" model 0x%X\n", boot_cpu_data.x86_model);
1131 
1132 	return 0;
1133 }
1134 
1135 /*
1136  * intel_idle_cpuidle_devices_uninit()
1137  * Unregisters the cpuidle devices.
1138  */
1139 static void intel_idle_cpuidle_devices_uninit(void)
1140 {
1141 	int i;
1142 	struct cpuidle_device *dev;
1143 
1144 	for_each_online_cpu(i) {
1145 		dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
1146 		cpuidle_unregister_device(dev);
1147 	}
1148 }
1149 
1150 /*
1151  * ivt_idle_state_table_update(void)
1152  *
1153  * Tune IVT multi-socket targets
1154  * Assumption: num_sockets == (max_package_num + 1)
1155  */
1156 static void ivt_idle_state_table_update(void)
1157 {
1158 	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1159 	int cpu, package_num, num_sockets = 1;
1160 
1161 	for_each_online_cpu(cpu) {
1162 		package_num = topology_physical_package_id(cpu);
1163 		if (package_num + 1 > num_sockets) {
1164 			num_sockets = package_num + 1;
1165 
1166 			if (num_sockets > 4) {
1167 				cpuidle_state_table = ivt_cstates_8s;
1168 				return;
1169 			}
1170 		}
1171 	}
1172 
1173 	if (num_sockets > 2)
1174 		cpuidle_state_table = ivt_cstates_4s;
1175 
1176 	/* else, 1 and 2 socket systems use default ivt_cstates */
1177 }
1178 
1179 /*
1180  * Translate IRTL (Interrupt Response Time Limit) MSR to usec
1181  */
1182 
1183 static unsigned int irtl_ns_units[] = {
1184 	1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1185 
1186 static unsigned long long irtl_2_usec(unsigned long long irtl)
1187 {
1188 	unsigned long long ns;
1189 
1190 	if (!irtl)
1191 		return 0;
1192 
1193 	ns = irtl_ns_units[(irtl >> 10) & 0x7];
1194 
1195 	return div64_u64((irtl & 0x3FF) * ns, 1000);
1196 }
1197 /*
1198  * bxt_idle_state_table_update(void)
1199  *
1200  * On BXT, we trust the IRTL to show the definitive maximum latency
1201  * We use the same value for target_residency.
1202  */
1203 static void bxt_idle_state_table_update(void)
1204 {
1205 	unsigned long long msr;
1206 	unsigned int usec;
1207 
1208 	rdmsrl(MSR_PKGC6_IRTL, msr);
1209 	usec = irtl_2_usec(msr);
1210 	if (usec) {
1211 		bxt_cstates[2].exit_latency = usec;
1212 		bxt_cstates[2].target_residency = usec;
1213 	}
1214 
1215 	rdmsrl(MSR_PKGC7_IRTL, msr);
1216 	usec = irtl_2_usec(msr);
1217 	if (usec) {
1218 		bxt_cstates[3].exit_latency = usec;
1219 		bxt_cstates[3].target_residency = usec;
1220 	}
1221 
1222 	rdmsrl(MSR_PKGC8_IRTL, msr);
1223 	usec = irtl_2_usec(msr);
1224 	if (usec) {
1225 		bxt_cstates[4].exit_latency = usec;
1226 		bxt_cstates[4].target_residency = usec;
1227 	}
1228 
1229 	rdmsrl(MSR_PKGC9_IRTL, msr);
1230 	usec = irtl_2_usec(msr);
1231 	if (usec) {
1232 		bxt_cstates[5].exit_latency = usec;
1233 		bxt_cstates[5].target_residency = usec;
1234 	}
1235 
1236 	rdmsrl(MSR_PKGC10_IRTL, msr);
1237 	usec = irtl_2_usec(msr);
1238 	if (usec) {
1239 		bxt_cstates[6].exit_latency = usec;
1240 		bxt_cstates[6].target_residency = usec;
1241 	}
1242 
1243 }
1244 /*
1245  * sklh_idle_state_table_update(void)
1246  *
1247  * On SKL-H (model 0x5e) disable C8 and C9 if:
1248  * C10 is enabled and SGX disabled
1249  */
1250 static void sklh_idle_state_table_update(void)
1251 {
1252 	unsigned long long msr;
1253 	unsigned int eax, ebx, ecx, edx;
1254 
1255 
1256 	/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1257 	if (max_cstate <= 7)
1258 		return;
1259 
1260 	/* if PC10 not present in CPUID.MWAIT.EDX */
1261 	if ((mwait_substates & (0xF << 28)) == 0)
1262 		return;
1263 
1264 	rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
1265 
1266 	/* PC10 is not enabled in PKG C-state limit */
1267 	if ((msr & 0xF) != 8)
1268 		return;
1269 
1270 	ecx = 0;
1271 	cpuid(7, &eax, &ebx, &ecx, &edx);
1272 
1273 	/* if SGX is present */
1274 	if (ebx & (1 << 2)) {
1275 
1276 		rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1277 
1278 		/* if SGX is enabled */
1279 		if (msr & (1 << 18))
1280 			return;
1281 	}
1282 
1283 	skl_cstates[5].disabled = 1;	/* C8-SKL */
1284 	skl_cstates[6].disabled = 1;	/* C9-SKL */
1285 }
1286 /*
1287  * intel_idle_state_table_update()
1288  *
1289  * Update the default state_table for this CPU-id
1290  */
1291 
1292 static void intel_idle_state_table_update(void)
1293 {
1294 	switch (boot_cpu_data.x86_model) {
1295 
1296 	case INTEL_FAM6_IVYBRIDGE_X:
1297 		ivt_idle_state_table_update();
1298 		break;
1299 	case INTEL_FAM6_ATOM_GOLDMONT:
1300 		bxt_idle_state_table_update();
1301 		break;
1302 	case INTEL_FAM6_SKYLAKE_DESKTOP:
1303 		sklh_idle_state_table_update();
1304 		break;
1305 	}
1306 }
1307 
1308 /*
1309  * intel_idle_cpuidle_driver_init()
1310  * allocate, initialize cpuidle_states
1311  */
1312 static void __init intel_idle_cpuidle_driver_init(void)
1313 {
1314 	int cstate;
1315 	struct cpuidle_driver *drv = &intel_idle_driver;
1316 
1317 	intel_idle_state_table_update();
1318 
1319 	drv->state_count = 1;
1320 
1321 	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1322 		int num_substates, mwait_hint, mwait_cstate;
1323 
1324 		if ((cpuidle_state_table[cstate].enter == NULL) &&
1325 		    (cpuidle_state_table[cstate].enter_freeze == NULL))
1326 			break;
1327 
1328 		if (cstate + 1 > max_cstate) {
1329 			printk(PREFIX "max_cstate %d reached\n",
1330 				max_cstate);
1331 			break;
1332 		}
1333 
1334 		mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1335 		mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
1336 
1337 		/* number of sub-states for this state in CPUID.MWAIT */
1338 		num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
1339 					& MWAIT_SUBSTATE_MASK;
1340 
1341 		/* if NO sub-states for this state in CPUID, skip it */
1342 		if (num_substates == 0)
1343 			continue;
1344 
1345 		/* if state marked as disabled, skip it */
1346 		if (cpuidle_state_table[cstate].disabled != 0) {
1347 			pr_debug(PREFIX "state %s is disabled",
1348 				cpuidle_state_table[cstate].name);
1349 			continue;
1350 		}
1351 
1352 
1353 		if (((mwait_cstate + 1) > 2) &&
1354 			!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1355 			mark_tsc_unstable("TSC halts in idle"
1356 					" states deeper than C2");
1357 
1358 		drv->states[drv->state_count] =	/* structure copy */
1359 			cpuidle_state_table[cstate];
1360 
1361 		drv->state_count += 1;
1362 	}
1363 
1364 	if (icpu->byt_auto_demotion_disable_flag) {
1365 		wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1366 		wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1367 	}
1368 }
1369 
1370 
1371 /*
1372  * intel_idle_cpu_init()
1373  * allocate, initialize, register cpuidle_devices
1374  * @cpu: cpu/core to initialize
1375  */
1376 static int intel_idle_cpu_init(int cpu)
1377 {
1378 	struct cpuidle_device *dev;
1379 
1380 	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1381 
1382 	dev->cpu = cpu;
1383 
1384 	if (cpuidle_register_device(dev)) {
1385 		pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu);
1386 		return -EIO;
1387 	}
1388 
1389 	if (icpu->auto_demotion_disable_flags)
1390 		smp_call_function_single(cpu, auto_demotion_disable, NULL, 1);
1391 
1392 	if (icpu->disable_promotion_to_c1e)
1393 		smp_call_function_single(cpu, c1e_promotion_disable, NULL, 1);
1394 
1395 	return 0;
1396 }
1397 
1398 static int __init intel_idle_init(void)
1399 {
1400 	int retval, i;
1401 
1402 	/* Do not load intel_idle at all for now if idle= is passed */
1403 	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1404 		return -ENODEV;
1405 
1406 	retval = intel_idle_probe();
1407 	if (retval)
1408 		return retval;
1409 
1410 	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
1411 	if (intel_idle_cpuidle_devices == NULL)
1412 		return -ENOMEM;
1413 
1414 	intel_idle_cpuidle_driver_init();
1415 	retval = cpuidle_register_driver(&intel_idle_driver);
1416 	if (retval) {
1417 		struct cpuidle_driver *drv = cpuidle_get_driver();
1418 		printk(KERN_DEBUG PREFIX "intel_idle yielding to %s",
1419 			drv ? drv->name : "none");
1420 		free_percpu(intel_idle_cpuidle_devices);
1421 		return retval;
1422 	}
1423 
1424 	cpu_notifier_register_begin();
1425 
1426 	for_each_online_cpu(i) {
1427 		retval = intel_idle_cpu_init(i);
1428 		if (retval) {
1429 			intel_idle_cpuidle_devices_uninit();
1430 			cpu_notifier_register_done();
1431 			cpuidle_unregister_driver(&intel_idle_driver);
1432 			free_percpu(intel_idle_cpuidle_devices);
1433 			return retval;
1434 		}
1435 	}
1436 	__register_cpu_notifier(&cpu_hotplug_notifier);
1437 
1438 	if (boot_cpu_has(X86_FEATURE_ARAT))	/* Always Reliable APIC Timer */
1439 		lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
1440 	else
1441 		on_each_cpu(__setup_broadcast_timer, (void *)true, 1);
1442 
1443 	cpu_notifier_register_done();
1444 
1445 	pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n",
1446 		lapic_timer_reliable_states);
1447 
1448 	return 0;
1449 }
1450 device_initcall(intel_idle_init);
1451 
1452 /*
1453  * We are not really modular, but we used to support that.  Meaning we also
1454  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
1455  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
1456  * is the easiest way (currently) to continue doing that.
1457  */
1458 module_param(max_cstate, int, 0444);
1459