1 /*
2  * intel_pstate.c: Native P state management for Intel processors
3  *
4  * (C) Copyright 2012 Intel Corporation
5  * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; version 2
10  * of the License.
11  */
12 
13 #include <linux/kernel.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/module.h>
16 #include <linux/ktime.h>
17 #include <linux/hrtimer.h>
18 #include <linux/tick.h>
19 #include <linux/slab.h>
20 #include <linux/sched.h>
21 #include <linux/list.h>
22 #include <linux/cpu.h>
23 #include <linux/cpufreq.h>
24 #include <linux/sysfs.h>
25 #include <linux/types.h>
26 #include <linux/fs.h>
27 #include <linux/debugfs.h>
28 #include <linux/acpi.h>
29 #include <linux/vmalloc.h>
30 #include <trace/events/power.h>
31 
32 #include <asm/div64.h>
33 #include <asm/msr.h>
34 #include <asm/cpu_device_id.h>
35 #include <asm/cpufeature.h>
36 
37 #define ATOM_RATIOS		0x66a
38 #define ATOM_VIDS		0x66b
39 #define ATOM_TURBO_RATIOS	0x66c
40 #define ATOM_TURBO_VIDS		0x66d
41 
42 #define FRAC_BITS 8
43 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
44 #define fp_toint(X) ((X) >> FRAC_BITS)
45 
46 static inline int32_t mul_fp(int32_t x, int32_t y)
47 {
48 	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
49 }
50 
51 static inline int32_t div_fp(s64 x, s64 y)
52 {
53 	return div64_s64((int64_t)x << FRAC_BITS, y);
54 }
55 
56 static inline int ceiling_fp(int32_t x)
57 {
58 	int mask, ret;
59 
60 	ret = fp_toint(x);
61 	mask = (1 << FRAC_BITS) - 1;
62 	if (x & mask)
63 		ret += 1;
64 	return ret;
65 }
66 
67 struct sample {
68 	int32_t core_pct_busy;
69 	int32_t busy_scaled;
70 	u64 aperf;
71 	u64 mperf;
72 	u64 tsc;
73 	int freq;
74 	ktime_t time;
75 };
76 
77 struct pstate_data {
78 	int	current_pstate;
79 	int	min_pstate;
80 	int	max_pstate;
81 	int	max_pstate_physical;
82 	int	scaling;
83 	int	turbo_pstate;
84 };
85 
86 struct vid_data {
87 	int min;
88 	int max;
89 	int turbo;
90 	int32_t ratio;
91 };
92 
93 struct _pid {
94 	int setpoint;
95 	int32_t integral;
96 	int32_t p_gain;
97 	int32_t i_gain;
98 	int32_t d_gain;
99 	int deadband;
100 	int32_t last_err;
101 };
102 
103 struct cpudata {
104 	int cpu;
105 
106 	struct timer_list timer;
107 
108 	struct pstate_data pstate;
109 	struct vid_data vid;
110 	struct _pid pid;
111 
112 	ktime_t last_sample_time;
113 	u64	prev_aperf;
114 	u64	prev_mperf;
115 	u64	prev_tsc;
116 	struct sample sample;
117 };
118 
119 static struct cpudata **all_cpu_data;
120 struct pstate_adjust_policy {
121 	int sample_rate_ms;
122 	int deadband;
123 	int setpoint;
124 	int p_gain_pct;
125 	int d_gain_pct;
126 	int i_gain_pct;
127 };
128 
129 struct pstate_funcs {
130 	int (*get_max)(void);
131 	int (*get_max_physical)(void);
132 	int (*get_min)(void);
133 	int (*get_turbo)(void);
134 	int (*get_scaling)(void);
135 	void (*set)(struct cpudata*, int pstate);
136 	void (*get_vid)(struct cpudata *);
137 	int32_t (*get_target_pstate)(struct cpudata *);
138 };
139 
140 struct cpu_defaults {
141 	struct pstate_adjust_policy pid_policy;
142 	struct pstate_funcs funcs;
143 };
144 
145 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
146 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
147 
148 static struct pstate_adjust_policy pid_params;
149 static struct pstate_funcs pstate_funcs;
150 static int hwp_active;
151 
152 struct perf_limits {
153 	int no_turbo;
154 	int turbo_disabled;
155 	int max_perf_pct;
156 	int min_perf_pct;
157 	int32_t max_perf;
158 	int32_t min_perf;
159 	int max_policy_pct;
160 	int max_sysfs_pct;
161 	int min_policy_pct;
162 	int min_sysfs_pct;
163 };
164 
165 static struct perf_limits performance_limits = {
166 	.no_turbo = 0,
167 	.turbo_disabled = 0,
168 	.max_perf_pct = 100,
169 	.max_perf = int_tofp(1),
170 	.min_perf_pct = 100,
171 	.min_perf = int_tofp(1),
172 	.max_policy_pct = 100,
173 	.max_sysfs_pct = 100,
174 	.min_policy_pct = 0,
175 	.min_sysfs_pct = 0,
176 };
177 
178 static struct perf_limits powersave_limits = {
179 	.no_turbo = 0,
180 	.turbo_disabled = 0,
181 	.max_perf_pct = 100,
182 	.max_perf = int_tofp(1),
183 	.min_perf_pct = 0,
184 	.min_perf = 0,
185 	.max_policy_pct = 100,
186 	.max_sysfs_pct = 100,
187 	.min_policy_pct = 0,
188 	.min_sysfs_pct = 0,
189 };
190 
191 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
192 static struct perf_limits *limits = &performance_limits;
193 #else
194 static struct perf_limits *limits = &powersave_limits;
195 #endif
196 
197 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
198 			     int deadband, int integral) {
199 	pid->setpoint = setpoint;
200 	pid->deadband  = deadband;
201 	pid->integral  = int_tofp(integral);
202 	pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
203 }
204 
205 static inline void pid_p_gain_set(struct _pid *pid, int percent)
206 {
207 	pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
208 }
209 
210 static inline void pid_i_gain_set(struct _pid *pid, int percent)
211 {
212 	pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
213 }
214 
215 static inline void pid_d_gain_set(struct _pid *pid, int percent)
216 {
217 	pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
218 }
219 
220 static signed int pid_calc(struct _pid *pid, int32_t busy)
221 {
222 	signed int result;
223 	int32_t pterm, dterm, fp_error;
224 	int32_t integral_limit;
225 
226 	fp_error = int_tofp(pid->setpoint) - busy;
227 
228 	if (abs(fp_error) <= int_tofp(pid->deadband))
229 		return 0;
230 
231 	pterm = mul_fp(pid->p_gain, fp_error);
232 
233 	pid->integral += fp_error;
234 
235 	/*
236 	 * We limit the integral here so that it will never
237 	 * get higher than 30.  This prevents it from becoming
238 	 * too large an input over long periods of time and allows
239 	 * it to get factored out sooner.
240 	 *
241 	 * The value of 30 was chosen through experimentation.
242 	 */
243 	integral_limit = int_tofp(30);
244 	if (pid->integral > integral_limit)
245 		pid->integral = integral_limit;
246 	if (pid->integral < -integral_limit)
247 		pid->integral = -integral_limit;
248 
249 	dterm = mul_fp(pid->d_gain, fp_error - pid->last_err);
250 	pid->last_err = fp_error;
251 
252 	result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
253 	result = result + (1 << (FRAC_BITS-1));
254 	return (signed int)fp_toint(result);
255 }
256 
257 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
258 {
259 	pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
260 	pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
261 	pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
262 
263 	pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
264 }
265 
266 static inline void intel_pstate_reset_all_pid(void)
267 {
268 	unsigned int cpu;
269 
270 	for_each_online_cpu(cpu) {
271 		if (all_cpu_data[cpu])
272 			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
273 	}
274 }
275 
276 static inline void update_turbo_state(void)
277 {
278 	u64 misc_en;
279 	struct cpudata *cpu;
280 
281 	cpu = all_cpu_data[0];
282 	rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
283 	limits->turbo_disabled =
284 		(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
285 		 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
286 }
287 
288 static void intel_pstate_hwp_set(void)
289 {
290 	int min, hw_min, max, hw_max, cpu, range, adj_range;
291 	u64 value, cap;
292 
293 	rdmsrl(MSR_HWP_CAPABILITIES, cap);
294 	hw_min = HWP_LOWEST_PERF(cap);
295 	hw_max = HWP_HIGHEST_PERF(cap);
296 	range = hw_max - hw_min;
297 
298 	get_online_cpus();
299 
300 	for_each_online_cpu(cpu) {
301 		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
302 		adj_range = limits->min_perf_pct * range / 100;
303 		min = hw_min + adj_range;
304 		value &= ~HWP_MIN_PERF(~0L);
305 		value |= HWP_MIN_PERF(min);
306 
307 		adj_range = limits->max_perf_pct * range / 100;
308 		max = hw_min + adj_range;
309 		if (limits->no_turbo) {
310 			hw_max = HWP_GUARANTEED_PERF(cap);
311 			if (hw_max < max)
312 				max = hw_max;
313 		}
314 
315 		value &= ~HWP_MAX_PERF(~0L);
316 		value |= HWP_MAX_PERF(max);
317 		wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
318 	}
319 
320 	put_online_cpus();
321 }
322 
323 /************************** debugfs begin ************************/
324 static int pid_param_set(void *data, u64 val)
325 {
326 	*(u32 *)data = val;
327 	intel_pstate_reset_all_pid();
328 	return 0;
329 }
330 
331 static int pid_param_get(void *data, u64 *val)
332 {
333 	*val = *(u32 *)data;
334 	return 0;
335 }
336 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
337 
338 struct pid_param {
339 	char *name;
340 	void *value;
341 };
342 
343 static struct pid_param pid_files[] = {
344 	{"sample_rate_ms", &pid_params.sample_rate_ms},
345 	{"d_gain_pct", &pid_params.d_gain_pct},
346 	{"i_gain_pct", &pid_params.i_gain_pct},
347 	{"deadband", &pid_params.deadband},
348 	{"setpoint", &pid_params.setpoint},
349 	{"p_gain_pct", &pid_params.p_gain_pct},
350 	{NULL, NULL}
351 };
352 
353 static void __init intel_pstate_debug_expose_params(void)
354 {
355 	struct dentry *debugfs_parent;
356 	int i = 0;
357 
358 	if (hwp_active)
359 		return;
360 	debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
361 	if (IS_ERR_OR_NULL(debugfs_parent))
362 		return;
363 	while (pid_files[i].name) {
364 		debugfs_create_file(pid_files[i].name, 0660,
365 				    debugfs_parent, pid_files[i].value,
366 				    &fops_pid_param);
367 		i++;
368 	}
369 }
370 
371 /************************** debugfs end ************************/
372 
373 /************************** sysfs begin ************************/
374 #define show_one(file_name, object)					\
375 	static ssize_t show_##file_name					\
376 	(struct kobject *kobj, struct attribute *attr, char *buf)	\
377 	{								\
378 		return sprintf(buf, "%u\n", limits->object);		\
379 	}
380 
381 static ssize_t show_turbo_pct(struct kobject *kobj,
382 				struct attribute *attr, char *buf)
383 {
384 	struct cpudata *cpu;
385 	int total, no_turbo, turbo_pct;
386 	uint32_t turbo_fp;
387 
388 	cpu = all_cpu_data[0];
389 
390 	total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
391 	no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
392 	turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
393 	turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
394 	return sprintf(buf, "%u\n", turbo_pct);
395 }
396 
397 static ssize_t show_num_pstates(struct kobject *kobj,
398 				struct attribute *attr, char *buf)
399 {
400 	struct cpudata *cpu;
401 	int total;
402 
403 	cpu = all_cpu_data[0];
404 	total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
405 	return sprintf(buf, "%u\n", total);
406 }
407 
408 static ssize_t show_no_turbo(struct kobject *kobj,
409 			     struct attribute *attr, char *buf)
410 {
411 	ssize_t ret;
412 
413 	update_turbo_state();
414 	if (limits->turbo_disabled)
415 		ret = sprintf(buf, "%u\n", limits->turbo_disabled);
416 	else
417 		ret = sprintf(buf, "%u\n", limits->no_turbo);
418 
419 	return ret;
420 }
421 
422 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
423 			      const char *buf, size_t count)
424 {
425 	unsigned int input;
426 	int ret;
427 
428 	ret = sscanf(buf, "%u", &input);
429 	if (ret != 1)
430 		return -EINVAL;
431 
432 	update_turbo_state();
433 	if (limits->turbo_disabled) {
434 		pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n");
435 		return -EPERM;
436 	}
437 
438 	limits->no_turbo = clamp_t(int, input, 0, 1);
439 
440 	if (hwp_active)
441 		intel_pstate_hwp_set();
442 
443 	return count;
444 }
445 
446 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
447 				  const char *buf, size_t count)
448 {
449 	unsigned int input;
450 	int ret;
451 
452 	ret = sscanf(buf, "%u", &input);
453 	if (ret != 1)
454 		return -EINVAL;
455 
456 	limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
457 	limits->max_perf_pct = min(limits->max_policy_pct,
458 				   limits->max_sysfs_pct);
459 	limits->max_perf_pct = max(limits->min_policy_pct,
460 				   limits->max_perf_pct);
461 	limits->max_perf_pct = max(limits->min_perf_pct,
462 				   limits->max_perf_pct);
463 	limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
464 				  int_tofp(100));
465 
466 	if (hwp_active)
467 		intel_pstate_hwp_set();
468 	return count;
469 }
470 
471 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
472 				  const char *buf, size_t count)
473 {
474 	unsigned int input;
475 	int ret;
476 
477 	ret = sscanf(buf, "%u", &input);
478 	if (ret != 1)
479 		return -EINVAL;
480 
481 	limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
482 	limits->min_perf_pct = max(limits->min_policy_pct,
483 				   limits->min_sysfs_pct);
484 	limits->min_perf_pct = min(limits->max_policy_pct,
485 				   limits->min_perf_pct);
486 	limits->min_perf_pct = min(limits->max_perf_pct,
487 				   limits->min_perf_pct);
488 	limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
489 				  int_tofp(100));
490 
491 	if (hwp_active)
492 		intel_pstate_hwp_set();
493 	return count;
494 }
495 
496 show_one(max_perf_pct, max_perf_pct);
497 show_one(min_perf_pct, min_perf_pct);
498 
499 define_one_global_rw(no_turbo);
500 define_one_global_rw(max_perf_pct);
501 define_one_global_rw(min_perf_pct);
502 define_one_global_ro(turbo_pct);
503 define_one_global_ro(num_pstates);
504 
505 static struct attribute *intel_pstate_attributes[] = {
506 	&no_turbo.attr,
507 	&max_perf_pct.attr,
508 	&min_perf_pct.attr,
509 	&turbo_pct.attr,
510 	&num_pstates.attr,
511 	NULL
512 };
513 
514 static struct attribute_group intel_pstate_attr_group = {
515 	.attrs = intel_pstate_attributes,
516 };
517 
518 static void __init intel_pstate_sysfs_expose_params(void)
519 {
520 	struct kobject *intel_pstate_kobject;
521 	int rc;
522 
523 	intel_pstate_kobject = kobject_create_and_add("intel_pstate",
524 						&cpu_subsys.dev_root->kobj);
525 	BUG_ON(!intel_pstate_kobject);
526 	rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
527 	BUG_ON(rc);
528 }
529 /************************** sysfs end ************************/
530 
531 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
532 {
533 	wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
534 }
535 
536 static int atom_get_min_pstate(void)
537 {
538 	u64 value;
539 
540 	rdmsrl(ATOM_RATIOS, value);
541 	return (value >> 8) & 0x7F;
542 }
543 
544 static int atom_get_max_pstate(void)
545 {
546 	u64 value;
547 
548 	rdmsrl(ATOM_RATIOS, value);
549 	return (value >> 16) & 0x7F;
550 }
551 
552 static int atom_get_turbo_pstate(void)
553 {
554 	u64 value;
555 
556 	rdmsrl(ATOM_TURBO_RATIOS, value);
557 	return value & 0x7F;
558 }
559 
560 static void atom_set_pstate(struct cpudata *cpudata, int pstate)
561 {
562 	u64 val;
563 	int32_t vid_fp;
564 	u32 vid;
565 
566 	val = (u64)pstate << 8;
567 	if (limits->no_turbo && !limits->turbo_disabled)
568 		val |= (u64)1 << 32;
569 
570 	vid_fp = cpudata->vid.min + mul_fp(
571 		int_tofp(pstate - cpudata->pstate.min_pstate),
572 		cpudata->vid.ratio);
573 
574 	vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
575 	vid = ceiling_fp(vid_fp);
576 
577 	if (pstate > cpudata->pstate.max_pstate)
578 		vid = cpudata->vid.turbo;
579 
580 	val |= vid;
581 
582 	wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
583 }
584 
585 static int silvermont_get_scaling(void)
586 {
587 	u64 value;
588 	int i;
589 	/* Defined in Table 35-6 from SDM (Sept 2015) */
590 	static int silvermont_freq_table[] = {
591 		83300, 100000, 133300, 116700, 80000};
592 
593 	rdmsrl(MSR_FSB_FREQ, value);
594 	i = value & 0x7;
595 	WARN_ON(i > 4);
596 
597 	return silvermont_freq_table[i];
598 }
599 
600 static int airmont_get_scaling(void)
601 {
602 	u64 value;
603 	int i;
604 	/* Defined in Table 35-10 from SDM (Sept 2015) */
605 	static int airmont_freq_table[] = {
606 		83300, 100000, 133300, 116700, 80000,
607 		93300, 90000, 88900, 87500};
608 
609 	rdmsrl(MSR_FSB_FREQ, value);
610 	i = value & 0xF;
611 	WARN_ON(i > 8);
612 
613 	return airmont_freq_table[i];
614 }
615 
616 static void atom_get_vid(struct cpudata *cpudata)
617 {
618 	u64 value;
619 
620 	rdmsrl(ATOM_VIDS, value);
621 	cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
622 	cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
623 	cpudata->vid.ratio = div_fp(
624 		cpudata->vid.max - cpudata->vid.min,
625 		int_tofp(cpudata->pstate.max_pstate -
626 			cpudata->pstate.min_pstate));
627 
628 	rdmsrl(ATOM_TURBO_VIDS, value);
629 	cpudata->vid.turbo = value & 0x7f;
630 }
631 
632 static int core_get_min_pstate(void)
633 {
634 	u64 value;
635 
636 	rdmsrl(MSR_PLATFORM_INFO, value);
637 	return (value >> 40) & 0xFF;
638 }
639 
640 static int core_get_max_pstate_physical(void)
641 {
642 	u64 value;
643 
644 	rdmsrl(MSR_PLATFORM_INFO, value);
645 	return (value >> 8) & 0xFF;
646 }
647 
648 static int core_get_max_pstate(void)
649 {
650 	u64 tar;
651 	u64 plat_info;
652 	int max_pstate;
653 	int err;
654 
655 	rdmsrl(MSR_PLATFORM_INFO, plat_info);
656 	max_pstate = (plat_info >> 8) & 0xFF;
657 
658 	err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
659 	if (!err) {
660 		/* Do some sanity checking for safety */
661 		if (plat_info & 0x600000000) {
662 			u64 tdp_ctrl;
663 			u64 tdp_ratio;
664 			int tdp_msr;
665 
666 			err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
667 			if (err)
668 				goto skip_tar;
669 
670 			tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl;
671 			err = rdmsrl_safe(tdp_msr, &tdp_ratio);
672 			if (err)
673 				goto skip_tar;
674 
675 			if (tdp_ratio - 1 == tar) {
676 				max_pstate = tar;
677 				pr_debug("max_pstate=TAC %x\n", max_pstate);
678 			} else {
679 				goto skip_tar;
680 			}
681 		}
682 	}
683 
684 skip_tar:
685 	return max_pstate;
686 }
687 
688 static int core_get_turbo_pstate(void)
689 {
690 	u64 value;
691 	int nont, ret;
692 
693 	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
694 	nont = core_get_max_pstate();
695 	ret = (value) & 255;
696 	if (ret <= nont)
697 		ret = nont;
698 	return ret;
699 }
700 
701 static inline int core_get_scaling(void)
702 {
703 	return 100000;
704 }
705 
706 static void core_set_pstate(struct cpudata *cpudata, int pstate)
707 {
708 	u64 val;
709 
710 	val = (u64)pstate << 8;
711 	if (limits->no_turbo && !limits->turbo_disabled)
712 		val |= (u64)1 << 32;
713 
714 	wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
715 }
716 
717 static int knl_get_turbo_pstate(void)
718 {
719 	u64 value;
720 	int nont, ret;
721 
722 	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
723 	nont = core_get_max_pstate();
724 	ret = (((value) >> 8) & 0xFF);
725 	if (ret <= nont)
726 		ret = nont;
727 	return ret;
728 }
729 
730 static struct cpu_defaults core_params = {
731 	.pid_policy = {
732 		.sample_rate_ms = 10,
733 		.deadband = 0,
734 		.setpoint = 97,
735 		.p_gain_pct = 20,
736 		.d_gain_pct = 0,
737 		.i_gain_pct = 0,
738 	},
739 	.funcs = {
740 		.get_max = core_get_max_pstate,
741 		.get_max_physical = core_get_max_pstate_physical,
742 		.get_min = core_get_min_pstate,
743 		.get_turbo = core_get_turbo_pstate,
744 		.get_scaling = core_get_scaling,
745 		.set = core_set_pstate,
746 		.get_target_pstate = get_target_pstate_use_performance,
747 	},
748 };
749 
750 static struct cpu_defaults silvermont_params = {
751 	.pid_policy = {
752 		.sample_rate_ms = 10,
753 		.deadband = 0,
754 		.setpoint = 60,
755 		.p_gain_pct = 14,
756 		.d_gain_pct = 0,
757 		.i_gain_pct = 4,
758 	},
759 	.funcs = {
760 		.get_max = atom_get_max_pstate,
761 		.get_max_physical = atom_get_max_pstate,
762 		.get_min = atom_get_min_pstate,
763 		.get_turbo = atom_get_turbo_pstate,
764 		.set = atom_set_pstate,
765 		.get_scaling = silvermont_get_scaling,
766 		.get_vid = atom_get_vid,
767 		.get_target_pstate = get_target_pstate_use_cpu_load,
768 	},
769 };
770 
771 static struct cpu_defaults airmont_params = {
772 	.pid_policy = {
773 		.sample_rate_ms = 10,
774 		.deadband = 0,
775 		.setpoint = 60,
776 		.p_gain_pct = 14,
777 		.d_gain_pct = 0,
778 		.i_gain_pct = 4,
779 	},
780 	.funcs = {
781 		.get_max = atom_get_max_pstate,
782 		.get_max_physical = atom_get_max_pstate,
783 		.get_min = atom_get_min_pstate,
784 		.get_turbo = atom_get_turbo_pstate,
785 		.set = atom_set_pstate,
786 		.get_scaling = airmont_get_scaling,
787 		.get_vid = atom_get_vid,
788 		.get_target_pstate = get_target_pstate_use_cpu_load,
789 	},
790 };
791 
792 static struct cpu_defaults knl_params = {
793 	.pid_policy = {
794 		.sample_rate_ms = 10,
795 		.deadband = 0,
796 		.setpoint = 97,
797 		.p_gain_pct = 20,
798 		.d_gain_pct = 0,
799 		.i_gain_pct = 0,
800 	},
801 	.funcs = {
802 		.get_max = core_get_max_pstate,
803 		.get_max_physical = core_get_max_pstate_physical,
804 		.get_min = core_get_min_pstate,
805 		.get_turbo = knl_get_turbo_pstate,
806 		.get_scaling = core_get_scaling,
807 		.set = core_set_pstate,
808 		.get_target_pstate = get_target_pstate_use_performance,
809 	},
810 };
811 
812 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
813 {
814 	int max_perf = cpu->pstate.turbo_pstate;
815 	int max_perf_adj;
816 	int min_perf;
817 
818 	if (limits->no_turbo || limits->turbo_disabled)
819 		max_perf = cpu->pstate.max_pstate;
820 
821 	/*
822 	 * performance can be limited by user through sysfs, by cpufreq
823 	 * policy, or by cpu specific default values determined through
824 	 * experimentation.
825 	 */
826 	max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf));
827 	*max = clamp_t(int, max_perf_adj,
828 			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
829 
830 	min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf));
831 	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
832 }
833 
834 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
835 {
836 	int max_perf, min_perf;
837 
838 	if (force) {
839 		update_turbo_state();
840 
841 		intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
842 
843 		pstate = clamp_t(int, pstate, min_perf, max_perf);
844 
845 		if (pstate == cpu->pstate.current_pstate)
846 			return;
847 	}
848 	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
849 
850 	cpu->pstate.current_pstate = pstate;
851 
852 	pstate_funcs.set(cpu, pstate);
853 }
854 
855 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
856 {
857 	cpu->pstate.min_pstate = pstate_funcs.get_min();
858 	cpu->pstate.max_pstate = pstate_funcs.get_max();
859 	cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
860 	cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
861 	cpu->pstate.scaling = pstate_funcs.get_scaling();
862 
863 	if (pstate_funcs.get_vid)
864 		pstate_funcs.get_vid(cpu);
865 	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
866 }
867 
868 static inline void intel_pstate_calc_busy(struct cpudata *cpu)
869 {
870 	struct sample *sample = &cpu->sample;
871 	int64_t core_pct;
872 
873 	core_pct = int_tofp(sample->aperf) * int_tofp(100);
874 	core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
875 
876 	sample->freq = fp_toint(
877 		mul_fp(int_tofp(
878 			cpu->pstate.max_pstate_physical *
879 			cpu->pstate.scaling / 100),
880 			core_pct));
881 
882 	sample->core_pct_busy = (int32_t)core_pct;
883 }
884 
885 static inline void intel_pstate_sample(struct cpudata *cpu)
886 {
887 	u64 aperf, mperf;
888 	unsigned long flags;
889 	u64 tsc;
890 
891 	local_irq_save(flags);
892 	rdmsrl(MSR_IA32_APERF, aperf);
893 	rdmsrl(MSR_IA32_MPERF, mperf);
894 	tsc = rdtsc();
895 	if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) {
896 		local_irq_restore(flags);
897 		return;
898 	}
899 	local_irq_restore(flags);
900 
901 	cpu->last_sample_time = cpu->sample.time;
902 	cpu->sample.time = ktime_get();
903 	cpu->sample.aperf = aperf;
904 	cpu->sample.mperf = mperf;
905 	cpu->sample.tsc =  tsc;
906 	cpu->sample.aperf -= cpu->prev_aperf;
907 	cpu->sample.mperf -= cpu->prev_mperf;
908 	cpu->sample.tsc -= cpu->prev_tsc;
909 
910 	intel_pstate_calc_busy(cpu);
911 
912 	cpu->prev_aperf = aperf;
913 	cpu->prev_mperf = mperf;
914 	cpu->prev_tsc = tsc;
915 }
916 
917 static inline void intel_hwp_set_sample_time(struct cpudata *cpu)
918 {
919 	int delay;
920 
921 	delay = msecs_to_jiffies(50);
922 	mod_timer_pinned(&cpu->timer, jiffies + delay);
923 }
924 
925 static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
926 {
927 	int delay;
928 
929 	delay = msecs_to_jiffies(pid_params.sample_rate_ms);
930 	mod_timer_pinned(&cpu->timer, jiffies + delay);
931 }
932 
933 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
934 {
935 	struct sample *sample = &cpu->sample;
936 	int32_t cpu_load;
937 
938 	/*
939 	 * The load can be estimated as the ratio of the mperf counter
940 	 * running at a constant frequency during active periods
941 	 * (C0) and the time stamp counter running at the same frequency
942 	 * also during C-states.
943 	 */
944 	cpu_load = div64_u64(int_tofp(100) * sample->mperf, sample->tsc);
945 
946 	cpu->sample.busy_scaled = cpu_load;
947 
948 	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
949 }
950 
951 
952 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
953 {
954 	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
955 	s64 duration_us;
956 	u32 sample_time;
957 
958 	/*
959 	 * core_busy is the ratio of actual performance to max
960 	 * max_pstate is the max non turbo pstate available
961 	 * current_pstate was the pstate that was requested during
962 	 * 	the last sample period.
963 	 *
964 	 * We normalize core_busy, which was our actual percent
965 	 * performance to what we requested during the last sample
966 	 * period. The result will be a percentage of busy at a
967 	 * specified pstate.
968 	 */
969 	core_busy = cpu->sample.core_pct_busy;
970 	max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
971 	current_pstate = int_tofp(cpu->pstate.current_pstate);
972 	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
973 
974 	/*
975 	 * Since we have a deferred timer, it will not fire unless
976 	 * we are in C0.  So, determine if the actual elapsed time
977 	 * is significantly greater (3x) than our sample interval.  If it
978 	 * is, then we were idle for a long enough period of time
979 	 * to adjust our busyness.
980 	 */
981 	sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
982 	duration_us = ktime_us_delta(cpu->sample.time,
983 				     cpu->last_sample_time);
984 	if (duration_us > sample_time * 3) {
985 		sample_ratio = div_fp(int_tofp(sample_time),
986 				      int_tofp(duration_us));
987 		core_busy = mul_fp(core_busy, sample_ratio);
988 	}
989 
990 	cpu->sample.busy_scaled = core_busy;
991 	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
992 }
993 
994 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
995 {
996 	int from, target_pstate;
997 	struct sample *sample;
998 
999 	from = cpu->pstate.current_pstate;
1000 
1001 	target_pstate = pstate_funcs.get_target_pstate(cpu);
1002 
1003 	intel_pstate_set_pstate(cpu, target_pstate, true);
1004 
1005 	sample = &cpu->sample;
1006 	trace_pstate_sample(fp_toint(sample->core_pct_busy),
1007 		fp_toint(sample->busy_scaled),
1008 		from,
1009 		cpu->pstate.current_pstate,
1010 		sample->mperf,
1011 		sample->aperf,
1012 		sample->tsc,
1013 		sample->freq);
1014 }
1015 
1016 static void intel_hwp_timer_func(unsigned long __data)
1017 {
1018 	struct cpudata *cpu = (struct cpudata *) __data;
1019 
1020 	intel_pstate_sample(cpu);
1021 	intel_hwp_set_sample_time(cpu);
1022 }
1023 
1024 static void intel_pstate_timer_func(unsigned long __data)
1025 {
1026 	struct cpudata *cpu = (struct cpudata *) __data;
1027 
1028 	intel_pstate_sample(cpu);
1029 
1030 	intel_pstate_adjust_busy_pstate(cpu);
1031 
1032 	intel_pstate_set_sample_time(cpu);
1033 }
1034 
1035 #define ICPU(model, policy) \
1036 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
1037 			(unsigned long)&policy }
1038 
1039 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
1040 	ICPU(0x2a, core_params),
1041 	ICPU(0x2d, core_params),
1042 	ICPU(0x37, silvermont_params),
1043 	ICPU(0x3a, core_params),
1044 	ICPU(0x3c, core_params),
1045 	ICPU(0x3d, core_params),
1046 	ICPU(0x3e, core_params),
1047 	ICPU(0x3f, core_params),
1048 	ICPU(0x45, core_params),
1049 	ICPU(0x46, core_params),
1050 	ICPU(0x47, core_params),
1051 	ICPU(0x4c, airmont_params),
1052 	ICPU(0x4e, core_params),
1053 	ICPU(0x4f, core_params),
1054 	ICPU(0x5e, core_params),
1055 	ICPU(0x56, core_params),
1056 	ICPU(0x57, knl_params),
1057 	{}
1058 };
1059 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
1060 
1061 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
1062 	ICPU(0x56, core_params),
1063 	{}
1064 };
1065 
1066 static int intel_pstate_init_cpu(unsigned int cpunum)
1067 {
1068 	struct cpudata *cpu;
1069 
1070 	if (!all_cpu_data[cpunum])
1071 		all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
1072 					       GFP_KERNEL);
1073 	if (!all_cpu_data[cpunum])
1074 		return -ENOMEM;
1075 
1076 	cpu = all_cpu_data[cpunum];
1077 
1078 	cpu->cpu = cpunum;
1079 
1080 	if (hwp_active)
1081 		intel_pstate_hwp_enable(cpu);
1082 
1083 	intel_pstate_get_cpu_pstates(cpu);
1084 
1085 	init_timer_deferrable(&cpu->timer);
1086 	cpu->timer.data = (unsigned long)cpu;
1087 	cpu->timer.expires = jiffies + HZ/100;
1088 
1089 	if (!hwp_active)
1090 		cpu->timer.function = intel_pstate_timer_func;
1091 	else
1092 		cpu->timer.function = intel_hwp_timer_func;
1093 
1094 	intel_pstate_busy_pid_reset(cpu);
1095 	intel_pstate_sample(cpu);
1096 
1097 	add_timer_on(&cpu->timer, cpunum);
1098 
1099 	pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
1100 
1101 	return 0;
1102 }
1103 
1104 static unsigned int intel_pstate_get(unsigned int cpu_num)
1105 {
1106 	struct sample *sample;
1107 	struct cpudata *cpu;
1108 
1109 	cpu = all_cpu_data[cpu_num];
1110 	if (!cpu)
1111 		return 0;
1112 	sample = &cpu->sample;
1113 	return sample->freq;
1114 }
1115 
1116 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
1117 {
1118 	if (!policy->cpuinfo.max_freq)
1119 		return -ENODEV;
1120 
1121 	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
1122 	    policy->max >= policy->cpuinfo.max_freq) {
1123 		pr_debug("intel_pstate: set performance\n");
1124 		limits = &performance_limits;
1125 		if (hwp_active)
1126 			intel_pstate_hwp_set();
1127 		return 0;
1128 	}
1129 
1130 	pr_debug("intel_pstate: set powersave\n");
1131 	limits = &powersave_limits;
1132 	limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
1133 	limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
1134 	limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
1135 					      policy->cpuinfo.max_freq);
1136 	limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100);
1137 
1138 	/* Normalize user input to [min_policy_pct, max_policy_pct] */
1139 	limits->min_perf_pct = max(limits->min_policy_pct,
1140 				   limits->min_sysfs_pct);
1141 	limits->min_perf_pct = min(limits->max_policy_pct,
1142 				   limits->min_perf_pct);
1143 	limits->max_perf_pct = min(limits->max_policy_pct,
1144 				   limits->max_sysfs_pct);
1145 	limits->max_perf_pct = max(limits->min_policy_pct,
1146 				   limits->max_perf_pct);
1147 	limits->max_perf = round_up(limits->max_perf, 8);
1148 
1149 	/* Make sure min_perf_pct <= max_perf_pct */
1150 	limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
1151 
1152 	limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
1153 				  int_tofp(100));
1154 	limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
1155 				  int_tofp(100));
1156 
1157 	if (hwp_active)
1158 		intel_pstate_hwp_set();
1159 
1160 	return 0;
1161 }
1162 
1163 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
1164 {
1165 	cpufreq_verify_within_cpu_limits(policy);
1166 
1167 	if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
1168 	    policy->policy != CPUFREQ_POLICY_PERFORMANCE)
1169 		return -EINVAL;
1170 
1171 	return 0;
1172 }
1173 
1174 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
1175 {
1176 	int cpu_num = policy->cpu;
1177 	struct cpudata *cpu = all_cpu_data[cpu_num];
1178 
1179 	pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
1180 
1181 	del_timer_sync(&all_cpu_data[cpu_num]->timer);
1182 	if (hwp_active)
1183 		return;
1184 
1185 	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
1186 }
1187 
1188 static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
1189 {
1190 	struct cpudata *cpu;
1191 	int rc;
1192 
1193 	rc = intel_pstate_init_cpu(policy->cpu);
1194 	if (rc)
1195 		return rc;
1196 
1197 	cpu = all_cpu_data[policy->cpu];
1198 
1199 	if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
1200 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1201 	else
1202 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
1203 
1204 	policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
1205 	policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1206 
1207 	/* cpuinfo and default policy values */
1208 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
1209 	policy->cpuinfo.max_freq =
1210 		cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1211 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
1212 	cpumask_set_cpu(policy->cpu, policy->cpus);
1213 
1214 	return 0;
1215 }
1216 
1217 static struct cpufreq_driver intel_pstate_driver = {
1218 	.flags		= CPUFREQ_CONST_LOOPS,
1219 	.verify		= intel_pstate_verify_policy,
1220 	.setpolicy	= intel_pstate_set_policy,
1221 	.get		= intel_pstate_get,
1222 	.init		= intel_pstate_cpu_init,
1223 	.stop_cpu	= intel_pstate_stop_cpu,
1224 	.name		= "intel_pstate",
1225 };
1226 
1227 static int __initdata no_load;
1228 static int __initdata no_hwp;
1229 static int __initdata hwp_only;
1230 static unsigned int force_load;
1231 
1232 static int intel_pstate_msrs_not_valid(void)
1233 {
1234 	if (!pstate_funcs.get_max() ||
1235 	    !pstate_funcs.get_min() ||
1236 	    !pstate_funcs.get_turbo())
1237 		return -ENODEV;
1238 
1239 	return 0;
1240 }
1241 
1242 static void copy_pid_params(struct pstate_adjust_policy *policy)
1243 {
1244 	pid_params.sample_rate_ms = policy->sample_rate_ms;
1245 	pid_params.p_gain_pct = policy->p_gain_pct;
1246 	pid_params.i_gain_pct = policy->i_gain_pct;
1247 	pid_params.d_gain_pct = policy->d_gain_pct;
1248 	pid_params.deadband = policy->deadband;
1249 	pid_params.setpoint = policy->setpoint;
1250 }
1251 
1252 static void copy_cpu_funcs(struct pstate_funcs *funcs)
1253 {
1254 	pstate_funcs.get_max   = funcs->get_max;
1255 	pstate_funcs.get_max_physical = funcs->get_max_physical;
1256 	pstate_funcs.get_min   = funcs->get_min;
1257 	pstate_funcs.get_turbo = funcs->get_turbo;
1258 	pstate_funcs.get_scaling = funcs->get_scaling;
1259 	pstate_funcs.set       = funcs->set;
1260 	pstate_funcs.get_vid   = funcs->get_vid;
1261 	pstate_funcs.get_target_pstate = funcs->get_target_pstate;
1262 
1263 }
1264 
1265 #if IS_ENABLED(CONFIG_ACPI)
1266 #include <acpi/processor.h>
1267 
1268 static bool intel_pstate_no_acpi_pss(void)
1269 {
1270 	int i;
1271 
1272 	for_each_possible_cpu(i) {
1273 		acpi_status status;
1274 		union acpi_object *pss;
1275 		struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
1276 		struct acpi_processor *pr = per_cpu(processors, i);
1277 
1278 		if (!pr)
1279 			continue;
1280 
1281 		status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
1282 		if (ACPI_FAILURE(status))
1283 			continue;
1284 
1285 		pss = buffer.pointer;
1286 		if (pss && pss->type == ACPI_TYPE_PACKAGE) {
1287 			kfree(pss);
1288 			return false;
1289 		}
1290 
1291 		kfree(pss);
1292 	}
1293 
1294 	return true;
1295 }
1296 
1297 static bool intel_pstate_has_acpi_ppc(void)
1298 {
1299 	int i;
1300 
1301 	for_each_possible_cpu(i) {
1302 		struct acpi_processor *pr = per_cpu(processors, i);
1303 
1304 		if (!pr)
1305 			continue;
1306 		if (acpi_has_method(pr->handle, "_PPC"))
1307 			return true;
1308 	}
1309 	return false;
1310 }
1311 
1312 enum {
1313 	PSS,
1314 	PPC,
1315 };
1316 
1317 struct hw_vendor_info {
1318 	u16  valid;
1319 	char oem_id[ACPI_OEM_ID_SIZE];
1320 	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];
1321 	int  oem_pwr_table;
1322 };
1323 
1324 /* Hardware vendor-specific info that has its own power management modes */
1325 static struct hw_vendor_info vendor_info[] = {
1326 	{1, "HP    ", "ProLiant", PSS},
1327 	{1, "ORACLE", "X4-2    ", PPC},
1328 	{1, "ORACLE", "X4-2L   ", PPC},
1329 	{1, "ORACLE", "X4-2B   ", PPC},
1330 	{1, "ORACLE", "X3-2    ", PPC},
1331 	{1, "ORACLE", "X3-2L   ", PPC},
1332 	{1, "ORACLE", "X3-2B   ", PPC},
1333 	{1, "ORACLE", "X4470M2 ", PPC},
1334 	{1, "ORACLE", "X4270M3 ", PPC},
1335 	{1, "ORACLE", "X4270M2 ", PPC},
1336 	{1, "ORACLE", "X4170M2 ", PPC},
1337 	{1, "ORACLE", "X4170 M3", PPC},
1338 	{1, "ORACLE", "X4275 M3", PPC},
1339 	{1, "ORACLE", "X6-2    ", PPC},
1340 	{1, "ORACLE", "Sudbury ", PPC},
1341 	{0, "", ""},
1342 };
1343 
1344 static bool intel_pstate_platform_pwr_mgmt_exists(void)
1345 {
1346 	struct acpi_table_header hdr;
1347 	struct hw_vendor_info *v_info;
1348 	const struct x86_cpu_id *id;
1349 	u64 misc_pwr;
1350 
1351 	id = x86_match_cpu(intel_pstate_cpu_oob_ids);
1352 	if (id) {
1353 		rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
1354 		if ( misc_pwr & (1 << 8))
1355 			return true;
1356 	}
1357 
1358 	if (acpi_disabled ||
1359 	    ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
1360 		return false;
1361 
1362 	for (v_info = vendor_info; v_info->valid; v_info++) {
1363 		if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) &&
1364 			!strncmp(hdr.oem_table_id, v_info->oem_table_id,
1365 						ACPI_OEM_TABLE_ID_SIZE))
1366 			switch (v_info->oem_pwr_table) {
1367 			case PSS:
1368 				return intel_pstate_no_acpi_pss();
1369 			case PPC:
1370 				return intel_pstate_has_acpi_ppc() &&
1371 					(!force_load);
1372 			}
1373 	}
1374 
1375 	return false;
1376 }
1377 #else /* CONFIG_ACPI not enabled */
1378 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
1379 static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
1380 #endif /* CONFIG_ACPI */
1381 
1382 static int __init intel_pstate_init(void)
1383 {
1384 	int cpu, rc = 0;
1385 	const struct x86_cpu_id *id;
1386 	struct cpu_defaults *cpu_def;
1387 
1388 	if (no_load)
1389 		return -ENODEV;
1390 
1391 	id = x86_match_cpu(intel_pstate_cpu_ids);
1392 	if (!id)
1393 		return -ENODEV;
1394 
1395 	/*
1396 	 * The Intel pstate driver will be ignored if the platform
1397 	 * firmware has its own power management modes.
1398 	 */
1399 	if (intel_pstate_platform_pwr_mgmt_exists())
1400 		return -ENODEV;
1401 
1402 	cpu_def = (struct cpu_defaults *)id->driver_data;
1403 
1404 	copy_pid_params(&cpu_def->pid_policy);
1405 	copy_cpu_funcs(&cpu_def->funcs);
1406 
1407 	if (intel_pstate_msrs_not_valid())
1408 		return -ENODEV;
1409 
1410 	pr_info("Intel P-state driver initializing.\n");
1411 
1412 	all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
1413 	if (!all_cpu_data)
1414 		return -ENOMEM;
1415 
1416 	if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
1417 		pr_info("intel_pstate: HWP enabled\n");
1418 		hwp_active++;
1419 	}
1420 
1421 	if (!hwp_active && hwp_only)
1422 		goto out;
1423 
1424 	rc = cpufreq_register_driver(&intel_pstate_driver);
1425 	if (rc)
1426 		goto out;
1427 
1428 	intel_pstate_debug_expose_params();
1429 	intel_pstate_sysfs_expose_params();
1430 
1431 	return rc;
1432 out:
1433 	get_online_cpus();
1434 	for_each_online_cpu(cpu) {
1435 		if (all_cpu_data[cpu]) {
1436 			del_timer_sync(&all_cpu_data[cpu]->timer);
1437 			kfree(all_cpu_data[cpu]);
1438 		}
1439 	}
1440 
1441 	put_online_cpus();
1442 	vfree(all_cpu_data);
1443 	return -ENODEV;
1444 }
1445 device_initcall(intel_pstate_init);
1446 
1447 static int __init intel_pstate_setup(char *str)
1448 {
1449 	if (!str)
1450 		return -EINVAL;
1451 
1452 	if (!strcmp(str, "disable"))
1453 		no_load = 1;
1454 	if (!strcmp(str, "no_hwp")) {
1455 		pr_info("intel_pstate: HWP disabled\n");
1456 		no_hwp = 1;
1457 	}
1458 	if (!strcmp(str, "force"))
1459 		force_load = 1;
1460 	if (!strcmp(str, "hwp_only"))
1461 		hwp_only = 1;
1462 	return 0;
1463 }
1464 early_param("intel_pstate", intel_pstate_setup);
1465 
1466 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
1467 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
1468 MODULE_LICENSE("GPL");
1469