1 /*
2  * intel_pstate.c: Native P state management for Intel processors
3  *
4  * (C) Copyright 2012 Intel Corporation
5  * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; version 2
10  * of the License.
11  */
12 
13 #include <linux/kernel.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/module.h>
16 #include <linux/ktime.h>
17 #include <linux/hrtimer.h>
18 #include <linux/tick.h>
19 #include <linux/slab.h>
20 #include <linux/sched.h>
21 #include <linux/list.h>
22 #include <linux/cpu.h>
23 #include <linux/cpufreq.h>
24 #include <linux/sysfs.h>
25 #include <linux/types.h>
26 #include <linux/fs.h>
27 #include <linux/debugfs.h>
28 #include <trace/events/power.h>
29 
30 #include <asm/div64.h>
31 #include <asm/msr.h>
32 #include <asm/cpu_device_id.h>
33 
34 #define SAMPLE_COUNT		3
35 
36 #define FRAC_BITS 8
37 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
38 #define fp_toint(X) ((X) >> FRAC_BITS)
39 
40 static inline int32_t mul_fp(int32_t x, int32_t y)
41 {
42 	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
43 }
44 
45 static inline int32_t div_fp(int32_t x, int32_t y)
46 {
47 	return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
48 }
49 
50 struct sample {
51 	ktime_t start_time;
52 	ktime_t end_time;
53 	int core_pct_busy;
54 	int pstate_pct_busy;
55 	u64 duration_us;
56 	u64 idletime_us;
57 	u64 aperf;
58 	u64 mperf;
59 	int freq;
60 };
61 
62 struct pstate_data {
63 	int	current_pstate;
64 	int	min_pstate;
65 	int	max_pstate;
66 	int	turbo_pstate;
67 };
68 
69 struct _pid {
70 	int setpoint;
71 	int32_t integral;
72 	int32_t p_gain;
73 	int32_t i_gain;
74 	int32_t d_gain;
75 	int deadband;
76 	int last_err;
77 };
78 
79 struct cpudata {
80 	int cpu;
81 
82 	char name[64];
83 
84 	struct timer_list timer;
85 
86 	struct pstate_adjust_policy *pstate_policy;
87 	struct pstate_data pstate;
88 	struct _pid pid;
89 	struct _pid idle_pid;
90 
91 	int min_pstate_count;
92 	int idle_mode;
93 
94 	ktime_t prev_sample;
95 	u64	prev_idle_time_us;
96 	u64	prev_aperf;
97 	u64	prev_mperf;
98 	int	sample_ptr;
99 	struct sample samples[SAMPLE_COUNT];
100 };
101 
102 static struct cpudata **all_cpu_data;
103 struct pstate_adjust_policy {
104 	int sample_rate_ms;
105 	int deadband;
106 	int setpoint;
107 	int p_gain_pct;
108 	int d_gain_pct;
109 	int i_gain_pct;
110 };
111 
112 static struct pstate_adjust_policy default_policy = {
113 	.sample_rate_ms = 10,
114 	.deadband = 0,
115 	.setpoint = 109,
116 	.p_gain_pct = 17,
117 	.d_gain_pct = 0,
118 	.i_gain_pct = 4,
119 };
120 
121 struct perf_limits {
122 	int no_turbo;
123 	int max_perf_pct;
124 	int min_perf_pct;
125 	int32_t max_perf;
126 	int32_t min_perf;
127 };
128 
129 static struct perf_limits limits = {
130 	.no_turbo = 0,
131 	.max_perf_pct = 100,
132 	.max_perf = int_tofp(1),
133 	.min_perf_pct = 0,
134 	.min_perf = 0,
135 };
136 
137 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
138 			int deadband, int integral) {
139 	pid->setpoint = setpoint;
140 	pid->deadband  = deadband;
141 	pid->integral  = int_tofp(integral);
142 	pid->last_err  = setpoint - busy;
143 }
144 
145 static inline void pid_p_gain_set(struct _pid *pid, int percent)
146 {
147 	pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
148 }
149 
150 static inline void pid_i_gain_set(struct _pid *pid, int percent)
151 {
152 	pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
153 }
154 
155 static inline void pid_d_gain_set(struct _pid *pid, int percent)
156 {
157 
158 	pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
159 }
160 
161 static signed int pid_calc(struct _pid *pid, int busy)
162 {
163 	signed int err, result;
164 	int32_t pterm, dterm, fp_error;
165 	int32_t integral_limit;
166 
167 	err = pid->setpoint - busy;
168 	fp_error = int_tofp(err);
169 
170 	if (abs(err) <= pid->deadband)
171 		return 0;
172 
173 	pterm = mul_fp(pid->p_gain, fp_error);
174 
175 	pid->integral += fp_error;
176 
177 	/* limit the integral term */
178 	integral_limit = int_tofp(30);
179 	if (pid->integral > integral_limit)
180 		pid->integral = integral_limit;
181 	if (pid->integral < -integral_limit)
182 		pid->integral = -integral_limit;
183 
184 	dterm = mul_fp(pid->d_gain, (err - pid->last_err));
185 	pid->last_err = err;
186 
187 	result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
188 
189 	return (signed int)fp_toint(result);
190 }
191 
192 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
193 {
194 	pid_p_gain_set(&cpu->pid, cpu->pstate_policy->p_gain_pct);
195 	pid_d_gain_set(&cpu->pid, cpu->pstate_policy->d_gain_pct);
196 	pid_i_gain_set(&cpu->pid, cpu->pstate_policy->i_gain_pct);
197 
198 	pid_reset(&cpu->pid,
199 		cpu->pstate_policy->setpoint,
200 		100,
201 		cpu->pstate_policy->deadband,
202 		0);
203 }
204 
205 static inline void intel_pstate_idle_pid_reset(struct cpudata *cpu)
206 {
207 	pid_p_gain_set(&cpu->idle_pid, cpu->pstate_policy->p_gain_pct);
208 	pid_d_gain_set(&cpu->idle_pid, cpu->pstate_policy->d_gain_pct);
209 	pid_i_gain_set(&cpu->idle_pid, cpu->pstate_policy->i_gain_pct);
210 
211 	pid_reset(&cpu->idle_pid,
212 		75,
213 		50,
214 		cpu->pstate_policy->deadband,
215 		0);
216 }
217 
218 static inline void intel_pstate_reset_all_pid(void)
219 {
220 	unsigned int cpu;
221 	for_each_online_cpu(cpu) {
222 		if (all_cpu_data[cpu])
223 			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
224 	}
225 }
226 
227 /************************** debugfs begin ************************/
228 static int pid_param_set(void *data, u64 val)
229 {
230 	*(u32 *)data = val;
231 	intel_pstate_reset_all_pid();
232 	return 0;
233 }
234 static int pid_param_get(void *data, u64 *val)
235 {
236 	*val = *(u32 *)data;
237 	return 0;
238 }
239 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get,
240 			pid_param_set, "%llu\n");
241 
242 struct pid_param {
243 	char *name;
244 	void *value;
245 };
246 
247 static struct pid_param pid_files[] = {
248 	{"sample_rate_ms", &default_policy.sample_rate_ms},
249 	{"d_gain_pct", &default_policy.d_gain_pct},
250 	{"i_gain_pct", &default_policy.i_gain_pct},
251 	{"deadband", &default_policy.deadband},
252 	{"setpoint", &default_policy.setpoint},
253 	{"p_gain_pct", &default_policy.p_gain_pct},
254 	{NULL, NULL}
255 };
256 
257 static struct dentry *debugfs_parent;
258 static void intel_pstate_debug_expose_params(void)
259 {
260 	int i = 0;
261 
262 	debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
263 	if (IS_ERR_OR_NULL(debugfs_parent))
264 		return;
265 	while (pid_files[i].name) {
266 		debugfs_create_file(pid_files[i].name, 0660,
267 				debugfs_parent, pid_files[i].value,
268 				&fops_pid_param);
269 		i++;
270 	}
271 }
272 
273 /************************** debugfs end ************************/
274 
275 /************************** sysfs begin ************************/
276 #define show_one(file_name, object)					\
277 	static ssize_t show_##file_name					\
278 	(struct kobject *kobj, struct attribute *attr, char *buf)	\
279 	{								\
280 		return sprintf(buf, "%u\n", limits.object);		\
281 	}
282 
283 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
284 				const char *buf, size_t count)
285 {
286 	unsigned int input;
287 	int ret;
288 	ret = sscanf(buf, "%u", &input);
289 	if (ret != 1)
290 		return -EINVAL;
291 	limits.no_turbo = clamp_t(int, input, 0 , 1);
292 
293 	return count;
294 }
295 
296 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
297 				const char *buf, size_t count)
298 {
299 	unsigned int input;
300 	int ret;
301 	ret = sscanf(buf, "%u", &input);
302 	if (ret != 1)
303 		return -EINVAL;
304 
305 	limits.max_perf_pct = clamp_t(int, input, 0 , 100);
306 	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
307 	return count;
308 }
309 
310 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
311 				const char *buf, size_t count)
312 {
313 	unsigned int input;
314 	int ret;
315 	ret = sscanf(buf, "%u", &input);
316 	if (ret != 1)
317 		return -EINVAL;
318 	limits.min_perf_pct = clamp_t(int, input, 0 , 100);
319 	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
320 
321 	return count;
322 }
323 
324 show_one(no_turbo, no_turbo);
325 show_one(max_perf_pct, max_perf_pct);
326 show_one(min_perf_pct, min_perf_pct);
327 
328 define_one_global_rw(no_turbo);
329 define_one_global_rw(max_perf_pct);
330 define_one_global_rw(min_perf_pct);
331 
332 static struct attribute *intel_pstate_attributes[] = {
333 	&no_turbo.attr,
334 	&max_perf_pct.attr,
335 	&min_perf_pct.attr,
336 	NULL
337 };
338 
339 static struct attribute_group intel_pstate_attr_group = {
340 	.attrs = intel_pstate_attributes,
341 };
342 static struct kobject *intel_pstate_kobject;
343 
344 static void intel_pstate_sysfs_expose_params(void)
345 {
346 	int rc;
347 
348 	intel_pstate_kobject = kobject_create_and_add("intel_pstate",
349 						&cpu_subsys.dev_root->kobj);
350 	BUG_ON(!intel_pstate_kobject);
351 	rc = sysfs_create_group(intel_pstate_kobject,
352 				&intel_pstate_attr_group);
353 	BUG_ON(rc);
354 }
355 
356 /************************** sysfs end ************************/
357 
358 static int intel_pstate_min_pstate(void)
359 {
360 	u64 value;
361 	rdmsrl(MSR_PLATFORM_INFO, value);
362 	return (value >> 40) & 0xFF;
363 }
364 
365 static int intel_pstate_max_pstate(void)
366 {
367 	u64 value;
368 	rdmsrl(MSR_PLATFORM_INFO, value);
369 	return (value >> 8) & 0xFF;
370 }
371 
372 static int intel_pstate_turbo_pstate(void)
373 {
374 	u64 value;
375 	int nont, ret;
376 	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
377 	nont = intel_pstate_max_pstate();
378 	ret = ((value) & 255);
379 	if (ret <= nont)
380 		ret = nont;
381 	return ret;
382 }
383 
384 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
385 {
386 	int max_perf = cpu->pstate.turbo_pstate;
387 	int min_perf;
388 	if (limits.no_turbo)
389 		max_perf = cpu->pstate.max_pstate;
390 
391 	max_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
392 	*max = clamp_t(int, max_perf,
393 			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
394 
395 	min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf));
396 	*min = clamp_t(int, min_perf,
397 			cpu->pstate.min_pstate, max_perf);
398 }
399 
400 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
401 {
402 	int max_perf, min_perf;
403 
404 	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
405 
406 	pstate = clamp_t(int, pstate, min_perf, max_perf);
407 
408 	if (pstate == cpu->pstate.current_pstate)
409 		return;
410 
411 #ifndef MODULE
412 	trace_cpu_frequency(pstate * 100000, cpu->cpu);
413 #endif
414 	cpu->pstate.current_pstate = pstate;
415 	wrmsrl(MSR_IA32_PERF_CTL, pstate << 8);
416 
417 }
418 
419 static inline void intel_pstate_pstate_increase(struct cpudata *cpu, int steps)
420 {
421 	int target;
422 	target = cpu->pstate.current_pstate + steps;
423 
424 	intel_pstate_set_pstate(cpu, target);
425 }
426 
427 static inline void intel_pstate_pstate_decrease(struct cpudata *cpu, int steps)
428 {
429 	int target;
430 	target = cpu->pstate.current_pstate - steps;
431 	intel_pstate_set_pstate(cpu, target);
432 }
433 
434 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
435 {
436 	sprintf(cpu->name, "Intel 2nd generation core");
437 
438 	cpu->pstate.min_pstate = intel_pstate_min_pstate();
439 	cpu->pstate.max_pstate = intel_pstate_max_pstate();
440 	cpu->pstate.turbo_pstate = intel_pstate_turbo_pstate();
441 
442 	/*
443 	 * goto max pstate so we don't slow up boot if we are built-in if we are
444 	 * a module we will take care of it during normal operation
445 	 */
446 	intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
447 }
448 
449 static inline void intel_pstate_calc_busy(struct cpudata *cpu,
450 					struct sample *sample)
451 {
452 	u64 core_pct;
453 	sample->pstate_pct_busy = 100 - div64_u64(
454 					sample->idletime_us * 100,
455 					sample->duration_us);
456 	core_pct = div64_u64(sample->aperf * 100, sample->mperf);
457 	sample->freq = cpu->pstate.max_pstate * core_pct * 1000;
458 
459 	sample->core_pct_busy = div_s64((sample->pstate_pct_busy * core_pct),
460 					100);
461 }
462 
463 static inline void intel_pstate_sample(struct cpudata *cpu)
464 {
465 	ktime_t now;
466 	u64 idle_time_us;
467 	u64 aperf, mperf;
468 
469 	now = ktime_get();
470 	idle_time_us = get_cpu_idle_time_us(cpu->cpu, NULL);
471 
472 	rdmsrl(MSR_IA32_APERF, aperf);
473 	rdmsrl(MSR_IA32_MPERF, mperf);
474 	/* for the first sample, don't actually record a sample, just
475 	 * set the baseline */
476 	if (cpu->prev_idle_time_us > 0) {
477 		cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
478 		cpu->samples[cpu->sample_ptr].start_time = cpu->prev_sample;
479 		cpu->samples[cpu->sample_ptr].end_time = now;
480 		cpu->samples[cpu->sample_ptr].duration_us =
481 			ktime_us_delta(now, cpu->prev_sample);
482 		cpu->samples[cpu->sample_ptr].idletime_us =
483 			idle_time_us - cpu->prev_idle_time_us;
484 
485 		cpu->samples[cpu->sample_ptr].aperf = aperf;
486 		cpu->samples[cpu->sample_ptr].mperf = mperf;
487 		cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf;
488 		cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf;
489 
490 		intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]);
491 	}
492 
493 	cpu->prev_sample = now;
494 	cpu->prev_idle_time_us = idle_time_us;
495 	cpu->prev_aperf = aperf;
496 	cpu->prev_mperf = mperf;
497 }
498 
499 static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
500 {
501 	int sample_time, delay;
502 
503 	sample_time = cpu->pstate_policy->sample_rate_ms;
504 	delay = msecs_to_jiffies(sample_time);
505 	mod_timer_pinned(&cpu->timer, jiffies + delay);
506 }
507 
508 static inline void intel_pstate_idle_mode(struct cpudata *cpu)
509 {
510 	cpu->idle_mode = 1;
511 }
512 
513 static inline void intel_pstate_normal_mode(struct cpudata *cpu)
514 {
515 	cpu->idle_mode = 0;
516 }
517 
518 static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu)
519 {
520 	int32_t busy_scaled;
521 	int32_t core_busy, turbo_pstate, current_pstate;
522 
523 	core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy);
524 	turbo_pstate = int_tofp(cpu->pstate.turbo_pstate);
525 	current_pstate = int_tofp(cpu->pstate.current_pstate);
526 	busy_scaled = mul_fp(core_busy, div_fp(turbo_pstate, current_pstate));
527 
528 	return fp_toint(busy_scaled);
529 }
530 
531 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
532 {
533 	int busy_scaled;
534 	struct _pid *pid;
535 	signed int ctl = 0;
536 	int steps;
537 
538 	pid = &cpu->pid;
539 	busy_scaled = intel_pstate_get_scaled_busy(cpu);
540 
541 	ctl = pid_calc(pid, busy_scaled);
542 
543 	steps = abs(ctl);
544 	if (ctl < 0)
545 		intel_pstate_pstate_increase(cpu, steps);
546 	else
547 		intel_pstate_pstate_decrease(cpu, steps);
548 }
549 
550 static inline void intel_pstate_adjust_idle_pstate(struct cpudata *cpu)
551 {
552 	int busy_scaled;
553 	struct _pid *pid;
554 	int ctl = 0;
555 	int steps;
556 
557 	pid = &cpu->idle_pid;
558 
559 	busy_scaled = intel_pstate_get_scaled_busy(cpu);
560 
561 	ctl = pid_calc(pid, 100 - busy_scaled);
562 
563 	steps = abs(ctl);
564 	if (ctl < 0)
565 		intel_pstate_pstate_decrease(cpu, steps);
566 	else
567 		intel_pstate_pstate_increase(cpu, steps);
568 
569 	if (cpu->pstate.current_pstate == cpu->pstate.min_pstate)
570 		intel_pstate_normal_mode(cpu);
571 }
572 
573 static void intel_pstate_timer_func(unsigned long __data)
574 {
575 	struct cpudata *cpu = (struct cpudata *) __data;
576 
577 	intel_pstate_sample(cpu);
578 
579 	if (!cpu->idle_mode)
580 		intel_pstate_adjust_busy_pstate(cpu);
581 	else
582 		intel_pstate_adjust_idle_pstate(cpu);
583 
584 #if defined(XPERF_FIX)
585 	if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) {
586 		cpu->min_pstate_count++;
587 		if (!(cpu->min_pstate_count % 5)) {
588 			intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
589 			intel_pstate_idle_mode(cpu);
590 		}
591 	} else
592 		cpu->min_pstate_count = 0;
593 #endif
594 	intel_pstate_set_sample_time(cpu);
595 }
596 
597 #define ICPU(model, policy) \
598 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&policy }
599 
600 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
601 	ICPU(0x2a, default_policy),
602 	ICPU(0x2d, default_policy),
603 	{}
604 };
605 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
606 
607 static int intel_pstate_init_cpu(unsigned int cpunum)
608 {
609 
610 	const struct x86_cpu_id *id;
611 	struct cpudata *cpu;
612 
613 	id = x86_match_cpu(intel_pstate_cpu_ids);
614 	if (!id)
615 		return -ENODEV;
616 
617 	all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
618 	if (!all_cpu_data[cpunum])
619 		return -ENOMEM;
620 
621 	cpu = all_cpu_data[cpunum];
622 
623 	intel_pstate_get_cpu_pstates(cpu);
624 
625 	cpu->cpu = cpunum;
626 	cpu->pstate_policy =
627 		(struct pstate_adjust_policy *)id->driver_data;
628 	init_timer_deferrable(&cpu->timer);
629 	cpu->timer.function = intel_pstate_timer_func;
630 	cpu->timer.data =
631 		(unsigned long)cpu;
632 	cpu->timer.expires = jiffies + HZ/100;
633 	intel_pstate_busy_pid_reset(cpu);
634 	intel_pstate_idle_pid_reset(cpu);
635 	intel_pstate_sample(cpu);
636 	intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
637 
638 	add_timer_on(&cpu->timer, cpunum);
639 
640 	pr_info("Intel pstate controlling: cpu %d\n", cpunum);
641 
642 	return 0;
643 }
644 
645 static unsigned int intel_pstate_get(unsigned int cpu_num)
646 {
647 	struct sample *sample;
648 	struct cpudata *cpu;
649 
650 	cpu = all_cpu_data[cpu_num];
651 	if (!cpu)
652 		return 0;
653 	sample = &cpu->samples[cpu->sample_ptr];
654 	return sample->freq;
655 }
656 
657 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
658 {
659 	struct cpudata *cpu;
660 
661 	cpu = all_cpu_data[policy->cpu];
662 
663 	if (!policy->cpuinfo.max_freq)
664 		return -ENODEV;
665 
666 	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
667 		limits.min_perf_pct = 100;
668 		limits.min_perf = int_tofp(1);
669 		limits.max_perf_pct = 100;
670 		limits.max_perf = int_tofp(1);
671 		limits.no_turbo = 0;
672 		return 0;
673 	}
674 	limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
675 	limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100);
676 	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
677 
678 	limits.max_perf_pct = policy->max * 100 / policy->cpuinfo.max_freq;
679 	limits.max_perf_pct = clamp_t(int, limits.max_perf_pct, 0 , 100);
680 	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
681 
682 	return 0;
683 }
684 
685 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
686 {
687 	cpufreq_verify_within_limits(policy,
688 				policy->cpuinfo.min_freq,
689 				policy->cpuinfo.max_freq);
690 
691 	if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
692 		(policy->policy != CPUFREQ_POLICY_PERFORMANCE))
693 		return -EINVAL;
694 
695 	return 0;
696 }
697 
698 static int __cpuinit intel_pstate_cpu_exit(struct cpufreq_policy *policy)
699 {
700 	int cpu = policy->cpu;
701 
702 	del_timer(&all_cpu_data[cpu]->timer);
703 	kfree(all_cpu_data[cpu]);
704 	all_cpu_data[cpu] = NULL;
705 	return 0;
706 }
707 
708 static int __cpuinit intel_pstate_cpu_init(struct cpufreq_policy *policy)
709 {
710 	int rc, min_pstate, max_pstate;
711 	struct cpudata *cpu;
712 
713 	rc = intel_pstate_init_cpu(policy->cpu);
714 	if (rc)
715 		return rc;
716 
717 	cpu = all_cpu_data[policy->cpu];
718 
719 	if (!limits.no_turbo &&
720 		limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
721 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
722 	else
723 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
724 
725 	intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate);
726 	policy->min = min_pstate * 100000;
727 	policy->max = max_pstate * 100000;
728 
729 	/* cpuinfo and default policy values */
730 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000;
731 	policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000;
732 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
733 	cpumask_set_cpu(policy->cpu, policy->cpus);
734 
735 	return 0;
736 }
737 
738 static struct cpufreq_driver intel_pstate_driver = {
739 	.flags		= CPUFREQ_CONST_LOOPS,
740 	.verify		= intel_pstate_verify_policy,
741 	.setpolicy	= intel_pstate_set_policy,
742 	.get		= intel_pstate_get,
743 	.init		= intel_pstate_cpu_init,
744 	.exit		= intel_pstate_cpu_exit,
745 	.name		= "intel_pstate",
746 	.owner		= THIS_MODULE,
747 };
748 
749 static int __initdata no_load;
750 
751 static int intel_pstate_msrs_not_valid(void)
752 {
753 	/* Check that all the msr's we are using are valid. */
754 	u64 aperf, mperf, tmp;
755 
756 	rdmsrl(MSR_IA32_APERF, aperf);
757 	rdmsrl(MSR_IA32_MPERF, mperf);
758 
759 	if (!intel_pstate_min_pstate() ||
760 		!intel_pstate_max_pstate() ||
761 		!intel_pstate_turbo_pstate())
762 		return -ENODEV;
763 
764 	rdmsrl(MSR_IA32_APERF, tmp);
765 	if (!(tmp - aperf))
766 		return -ENODEV;
767 
768 	rdmsrl(MSR_IA32_MPERF, tmp);
769 	if (!(tmp - mperf))
770 		return -ENODEV;
771 
772 	return 0;
773 }
774 static int __init intel_pstate_init(void)
775 {
776 	int cpu, rc = 0;
777 	const struct x86_cpu_id *id;
778 
779 	if (no_load)
780 		return -ENODEV;
781 
782 	id = x86_match_cpu(intel_pstate_cpu_ids);
783 	if (!id)
784 		return -ENODEV;
785 
786 	if (intel_pstate_msrs_not_valid())
787 		return -ENODEV;
788 
789 	pr_info("Intel P-state driver initializing.\n");
790 
791 	all_cpu_data = vmalloc(sizeof(void *) * num_possible_cpus());
792 	if (!all_cpu_data)
793 		return -ENOMEM;
794 	memset(all_cpu_data, 0, sizeof(void *) * num_possible_cpus());
795 
796 	rc = cpufreq_register_driver(&intel_pstate_driver);
797 	if (rc)
798 		goto out;
799 
800 	intel_pstate_debug_expose_params();
801 	intel_pstate_sysfs_expose_params();
802 	return rc;
803 out:
804 	get_online_cpus();
805 	for_each_online_cpu(cpu) {
806 		if (all_cpu_data[cpu]) {
807 			del_timer_sync(&all_cpu_data[cpu]->timer);
808 			kfree(all_cpu_data[cpu]);
809 		}
810 	}
811 
812 	put_online_cpus();
813 	vfree(all_cpu_data);
814 	return -ENODEV;
815 }
816 device_initcall(intel_pstate_init);
817 
818 static int __init intel_pstate_setup(char *str)
819 {
820 	if (!str)
821 		return -EINVAL;
822 
823 	if (!strcmp(str, "disable"))
824 		no_load = 1;
825 	return 0;
826 }
827 early_param("intel_pstate", intel_pstate_setup);
828 
829 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
830 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
831 MODULE_LICENSE("GPL");
832