1 /*
2  * cpufreq_snb.c: Native P state management for Intel processors
3  *
4  * (C) Copyright 2012 Intel Corporation
5  * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; version 2
10  * of the License.
11  */
12 
13 #include <linux/kernel.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/module.h>
16 #include <linux/ktime.h>
17 #include <linux/hrtimer.h>
18 #include <linux/tick.h>
19 #include <linux/slab.h>
20 #include <linux/sched.h>
21 #include <linux/list.h>
22 #include <linux/cpu.h>
23 #include <linux/cpufreq.h>
24 #include <linux/sysfs.h>
25 #include <linux/types.h>
26 #include <linux/fs.h>
27 #include <linux/debugfs.h>
28 #include <trace/events/power.h>
29 
30 #include <asm/div64.h>
31 #include <asm/msr.h>
32 #include <asm/cpu_device_id.h>
33 
34 #define SAMPLE_COUNT		3
35 
36 #define FRAC_BITS 8
37 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
38 #define fp_toint(X) ((X) >> FRAC_BITS)
39 
40 static inline int32_t mul_fp(int32_t x, int32_t y)
41 {
42 	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
43 }
44 
45 static inline int32_t div_fp(int32_t x, int32_t y)
46 {
47 	return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
48 }
49 
50 struct sample {
51 	ktime_t start_time;
52 	ktime_t end_time;
53 	int core_pct_busy;
54 	int pstate_pct_busy;
55 	u64 duration_us;
56 	u64 idletime_us;
57 	u64 aperf;
58 	u64 mperf;
59 	int freq;
60 };
61 
62 struct pstate_data {
63 	int	current_pstate;
64 	int	min_pstate;
65 	int	max_pstate;
66 	int	turbo_pstate;
67 };
68 
69 struct _pid {
70 	int setpoint;
71 	int32_t integral;
72 	int32_t p_gain;
73 	int32_t i_gain;
74 	int32_t d_gain;
75 	int deadband;
76 	int last_err;
77 };
78 
79 struct cpudata {
80 	int cpu;
81 
82 	char name[64];
83 
84 	struct timer_list timer;
85 
86 	struct pstate_adjust_policy *pstate_policy;
87 	struct pstate_data pstate;
88 	struct _pid pid;
89 	struct _pid idle_pid;
90 
91 	int min_pstate_count;
92 	int idle_mode;
93 
94 	ktime_t prev_sample;
95 	u64	prev_idle_time_us;
96 	u64	prev_aperf;
97 	u64	prev_mperf;
98 	int	sample_ptr;
99 	struct sample samples[SAMPLE_COUNT];
100 };
101 
102 static struct cpudata **all_cpu_data;
103 struct pstate_adjust_policy {
104 	int sample_rate_ms;
105 	int deadband;
106 	int setpoint;
107 	int p_gain_pct;
108 	int d_gain_pct;
109 	int i_gain_pct;
110 };
111 
112 static struct pstate_adjust_policy default_policy = {
113 	.sample_rate_ms = 10,
114 	.deadband = 0,
115 	.setpoint = 109,
116 	.p_gain_pct = 17,
117 	.d_gain_pct = 0,
118 	.i_gain_pct = 4,
119 };
120 
121 struct perf_limits {
122 	int no_turbo;
123 	int max_perf_pct;
124 	int min_perf_pct;
125 	int32_t max_perf;
126 	int32_t min_perf;
127 };
128 
129 static struct perf_limits limits = {
130 	.no_turbo = 0,
131 	.max_perf_pct = 100,
132 	.max_perf = int_tofp(1),
133 	.min_perf_pct = 0,
134 	.min_perf = 0,
135 };
136 
137 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
138 			int deadband, int integral) {
139 	pid->setpoint = setpoint;
140 	pid->deadband  = deadband;
141 	pid->integral  = int_tofp(integral);
142 	pid->last_err  = setpoint - busy;
143 }
144 
145 static inline void pid_p_gain_set(struct _pid *pid, int percent)
146 {
147 	pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
148 }
149 
150 static inline void pid_i_gain_set(struct _pid *pid, int percent)
151 {
152 	pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
153 }
154 
155 static inline void pid_d_gain_set(struct _pid *pid, int percent)
156 {
157 
158 	pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
159 }
160 
161 static signed int pid_calc(struct _pid *pid, int busy)
162 {
163 	signed int err, result;
164 	int32_t pterm, dterm, fp_error;
165 	int32_t integral_limit;
166 
167 	err = pid->setpoint - busy;
168 	fp_error = int_tofp(err);
169 
170 	if (abs(err) <= pid->deadband)
171 		return 0;
172 
173 	pterm = mul_fp(pid->p_gain, fp_error);
174 
175 	pid->integral += fp_error;
176 
177 	/* limit the integral term */
178 	integral_limit = int_tofp(30);
179 	if (pid->integral > integral_limit)
180 		pid->integral = integral_limit;
181 	if (pid->integral < -integral_limit)
182 		pid->integral = -integral_limit;
183 
184 	dterm = mul_fp(pid->d_gain, (err - pid->last_err));
185 	pid->last_err = err;
186 
187 	result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
188 
189 	return (signed int)fp_toint(result);
190 }
191 
192 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
193 {
194 	pid_p_gain_set(&cpu->pid, cpu->pstate_policy->p_gain_pct);
195 	pid_d_gain_set(&cpu->pid, cpu->pstate_policy->d_gain_pct);
196 	pid_i_gain_set(&cpu->pid, cpu->pstate_policy->i_gain_pct);
197 
198 	pid_reset(&cpu->pid,
199 		cpu->pstate_policy->setpoint,
200 		100,
201 		cpu->pstate_policy->deadband,
202 		0);
203 }
204 
205 static inline void intel_pstate_idle_pid_reset(struct cpudata *cpu)
206 {
207 	pid_p_gain_set(&cpu->idle_pid, cpu->pstate_policy->p_gain_pct);
208 	pid_d_gain_set(&cpu->idle_pid, cpu->pstate_policy->d_gain_pct);
209 	pid_i_gain_set(&cpu->idle_pid, cpu->pstate_policy->i_gain_pct);
210 
211 	pid_reset(&cpu->idle_pid,
212 		75,
213 		50,
214 		cpu->pstate_policy->deadband,
215 		0);
216 }
217 
218 static inline void intel_pstate_reset_all_pid(void)
219 {
220 	unsigned int cpu;
221 	for_each_online_cpu(cpu) {
222 		if (all_cpu_data[cpu])
223 			intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
224 	}
225 }
226 
227 /************************** debugfs begin ************************/
228 static int pid_param_set(void *data, u64 val)
229 {
230 	*(u32 *)data = val;
231 	intel_pstate_reset_all_pid();
232 	return 0;
233 }
234 static int pid_param_get(void *data, u64 *val)
235 {
236 	*val = *(u32 *)data;
237 	return 0;
238 }
239 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get,
240 			pid_param_set, "%llu\n");
241 
242 struct pid_param {
243 	char *name;
244 	void *value;
245 };
246 
247 static struct pid_param pid_files[] = {
248 	{"sample_rate_ms", &default_policy.sample_rate_ms},
249 	{"d_gain_pct", &default_policy.d_gain_pct},
250 	{"i_gain_pct", &default_policy.i_gain_pct},
251 	{"deadband", &default_policy.deadband},
252 	{"setpoint", &default_policy.setpoint},
253 	{"p_gain_pct", &default_policy.p_gain_pct},
254 	{NULL, NULL}
255 };
256 
257 static struct dentry *debugfs_parent;
258 static void intel_pstate_debug_expose_params(void)
259 {
260 	int i = 0;
261 
262 	debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
263 	if (IS_ERR_OR_NULL(debugfs_parent))
264 		return;
265 	while (pid_files[i].name) {
266 		debugfs_create_file(pid_files[i].name, 0660,
267 				debugfs_parent, pid_files[i].value,
268 				&fops_pid_param);
269 		i++;
270 	}
271 }
272 
273 /************************** debugfs end ************************/
274 
275 /************************** sysfs begin ************************/
276 #define show_one(file_name, object)					\
277 	static ssize_t show_##file_name					\
278 	(struct kobject *kobj, struct attribute *attr, char *buf)	\
279 	{								\
280 		return sprintf(buf, "%u\n", limits.object);		\
281 	}
282 
283 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
284 				const char *buf, size_t count)
285 {
286 	unsigned int input;
287 	int ret;
288 	ret = sscanf(buf, "%u", &input);
289 	if (ret != 1)
290 		return -EINVAL;
291 	limits.no_turbo = clamp_t(int, input, 0 , 1);
292 
293 	return count;
294 }
295 
296 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
297 				const char *buf, size_t count)
298 {
299 	unsigned int input;
300 	int ret;
301 	ret = sscanf(buf, "%u", &input);
302 	if (ret != 1)
303 		return -EINVAL;
304 
305 	limits.max_perf_pct = clamp_t(int, input, 0 , 100);
306 	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
307 	return count;
308 }
309 
310 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
311 				const char *buf, size_t count)
312 {
313 	unsigned int input;
314 	int ret;
315 	ret = sscanf(buf, "%u", &input);
316 	if (ret != 1)
317 		return -EINVAL;
318 	limits.min_perf_pct = clamp_t(int, input, 0 , 100);
319 	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
320 
321 	return count;
322 }
323 
324 show_one(no_turbo, no_turbo);
325 show_one(max_perf_pct, max_perf_pct);
326 show_one(min_perf_pct, min_perf_pct);
327 
328 define_one_global_rw(no_turbo);
329 define_one_global_rw(max_perf_pct);
330 define_one_global_rw(min_perf_pct);
331 
332 static struct attribute *intel_pstate_attributes[] = {
333 	&no_turbo.attr,
334 	&max_perf_pct.attr,
335 	&min_perf_pct.attr,
336 	NULL
337 };
338 
339 static struct attribute_group intel_pstate_attr_group = {
340 	.attrs = intel_pstate_attributes,
341 };
342 static struct kobject *intel_pstate_kobject;
343 
344 static void intel_pstate_sysfs_expose_params(void)
345 {
346 	int rc;
347 
348 	intel_pstate_kobject = kobject_create_and_add("intel_pstate",
349 						&cpu_subsys.dev_root->kobj);
350 	BUG_ON(!intel_pstate_kobject);
351 	rc = sysfs_create_group(intel_pstate_kobject,
352 				&intel_pstate_attr_group);
353 	BUG_ON(rc);
354 }
355 
356 /************************** sysfs end ************************/
357 
358 static int intel_pstate_min_pstate(void)
359 {
360 	u64 value;
361 	rdmsrl(0xCE, value);
362 	return (value >> 40) & 0xFF;
363 }
364 
365 static int intel_pstate_max_pstate(void)
366 {
367 	u64 value;
368 	rdmsrl(0xCE, value);
369 	return (value >> 8) & 0xFF;
370 }
371 
372 static int intel_pstate_turbo_pstate(void)
373 {
374 	u64 value;
375 	int nont, ret;
376 	rdmsrl(0x1AD, value);
377 	nont = intel_pstate_max_pstate();
378 	ret = ((value) & 255);
379 	if (ret <= nont)
380 		ret = nont;
381 	return ret;
382 }
383 
384 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
385 {
386 	int max_perf = cpu->pstate.turbo_pstate;
387 	int min_perf;
388 	if (limits.no_turbo)
389 		max_perf = cpu->pstate.max_pstate;
390 
391 	max_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
392 	*max = clamp_t(int, max_perf,
393 			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
394 
395 	min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf));
396 	*min = clamp_t(int, min_perf,
397 			cpu->pstate.min_pstate, max_perf);
398 }
399 
400 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
401 {
402 	int max_perf, min_perf;
403 
404 	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
405 
406 	pstate = clamp_t(int, pstate, min_perf, max_perf);
407 
408 	if (pstate == cpu->pstate.current_pstate)
409 		return;
410 
411 #ifndef MODULE
412 	trace_cpu_frequency(pstate * 100000, cpu->cpu);
413 #endif
414 	cpu->pstate.current_pstate = pstate;
415 	wrmsrl(MSR_IA32_PERF_CTL, pstate << 8);
416 
417 }
418 
419 static inline void intel_pstate_pstate_increase(struct cpudata *cpu, int steps)
420 {
421 	int target;
422 	target = cpu->pstate.current_pstate + steps;
423 
424 	intel_pstate_set_pstate(cpu, target);
425 }
426 
427 static inline void intel_pstate_pstate_decrease(struct cpudata *cpu, int steps)
428 {
429 	int target;
430 	target = cpu->pstate.current_pstate - steps;
431 	intel_pstate_set_pstate(cpu, target);
432 }
433 
434 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
435 {
436 	sprintf(cpu->name, "Intel 2nd generation core");
437 
438 	cpu->pstate.min_pstate = intel_pstate_min_pstate();
439 	cpu->pstate.max_pstate = intel_pstate_max_pstate();
440 	cpu->pstate.turbo_pstate = intel_pstate_turbo_pstate();
441 
442 	/*
443 	 * goto max pstate so we don't slow up boot if we are built-in if we are
444 	 * a module we will take care of it during normal operation
445 	 */
446 	intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
447 }
448 
449 static inline void intel_pstate_calc_busy(struct cpudata *cpu,
450 					struct sample *sample)
451 {
452 	u64 core_pct;
453 	sample->pstate_pct_busy = 100 - div64_u64(
454 					sample->idletime_us * 100,
455 					sample->duration_us);
456 	core_pct = div64_u64(sample->aperf * 100, sample->mperf);
457 	sample->freq = cpu->pstate.turbo_pstate * core_pct * 1000;
458 
459 	sample->core_pct_busy = div_s64((sample->pstate_pct_busy * core_pct),
460 					100);
461 }
462 
463 static inline void intel_pstate_sample(struct cpudata *cpu)
464 {
465 	ktime_t now;
466 	u64 idle_time_us;
467 	u64 aperf, mperf;
468 
469 	now = ktime_get();
470 	idle_time_us = get_cpu_idle_time_us(cpu->cpu, NULL);
471 
472 	rdmsrl(MSR_IA32_APERF, aperf);
473 	rdmsrl(MSR_IA32_MPERF, mperf);
474 	/* for the first sample, don't actually record a sample, just
475 	 * set the baseline */
476 	if (cpu->prev_idle_time_us > 0) {
477 		cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
478 		cpu->samples[cpu->sample_ptr].start_time = cpu->prev_sample;
479 		cpu->samples[cpu->sample_ptr].end_time = now;
480 		cpu->samples[cpu->sample_ptr].duration_us =
481 			ktime_us_delta(now, cpu->prev_sample);
482 		cpu->samples[cpu->sample_ptr].idletime_us =
483 			idle_time_us - cpu->prev_idle_time_us;
484 
485 		cpu->samples[cpu->sample_ptr].aperf = aperf;
486 		cpu->samples[cpu->sample_ptr].mperf = mperf;
487 		cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf;
488 		cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf;
489 
490 		intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]);
491 	}
492 
493 	cpu->prev_sample = now;
494 	cpu->prev_idle_time_us = idle_time_us;
495 	cpu->prev_aperf = aperf;
496 	cpu->prev_mperf = mperf;
497 }
498 
499 static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
500 {
501 	int sample_time, delay;
502 
503 	sample_time = cpu->pstate_policy->sample_rate_ms;
504 	delay = msecs_to_jiffies(sample_time);
505 	delay -= jiffies % delay;
506 	mod_timer_pinned(&cpu->timer, jiffies + delay);
507 }
508 
509 static inline void intel_pstate_idle_mode(struct cpudata *cpu)
510 {
511 	cpu->idle_mode = 1;
512 }
513 
514 static inline void intel_pstate_normal_mode(struct cpudata *cpu)
515 {
516 	cpu->idle_mode = 0;
517 }
518 
519 static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu)
520 {
521 	int32_t busy_scaled;
522 	int32_t core_busy, turbo_pstate, current_pstate;
523 
524 	core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy);
525 	turbo_pstate = int_tofp(cpu->pstate.turbo_pstate);
526 	current_pstate = int_tofp(cpu->pstate.current_pstate);
527 	busy_scaled = mul_fp(core_busy, div_fp(turbo_pstate, current_pstate));
528 
529 	return fp_toint(busy_scaled);
530 }
531 
532 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
533 {
534 	int busy_scaled;
535 	struct _pid *pid;
536 	signed int ctl = 0;
537 	int steps;
538 
539 	pid = &cpu->pid;
540 	busy_scaled = intel_pstate_get_scaled_busy(cpu);
541 
542 	ctl = pid_calc(pid, busy_scaled);
543 
544 	steps = abs(ctl);
545 	if (ctl < 0)
546 		intel_pstate_pstate_increase(cpu, steps);
547 	else
548 		intel_pstate_pstate_decrease(cpu, steps);
549 }
550 
551 static inline void intel_pstate_adjust_idle_pstate(struct cpudata *cpu)
552 {
553 	int busy_scaled;
554 	struct _pid *pid;
555 	int ctl = 0;
556 	int steps;
557 
558 	pid = &cpu->idle_pid;
559 
560 	busy_scaled = intel_pstate_get_scaled_busy(cpu);
561 
562 	ctl = pid_calc(pid, 100 - busy_scaled);
563 
564 	steps = abs(ctl);
565 	if (ctl < 0)
566 		intel_pstate_pstate_decrease(cpu, steps);
567 	else
568 		intel_pstate_pstate_increase(cpu, steps);
569 
570 	if (cpu->pstate.current_pstate == cpu->pstate.min_pstate)
571 		intel_pstate_normal_mode(cpu);
572 }
573 
574 static void intel_pstate_timer_func(unsigned long __data)
575 {
576 	struct cpudata *cpu = (struct cpudata *) __data;
577 
578 	intel_pstate_sample(cpu);
579 
580 	if (!cpu->idle_mode)
581 		intel_pstate_adjust_busy_pstate(cpu);
582 	else
583 		intel_pstate_adjust_idle_pstate(cpu);
584 
585 #if defined(XPERF_FIX)
586 	if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) {
587 		cpu->min_pstate_count++;
588 		if (!(cpu->min_pstate_count % 5)) {
589 			intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
590 			intel_pstate_idle_mode(cpu);
591 		}
592 	} else
593 		cpu->min_pstate_count = 0;
594 #endif
595 	intel_pstate_set_sample_time(cpu);
596 }
597 
598 #define ICPU(model, policy) \
599 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&policy }
600 
601 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
602 	ICPU(0x2a, default_policy),
603 	ICPU(0x2d, default_policy),
604 	{}
605 };
606 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
607 
608 static int intel_pstate_init_cpu(unsigned int cpunum)
609 {
610 
611 	const struct x86_cpu_id *id;
612 	struct cpudata *cpu;
613 
614 	id = x86_match_cpu(intel_pstate_cpu_ids);
615 	if (!id)
616 		return -ENODEV;
617 
618 	all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
619 	if (!all_cpu_data[cpunum])
620 		return -ENOMEM;
621 
622 	cpu = all_cpu_data[cpunum];
623 
624 	intel_pstate_get_cpu_pstates(cpu);
625 
626 	cpu->cpu = cpunum;
627 	cpu->pstate_policy =
628 		(struct pstate_adjust_policy *)id->driver_data;
629 	init_timer_deferrable(&cpu->timer);
630 	cpu->timer.function = intel_pstate_timer_func;
631 	cpu->timer.data =
632 		(unsigned long)cpu;
633 	cpu->timer.expires = jiffies + HZ/100;
634 	intel_pstate_busy_pid_reset(cpu);
635 	intel_pstate_idle_pid_reset(cpu);
636 	intel_pstate_sample(cpu);
637 	intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate);
638 
639 	add_timer_on(&cpu->timer, cpunum);
640 
641 	pr_info("Intel pstate controlling: cpu %d\n", cpunum);
642 
643 	return 0;
644 }
645 
646 static unsigned int intel_pstate_get(unsigned int cpu_num)
647 {
648 	struct sample *sample;
649 	struct cpudata *cpu;
650 
651 	cpu = all_cpu_data[cpu_num];
652 	if (!cpu)
653 		return 0;
654 	sample = &cpu->samples[cpu->sample_ptr];
655 	return sample->freq;
656 }
657 
658 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
659 {
660 	struct cpudata *cpu;
661 	int min, max;
662 
663 	cpu = all_cpu_data[policy->cpu];
664 
665 	if (!policy->cpuinfo.max_freq)
666 		return -ENODEV;
667 
668 	intel_pstate_get_min_max(cpu, &min, &max);
669 
670 	limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
671 	limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100);
672 	limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
673 
674 	limits.max_perf_pct = policy->max * 100 / policy->cpuinfo.max_freq;
675 	limits.max_perf_pct = clamp_t(int, limits.max_perf_pct, 0 , 100);
676 	limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
677 
678 	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
679 		limits.min_perf_pct = 100;
680 		limits.min_perf = int_tofp(1);
681 		limits.max_perf_pct = 100;
682 		limits.max_perf = int_tofp(1);
683 		limits.no_turbo = 0;
684 	}
685 
686 	return 0;
687 }
688 
689 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
690 {
691 	cpufreq_verify_within_limits(policy,
692 				policy->cpuinfo.min_freq,
693 				policy->cpuinfo.max_freq);
694 
695 	if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
696 		(policy->policy != CPUFREQ_POLICY_PERFORMANCE))
697 		return -EINVAL;
698 
699 	return 0;
700 }
701 
702 static int __cpuinit intel_pstate_cpu_exit(struct cpufreq_policy *policy)
703 {
704 	int cpu = policy->cpu;
705 
706 	del_timer(&all_cpu_data[cpu]->timer);
707 	kfree(all_cpu_data[cpu]);
708 	all_cpu_data[cpu] = NULL;
709 	return 0;
710 }
711 
712 static int __cpuinit intel_pstate_cpu_init(struct cpufreq_policy *policy)
713 {
714 	int rc, min_pstate, max_pstate;
715 	struct cpudata *cpu;
716 
717 	rc = intel_pstate_init_cpu(policy->cpu);
718 	if (rc)
719 		return rc;
720 
721 	cpu = all_cpu_data[policy->cpu];
722 
723 	if (!limits.no_turbo &&
724 		limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
725 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
726 	else
727 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
728 
729 	intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate);
730 	policy->min = min_pstate * 100000;
731 	policy->max = max_pstate * 100000;
732 
733 	/* cpuinfo and default policy values */
734 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000;
735 	policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000;
736 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
737 	cpumask_set_cpu(policy->cpu, policy->cpus);
738 
739 	return 0;
740 }
741 
742 static struct cpufreq_driver intel_pstate_driver = {
743 	.flags		= CPUFREQ_CONST_LOOPS,
744 	.verify		= intel_pstate_verify_policy,
745 	.setpolicy	= intel_pstate_set_policy,
746 	.get		= intel_pstate_get,
747 	.init		= intel_pstate_cpu_init,
748 	.exit		= intel_pstate_cpu_exit,
749 	.name		= "intel_pstate",
750 	.owner		= THIS_MODULE,
751 };
752 
753 static int __initdata no_load;
754 
755 static int __init intel_pstate_init(void)
756 {
757 	int cpu, rc = 0;
758 	const struct x86_cpu_id *id;
759 
760 	if (no_load)
761 		return -ENODEV;
762 
763 	id = x86_match_cpu(intel_pstate_cpu_ids);
764 	if (!id)
765 		return -ENODEV;
766 
767 	pr_info("Intel P-state driver initializing.\n");
768 
769 	all_cpu_data = vmalloc(sizeof(void *) * num_possible_cpus());
770 	if (!all_cpu_data)
771 		return -ENOMEM;
772 	memset(all_cpu_data, 0, sizeof(void *) * num_possible_cpus());
773 
774 	rc = cpufreq_register_driver(&intel_pstate_driver);
775 	if (rc)
776 		goto out;
777 
778 	intel_pstate_debug_expose_params();
779 	intel_pstate_sysfs_expose_params();
780 	return rc;
781 out:
782 	get_online_cpus();
783 	for_each_online_cpu(cpu) {
784 		if (all_cpu_data[cpu]) {
785 			del_timer_sync(&all_cpu_data[cpu]->timer);
786 			kfree(all_cpu_data[cpu]);
787 		}
788 	}
789 
790 	put_online_cpus();
791 	vfree(all_cpu_data);
792 	return -ENODEV;
793 }
794 device_initcall(intel_pstate_init);
795 
796 static int __init intel_pstate_setup(char *str)
797 {
798 	if (!str)
799 		return -EINVAL;
800 
801 	if (!strcmp(str, "disable"))
802 		no_load = 1;
803 	return 0;
804 }
805 early_param("intel_pstate", intel_pstate_setup);
806 
807 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
808 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
809 MODULE_LICENSE("GPL");
810