1 /*
2  * acpi-cpufreq.c - ACPI Processor P-States Driver
3  *
4  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6  *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7  *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
8  *
9  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10  *
11  *  This program is free software; you can redistribute it and/or modify
12  *  it under the terms of the GNU General Public License as published by
13  *  the Free Software Foundation; either version 2 of the License, or (at
14  *  your option) any later version.
15  *
16  *  This program is distributed in the hope that it will be useful, but
17  *  WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  *  General Public License for more details.
20  *
21  *  You should have received a copy of the GNU General Public License along
22  *  with this program; if not, write to the Free Software Foundation, Inc.,
23  *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
24  *
25  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
26  */
27 
28 #include <linux/kernel.h>
29 #include <linux/module.h>
30 #include <linux/init.h>
31 #include <linux/smp.h>
32 #include <linux/sched.h>
33 #include <linux/cpufreq.h>
34 #include <linux/compiler.h>
35 #include <linux/dmi.h>
36 #include <linux/slab.h>
37 
38 #include <linux/acpi.h>
39 #include <linux/io.h>
40 #include <linux/delay.h>
41 #include <linux/uaccess.h>
42 
43 #include <acpi/processor.h>
44 
45 #include <asm/msr.h>
46 #include <asm/processor.h>
47 #include <asm/cpufeature.h>
48 
49 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
50 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
51 MODULE_LICENSE("GPL");
52 
53 #define PFX "acpi-cpufreq: "
54 
55 enum {
56 	UNDEFINED_CAPABLE = 0,
57 	SYSTEM_INTEL_MSR_CAPABLE,
58 	SYSTEM_AMD_MSR_CAPABLE,
59 	SYSTEM_IO_CAPABLE,
60 };
61 
62 #define INTEL_MSR_RANGE		(0xffff)
63 #define AMD_MSR_RANGE		(0x7)
64 
65 #define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)
66 
67 struct acpi_cpufreq_data {
68 	struct acpi_processor_performance *acpi_data;
69 	struct cpufreq_frequency_table *freq_table;
70 	unsigned int resume;
71 	unsigned int cpu_feature;
72 	cpumask_var_t freqdomain_cpus;
73 };
74 
75 static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
76 
77 /* acpi_perf_data is a pointer to percpu data. */
78 static struct acpi_processor_performance __percpu *acpi_perf_data;
79 
80 static struct cpufreq_driver acpi_cpufreq_driver;
81 
82 static unsigned int acpi_pstate_strict;
83 static struct msr __percpu *msrs;
84 
85 static bool boost_state(unsigned int cpu)
86 {
87 	u32 lo, hi;
88 	u64 msr;
89 
90 	switch (boot_cpu_data.x86_vendor) {
91 	case X86_VENDOR_INTEL:
92 		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
93 		msr = lo | ((u64)hi << 32);
94 		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
95 	case X86_VENDOR_AMD:
96 		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
97 		msr = lo | ((u64)hi << 32);
98 		return !(msr & MSR_K7_HWCR_CPB_DIS);
99 	}
100 	return false;
101 }
102 
103 static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
104 {
105 	u32 cpu;
106 	u32 msr_addr;
107 	u64 msr_mask;
108 
109 	switch (boot_cpu_data.x86_vendor) {
110 	case X86_VENDOR_INTEL:
111 		msr_addr = MSR_IA32_MISC_ENABLE;
112 		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
113 		break;
114 	case X86_VENDOR_AMD:
115 		msr_addr = MSR_K7_HWCR;
116 		msr_mask = MSR_K7_HWCR_CPB_DIS;
117 		break;
118 	default:
119 		return;
120 	}
121 
122 	rdmsr_on_cpus(cpumask, msr_addr, msrs);
123 
124 	for_each_cpu(cpu, cpumask) {
125 		struct msr *reg = per_cpu_ptr(msrs, cpu);
126 		if (enable)
127 			reg->q &= ~msr_mask;
128 		else
129 			reg->q |= msr_mask;
130 	}
131 
132 	wrmsr_on_cpus(cpumask, msr_addr, msrs);
133 }
134 
135 static int _store_boost(int val)
136 {
137 	get_online_cpus();
138 	boost_set_msrs(val, cpu_online_mask);
139 	put_online_cpus();
140 	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");
141 
142 	return 0;
143 }
144 
145 static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
146 {
147 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
148 
149 	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
150 }
151 
152 cpufreq_freq_attr_ro(freqdomain_cpus);
153 
154 #ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
155 static ssize_t store_boost(const char *buf, size_t count)
156 {
157 	int ret;
158 	unsigned long val = 0;
159 
160 	if (!acpi_cpufreq_driver.boost_supported)
161 		return -EINVAL;
162 
163 	ret = kstrtoul(buf, 10, &val);
164 	if (ret || (val > 1))
165 		return -EINVAL;
166 
167 	_store_boost((int) val);
168 
169 	return count;
170 }
171 
172 static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
173 			 size_t count)
174 {
175 	return store_boost(buf, count);
176 }
177 
178 static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
179 {
180 	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
181 }
182 
183 cpufreq_freq_attr_rw(cpb);
184 #endif
185 
186 static int check_est_cpu(unsigned int cpuid)
187 {
188 	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
189 
190 	return cpu_has(cpu, X86_FEATURE_EST);
191 }
192 
193 static int check_amd_hwpstate_cpu(unsigned int cpuid)
194 {
195 	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
196 
197 	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
198 }
199 
200 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
201 {
202 	struct acpi_processor_performance *perf;
203 	int i;
204 
205 	perf = data->acpi_data;
206 
207 	for (i = 0; i < perf->state_count; i++) {
208 		if (value == perf->states[i].status)
209 			return data->freq_table[i].frequency;
210 	}
211 	return 0;
212 }
213 
214 static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
215 {
216 	struct cpufreq_frequency_table *pos;
217 	struct acpi_processor_performance *perf;
218 
219 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
220 		msr &= AMD_MSR_RANGE;
221 	else
222 		msr &= INTEL_MSR_RANGE;
223 
224 	perf = data->acpi_data;
225 
226 	cpufreq_for_each_entry(pos, data->freq_table)
227 		if (msr == perf->states[pos->driver_data].status)
228 			return pos->frequency;
229 	return data->freq_table[0].frequency;
230 }
231 
232 static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
233 {
234 	switch (data->cpu_feature) {
235 	case SYSTEM_INTEL_MSR_CAPABLE:
236 	case SYSTEM_AMD_MSR_CAPABLE:
237 		return extract_msr(val, data);
238 	case SYSTEM_IO_CAPABLE:
239 		return extract_io(val, data);
240 	default:
241 		return 0;
242 	}
243 }
244 
245 struct msr_addr {
246 	u32 reg;
247 };
248 
249 struct io_addr {
250 	u16 port;
251 	u8 bit_width;
252 };
253 
254 struct drv_cmd {
255 	unsigned int type;
256 	const struct cpumask *mask;
257 	union {
258 		struct msr_addr msr;
259 		struct io_addr io;
260 	} addr;
261 	u32 val;
262 };
263 
264 /* Called via smp_call_function_single(), on the target CPU */
265 static void do_drv_read(void *_cmd)
266 {
267 	struct drv_cmd *cmd = _cmd;
268 	u32 h;
269 
270 	switch (cmd->type) {
271 	case SYSTEM_INTEL_MSR_CAPABLE:
272 	case SYSTEM_AMD_MSR_CAPABLE:
273 		rdmsr(cmd->addr.msr.reg, cmd->val, h);
274 		break;
275 	case SYSTEM_IO_CAPABLE:
276 		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
277 				&cmd->val,
278 				(u32)cmd->addr.io.bit_width);
279 		break;
280 	default:
281 		break;
282 	}
283 }
284 
285 /* Called via smp_call_function_many(), on the target CPUs */
286 static void do_drv_write(void *_cmd)
287 {
288 	struct drv_cmd *cmd = _cmd;
289 	u32 lo, hi;
290 
291 	switch (cmd->type) {
292 	case SYSTEM_INTEL_MSR_CAPABLE:
293 		rdmsr(cmd->addr.msr.reg, lo, hi);
294 		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
295 		wrmsr(cmd->addr.msr.reg, lo, hi);
296 		break;
297 	case SYSTEM_AMD_MSR_CAPABLE:
298 		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
299 		break;
300 	case SYSTEM_IO_CAPABLE:
301 		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
302 				cmd->val,
303 				(u32)cmd->addr.io.bit_width);
304 		break;
305 	default:
306 		break;
307 	}
308 }
309 
310 static void drv_read(struct drv_cmd *cmd)
311 {
312 	int err;
313 	cmd->val = 0;
314 
315 	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
316 	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
317 }
318 
319 static void drv_write(struct drv_cmd *cmd)
320 {
321 	int this_cpu;
322 
323 	this_cpu = get_cpu();
324 	if (cpumask_test_cpu(this_cpu, cmd->mask))
325 		do_drv_write(cmd);
326 	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
327 	put_cpu();
328 }
329 
330 static u32 get_cur_val(const struct cpumask *mask)
331 {
332 	struct acpi_processor_performance *perf;
333 	struct drv_cmd cmd;
334 
335 	if (unlikely(cpumask_empty(mask)))
336 		return 0;
337 
338 	switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
339 	case SYSTEM_INTEL_MSR_CAPABLE:
340 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
341 		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
342 		break;
343 	case SYSTEM_AMD_MSR_CAPABLE:
344 		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
345 		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
346 		break;
347 	case SYSTEM_IO_CAPABLE:
348 		cmd.type = SYSTEM_IO_CAPABLE;
349 		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
350 		cmd.addr.io.port = perf->control_register.address;
351 		cmd.addr.io.bit_width = perf->control_register.bit_width;
352 		break;
353 	default:
354 		return 0;
355 	}
356 
357 	cmd.mask = mask;
358 	drv_read(&cmd);
359 
360 	pr_debug("get_cur_val = %u\n", cmd.val);
361 
362 	return cmd.val;
363 }
364 
365 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
366 {
367 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
368 	unsigned int freq;
369 	unsigned int cached_freq;
370 
371 	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
372 
373 	if (unlikely(data == NULL ||
374 		     data->acpi_data == NULL || data->freq_table == NULL)) {
375 		return 0;
376 	}
377 
378 	cached_freq = data->freq_table[data->acpi_data->state].frequency;
379 	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
380 	if (freq != cached_freq) {
381 		/*
382 		 * The dreaded BIOS frequency change behind our back.
383 		 * Force set the frequency on next target call.
384 		 */
385 		data->resume = 1;
386 	}
387 
388 	pr_debug("cur freq = %u\n", freq);
389 
390 	return freq;
391 }
392 
393 static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
394 				struct acpi_cpufreq_data *data)
395 {
396 	unsigned int cur_freq;
397 	unsigned int i;
398 
399 	for (i = 0; i < 100; i++) {
400 		cur_freq = extract_freq(get_cur_val(mask), data);
401 		if (cur_freq == freq)
402 			return 1;
403 		udelay(10);
404 	}
405 	return 0;
406 }
407 
408 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
409 			       unsigned int index)
410 {
411 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
412 	struct acpi_processor_performance *perf;
413 	struct drv_cmd cmd;
414 	unsigned int next_perf_state = 0; /* Index into perf table */
415 	int result = 0;
416 
417 	if (unlikely(data == NULL ||
418 	     data->acpi_data == NULL || data->freq_table == NULL)) {
419 		return -ENODEV;
420 	}
421 
422 	perf = data->acpi_data;
423 	next_perf_state = data->freq_table[index].driver_data;
424 	if (perf->state == next_perf_state) {
425 		if (unlikely(data->resume)) {
426 			pr_debug("Called after resume, resetting to P%d\n",
427 				next_perf_state);
428 			data->resume = 0;
429 		} else {
430 			pr_debug("Already at target state (P%d)\n",
431 				next_perf_state);
432 			goto out;
433 		}
434 	}
435 
436 	switch (data->cpu_feature) {
437 	case SYSTEM_INTEL_MSR_CAPABLE:
438 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
439 		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
440 		cmd.val = (u32) perf->states[next_perf_state].control;
441 		break;
442 	case SYSTEM_AMD_MSR_CAPABLE:
443 		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
444 		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
445 		cmd.val = (u32) perf->states[next_perf_state].control;
446 		break;
447 	case SYSTEM_IO_CAPABLE:
448 		cmd.type = SYSTEM_IO_CAPABLE;
449 		cmd.addr.io.port = perf->control_register.address;
450 		cmd.addr.io.bit_width = perf->control_register.bit_width;
451 		cmd.val = (u32) perf->states[next_perf_state].control;
452 		break;
453 	default:
454 		result = -ENODEV;
455 		goto out;
456 	}
457 
458 	/* cpufreq holds the hotplug lock, so we are safe from here on */
459 	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
460 		cmd.mask = policy->cpus;
461 	else
462 		cmd.mask = cpumask_of(policy->cpu);
463 
464 	drv_write(&cmd);
465 
466 	if (acpi_pstate_strict) {
467 		if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
468 					data)) {
469 			pr_debug("acpi_cpufreq_target failed (%d)\n",
470 				policy->cpu);
471 			result = -EAGAIN;
472 		}
473 	}
474 
475 	if (!result)
476 		perf->state = next_perf_state;
477 
478 out:
479 	return result;
480 }
481 
482 static unsigned long
483 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
484 {
485 	struct acpi_processor_performance *perf = data->acpi_data;
486 
487 	if (cpu_khz) {
488 		/* search the closest match to cpu_khz */
489 		unsigned int i;
490 		unsigned long freq;
491 		unsigned long freqn = perf->states[0].core_frequency * 1000;
492 
493 		for (i = 0; i < (perf->state_count-1); i++) {
494 			freq = freqn;
495 			freqn = perf->states[i+1].core_frequency * 1000;
496 			if ((2 * cpu_khz) > (freqn + freq)) {
497 				perf->state = i;
498 				return freq;
499 			}
500 		}
501 		perf->state = perf->state_count-1;
502 		return freqn;
503 	} else {
504 		/* assume CPU is at P0... */
505 		perf->state = 0;
506 		return perf->states[0].core_frequency * 1000;
507 	}
508 }
509 
510 static void free_acpi_perf_data(void)
511 {
512 	unsigned int i;
513 
514 	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
515 	for_each_possible_cpu(i)
516 		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
517 				 ->shared_cpu_map);
518 	free_percpu(acpi_perf_data);
519 }
520 
521 static int boost_notify(struct notifier_block *nb, unsigned long action,
522 		      void *hcpu)
523 {
524 	unsigned cpu = (long)hcpu;
525 	const struct cpumask *cpumask;
526 
527 	cpumask = get_cpu_mask(cpu);
528 
529 	/*
530 	 * Clear the boost-disable bit on the CPU_DOWN path so that
531 	 * this cpu cannot block the remaining ones from boosting. On
532 	 * the CPU_UP path we simply keep the boost-disable flag in
533 	 * sync with the current global state.
534 	 */
535 
536 	switch (action) {
537 	case CPU_UP_PREPARE:
538 	case CPU_UP_PREPARE_FROZEN:
539 		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
540 		break;
541 
542 	case CPU_DOWN_PREPARE:
543 	case CPU_DOWN_PREPARE_FROZEN:
544 		boost_set_msrs(1, cpumask);
545 		break;
546 
547 	default:
548 		break;
549 	}
550 
551 	return NOTIFY_OK;
552 }
553 
554 
555 static struct notifier_block boost_nb = {
556 	.notifier_call          = boost_notify,
557 };
558 
559 /*
560  * acpi_cpufreq_early_init - initialize ACPI P-States library
561  *
562  * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
563  * in order to determine correct frequency and voltage pairings. We can
564  * do _PDC and _PSD and find out the processor dependency for the
565  * actual init that will happen later...
566  */
567 static int __init acpi_cpufreq_early_init(void)
568 {
569 	unsigned int i;
570 	pr_debug("acpi_cpufreq_early_init\n");
571 
572 	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
573 	if (!acpi_perf_data) {
574 		pr_debug("Memory allocation error for acpi_perf_data.\n");
575 		return -ENOMEM;
576 	}
577 	for_each_possible_cpu(i) {
578 		if (!zalloc_cpumask_var_node(
579 			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
580 			GFP_KERNEL, cpu_to_node(i))) {
581 
582 			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
583 			free_acpi_perf_data();
584 			return -ENOMEM;
585 		}
586 	}
587 
588 	/* Do initialization in ACPI core */
589 	acpi_processor_preregister_performance(acpi_perf_data);
590 	return 0;
591 }
592 
593 #ifdef CONFIG_SMP
594 /*
595  * Some BIOSes do SW_ANY coordination internally, either set it up in hw
596  * or do it in BIOS firmware and won't inform about it to OS. If not
597  * detected, this has a side effect of making CPU run at a different speed
598  * than OS intended it to run at. Detect it and handle it cleanly.
599  */
600 static int bios_with_sw_any_bug;
601 
602 static int sw_any_bug_found(const struct dmi_system_id *d)
603 {
604 	bios_with_sw_any_bug = 1;
605 	return 0;
606 }
607 
608 static const struct dmi_system_id sw_any_bug_dmi_table[] = {
609 	{
610 		.callback = sw_any_bug_found,
611 		.ident = "Supermicro Server X6DLP",
612 		.matches = {
613 			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
614 			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
615 			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
616 		},
617 	},
618 	{ }
619 };
620 
621 static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
622 {
623 	/* Intel Xeon Processor 7100 Series Specification Update
624 	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
625 	 * AL30: A Machine Check Exception (MCE) Occurring during an
626 	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
627 	 * Both Processor Cores to Lock Up. */
628 	if (c->x86_vendor == X86_VENDOR_INTEL) {
629 		if ((c->x86 == 15) &&
630 		    (c->x86_model == 6) &&
631 		    (c->x86_mask == 8)) {
632 			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
633 			    "Xeon(R) 7100 Errata AL30, processors may "
634 			    "lock up on frequency changes: disabling "
635 			    "acpi-cpufreq.\n");
636 			return -ENODEV;
637 		    }
638 		}
639 	return 0;
640 }
641 #endif
642 
643 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
644 {
645 	unsigned int i;
646 	unsigned int valid_states = 0;
647 	unsigned int cpu = policy->cpu;
648 	struct acpi_cpufreq_data *data;
649 	unsigned int result = 0;
650 	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
651 	struct acpi_processor_performance *perf;
652 #ifdef CONFIG_SMP
653 	static int blacklisted;
654 #endif
655 
656 	pr_debug("acpi_cpufreq_cpu_init\n");
657 
658 #ifdef CONFIG_SMP
659 	if (blacklisted)
660 		return blacklisted;
661 	blacklisted = acpi_cpufreq_blacklist(c);
662 	if (blacklisted)
663 		return blacklisted;
664 #endif
665 
666 	data = kzalloc(sizeof(*data), GFP_KERNEL);
667 	if (!data)
668 		return -ENOMEM;
669 
670 	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
671 		result = -ENOMEM;
672 		goto err_free;
673 	}
674 
675 	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
676 	per_cpu(acfreq_data, cpu) = data;
677 
678 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
679 		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
680 
681 	result = acpi_processor_register_performance(data->acpi_data, cpu);
682 	if (result)
683 		goto err_free_mask;
684 
685 	perf = data->acpi_data;
686 	policy->shared_type = perf->shared_type;
687 
688 	/*
689 	 * Will let policy->cpus know about dependency only when software
690 	 * coordination is required.
691 	 */
692 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
693 	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
694 		cpumask_copy(policy->cpus, perf->shared_cpu_map);
695 	}
696 	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
697 
698 #ifdef CONFIG_SMP
699 	dmi_check_system(sw_any_bug_dmi_table);
700 	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
701 		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
702 		cpumask_copy(policy->cpus, cpu_core_mask(cpu));
703 	}
704 
705 	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
706 		cpumask_clear(policy->cpus);
707 		cpumask_set_cpu(cpu, policy->cpus);
708 		cpumask_copy(data->freqdomain_cpus, cpu_sibling_mask(cpu));
709 		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
710 		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
711 	}
712 #endif
713 
714 	/* capability check */
715 	if (perf->state_count <= 1) {
716 		pr_debug("No P-States\n");
717 		result = -ENODEV;
718 		goto err_unreg;
719 	}
720 
721 	if (perf->control_register.space_id != perf->status_register.space_id) {
722 		result = -ENODEV;
723 		goto err_unreg;
724 	}
725 
726 	switch (perf->control_register.space_id) {
727 	case ACPI_ADR_SPACE_SYSTEM_IO:
728 		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
729 		    boot_cpu_data.x86 == 0xf) {
730 			pr_debug("AMD K8 systems must use native drivers.\n");
731 			result = -ENODEV;
732 			goto err_unreg;
733 		}
734 		pr_debug("SYSTEM IO addr space\n");
735 		data->cpu_feature = SYSTEM_IO_CAPABLE;
736 		break;
737 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
738 		pr_debug("HARDWARE addr space\n");
739 		if (check_est_cpu(cpu)) {
740 			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
741 			break;
742 		}
743 		if (check_amd_hwpstate_cpu(cpu)) {
744 			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
745 			break;
746 		}
747 		result = -ENODEV;
748 		goto err_unreg;
749 	default:
750 		pr_debug("Unknown addr space %d\n",
751 			(u32) (perf->control_register.space_id));
752 		result = -ENODEV;
753 		goto err_unreg;
754 	}
755 
756 	data->freq_table = kzalloc(sizeof(*data->freq_table) *
757 		    (perf->state_count+1), GFP_KERNEL);
758 	if (!data->freq_table) {
759 		result = -ENOMEM;
760 		goto err_unreg;
761 	}
762 
763 	/* detect transition latency */
764 	policy->cpuinfo.transition_latency = 0;
765 	for (i = 0; i < perf->state_count; i++) {
766 		if ((perf->states[i].transition_latency * 1000) >
767 		    policy->cpuinfo.transition_latency)
768 			policy->cpuinfo.transition_latency =
769 			    perf->states[i].transition_latency * 1000;
770 	}
771 
772 	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
773 	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
774 	    policy->cpuinfo.transition_latency > 20 * 1000) {
775 		policy->cpuinfo.transition_latency = 20 * 1000;
776 		printk_once(KERN_INFO
777 			    "P-state transition latency capped at 20 uS\n");
778 	}
779 
780 	/* table init */
781 	for (i = 0; i < perf->state_count; i++) {
782 		if (i > 0 && perf->states[i].core_frequency >=
783 		    data->freq_table[valid_states-1].frequency / 1000)
784 			continue;
785 
786 		data->freq_table[valid_states].driver_data = i;
787 		data->freq_table[valid_states].frequency =
788 		    perf->states[i].core_frequency * 1000;
789 		valid_states++;
790 	}
791 	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
792 	perf->state = 0;
793 
794 	result = cpufreq_table_validate_and_show(policy, data->freq_table);
795 	if (result)
796 		goto err_freqfree;
797 
798 	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
799 		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
800 
801 	switch (perf->control_register.space_id) {
802 	case ACPI_ADR_SPACE_SYSTEM_IO:
803 		/*
804 		 * The core will not set policy->cur, because
805 		 * cpufreq_driver->get is NULL, so we need to set it here.
806 		 * However, we have to guess it, because the current speed is
807 		 * unknown and not detectable via IO ports.
808 		 */
809 		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
810 		break;
811 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
812 		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
813 		break;
814 	default:
815 		break;
816 	}
817 
818 	/* notify BIOS that we exist */
819 	acpi_processor_notify_smm(THIS_MODULE);
820 
821 	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
822 	for (i = 0; i < perf->state_count; i++)
823 		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
824 			(i == perf->state ? '*' : ' '), i,
825 			(u32) perf->states[i].core_frequency,
826 			(u32) perf->states[i].power,
827 			(u32) perf->states[i].transition_latency);
828 
829 	/*
830 	 * the first call to ->target() should result in us actually
831 	 * writing something to the appropriate registers.
832 	 */
833 	data->resume = 1;
834 
835 	return result;
836 
837 err_freqfree:
838 	kfree(data->freq_table);
839 err_unreg:
840 	acpi_processor_unregister_performance(perf, cpu);
841 err_free_mask:
842 	free_cpumask_var(data->freqdomain_cpus);
843 err_free:
844 	kfree(data);
845 	per_cpu(acfreq_data, cpu) = NULL;
846 
847 	return result;
848 }
849 
850 static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
851 {
852 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
853 
854 	pr_debug("acpi_cpufreq_cpu_exit\n");
855 
856 	if (data) {
857 		per_cpu(acfreq_data, policy->cpu) = NULL;
858 		acpi_processor_unregister_performance(data->acpi_data,
859 						      policy->cpu);
860 		free_cpumask_var(data->freqdomain_cpus);
861 		kfree(data->freq_table);
862 		kfree(data);
863 	}
864 
865 	return 0;
866 }
867 
868 static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
869 {
870 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
871 
872 	pr_debug("acpi_cpufreq_resume\n");
873 
874 	data->resume = 1;
875 
876 	return 0;
877 }
878 
879 static struct freq_attr *acpi_cpufreq_attr[] = {
880 	&cpufreq_freq_attr_scaling_available_freqs,
881 	&freqdomain_cpus,
882 	NULL,	/* this is a placeholder for cpb, do not remove */
883 	NULL,
884 };
885 
886 static struct cpufreq_driver acpi_cpufreq_driver = {
887 	.verify		= cpufreq_generic_frequency_table_verify,
888 	.target_index	= acpi_cpufreq_target,
889 	.bios_limit	= acpi_processor_get_bios_limit,
890 	.init		= acpi_cpufreq_cpu_init,
891 	.exit		= acpi_cpufreq_cpu_exit,
892 	.resume		= acpi_cpufreq_resume,
893 	.name		= "acpi-cpufreq",
894 	.attr		= acpi_cpufreq_attr,
895 	.set_boost      = _store_boost,
896 };
897 
898 static void __init acpi_cpufreq_boost_init(void)
899 {
900 	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
901 		msrs = msrs_alloc();
902 
903 		if (!msrs)
904 			return;
905 
906 		acpi_cpufreq_driver.boost_supported = true;
907 		acpi_cpufreq_driver.boost_enabled = boost_state(0);
908 
909 		cpu_notifier_register_begin();
910 
911 		/* Force all MSRs to the same value */
912 		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
913 			       cpu_online_mask);
914 
915 		__register_cpu_notifier(&boost_nb);
916 
917 		cpu_notifier_register_done();
918 	}
919 }
920 
921 static void acpi_cpufreq_boost_exit(void)
922 {
923 	if (msrs) {
924 		unregister_cpu_notifier(&boost_nb);
925 
926 		msrs_free(msrs);
927 		msrs = NULL;
928 	}
929 }
930 
931 static int __init acpi_cpufreq_init(void)
932 {
933 	int ret;
934 
935 	if (acpi_disabled)
936 		return -ENODEV;
937 
938 	/* don't keep reloading if cpufreq_driver exists */
939 	if (cpufreq_get_current_driver())
940 		return -EEXIST;
941 
942 	pr_debug("acpi_cpufreq_init\n");
943 
944 	ret = acpi_cpufreq_early_init();
945 	if (ret)
946 		return ret;
947 
948 #ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
949 	/* this is a sysfs file with a strange name and an even stranger
950 	 * semantic - per CPU instantiation, but system global effect.
951 	 * Lets enable it only on AMD CPUs for compatibility reasons and
952 	 * only if configured. This is considered legacy code, which
953 	 * will probably be removed at some point in the future.
954 	 */
955 	if (check_amd_hwpstate_cpu(0)) {
956 		struct freq_attr **iter;
957 
958 		pr_debug("adding sysfs entry for cpb\n");
959 
960 		for (iter = acpi_cpufreq_attr; *iter != NULL; iter++)
961 			;
962 
963 		/* make sure there is a terminator behind it */
964 		if (iter[1] == NULL)
965 			*iter = &cpb;
966 	}
967 #endif
968 	acpi_cpufreq_boost_init();
969 
970 	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
971 	if (ret) {
972 		free_acpi_perf_data();
973 		acpi_cpufreq_boost_exit();
974 	}
975 	return ret;
976 }
977 
978 static void __exit acpi_cpufreq_exit(void)
979 {
980 	pr_debug("acpi_cpufreq_exit\n");
981 
982 	acpi_cpufreq_boost_exit();
983 
984 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
985 
986 	free_acpi_perf_data();
987 }
988 
989 module_param(acpi_pstate_strict, uint, 0644);
990 MODULE_PARM_DESC(acpi_pstate_strict,
991 	"value 0 or non-zero. non-zero -> strict ACPI checks are "
992 	"performed during frequency changes.");
993 
994 late_initcall(acpi_cpufreq_init);
995 module_exit(acpi_cpufreq_exit);
996 
997 static const struct x86_cpu_id acpi_cpufreq_ids[] = {
998 	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
999 	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
1000 	{}
1001 };
1002 MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);
1003 
1004 static const struct acpi_device_id processor_device_ids[] = {
1005 	{ACPI_PROCESSOR_OBJECT_HID, },
1006 	{ACPI_PROCESSOR_DEVICE_HID, },
1007 	{},
1008 };
1009 MODULE_DEVICE_TABLE(acpi, processor_device_ids);
1010 
1011 MODULE_ALIAS("acpi");
1012