1 /*
2  * acpi-cpufreq.c - ACPI Processor P-States Driver
3  *
4  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6  *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7  *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
8  *
9  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10  *
11  *  This program is free software; you can redistribute it and/or modify
12  *  it under the terms of the GNU General Public License as published by
13  *  the Free Software Foundation; either version 2 of the License, or (at
14  *  your option) any later version.
15  *
16  *  This program is distributed in the hope that it will be useful, but
17  *  WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  *  General Public License for more details.
20  *
21  *  You should have received a copy of the GNU General Public License along
22  *  with this program; if not, write to the Free Software Foundation, Inc.,
23  *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
24  *
25  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
26  */
27 
28 #include <linux/kernel.h>
29 #include <linux/module.h>
30 #include <linux/init.h>
31 #include <linux/smp.h>
32 #include <linux/sched.h>
33 #include <linux/cpufreq.h>
34 #include <linux/compiler.h>
35 #include <linux/dmi.h>
36 #include <linux/slab.h>
37 
38 #include <linux/acpi.h>
39 #include <linux/io.h>
40 #include <linux/delay.h>
41 #include <linux/uaccess.h>
42 
43 #include <acpi/processor.h>
44 
45 #include <asm/msr.h>
46 #include <asm/processor.h>
47 #include <asm/cpufeature.h>
48 
49 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
50 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
51 MODULE_LICENSE("GPL");
52 
53 #define PFX "acpi-cpufreq: "
54 
55 enum {
56 	UNDEFINED_CAPABLE = 0,
57 	SYSTEM_INTEL_MSR_CAPABLE,
58 	SYSTEM_AMD_MSR_CAPABLE,
59 	SYSTEM_IO_CAPABLE,
60 };
61 
62 #define INTEL_MSR_RANGE		(0xffff)
63 #define AMD_MSR_RANGE		(0x7)
64 
65 #define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)
66 
67 struct acpi_cpufreq_data {
68 	struct acpi_processor_performance *acpi_data;
69 	struct cpufreq_frequency_table *freq_table;
70 	unsigned int resume;
71 	unsigned int cpu_feature;
72 	cpumask_var_t freqdomain_cpus;
73 };
74 
75 static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
76 
77 /* acpi_perf_data is a pointer to percpu data. */
78 static struct acpi_processor_performance __percpu *acpi_perf_data;
79 
80 static struct cpufreq_driver acpi_cpufreq_driver;
81 
82 static unsigned int acpi_pstate_strict;
83 static struct msr __percpu *msrs;
84 
85 static bool boost_state(unsigned int cpu)
86 {
87 	u32 lo, hi;
88 	u64 msr;
89 
90 	switch (boot_cpu_data.x86_vendor) {
91 	case X86_VENDOR_INTEL:
92 		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
93 		msr = lo | ((u64)hi << 32);
94 		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
95 	case X86_VENDOR_AMD:
96 		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
97 		msr = lo | ((u64)hi << 32);
98 		return !(msr & MSR_K7_HWCR_CPB_DIS);
99 	}
100 	return false;
101 }
102 
103 static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
104 {
105 	u32 cpu;
106 	u32 msr_addr;
107 	u64 msr_mask;
108 
109 	switch (boot_cpu_data.x86_vendor) {
110 	case X86_VENDOR_INTEL:
111 		msr_addr = MSR_IA32_MISC_ENABLE;
112 		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
113 		break;
114 	case X86_VENDOR_AMD:
115 		msr_addr = MSR_K7_HWCR;
116 		msr_mask = MSR_K7_HWCR_CPB_DIS;
117 		break;
118 	default:
119 		return;
120 	}
121 
122 	rdmsr_on_cpus(cpumask, msr_addr, msrs);
123 
124 	for_each_cpu(cpu, cpumask) {
125 		struct msr *reg = per_cpu_ptr(msrs, cpu);
126 		if (enable)
127 			reg->q &= ~msr_mask;
128 		else
129 			reg->q |= msr_mask;
130 	}
131 
132 	wrmsr_on_cpus(cpumask, msr_addr, msrs);
133 }
134 
135 static int _store_boost(int val)
136 {
137 	get_online_cpus();
138 	boost_set_msrs(val, cpu_online_mask);
139 	put_online_cpus();
140 	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");
141 
142 	return 0;
143 }
144 
145 static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
146 {
147 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
148 
149 	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
150 }
151 
152 cpufreq_freq_attr_ro(freqdomain_cpus);
153 
154 #ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
155 static ssize_t store_boost(const char *buf, size_t count)
156 {
157 	int ret;
158 	unsigned long val = 0;
159 
160 	if (!acpi_cpufreq_driver.boost_supported)
161 		return -EINVAL;
162 
163 	ret = kstrtoul(buf, 10, &val);
164 	if (ret || (val > 1))
165 		return -EINVAL;
166 
167 	_store_boost((int) val);
168 
169 	return count;
170 }
171 
172 static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
173 			 size_t count)
174 {
175 	return store_boost(buf, count);
176 }
177 
178 static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
179 {
180 	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
181 }
182 
183 cpufreq_freq_attr_rw(cpb);
184 #endif
185 
186 static int check_est_cpu(unsigned int cpuid)
187 {
188 	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
189 
190 	return cpu_has(cpu, X86_FEATURE_EST);
191 }
192 
193 static int check_amd_hwpstate_cpu(unsigned int cpuid)
194 {
195 	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
196 
197 	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
198 }
199 
200 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
201 {
202 	struct acpi_processor_performance *perf;
203 	int i;
204 
205 	perf = data->acpi_data;
206 
207 	for (i = 0; i < perf->state_count; i++) {
208 		if (value == perf->states[i].status)
209 			return data->freq_table[i].frequency;
210 	}
211 	return 0;
212 }
213 
214 static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
215 {
216 	int i;
217 	struct acpi_processor_performance *perf;
218 
219 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
220 		msr &= AMD_MSR_RANGE;
221 	else
222 		msr &= INTEL_MSR_RANGE;
223 
224 	perf = data->acpi_data;
225 
226 	for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
227 		if (msr == perf->states[data->freq_table[i].driver_data].status)
228 			return data->freq_table[i].frequency;
229 	}
230 	return data->freq_table[0].frequency;
231 }
232 
233 static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
234 {
235 	switch (data->cpu_feature) {
236 	case SYSTEM_INTEL_MSR_CAPABLE:
237 	case SYSTEM_AMD_MSR_CAPABLE:
238 		return extract_msr(val, data);
239 	case SYSTEM_IO_CAPABLE:
240 		return extract_io(val, data);
241 	default:
242 		return 0;
243 	}
244 }
245 
246 struct msr_addr {
247 	u32 reg;
248 };
249 
250 struct io_addr {
251 	u16 port;
252 	u8 bit_width;
253 };
254 
255 struct drv_cmd {
256 	unsigned int type;
257 	const struct cpumask *mask;
258 	union {
259 		struct msr_addr msr;
260 		struct io_addr io;
261 	} addr;
262 	u32 val;
263 };
264 
265 /* Called via smp_call_function_single(), on the target CPU */
266 static void do_drv_read(void *_cmd)
267 {
268 	struct drv_cmd *cmd = _cmd;
269 	u32 h;
270 
271 	switch (cmd->type) {
272 	case SYSTEM_INTEL_MSR_CAPABLE:
273 	case SYSTEM_AMD_MSR_CAPABLE:
274 		rdmsr(cmd->addr.msr.reg, cmd->val, h);
275 		break;
276 	case SYSTEM_IO_CAPABLE:
277 		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
278 				&cmd->val,
279 				(u32)cmd->addr.io.bit_width);
280 		break;
281 	default:
282 		break;
283 	}
284 }
285 
286 /* Called via smp_call_function_many(), on the target CPUs */
287 static void do_drv_write(void *_cmd)
288 {
289 	struct drv_cmd *cmd = _cmd;
290 	u32 lo, hi;
291 
292 	switch (cmd->type) {
293 	case SYSTEM_INTEL_MSR_CAPABLE:
294 		rdmsr(cmd->addr.msr.reg, lo, hi);
295 		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
296 		wrmsr(cmd->addr.msr.reg, lo, hi);
297 		break;
298 	case SYSTEM_AMD_MSR_CAPABLE:
299 		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
300 		break;
301 	case SYSTEM_IO_CAPABLE:
302 		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
303 				cmd->val,
304 				(u32)cmd->addr.io.bit_width);
305 		break;
306 	default:
307 		break;
308 	}
309 }
310 
311 static void drv_read(struct drv_cmd *cmd)
312 {
313 	int err;
314 	cmd->val = 0;
315 
316 	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
317 	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
318 }
319 
320 static void drv_write(struct drv_cmd *cmd)
321 {
322 	int this_cpu;
323 
324 	this_cpu = get_cpu();
325 	if (cpumask_test_cpu(this_cpu, cmd->mask))
326 		do_drv_write(cmd);
327 	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
328 	put_cpu();
329 }
330 
331 static u32 get_cur_val(const struct cpumask *mask)
332 {
333 	struct acpi_processor_performance *perf;
334 	struct drv_cmd cmd;
335 
336 	if (unlikely(cpumask_empty(mask)))
337 		return 0;
338 
339 	switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
340 	case SYSTEM_INTEL_MSR_CAPABLE:
341 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
342 		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
343 		break;
344 	case SYSTEM_AMD_MSR_CAPABLE:
345 		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
346 		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
347 		break;
348 	case SYSTEM_IO_CAPABLE:
349 		cmd.type = SYSTEM_IO_CAPABLE;
350 		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
351 		cmd.addr.io.port = perf->control_register.address;
352 		cmd.addr.io.bit_width = perf->control_register.bit_width;
353 		break;
354 	default:
355 		return 0;
356 	}
357 
358 	cmd.mask = mask;
359 	drv_read(&cmd);
360 
361 	pr_debug("get_cur_val = %u\n", cmd.val);
362 
363 	return cmd.val;
364 }
365 
366 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
367 {
368 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
369 	unsigned int freq;
370 	unsigned int cached_freq;
371 
372 	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
373 
374 	if (unlikely(data == NULL ||
375 		     data->acpi_data == NULL || data->freq_table == NULL)) {
376 		return 0;
377 	}
378 
379 	cached_freq = data->freq_table[data->acpi_data->state].frequency;
380 	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
381 	if (freq != cached_freq) {
382 		/*
383 		 * The dreaded BIOS frequency change behind our back.
384 		 * Force set the frequency on next target call.
385 		 */
386 		data->resume = 1;
387 	}
388 
389 	pr_debug("cur freq = %u\n", freq);
390 
391 	return freq;
392 }
393 
394 static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
395 				struct acpi_cpufreq_data *data)
396 {
397 	unsigned int cur_freq;
398 	unsigned int i;
399 
400 	for (i = 0; i < 100; i++) {
401 		cur_freq = extract_freq(get_cur_val(mask), data);
402 		if (cur_freq == freq)
403 			return 1;
404 		udelay(10);
405 	}
406 	return 0;
407 }
408 
409 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
410 			       unsigned int index)
411 {
412 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
413 	struct acpi_processor_performance *perf;
414 	struct drv_cmd cmd;
415 	unsigned int next_perf_state = 0; /* Index into perf table */
416 	int result = 0;
417 
418 	if (unlikely(data == NULL ||
419 	     data->acpi_data == NULL || data->freq_table == NULL)) {
420 		return -ENODEV;
421 	}
422 
423 	perf = data->acpi_data;
424 	next_perf_state = data->freq_table[index].driver_data;
425 	if (perf->state == next_perf_state) {
426 		if (unlikely(data->resume)) {
427 			pr_debug("Called after resume, resetting to P%d\n",
428 				next_perf_state);
429 			data->resume = 0;
430 		} else {
431 			pr_debug("Already at target state (P%d)\n",
432 				next_perf_state);
433 			goto out;
434 		}
435 	}
436 
437 	switch (data->cpu_feature) {
438 	case SYSTEM_INTEL_MSR_CAPABLE:
439 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
440 		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
441 		cmd.val = (u32) perf->states[next_perf_state].control;
442 		break;
443 	case SYSTEM_AMD_MSR_CAPABLE:
444 		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
445 		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
446 		cmd.val = (u32) perf->states[next_perf_state].control;
447 		break;
448 	case SYSTEM_IO_CAPABLE:
449 		cmd.type = SYSTEM_IO_CAPABLE;
450 		cmd.addr.io.port = perf->control_register.address;
451 		cmd.addr.io.bit_width = perf->control_register.bit_width;
452 		cmd.val = (u32) perf->states[next_perf_state].control;
453 		break;
454 	default:
455 		result = -ENODEV;
456 		goto out;
457 	}
458 
459 	/* cpufreq holds the hotplug lock, so we are safe from here on */
460 	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
461 		cmd.mask = policy->cpus;
462 	else
463 		cmd.mask = cpumask_of(policy->cpu);
464 
465 	drv_write(&cmd);
466 
467 	if (acpi_pstate_strict) {
468 		if (!check_freqs(cmd.mask, data->freq_table[index].frequency,
469 					data)) {
470 			pr_debug("acpi_cpufreq_target failed (%d)\n",
471 				policy->cpu);
472 			result = -EAGAIN;
473 		}
474 	}
475 
476 	if (!result)
477 		perf->state = next_perf_state;
478 
479 out:
480 	return result;
481 }
482 
483 static unsigned long
484 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
485 {
486 	struct acpi_processor_performance *perf = data->acpi_data;
487 
488 	if (cpu_khz) {
489 		/* search the closest match to cpu_khz */
490 		unsigned int i;
491 		unsigned long freq;
492 		unsigned long freqn = perf->states[0].core_frequency * 1000;
493 
494 		for (i = 0; i < (perf->state_count-1); i++) {
495 			freq = freqn;
496 			freqn = perf->states[i+1].core_frequency * 1000;
497 			if ((2 * cpu_khz) > (freqn + freq)) {
498 				perf->state = i;
499 				return freq;
500 			}
501 		}
502 		perf->state = perf->state_count-1;
503 		return freqn;
504 	} else {
505 		/* assume CPU is at P0... */
506 		perf->state = 0;
507 		return perf->states[0].core_frequency * 1000;
508 	}
509 }
510 
511 static void free_acpi_perf_data(void)
512 {
513 	unsigned int i;
514 
515 	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
516 	for_each_possible_cpu(i)
517 		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
518 				 ->shared_cpu_map);
519 	free_percpu(acpi_perf_data);
520 }
521 
522 static int boost_notify(struct notifier_block *nb, unsigned long action,
523 		      void *hcpu)
524 {
525 	unsigned cpu = (long)hcpu;
526 	const struct cpumask *cpumask;
527 
528 	cpumask = get_cpu_mask(cpu);
529 
530 	/*
531 	 * Clear the boost-disable bit on the CPU_DOWN path so that
532 	 * this cpu cannot block the remaining ones from boosting. On
533 	 * the CPU_UP path we simply keep the boost-disable flag in
534 	 * sync with the current global state.
535 	 */
536 
537 	switch (action) {
538 	case CPU_UP_PREPARE:
539 	case CPU_UP_PREPARE_FROZEN:
540 		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
541 		break;
542 
543 	case CPU_DOWN_PREPARE:
544 	case CPU_DOWN_PREPARE_FROZEN:
545 		boost_set_msrs(1, cpumask);
546 		break;
547 
548 	default:
549 		break;
550 	}
551 
552 	return NOTIFY_OK;
553 }
554 
555 
556 static struct notifier_block boost_nb = {
557 	.notifier_call          = boost_notify,
558 };
559 
560 /*
561  * acpi_cpufreq_early_init - initialize ACPI P-States library
562  *
563  * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
564  * in order to determine correct frequency and voltage pairings. We can
565  * do _PDC and _PSD and find out the processor dependency for the
566  * actual init that will happen later...
567  */
568 static int __init acpi_cpufreq_early_init(void)
569 {
570 	unsigned int i;
571 	pr_debug("acpi_cpufreq_early_init\n");
572 
573 	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
574 	if (!acpi_perf_data) {
575 		pr_debug("Memory allocation error for acpi_perf_data.\n");
576 		return -ENOMEM;
577 	}
578 	for_each_possible_cpu(i) {
579 		if (!zalloc_cpumask_var_node(
580 			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
581 			GFP_KERNEL, cpu_to_node(i))) {
582 
583 			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
584 			free_acpi_perf_data();
585 			return -ENOMEM;
586 		}
587 	}
588 
589 	/* Do initialization in ACPI core */
590 	acpi_processor_preregister_performance(acpi_perf_data);
591 	return 0;
592 }
593 
594 #ifdef CONFIG_SMP
595 /*
596  * Some BIOSes do SW_ANY coordination internally, either set it up in hw
597  * or do it in BIOS firmware and won't inform about it to OS. If not
598  * detected, this has a side effect of making CPU run at a different speed
599  * than OS intended it to run at. Detect it and handle it cleanly.
600  */
601 static int bios_with_sw_any_bug;
602 
603 static int sw_any_bug_found(const struct dmi_system_id *d)
604 {
605 	bios_with_sw_any_bug = 1;
606 	return 0;
607 }
608 
609 static const struct dmi_system_id sw_any_bug_dmi_table[] = {
610 	{
611 		.callback = sw_any_bug_found,
612 		.ident = "Supermicro Server X6DLP",
613 		.matches = {
614 			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
615 			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
616 			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
617 		},
618 	},
619 	{ }
620 };
621 
622 static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
623 {
624 	/* Intel Xeon Processor 7100 Series Specification Update
625 	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
626 	 * AL30: A Machine Check Exception (MCE) Occurring during an
627 	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
628 	 * Both Processor Cores to Lock Up. */
629 	if (c->x86_vendor == X86_VENDOR_INTEL) {
630 		if ((c->x86 == 15) &&
631 		    (c->x86_model == 6) &&
632 		    (c->x86_mask == 8)) {
633 			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
634 			    "Xeon(R) 7100 Errata AL30, processors may "
635 			    "lock up on frequency changes: disabling "
636 			    "acpi-cpufreq.\n");
637 			return -ENODEV;
638 		    }
639 		}
640 	return 0;
641 }
642 #endif
643 
644 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
645 {
646 	unsigned int i;
647 	unsigned int valid_states = 0;
648 	unsigned int cpu = policy->cpu;
649 	struct acpi_cpufreq_data *data;
650 	unsigned int result = 0;
651 	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
652 	struct acpi_processor_performance *perf;
653 #ifdef CONFIG_SMP
654 	static int blacklisted;
655 #endif
656 
657 	pr_debug("acpi_cpufreq_cpu_init\n");
658 
659 #ifdef CONFIG_SMP
660 	if (blacklisted)
661 		return blacklisted;
662 	blacklisted = acpi_cpufreq_blacklist(c);
663 	if (blacklisted)
664 		return blacklisted;
665 #endif
666 
667 	data = kzalloc(sizeof(*data), GFP_KERNEL);
668 	if (!data)
669 		return -ENOMEM;
670 
671 	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
672 		result = -ENOMEM;
673 		goto err_free;
674 	}
675 
676 	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
677 	per_cpu(acfreq_data, cpu) = data;
678 
679 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
680 		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
681 
682 	result = acpi_processor_register_performance(data->acpi_data, cpu);
683 	if (result)
684 		goto err_free_mask;
685 
686 	perf = data->acpi_data;
687 	policy->shared_type = perf->shared_type;
688 
689 	/*
690 	 * Will let policy->cpus know about dependency only when software
691 	 * coordination is required.
692 	 */
693 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
694 	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
695 		cpumask_copy(policy->cpus, perf->shared_cpu_map);
696 	}
697 	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
698 
699 #ifdef CONFIG_SMP
700 	dmi_check_system(sw_any_bug_dmi_table);
701 	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
702 		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
703 		cpumask_copy(policy->cpus, cpu_core_mask(cpu));
704 	}
705 
706 	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
707 		cpumask_clear(policy->cpus);
708 		cpumask_set_cpu(cpu, policy->cpus);
709 		cpumask_copy(data->freqdomain_cpus, cpu_sibling_mask(cpu));
710 		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
711 		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
712 	}
713 #endif
714 
715 	/* capability check */
716 	if (perf->state_count <= 1) {
717 		pr_debug("No P-States\n");
718 		result = -ENODEV;
719 		goto err_unreg;
720 	}
721 
722 	if (perf->control_register.space_id != perf->status_register.space_id) {
723 		result = -ENODEV;
724 		goto err_unreg;
725 	}
726 
727 	switch (perf->control_register.space_id) {
728 	case ACPI_ADR_SPACE_SYSTEM_IO:
729 		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
730 		    boot_cpu_data.x86 == 0xf) {
731 			pr_debug("AMD K8 systems must use native drivers.\n");
732 			result = -ENODEV;
733 			goto err_unreg;
734 		}
735 		pr_debug("SYSTEM IO addr space\n");
736 		data->cpu_feature = SYSTEM_IO_CAPABLE;
737 		break;
738 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
739 		pr_debug("HARDWARE addr space\n");
740 		if (check_est_cpu(cpu)) {
741 			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
742 			break;
743 		}
744 		if (check_amd_hwpstate_cpu(cpu)) {
745 			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
746 			break;
747 		}
748 		result = -ENODEV;
749 		goto err_unreg;
750 	default:
751 		pr_debug("Unknown addr space %d\n",
752 			(u32) (perf->control_register.space_id));
753 		result = -ENODEV;
754 		goto err_unreg;
755 	}
756 
757 	data->freq_table = kzalloc(sizeof(*data->freq_table) *
758 		    (perf->state_count+1), GFP_KERNEL);
759 	if (!data->freq_table) {
760 		result = -ENOMEM;
761 		goto err_unreg;
762 	}
763 
764 	/* detect transition latency */
765 	policy->cpuinfo.transition_latency = 0;
766 	for (i = 0; i < perf->state_count; i++) {
767 		if ((perf->states[i].transition_latency * 1000) >
768 		    policy->cpuinfo.transition_latency)
769 			policy->cpuinfo.transition_latency =
770 			    perf->states[i].transition_latency * 1000;
771 	}
772 
773 	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
774 	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
775 	    policy->cpuinfo.transition_latency > 20 * 1000) {
776 		policy->cpuinfo.transition_latency = 20 * 1000;
777 		printk_once(KERN_INFO
778 			    "P-state transition latency capped at 20 uS\n");
779 	}
780 
781 	/* table init */
782 	for (i = 0; i < perf->state_count; i++) {
783 		if (i > 0 && perf->states[i].core_frequency >=
784 		    data->freq_table[valid_states-1].frequency / 1000)
785 			continue;
786 
787 		data->freq_table[valid_states].driver_data = i;
788 		data->freq_table[valid_states].frequency =
789 		    perf->states[i].core_frequency * 1000;
790 		valid_states++;
791 	}
792 	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
793 	perf->state = 0;
794 
795 	result = cpufreq_table_validate_and_show(policy, data->freq_table);
796 	if (result)
797 		goto err_freqfree;
798 
799 	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
800 		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
801 
802 	switch (perf->control_register.space_id) {
803 	case ACPI_ADR_SPACE_SYSTEM_IO:
804 		/*
805 		 * The core will not set policy->cur, because
806 		 * cpufreq_driver->get is NULL, so we need to set it here.
807 		 * However, we have to guess it, because the current speed is
808 		 * unknown and not detectable via IO ports.
809 		 */
810 		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
811 		break;
812 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
813 		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
814 		break;
815 	default:
816 		break;
817 	}
818 
819 	/* notify BIOS that we exist */
820 	acpi_processor_notify_smm(THIS_MODULE);
821 
822 	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
823 	for (i = 0; i < perf->state_count; i++)
824 		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
825 			(i == perf->state ? '*' : ' '), i,
826 			(u32) perf->states[i].core_frequency,
827 			(u32) perf->states[i].power,
828 			(u32) perf->states[i].transition_latency);
829 
830 	/*
831 	 * the first call to ->target() should result in us actually
832 	 * writing something to the appropriate registers.
833 	 */
834 	data->resume = 1;
835 
836 	return result;
837 
838 err_freqfree:
839 	kfree(data->freq_table);
840 err_unreg:
841 	acpi_processor_unregister_performance(perf, cpu);
842 err_free_mask:
843 	free_cpumask_var(data->freqdomain_cpus);
844 err_free:
845 	kfree(data);
846 	per_cpu(acfreq_data, cpu) = NULL;
847 
848 	return result;
849 }
850 
851 static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
852 {
853 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
854 
855 	pr_debug("acpi_cpufreq_cpu_exit\n");
856 
857 	if (data) {
858 		per_cpu(acfreq_data, policy->cpu) = NULL;
859 		acpi_processor_unregister_performance(data->acpi_data,
860 						      policy->cpu);
861 		free_cpumask_var(data->freqdomain_cpus);
862 		kfree(data->freq_table);
863 		kfree(data);
864 	}
865 
866 	return 0;
867 }
868 
869 static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
870 {
871 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
872 
873 	pr_debug("acpi_cpufreq_resume\n");
874 
875 	data->resume = 1;
876 
877 	return 0;
878 }
879 
880 static struct freq_attr *acpi_cpufreq_attr[] = {
881 	&cpufreq_freq_attr_scaling_available_freqs,
882 	&freqdomain_cpus,
883 	NULL,	/* this is a placeholder for cpb, do not remove */
884 	NULL,
885 };
886 
887 static struct cpufreq_driver acpi_cpufreq_driver = {
888 	.verify		= cpufreq_generic_frequency_table_verify,
889 	.target_index	= acpi_cpufreq_target,
890 	.bios_limit	= acpi_processor_get_bios_limit,
891 	.init		= acpi_cpufreq_cpu_init,
892 	.exit		= acpi_cpufreq_cpu_exit,
893 	.resume		= acpi_cpufreq_resume,
894 	.name		= "acpi-cpufreq",
895 	.attr		= acpi_cpufreq_attr,
896 	.set_boost      = _store_boost,
897 };
898 
899 static void __init acpi_cpufreq_boost_init(void)
900 {
901 	if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
902 		msrs = msrs_alloc();
903 
904 		if (!msrs)
905 			return;
906 
907 		acpi_cpufreq_driver.boost_supported = true;
908 		acpi_cpufreq_driver.boost_enabled = boost_state(0);
909 
910 		cpu_notifier_register_begin();
911 
912 		/* Force all MSRs to the same value */
913 		boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
914 			       cpu_online_mask);
915 
916 		__register_cpu_notifier(&boost_nb);
917 
918 		cpu_notifier_register_done();
919 	}
920 }
921 
922 static void acpi_cpufreq_boost_exit(void)
923 {
924 	if (msrs) {
925 		unregister_cpu_notifier(&boost_nb);
926 
927 		msrs_free(msrs);
928 		msrs = NULL;
929 	}
930 }
931 
932 static int __init acpi_cpufreq_init(void)
933 {
934 	int ret;
935 
936 	if (acpi_disabled)
937 		return -ENODEV;
938 
939 	/* don't keep reloading if cpufreq_driver exists */
940 	if (cpufreq_get_current_driver())
941 		return -EEXIST;
942 
943 	pr_debug("acpi_cpufreq_init\n");
944 
945 	ret = acpi_cpufreq_early_init();
946 	if (ret)
947 		return ret;
948 
949 #ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
950 	/* this is a sysfs file with a strange name and an even stranger
951 	 * semantic - per CPU instantiation, but system global effect.
952 	 * Lets enable it only on AMD CPUs for compatibility reasons and
953 	 * only if configured. This is considered legacy code, which
954 	 * will probably be removed at some point in the future.
955 	 */
956 	if (check_amd_hwpstate_cpu(0)) {
957 		struct freq_attr **iter;
958 
959 		pr_debug("adding sysfs entry for cpb\n");
960 
961 		for (iter = acpi_cpufreq_attr; *iter != NULL; iter++)
962 			;
963 
964 		/* make sure there is a terminator behind it */
965 		if (iter[1] == NULL)
966 			*iter = &cpb;
967 	}
968 #endif
969 	acpi_cpufreq_boost_init();
970 
971 	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
972 	if (ret) {
973 		free_acpi_perf_data();
974 		acpi_cpufreq_boost_exit();
975 	}
976 	return ret;
977 }
978 
979 static void __exit acpi_cpufreq_exit(void)
980 {
981 	pr_debug("acpi_cpufreq_exit\n");
982 
983 	acpi_cpufreq_boost_exit();
984 
985 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
986 
987 	free_acpi_perf_data();
988 }
989 
990 module_param(acpi_pstate_strict, uint, 0644);
991 MODULE_PARM_DESC(acpi_pstate_strict,
992 	"value 0 or non-zero. non-zero -> strict ACPI checks are "
993 	"performed during frequency changes.");
994 
995 late_initcall(acpi_cpufreq_init);
996 module_exit(acpi_cpufreq_exit);
997 
998 static const struct x86_cpu_id acpi_cpufreq_ids[] = {
999 	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
1000 	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
1001 	{}
1002 };
1003 MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);
1004 
1005 static const struct acpi_device_id processor_device_ids[] = {
1006 	{ACPI_PROCESSOR_OBJECT_HID, },
1007 	{ACPI_PROCESSOR_DEVICE_HID, },
1008 	{},
1009 };
1010 MODULE_DEVICE_TABLE(acpi, processor_device_ids);
1011 
1012 MODULE_ALIAS("acpi");
1013