1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * acpi-cpufreq.c - ACPI Processor P-States Driver
4  *
5  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
6  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
7  *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
8  *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
9  */
10 
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/smp.h>
17 #include <linux/sched.h>
18 #include <linux/cpufreq.h>
19 #include <linux/compiler.h>
20 #include <linux/dmi.h>
21 #include <linux/slab.h>
22 
23 #include <linux/acpi.h>
24 #include <linux/io.h>
25 #include <linux/delay.h>
26 #include <linux/uaccess.h>
27 
28 #include <acpi/processor.h>
29 
30 #include <asm/msr.h>
31 #include <asm/processor.h>
32 #include <asm/cpufeature.h>
33 
34 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
35 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
36 MODULE_LICENSE("GPL");
37 
38 enum {
39 	UNDEFINED_CAPABLE = 0,
40 	SYSTEM_INTEL_MSR_CAPABLE,
41 	SYSTEM_AMD_MSR_CAPABLE,
42 	SYSTEM_IO_CAPABLE,
43 };
44 
45 #define INTEL_MSR_RANGE		(0xffff)
46 #define AMD_MSR_RANGE		(0x7)
47 #define HYGON_MSR_RANGE		(0x7)
48 
49 #define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)
50 
51 struct acpi_cpufreq_data {
52 	unsigned int resume;
53 	unsigned int cpu_feature;
54 	unsigned int acpi_perf_cpu;
55 	cpumask_var_t freqdomain_cpus;
56 	void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
57 	u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
58 };
59 
60 /* acpi_perf_data is a pointer to percpu data. */
61 static struct acpi_processor_performance __percpu *acpi_perf_data;
62 
63 static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
64 {
65 	return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
66 }
67 
68 static struct cpufreq_driver acpi_cpufreq_driver;
69 
70 static unsigned int acpi_pstate_strict;
71 
72 static bool boost_state(unsigned int cpu)
73 {
74 	u32 lo, hi;
75 	u64 msr;
76 
77 	switch (boot_cpu_data.x86_vendor) {
78 	case X86_VENDOR_INTEL:
79 		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
80 		msr = lo | ((u64)hi << 32);
81 		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
82 	case X86_VENDOR_HYGON:
83 	case X86_VENDOR_AMD:
84 		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
85 		msr = lo | ((u64)hi << 32);
86 		return !(msr & MSR_K7_HWCR_CPB_DIS);
87 	}
88 	return false;
89 }
90 
91 static int boost_set_msr(bool enable)
92 {
93 	u32 msr_addr;
94 	u64 msr_mask, val;
95 
96 	switch (boot_cpu_data.x86_vendor) {
97 	case X86_VENDOR_INTEL:
98 		msr_addr = MSR_IA32_MISC_ENABLE;
99 		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
100 		break;
101 	case X86_VENDOR_HYGON:
102 	case X86_VENDOR_AMD:
103 		msr_addr = MSR_K7_HWCR;
104 		msr_mask = MSR_K7_HWCR_CPB_DIS;
105 		break;
106 	default:
107 		return -EINVAL;
108 	}
109 
110 	rdmsrl(msr_addr, val);
111 
112 	if (enable)
113 		val &= ~msr_mask;
114 	else
115 		val |= msr_mask;
116 
117 	wrmsrl(msr_addr, val);
118 	return 0;
119 }
120 
121 static void boost_set_msr_each(void *p_en)
122 {
123 	bool enable = (bool) p_en;
124 
125 	boost_set_msr(enable);
126 }
127 
128 static int set_boost(int val)
129 {
130 	get_online_cpus();
131 	on_each_cpu(boost_set_msr_each, (void *)(long)val, 1);
132 	put_online_cpus();
133 	pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");
134 
135 	return 0;
136 }
137 
138 static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
139 {
140 	struct acpi_cpufreq_data *data = policy->driver_data;
141 
142 	if (unlikely(!data))
143 		return -ENODEV;
144 
145 	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
146 }
147 
148 cpufreq_freq_attr_ro(freqdomain_cpus);
149 
150 #ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
151 static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
152 			 size_t count)
153 {
154 	int ret;
155 	unsigned int val = 0;
156 
157 	if (!acpi_cpufreq_driver.set_boost)
158 		return -EINVAL;
159 
160 	ret = kstrtouint(buf, 10, &val);
161 	if (ret || val > 1)
162 		return -EINVAL;
163 
164 	set_boost(val);
165 
166 	return count;
167 }
168 
169 static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
170 {
171 	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
172 }
173 
174 cpufreq_freq_attr_rw(cpb);
175 #endif
176 
177 static int check_est_cpu(unsigned int cpuid)
178 {
179 	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
180 
181 	return cpu_has(cpu, X86_FEATURE_EST);
182 }
183 
184 static int check_amd_hwpstate_cpu(unsigned int cpuid)
185 {
186 	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
187 
188 	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
189 }
190 
191 static unsigned extract_io(struct cpufreq_policy *policy, u32 value)
192 {
193 	struct acpi_cpufreq_data *data = policy->driver_data;
194 	struct acpi_processor_performance *perf;
195 	int i;
196 
197 	perf = to_perf_data(data);
198 
199 	for (i = 0; i < perf->state_count; i++) {
200 		if (value == perf->states[i].status)
201 			return policy->freq_table[i].frequency;
202 	}
203 	return 0;
204 }
205 
206 static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
207 {
208 	struct acpi_cpufreq_data *data = policy->driver_data;
209 	struct cpufreq_frequency_table *pos;
210 	struct acpi_processor_performance *perf;
211 
212 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
213 		msr &= AMD_MSR_RANGE;
214 	else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
215 		msr &= HYGON_MSR_RANGE;
216 	else
217 		msr &= INTEL_MSR_RANGE;
218 
219 	perf = to_perf_data(data);
220 
221 	cpufreq_for_each_entry(pos, policy->freq_table)
222 		if (msr == perf->states[pos->driver_data].status)
223 			return pos->frequency;
224 	return policy->freq_table[0].frequency;
225 }
226 
227 static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
228 {
229 	struct acpi_cpufreq_data *data = policy->driver_data;
230 
231 	switch (data->cpu_feature) {
232 	case SYSTEM_INTEL_MSR_CAPABLE:
233 	case SYSTEM_AMD_MSR_CAPABLE:
234 		return extract_msr(policy, val);
235 	case SYSTEM_IO_CAPABLE:
236 		return extract_io(policy, val);
237 	default:
238 		return 0;
239 	}
240 }
241 
242 static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
243 {
244 	u32 val, dummy;
245 
246 	rdmsr(MSR_IA32_PERF_CTL, val, dummy);
247 	return val;
248 }
249 
250 static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
251 {
252 	u32 lo, hi;
253 
254 	rdmsr(MSR_IA32_PERF_CTL, lo, hi);
255 	lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE);
256 	wrmsr(MSR_IA32_PERF_CTL, lo, hi);
257 }
258 
259 static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
260 {
261 	u32 val, dummy;
262 
263 	rdmsr(MSR_AMD_PERF_CTL, val, dummy);
264 	return val;
265 }
266 
267 static void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
268 {
269 	wrmsr(MSR_AMD_PERF_CTL, val, 0);
270 }
271 
272 static u32 cpu_freq_read_io(struct acpi_pct_register *reg)
273 {
274 	u32 val;
275 
276 	acpi_os_read_port(reg->address, &val, reg->bit_width);
277 	return val;
278 }
279 
280 static void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
281 {
282 	acpi_os_write_port(reg->address, val, reg->bit_width);
283 }
284 
285 struct drv_cmd {
286 	struct acpi_pct_register *reg;
287 	u32 val;
288 	union {
289 		void (*write)(struct acpi_pct_register *reg, u32 val);
290 		u32 (*read)(struct acpi_pct_register *reg);
291 	} func;
292 };
293 
294 /* Called via smp_call_function_single(), on the target CPU */
295 static void do_drv_read(void *_cmd)
296 {
297 	struct drv_cmd *cmd = _cmd;
298 
299 	cmd->val = cmd->func.read(cmd->reg);
300 }
301 
302 static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask)
303 {
304 	struct acpi_processor_performance *perf = to_perf_data(data);
305 	struct drv_cmd cmd = {
306 		.reg = &perf->control_register,
307 		.func.read = data->cpu_freq_read,
308 	};
309 	int err;
310 
311 	err = smp_call_function_any(mask, do_drv_read, &cmd, 1);
312 	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
313 	return cmd.val;
314 }
315 
316 /* Called via smp_call_function_many(), on the target CPUs */
317 static void do_drv_write(void *_cmd)
318 {
319 	struct drv_cmd *cmd = _cmd;
320 
321 	cmd->func.write(cmd->reg, cmd->val);
322 }
323 
324 static void drv_write(struct acpi_cpufreq_data *data,
325 		      const struct cpumask *mask, u32 val)
326 {
327 	struct acpi_processor_performance *perf = to_perf_data(data);
328 	struct drv_cmd cmd = {
329 		.reg = &perf->control_register,
330 		.val = val,
331 		.func.write = data->cpu_freq_write,
332 	};
333 	int this_cpu;
334 
335 	this_cpu = get_cpu();
336 	if (cpumask_test_cpu(this_cpu, mask))
337 		do_drv_write(&cmd);
338 
339 	smp_call_function_many(mask, do_drv_write, &cmd, 1);
340 	put_cpu();
341 }
342 
343 static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
344 {
345 	u32 val;
346 
347 	if (unlikely(cpumask_empty(mask)))
348 		return 0;
349 
350 	val = drv_read(data, mask);
351 
352 	pr_debug("%s = %u\n", __func__, val);
353 
354 	return val;
355 }
356 
357 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
358 {
359 	struct acpi_cpufreq_data *data;
360 	struct cpufreq_policy *policy;
361 	unsigned int freq;
362 	unsigned int cached_freq;
363 
364 	pr_debug("%s (%d)\n", __func__, cpu);
365 
366 	policy = cpufreq_cpu_get_raw(cpu);
367 	if (unlikely(!policy))
368 		return 0;
369 
370 	data = policy->driver_data;
371 	if (unlikely(!data || !policy->freq_table))
372 		return 0;
373 
374 	cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
375 	freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
376 	if (freq != cached_freq) {
377 		/*
378 		 * The dreaded BIOS frequency change behind our back.
379 		 * Force set the frequency on next target call.
380 		 */
381 		data->resume = 1;
382 	}
383 
384 	pr_debug("cur freq = %u\n", freq);
385 
386 	return freq;
387 }
388 
389 static unsigned int check_freqs(struct cpufreq_policy *policy,
390 				const struct cpumask *mask, unsigned int freq)
391 {
392 	struct acpi_cpufreq_data *data = policy->driver_data;
393 	unsigned int cur_freq;
394 	unsigned int i;
395 
396 	for (i = 0; i < 100; i++) {
397 		cur_freq = extract_freq(policy, get_cur_val(mask, data));
398 		if (cur_freq == freq)
399 			return 1;
400 		udelay(10);
401 	}
402 	return 0;
403 }
404 
405 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
406 			       unsigned int index)
407 {
408 	struct acpi_cpufreq_data *data = policy->driver_data;
409 	struct acpi_processor_performance *perf;
410 	const struct cpumask *mask;
411 	unsigned int next_perf_state = 0; /* Index into perf table */
412 	int result = 0;
413 
414 	if (unlikely(!data)) {
415 		return -ENODEV;
416 	}
417 
418 	perf = to_perf_data(data);
419 	next_perf_state = policy->freq_table[index].driver_data;
420 	if (perf->state == next_perf_state) {
421 		if (unlikely(data->resume)) {
422 			pr_debug("Called after resume, resetting to P%d\n",
423 				next_perf_state);
424 			data->resume = 0;
425 		} else {
426 			pr_debug("Already at target state (P%d)\n",
427 				next_perf_state);
428 			return 0;
429 		}
430 	}
431 
432 	/*
433 	 * The core won't allow CPUs to go away until the governor has been
434 	 * stopped, so we can rely on the stability of policy->cpus.
435 	 */
436 	mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ?
437 		cpumask_of(policy->cpu) : policy->cpus;
438 
439 	drv_write(data, mask, perf->states[next_perf_state].control);
440 
441 	if (acpi_pstate_strict) {
442 		if (!check_freqs(policy, mask,
443 				 policy->freq_table[index].frequency)) {
444 			pr_debug("%s (%d)\n", __func__, policy->cpu);
445 			result = -EAGAIN;
446 		}
447 	}
448 
449 	if (!result)
450 		perf->state = next_perf_state;
451 
452 	return result;
453 }
454 
455 static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
456 					     unsigned int target_freq)
457 {
458 	struct acpi_cpufreq_data *data = policy->driver_data;
459 	struct acpi_processor_performance *perf;
460 	struct cpufreq_frequency_table *entry;
461 	unsigned int next_perf_state, next_freq, index;
462 
463 	/*
464 	 * Find the closest frequency above target_freq.
465 	 */
466 	if (policy->cached_target_freq == target_freq)
467 		index = policy->cached_resolved_idx;
468 	else
469 		index = cpufreq_table_find_index_dl(policy, target_freq);
470 
471 	entry = &policy->freq_table[index];
472 	next_freq = entry->frequency;
473 	next_perf_state = entry->driver_data;
474 
475 	perf = to_perf_data(data);
476 	if (perf->state == next_perf_state) {
477 		if (unlikely(data->resume))
478 			data->resume = 0;
479 		else
480 			return next_freq;
481 	}
482 
483 	data->cpu_freq_write(&perf->control_register,
484 			     perf->states[next_perf_state].control);
485 	perf->state = next_perf_state;
486 	return next_freq;
487 }
488 
489 static unsigned long
490 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
491 {
492 	struct acpi_processor_performance *perf;
493 
494 	perf = to_perf_data(data);
495 	if (cpu_khz) {
496 		/* search the closest match to cpu_khz */
497 		unsigned int i;
498 		unsigned long freq;
499 		unsigned long freqn = perf->states[0].core_frequency * 1000;
500 
501 		for (i = 0; i < (perf->state_count-1); i++) {
502 			freq = freqn;
503 			freqn = perf->states[i+1].core_frequency * 1000;
504 			if ((2 * cpu_khz) > (freqn + freq)) {
505 				perf->state = i;
506 				return freq;
507 			}
508 		}
509 		perf->state = perf->state_count-1;
510 		return freqn;
511 	} else {
512 		/* assume CPU is at P0... */
513 		perf->state = 0;
514 		return perf->states[0].core_frequency * 1000;
515 	}
516 }
517 
518 static void free_acpi_perf_data(void)
519 {
520 	unsigned int i;
521 
522 	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
523 	for_each_possible_cpu(i)
524 		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
525 				 ->shared_cpu_map);
526 	free_percpu(acpi_perf_data);
527 }
528 
529 static int cpufreq_boost_online(unsigned int cpu)
530 {
531 	/*
532 	 * On the CPU_UP path we simply keep the boost-disable flag
533 	 * in sync with the current global state.
534 	 */
535 	return boost_set_msr(acpi_cpufreq_driver.boost_enabled);
536 }
537 
538 static int cpufreq_boost_down_prep(unsigned int cpu)
539 {
540 	/*
541 	 * Clear the boost-disable bit on the CPU_DOWN path so that
542 	 * this cpu cannot block the remaining ones from boosting.
543 	 */
544 	return boost_set_msr(1);
545 }
546 
547 /*
548  * acpi_cpufreq_early_init - initialize ACPI P-States library
549  *
550  * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
551  * in order to determine correct frequency and voltage pairings. We can
552  * do _PDC and _PSD and find out the processor dependency for the
553  * actual init that will happen later...
554  */
555 static int __init acpi_cpufreq_early_init(void)
556 {
557 	unsigned int i;
558 	pr_debug("%s\n", __func__);
559 
560 	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
561 	if (!acpi_perf_data) {
562 		pr_debug("Memory allocation error for acpi_perf_data.\n");
563 		return -ENOMEM;
564 	}
565 	for_each_possible_cpu(i) {
566 		if (!zalloc_cpumask_var_node(
567 			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
568 			GFP_KERNEL, cpu_to_node(i))) {
569 
570 			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
571 			free_acpi_perf_data();
572 			return -ENOMEM;
573 		}
574 	}
575 
576 	/* Do initialization in ACPI core */
577 	acpi_processor_preregister_performance(acpi_perf_data);
578 	return 0;
579 }
580 
581 #ifdef CONFIG_SMP
582 /*
583  * Some BIOSes do SW_ANY coordination internally, either set it up in hw
584  * or do it in BIOS firmware and won't inform about it to OS. If not
585  * detected, this has a side effect of making CPU run at a different speed
586  * than OS intended it to run at. Detect it and handle it cleanly.
587  */
588 static int bios_with_sw_any_bug;
589 
590 static int sw_any_bug_found(const struct dmi_system_id *d)
591 {
592 	bios_with_sw_any_bug = 1;
593 	return 0;
594 }
595 
596 static const struct dmi_system_id sw_any_bug_dmi_table[] = {
597 	{
598 		.callback = sw_any_bug_found,
599 		.ident = "Supermicro Server X6DLP",
600 		.matches = {
601 			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
602 			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
603 			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
604 		},
605 	},
606 	{ }
607 };
608 
609 static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
610 {
611 	/* Intel Xeon Processor 7100 Series Specification Update
612 	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
613 	 * AL30: A Machine Check Exception (MCE) Occurring during an
614 	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
615 	 * Both Processor Cores to Lock Up. */
616 	if (c->x86_vendor == X86_VENDOR_INTEL) {
617 		if ((c->x86 == 15) &&
618 		    (c->x86_model == 6) &&
619 		    (c->x86_stepping == 8)) {
620 			pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
621 			return -ENODEV;
622 		    }
623 		}
624 	return 0;
625 }
626 #endif
627 
628 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
629 {
630 	unsigned int i;
631 	unsigned int valid_states = 0;
632 	unsigned int cpu = policy->cpu;
633 	struct acpi_cpufreq_data *data;
634 	unsigned int result = 0;
635 	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
636 	struct acpi_processor_performance *perf;
637 	struct cpufreq_frequency_table *freq_table;
638 #ifdef CONFIG_SMP
639 	static int blacklisted;
640 #endif
641 
642 	pr_debug("%s\n", __func__);
643 
644 #ifdef CONFIG_SMP
645 	if (blacklisted)
646 		return blacklisted;
647 	blacklisted = acpi_cpufreq_blacklist(c);
648 	if (blacklisted)
649 		return blacklisted;
650 #endif
651 
652 	data = kzalloc(sizeof(*data), GFP_KERNEL);
653 	if (!data)
654 		return -ENOMEM;
655 
656 	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
657 		result = -ENOMEM;
658 		goto err_free;
659 	}
660 
661 	perf = per_cpu_ptr(acpi_perf_data, cpu);
662 	data->acpi_perf_cpu = cpu;
663 	policy->driver_data = data;
664 
665 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
666 		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
667 
668 	result = acpi_processor_register_performance(perf, cpu);
669 	if (result)
670 		goto err_free_mask;
671 
672 	policy->shared_type = perf->shared_type;
673 
674 	/*
675 	 * Will let policy->cpus know about dependency only when software
676 	 * coordination is required.
677 	 */
678 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
679 	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
680 		cpumask_copy(policy->cpus, perf->shared_cpu_map);
681 	}
682 	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
683 
684 #ifdef CONFIG_SMP
685 	dmi_check_system(sw_any_bug_dmi_table);
686 	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
687 		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
688 		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
689 	}
690 
691 	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
692 		cpumask_clear(policy->cpus);
693 		cpumask_set_cpu(cpu, policy->cpus);
694 		cpumask_copy(data->freqdomain_cpus,
695 			     topology_sibling_cpumask(cpu));
696 		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
697 		pr_info_once("overriding BIOS provided _PSD data\n");
698 	}
699 #endif
700 
701 	/* capability check */
702 	if (perf->state_count <= 1) {
703 		pr_debug("No P-States\n");
704 		result = -ENODEV;
705 		goto err_unreg;
706 	}
707 
708 	if (perf->control_register.space_id != perf->status_register.space_id) {
709 		result = -ENODEV;
710 		goto err_unreg;
711 	}
712 
713 	switch (perf->control_register.space_id) {
714 	case ACPI_ADR_SPACE_SYSTEM_IO:
715 		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
716 		    boot_cpu_data.x86 == 0xf) {
717 			pr_debug("AMD K8 systems must use native drivers.\n");
718 			result = -ENODEV;
719 			goto err_unreg;
720 		}
721 		pr_debug("SYSTEM IO addr space\n");
722 		data->cpu_feature = SYSTEM_IO_CAPABLE;
723 		data->cpu_freq_read = cpu_freq_read_io;
724 		data->cpu_freq_write = cpu_freq_write_io;
725 		break;
726 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
727 		pr_debug("HARDWARE addr space\n");
728 		if (check_est_cpu(cpu)) {
729 			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
730 			data->cpu_freq_read = cpu_freq_read_intel;
731 			data->cpu_freq_write = cpu_freq_write_intel;
732 			break;
733 		}
734 		if (check_amd_hwpstate_cpu(cpu)) {
735 			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
736 			data->cpu_freq_read = cpu_freq_read_amd;
737 			data->cpu_freq_write = cpu_freq_write_amd;
738 			break;
739 		}
740 		result = -ENODEV;
741 		goto err_unreg;
742 	default:
743 		pr_debug("Unknown addr space %d\n",
744 			(u32) (perf->control_register.space_id));
745 		result = -ENODEV;
746 		goto err_unreg;
747 	}
748 
749 	freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
750 			     GFP_KERNEL);
751 	if (!freq_table) {
752 		result = -ENOMEM;
753 		goto err_unreg;
754 	}
755 
756 	/* detect transition latency */
757 	policy->cpuinfo.transition_latency = 0;
758 	for (i = 0; i < perf->state_count; i++) {
759 		if ((perf->states[i].transition_latency * 1000) >
760 		    policy->cpuinfo.transition_latency)
761 			policy->cpuinfo.transition_latency =
762 			    perf->states[i].transition_latency * 1000;
763 	}
764 
765 	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
766 	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
767 	    policy->cpuinfo.transition_latency > 20 * 1000) {
768 		policy->cpuinfo.transition_latency = 20 * 1000;
769 		pr_info_once("P-state transition latency capped at 20 uS\n");
770 	}
771 
772 	/* table init */
773 	for (i = 0; i < perf->state_count; i++) {
774 		if (i > 0 && perf->states[i].core_frequency >=
775 		    freq_table[valid_states-1].frequency / 1000)
776 			continue;
777 
778 		freq_table[valid_states].driver_data = i;
779 		freq_table[valid_states].frequency =
780 		    perf->states[i].core_frequency * 1000;
781 		valid_states++;
782 	}
783 	freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
784 	policy->freq_table = freq_table;
785 	perf->state = 0;
786 
787 	switch (perf->control_register.space_id) {
788 	case ACPI_ADR_SPACE_SYSTEM_IO:
789 		/*
790 		 * The core will not set policy->cur, because
791 		 * cpufreq_driver->get is NULL, so we need to set it here.
792 		 * However, we have to guess it, because the current speed is
793 		 * unknown and not detectable via IO ports.
794 		 */
795 		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
796 		break;
797 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
798 		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
799 		break;
800 	default:
801 		break;
802 	}
803 
804 	/* notify BIOS that we exist */
805 	acpi_processor_notify_smm(THIS_MODULE);
806 
807 	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
808 	for (i = 0; i < perf->state_count; i++)
809 		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
810 			(i == perf->state ? '*' : ' '), i,
811 			(u32) perf->states[i].core_frequency,
812 			(u32) perf->states[i].power,
813 			(u32) perf->states[i].transition_latency);
814 
815 	/*
816 	 * the first call to ->target() should result in us actually
817 	 * writing something to the appropriate registers.
818 	 */
819 	data->resume = 1;
820 
821 	policy->fast_switch_possible = !acpi_pstate_strict &&
822 		!(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY);
823 
824 	return result;
825 
826 err_unreg:
827 	acpi_processor_unregister_performance(cpu);
828 err_free_mask:
829 	free_cpumask_var(data->freqdomain_cpus);
830 err_free:
831 	kfree(data);
832 	policy->driver_data = NULL;
833 
834 	return result;
835 }
836 
837 static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
838 {
839 	struct acpi_cpufreq_data *data = policy->driver_data;
840 
841 	pr_debug("%s\n", __func__);
842 
843 	policy->fast_switch_possible = false;
844 	policy->driver_data = NULL;
845 	acpi_processor_unregister_performance(data->acpi_perf_cpu);
846 	free_cpumask_var(data->freqdomain_cpus);
847 	kfree(policy->freq_table);
848 	kfree(data);
849 
850 	return 0;
851 }
852 
853 static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy)
854 {
855 	struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data,
856 							      policy->cpu);
857 
858 	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
859 		pr_warn(FW_WARN "P-state 0 is not max freq\n");
860 }
861 
862 static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
863 {
864 	struct acpi_cpufreq_data *data = policy->driver_data;
865 
866 	pr_debug("%s\n", __func__);
867 
868 	data->resume = 1;
869 
870 	return 0;
871 }
872 
873 static struct freq_attr *acpi_cpufreq_attr[] = {
874 	&cpufreq_freq_attr_scaling_available_freqs,
875 	&freqdomain_cpus,
876 #ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
877 	&cpb,
878 #endif
879 	NULL,
880 };
881 
882 static struct cpufreq_driver acpi_cpufreq_driver = {
883 	.verify		= cpufreq_generic_frequency_table_verify,
884 	.target_index	= acpi_cpufreq_target,
885 	.fast_switch	= acpi_cpufreq_fast_switch,
886 	.bios_limit	= acpi_processor_get_bios_limit,
887 	.init		= acpi_cpufreq_cpu_init,
888 	.exit		= acpi_cpufreq_cpu_exit,
889 	.ready		= acpi_cpufreq_cpu_ready,
890 	.resume		= acpi_cpufreq_resume,
891 	.name		= "acpi-cpufreq",
892 	.attr		= acpi_cpufreq_attr,
893 };
894 
895 static enum cpuhp_state acpi_cpufreq_online;
896 
897 static void __init acpi_cpufreq_boost_init(void)
898 {
899 	int ret;
900 
901 	if (!(boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA))) {
902 		pr_debug("Boost capabilities not present in the processor\n");
903 		return;
904 	}
905 
906 	acpi_cpufreq_driver.set_boost = set_boost;
907 	acpi_cpufreq_driver.boost_enabled = boost_state(0);
908 
909 	/*
910 	 * This calls the online callback on all online cpu and forces all
911 	 * MSRs to the same value.
912 	 */
913 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpufreq/acpi:online",
914 				cpufreq_boost_online, cpufreq_boost_down_prep);
915 	if (ret < 0) {
916 		pr_err("acpi_cpufreq: failed to register hotplug callbacks\n");
917 		return;
918 	}
919 	acpi_cpufreq_online = ret;
920 }
921 
922 static void acpi_cpufreq_boost_exit(void)
923 {
924 	if (acpi_cpufreq_online > 0)
925 		cpuhp_remove_state_nocalls(acpi_cpufreq_online);
926 }
927 
928 static int __init acpi_cpufreq_init(void)
929 {
930 	int ret;
931 
932 	if (acpi_disabled)
933 		return -ENODEV;
934 
935 	/* don't keep reloading if cpufreq_driver exists */
936 	if (cpufreq_get_current_driver())
937 		return -EEXIST;
938 
939 	pr_debug("%s\n", __func__);
940 
941 	ret = acpi_cpufreq_early_init();
942 	if (ret)
943 		return ret;
944 
945 #ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
946 	/* this is a sysfs file with a strange name and an even stranger
947 	 * semantic - per CPU instantiation, but system global effect.
948 	 * Lets enable it only on AMD CPUs for compatibility reasons and
949 	 * only if configured. This is considered legacy code, which
950 	 * will probably be removed at some point in the future.
951 	 */
952 	if (!check_amd_hwpstate_cpu(0)) {
953 		struct freq_attr **attr;
954 
955 		pr_debug("CPB unsupported, do not expose it\n");
956 
957 		for (attr = acpi_cpufreq_attr; *attr; attr++)
958 			if (*attr == &cpb) {
959 				*attr = NULL;
960 				break;
961 			}
962 	}
963 #endif
964 	acpi_cpufreq_boost_init();
965 
966 	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
967 	if (ret) {
968 		free_acpi_perf_data();
969 		acpi_cpufreq_boost_exit();
970 	}
971 	return ret;
972 }
973 
974 static void __exit acpi_cpufreq_exit(void)
975 {
976 	pr_debug("%s\n", __func__);
977 
978 	acpi_cpufreq_boost_exit();
979 
980 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
981 
982 	free_acpi_perf_data();
983 }
984 
985 module_param(acpi_pstate_strict, uint, 0644);
986 MODULE_PARM_DESC(acpi_pstate_strict,
987 	"value 0 or non-zero. non-zero -> strict ACPI checks are "
988 	"performed during frequency changes.");
989 
990 late_initcall(acpi_cpufreq_init);
991 module_exit(acpi_cpufreq_exit);
992 
993 static const struct x86_cpu_id acpi_cpufreq_ids[] = {
994 	X86_FEATURE_MATCH(X86_FEATURE_ACPI),
995 	X86_FEATURE_MATCH(X86_FEATURE_HW_PSTATE),
996 	{}
997 };
998 MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);
999 
1000 static const struct acpi_device_id processor_device_ids[] = {
1001 	{ACPI_PROCESSOR_OBJECT_HID, },
1002 	{ACPI_PROCESSOR_DEVICE_HID, },
1003 	{},
1004 };
1005 MODULE_DEVICE_TABLE(acpi, processor_device_ids);
1006 
1007 MODULE_ALIAS("acpi");
1008