1f6cc69f1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
23e8c4d31SAmit Kucheria /*
33e8c4d31SAmit Kucheria  * x86_pkg_temp_thermal driver
43e8c4d31SAmit Kucheria  * Copyright (c) 2013, Intel Corporation.
53e8c4d31SAmit Kucheria  */
63e8c4d31SAmit Kucheria #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
73e8c4d31SAmit Kucheria 
83e8c4d31SAmit Kucheria #include <linux/module.h>
93e8c4d31SAmit Kucheria #include <linux/init.h>
103e8c4d31SAmit Kucheria #include <linux/err.h>
113e8c4d31SAmit Kucheria #include <linux/param.h>
123e8c4d31SAmit Kucheria #include <linux/device.h>
133e8c4d31SAmit Kucheria #include <linux/platform_device.h>
143e8c4d31SAmit Kucheria #include <linux/cpu.h>
153e8c4d31SAmit Kucheria #include <linux/smp.h>
163e8c4d31SAmit Kucheria #include <linux/slab.h>
173e8c4d31SAmit Kucheria #include <linux/pm.h>
183e8c4d31SAmit Kucheria #include <linux/thermal.h>
193e8c4d31SAmit Kucheria #include <linux/debugfs.h>
203e8c4d31SAmit Kucheria #include <asm/cpu_device_id.h>
213e8c4d31SAmit Kucheria #include <asm/mce.h>
223e8c4d31SAmit Kucheria 
233e8c4d31SAmit Kucheria /*
243e8c4d31SAmit Kucheria * Rate control delay: Idea is to introduce denounce effect
253e8c4d31SAmit Kucheria * This should be long enough to avoid reduce events, when
263e8c4d31SAmit Kucheria * threshold is set to a temperature, which is constantly
273e8c4d31SAmit Kucheria * violated, but at the short enough to take any action.
283e8c4d31SAmit Kucheria * The action can be remove threshold or change it to next
293e8c4d31SAmit Kucheria * interesting setting. Based on experiments, in around
303e8c4d31SAmit Kucheria * every 5 seconds under load will give us a significant
313e8c4d31SAmit Kucheria * temperature change.
323e8c4d31SAmit Kucheria */
333e8c4d31SAmit Kucheria #define PKG_TEMP_THERMAL_NOTIFY_DELAY	5000
343e8c4d31SAmit Kucheria static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY;
353e8c4d31SAmit Kucheria module_param(notify_delay_ms, int, 0644);
363e8c4d31SAmit Kucheria MODULE_PARM_DESC(notify_delay_ms,
373e8c4d31SAmit Kucheria 	"User space notification delay in milli seconds.");
383e8c4d31SAmit Kucheria 
393e8c4d31SAmit Kucheria /* Number of trip points in thermal zone. Currently it can't
403e8c4d31SAmit Kucheria * be more than 2. MSR can allow setting and getting notifications
413e8c4d31SAmit Kucheria * for only 2 thresholds. This define enforces this, if there
423e8c4d31SAmit Kucheria * is some wrong values returned by cpuid for number of thresholds.
433e8c4d31SAmit Kucheria */
443e8c4d31SAmit Kucheria #define MAX_NUMBER_OF_TRIPS	2
453e8c4d31SAmit Kucheria 
46b2ce1c88SLen Brown struct zone_device {
473e8c4d31SAmit Kucheria 	int				cpu;
483e8c4d31SAmit Kucheria 	bool				work_scheduled;
493e8c4d31SAmit Kucheria 	u32				tj_max;
503e8c4d31SAmit Kucheria 	u32				msr_pkg_therm_low;
513e8c4d31SAmit Kucheria 	u32				msr_pkg_therm_high;
523e8c4d31SAmit Kucheria 	struct delayed_work		work;
533e8c4d31SAmit Kucheria 	struct thermal_zone_device	*tzone;
543e8c4d31SAmit Kucheria 	struct cpumask			cpumask;
553e8c4d31SAmit Kucheria };
563e8c4d31SAmit Kucheria 
573e8c4d31SAmit Kucheria static struct thermal_zone_params pkg_temp_tz_params = {
583e8c4d31SAmit Kucheria 	.no_hwmon	= true,
593e8c4d31SAmit Kucheria };
603e8c4d31SAmit Kucheria 
61b2ce1c88SLen Brown /* Keep track of how many zone pointers we allocated in init() */
62b2ce1c88SLen Brown static int max_id __read_mostly;
63b2ce1c88SLen Brown /* Array of zone pointers */
64b2ce1c88SLen Brown static struct zone_device **zones;
653e8c4d31SAmit Kucheria /* Serializes interrupt notification, work and hotplug */
66fc32150eSClark Williams static DEFINE_RAW_SPINLOCK(pkg_temp_lock);
673e8c4d31SAmit Kucheria /* Protects zone operation in the work function against hotplug removal */
683e8c4d31SAmit Kucheria static DEFINE_MUTEX(thermal_zone_mutex);
693e8c4d31SAmit Kucheria 
703e8c4d31SAmit Kucheria /* The dynamically assigned cpu hotplug state for module_exit() */
713e8c4d31SAmit Kucheria static enum cpuhp_state pkg_thermal_hp_state __read_mostly;
723e8c4d31SAmit Kucheria 
733e8c4d31SAmit Kucheria /* Debug counters to show using debugfs */
743e8c4d31SAmit Kucheria static struct dentry *debugfs;
753e8c4d31SAmit Kucheria static unsigned int pkg_interrupt_cnt;
763e8c4d31SAmit Kucheria static unsigned int pkg_work_cnt;
773e8c4d31SAmit Kucheria 
7872c9f26bSGreg Kroah-Hartman static void pkg_temp_debugfs_init(void)
793e8c4d31SAmit Kucheria {
803e8c4d31SAmit Kucheria 	debugfs = debugfs_create_dir("pkg_temp_thermal", NULL);
813e8c4d31SAmit Kucheria 
8272c9f26bSGreg Kroah-Hartman 	debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs,
833e8c4d31SAmit Kucheria 			   &pkg_interrupt_cnt);
8472c9f26bSGreg Kroah-Hartman 	debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs,
853e8c4d31SAmit Kucheria 			   &pkg_work_cnt);
863e8c4d31SAmit Kucheria }
873e8c4d31SAmit Kucheria 
883e8c4d31SAmit Kucheria /*
893e8c4d31SAmit Kucheria  * Protection:
903e8c4d31SAmit Kucheria  *
913e8c4d31SAmit Kucheria  * - cpu hotplug: Read serialized by cpu hotplug lock
923e8c4d31SAmit Kucheria  *		  Write must hold pkg_temp_lock
933e8c4d31SAmit Kucheria  *
943e8c4d31SAmit Kucheria  * - Other callsites: Must hold pkg_temp_lock
953e8c4d31SAmit Kucheria  */
96b2ce1c88SLen Brown static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
973e8c4d31SAmit Kucheria {
98b2ce1c88SLen Brown 	int id = topology_logical_die_id(cpu);
993e8c4d31SAmit Kucheria 
100b2ce1c88SLen Brown 	if (id >= 0 && id < max_id)
101b2ce1c88SLen Brown 		return zones[id];
1023e8c4d31SAmit Kucheria 	return NULL;
1033e8c4d31SAmit Kucheria }
1043e8c4d31SAmit Kucheria 
1053e8c4d31SAmit Kucheria /*
1063e8c4d31SAmit Kucheria * tj-max is is interesting because threshold is set relative to this
1073e8c4d31SAmit Kucheria * temperature.
1083e8c4d31SAmit Kucheria */
1093e8c4d31SAmit Kucheria static int get_tj_max(int cpu, u32 *tj_max)
1103e8c4d31SAmit Kucheria {
1113e8c4d31SAmit Kucheria 	u32 eax, edx, val;
1123e8c4d31SAmit Kucheria 	int err;
1133e8c4d31SAmit Kucheria 
1143e8c4d31SAmit Kucheria 	err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
1153e8c4d31SAmit Kucheria 	if (err)
1163e8c4d31SAmit Kucheria 		return err;
1173e8c4d31SAmit Kucheria 
1183e8c4d31SAmit Kucheria 	val = (eax >> 16) & 0xff;
1193e8c4d31SAmit Kucheria 	*tj_max = val * 1000;
1203e8c4d31SAmit Kucheria 
1213e8c4d31SAmit Kucheria 	return val ? 0 : -EINVAL;
1223e8c4d31SAmit Kucheria }
1233e8c4d31SAmit Kucheria 
1243e8c4d31SAmit Kucheria static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
1253e8c4d31SAmit Kucheria {
126b2ce1c88SLen Brown 	struct zone_device *zonedev = tzd->devdata;
1273e8c4d31SAmit Kucheria 	u32 eax, edx;
1283e8c4d31SAmit Kucheria 
129b2ce1c88SLen Brown 	rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_STATUS,
130b2ce1c88SLen Brown 			&eax, &edx);
1313e8c4d31SAmit Kucheria 	if (eax & 0x80000000) {
132b2ce1c88SLen Brown 		*temp = zonedev->tj_max - ((eax >> 16) & 0x7f) * 1000;
1333e8c4d31SAmit Kucheria 		pr_debug("sys_get_curr_temp %d\n", *temp);
1343e8c4d31SAmit Kucheria 		return 0;
1353e8c4d31SAmit Kucheria 	}
1363e8c4d31SAmit Kucheria 	return -EINVAL;
1373e8c4d31SAmit Kucheria }
1383e8c4d31SAmit Kucheria 
1393e8c4d31SAmit Kucheria static int sys_get_trip_temp(struct thermal_zone_device *tzd,
1403e8c4d31SAmit Kucheria 			     int trip, int *temp)
1413e8c4d31SAmit Kucheria {
142b2ce1c88SLen Brown 	struct zone_device *zonedev = tzd->devdata;
1433e8c4d31SAmit Kucheria 	unsigned long thres_reg_value;
1443e8c4d31SAmit Kucheria 	u32 mask, shift, eax, edx;
1453e8c4d31SAmit Kucheria 	int ret;
1463e8c4d31SAmit Kucheria 
1473e8c4d31SAmit Kucheria 	if (trip >= MAX_NUMBER_OF_TRIPS)
1483e8c4d31SAmit Kucheria 		return -EINVAL;
1493e8c4d31SAmit Kucheria 
1503e8c4d31SAmit Kucheria 	if (trip) {
1513e8c4d31SAmit Kucheria 		mask = THERM_MASK_THRESHOLD1;
1523e8c4d31SAmit Kucheria 		shift = THERM_SHIFT_THRESHOLD1;
1533e8c4d31SAmit Kucheria 	} else {
1543e8c4d31SAmit Kucheria 		mask = THERM_MASK_THRESHOLD0;
1553e8c4d31SAmit Kucheria 		shift = THERM_SHIFT_THRESHOLD0;
1563e8c4d31SAmit Kucheria 	}
1573e8c4d31SAmit Kucheria 
158b2ce1c88SLen Brown 	ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
1593e8c4d31SAmit Kucheria 			   &eax, &edx);
1603e8c4d31SAmit Kucheria 	if (ret < 0)
1613e8c4d31SAmit Kucheria 		return ret;
1623e8c4d31SAmit Kucheria 
1633e8c4d31SAmit Kucheria 	thres_reg_value = (eax & mask) >> shift;
1643e8c4d31SAmit Kucheria 	if (thres_reg_value)
165b2ce1c88SLen Brown 		*temp = zonedev->tj_max - thres_reg_value * 1000;
1663e8c4d31SAmit Kucheria 	else
1673e8c4d31SAmit Kucheria 		*temp = 0;
1683e8c4d31SAmit Kucheria 	pr_debug("sys_get_trip_temp %d\n", *temp);
1693e8c4d31SAmit Kucheria 
1703e8c4d31SAmit Kucheria 	return 0;
1713e8c4d31SAmit Kucheria }
1723e8c4d31SAmit Kucheria 
1733e8c4d31SAmit Kucheria static int
1743e8c4d31SAmit Kucheria sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
1753e8c4d31SAmit Kucheria {
176b2ce1c88SLen Brown 	struct zone_device *zonedev = tzd->devdata;
1773e8c4d31SAmit Kucheria 	u32 l, h, mask, shift, intr;
1783e8c4d31SAmit Kucheria 	int ret;
1793e8c4d31SAmit Kucheria 
180b2ce1c88SLen Brown 	if (trip >= MAX_NUMBER_OF_TRIPS || temp >= zonedev->tj_max)
1813e8c4d31SAmit Kucheria 		return -EINVAL;
1823e8c4d31SAmit Kucheria 
183b2ce1c88SLen Brown 	ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
1843e8c4d31SAmit Kucheria 			   &l, &h);
1853e8c4d31SAmit Kucheria 	if (ret < 0)
1863e8c4d31SAmit Kucheria 		return ret;
1873e8c4d31SAmit Kucheria 
1883e8c4d31SAmit Kucheria 	if (trip) {
1893e8c4d31SAmit Kucheria 		mask = THERM_MASK_THRESHOLD1;
1903e8c4d31SAmit Kucheria 		shift = THERM_SHIFT_THRESHOLD1;
1913e8c4d31SAmit Kucheria 		intr = THERM_INT_THRESHOLD1_ENABLE;
1923e8c4d31SAmit Kucheria 	} else {
1933e8c4d31SAmit Kucheria 		mask = THERM_MASK_THRESHOLD0;
1943e8c4d31SAmit Kucheria 		shift = THERM_SHIFT_THRESHOLD0;
1953e8c4d31SAmit Kucheria 		intr = THERM_INT_THRESHOLD0_ENABLE;
1963e8c4d31SAmit Kucheria 	}
1973e8c4d31SAmit Kucheria 	l &= ~mask;
1983e8c4d31SAmit Kucheria 	/*
1993e8c4d31SAmit Kucheria 	* When users space sets a trip temperature == 0, which is indication
2003e8c4d31SAmit Kucheria 	* that, it is no longer interested in receiving notifications.
2013e8c4d31SAmit Kucheria 	*/
2023e8c4d31SAmit Kucheria 	if (!temp) {
2033e8c4d31SAmit Kucheria 		l &= ~intr;
2043e8c4d31SAmit Kucheria 	} else {
205b2ce1c88SLen Brown 		l |= (zonedev->tj_max - temp)/1000 << shift;
2063e8c4d31SAmit Kucheria 		l |= intr;
2073e8c4d31SAmit Kucheria 	}
2083e8c4d31SAmit Kucheria 
209b2ce1c88SLen Brown 	return wrmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
210b2ce1c88SLen Brown 			l, h);
2113e8c4d31SAmit Kucheria }
2123e8c4d31SAmit Kucheria 
2133e8c4d31SAmit Kucheria static int sys_get_trip_type(struct thermal_zone_device *thermal, int trip,
2143e8c4d31SAmit Kucheria 			     enum thermal_trip_type *type)
2153e8c4d31SAmit Kucheria {
2163e8c4d31SAmit Kucheria 	*type = THERMAL_TRIP_PASSIVE;
2173e8c4d31SAmit Kucheria 	return 0;
2183e8c4d31SAmit Kucheria }
2193e8c4d31SAmit Kucheria 
2203e8c4d31SAmit Kucheria /* Thermal zone callback registry */
2213e8c4d31SAmit Kucheria static struct thermal_zone_device_ops tzone_ops = {
2223e8c4d31SAmit Kucheria 	.get_temp = sys_get_curr_temp,
2233e8c4d31SAmit Kucheria 	.get_trip_temp = sys_get_trip_temp,
2243e8c4d31SAmit Kucheria 	.get_trip_type = sys_get_trip_type,
2253e8c4d31SAmit Kucheria 	.set_trip_temp = sys_set_trip_temp,
2263e8c4d31SAmit Kucheria };
2273e8c4d31SAmit Kucheria 
2283e8c4d31SAmit Kucheria static bool pkg_thermal_rate_control(void)
2293e8c4d31SAmit Kucheria {
2303e8c4d31SAmit Kucheria 	return true;
2313e8c4d31SAmit Kucheria }
2323e8c4d31SAmit Kucheria 
2333e8c4d31SAmit Kucheria /* Enable threshold interrupt on local package/cpu */
2343e8c4d31SAmit Kucheria static inline void enable_pkg_thres_interrupt(void)
2353e8c4d31SAmit Kucheria {
2363e8c4d31SAmit Kucheria 	u8 thres_0, thres_1;
2373e8c4d31SAmit Kucheria 	u32 l, h;
2383e8c4d31SAmit Kucheria 
2393e8c4d31SAmit Kucheria 	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2403e8c4d31SAmit Kucheria 	/* only enable/disable if it had valid threshold value */
2413e8c4d31SAmit Kucheria 	thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0;
2423e8c4d31SAmit Kucheria 	thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1;
2433e8c4d31SAmit Kucheria 	if (thres_0)
2443e8c4d31SAmit Kucheria 		l |= THERM_INT_THRESHOLD0_ENABLE;
2453e8c4d31SAmit Kucheria 	if (thres_1)
2463e8c4d31SAmit Kucheria 		l |= THERM_INT_THRESHOLD1_ENABLE;
2473e8c4d31SAmit Kucheria 	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2483e8c4d31SAmit Kucheria }
2493e8c4d31SAmit Kucheria 
2503e8c4d31SAmit Kucheria /* Disable threshold interrupt on local package/cpu */
2513e8c4d31SAmit Kucheria static inline void disable_pkg_thres_interrupt(void)
2523e8c4d31SAmit Kucheria {
2533e8c4d31SAmit Kucheria 	u32 l, h;
2543e8c4d31SAmit Kucheria 
2553e8c4d31SAmit Kucheria 	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2563e8c4d31SAmit Kucheria 
2573e8c4d31SAmit Kucheria 	l &= ~(THERM_INT_THRESHOLD0_ENABLE | THERM_INT_THRESHOLD1_ENABLE);
2583e8c4d31SAmit Kucheria 	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2593e8c4d31SAmit Kucheria }
2603e8c4d31SAmit Kucheria 
2613e8c4d31SAmit Kucheria static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
2623e8c4d31SAmit Kucheria {
2633e8c4d31SAmit Kucheria 	struct thermal_zone_device *tzone = NULL;
2643e8c4d31SAmit Kucheria 	int cpu = smp_processor_id();
265b2ce1c88SLen Brown 	struct zone_device *zonedev;
2663e8c4d31SAmit Kucheria 	u64 msr_val, wr_val;
2673e8c4d31SAmit Kucheria 
2683e8c4d31SAmit Kucheria 	mutex_lock(&thermal_zone_mutex);
269fc32150eSClark Williams 	raw_spin_lock_irq(&pkg_temp_lock);
2703e8c4d31SAmit Kucheria 	++pkg_work_cnt;
2713e8c4d31SAmit Kucheria 
272b2ce1c88SLen Brown 	zonedev = pkg_temp_thermal_get_dev(cpu);
273b2ce1c88SLen Brown 	if (!zonedev) {
274fc32150eSClark Williams 		raw_spin_unlock_irq(&pkg_temp_lock);
2753e8c4d31SAmit Kucheria 		mutex_unlock(&thermal_zone_mutex);
2763e8c4d31SAmit Kucheria 		return;
2773e8c4d31SAmit Kucheria 	}
278b2ce1c88SLen Brown 	zonedev->work_scheduled = false;
2793e8c4d31SAmit Kucheria 
2803e8c4d31SAmit Kucheria 	rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
2813e8c4d31SAmit Kucheria 	wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
2823e8c4d31SAmit Kucheria 	if (wr_val != msr_val) {
2833e8c4d31SAmit Kucheria 		wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
284b2ce1c88SLen Brown 		tzone = zonedev->tzone;
2853e8c4d31SAmit Kucheria 	}
2863e8c4d31SAmit Kucheria 
2873e8c4d31SAmit Kucheria 	enable_pkg_thres_interrupt();
288fc32150eSClark Williams 	raw_spin_unlock_irq(&pkg_temp_lock);
2893e8c4d31SAmit Kucheria 
2903e8c4d31SAmit Kucheria 	/*
2913e8c4d31SAmit Kucheria 	 * If tzone is not NULL, then thermal_zone_mutex will prevent the
2923e8c4d31SAmit Kucheria 	 * concurrent removal in the cpu offline callback.
2933e8c4d31SAmit Kucheria 	 */
2943e8c4d31SAmit Kucheria 	if (tzone)
2953e8c4d31SAmit Kucheria 		thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED);
2963e8c4d31SAmit Kucheria 
2973e8c4d31SAmit Kucheria 	mutex_unlock(&thermal_zone_mutex);
2983e8c4d31SAmit Kucheria }
2993e8c4d31SAmit Kucheria 
3003e8c4d31SAmit Kucheria static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
3013e8c4d31SAmit Kucheria {
3023e8c4d31SAmit Kucheria 	unsigned long ms = msecs_to_jiffies(notify_delay_ms);
3033e8c4d31SAmit Kucheria 
3043e8c4d31SAmit Kucheria 	schedule_delayed_work_on(cpu, work, ms);
3053e8c4d31SAmit Kucheria }
3063e8c4d31SAmit Kucheria 
3073e8c4d31SAmit Kucheria static int pkg_thermal_notify(u64 msr_val)
3083e8c4d31SAmit Kucheria {
3093e8c4d31SAmit Kucheria 	int cpu = smp_processor_id();
310b2ce1c88SLen Brown 	struct zone_device *zonedev;
3113e8c4d31SAmit Kucheria 	unsigned long flags;
3123e8c4d31SAmit Kucheria 
313fc32150eSClark Williams 	raw_spin_lock_irqsave(&pkg_temp_lock, flags);
3143e8c4d31SAmit Kucheria 	++pkg_interrupt_cnt;
3153e8c4d31SAmit Kucheria 
3163e8c4d31SAmit Kucheria 	disable_pkg_thres_interrupt();
3173e8c4d31SAmit Kucheria 
3183e8c4d31SAmit Kucheria 	/* Work is per package, so scheduling it once is enough. */
319b2ce1c88SLen Brown 	zonedev = pkg_temp_thermal_get_dev(cpu);
320b2ce1c88SLen Brown 	if (zonedev && !zonedev->work_scheduled) {
321b2ce1c88SLen Brown 		zonedev->work_scheduled = true;
322b2ce1c88SLen Brown 		pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work);
3233e8c4d31SAmit Kucheria 	}
3243e8c4d31SAmit Kucheria 
325fc32150eSClark Williams 	raw_spin_unlock_irqrestore(&pkg_temp_lock, flags);
3263e8c4d31SAmit Kucheria 	return 0;
3273e8c4d31SAmit Kucheria }
3283e8c4d31SAmit Kucheria 
3293e8c4d31SAmit Kucheria static int pkg_temp_thermal_device_add(unsigned int cpu)
3303e8c4d31SAmit Kucheria {
331b2ce1c88SLen Brown 	int id = topology_logical_die_id(cpu);
3323e8c4d31SAmit Kucheria 	u32 tj_max, eax, ebx, ecx, edx;
333b2ce1c88SLen Brown 	struct zone_device *zonedev;
3343e8c4d31SAmit Kucheria 	int thres_count, err;
3353e8c4d31SAmit Kucheria 
336b2ce1c88SLen Brown 	if (id >= max_id)
3373e8c4d31SAmit Kucheria 		return -ENOMEM;
3383e8c4d31SAmit Kucheria 
3393e8c4d31SAmit Kucheria 	cpuid(6, &eax, &ebx, &ecx, &edx);
3403e8c4d31SAmit Kucheria 	thres_count = ebx & 0x07;
3413e8c4d31SAmit Kucheria 	if (!thres_count)
3423e8c4d31SAmit Kucheria 		return -ENODEV;
3433e8c4d31SAmit Kucheria 
3443e8c4d31SAmit Kucheria 	thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
3453e8c4d31SAmit Kucheria 
3463e8c4d31SAmit Kucheria 	err = get_tj_max(cpu, &tj_max);
3473e8c4d31SAmit Kucheria 	if (err)
3483e8c4d31SAmit Kucheria 		return err;
3493e8c4d31SAmit Kucheria 
350b2ce1c88SLen Brown 	zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL);
351b2ce1c88SLen Brown 	if (!zonedev)
3523e8c4d31SAmit Kucheria 		return -ENOMEM;
3533e8c4d31SAmit Kucheria 
354b2ce1c88SLen Brown 	INIT_DELAYED_WORK(&zonedev->work, pkg_temp_thermal_threshold_work_fn);
355b2ce1c88SLen Brown 	zonedev->cpu = cpu;
356b2ce1c88SLen Brown 	zonedev->tj_max = tj_max;
357b2ce1c88SLen Brown 	zonedev->tzone = thermal_zone_device_register("x86_pkg_temp",
3583e8c4d31SAmit Kucheria 			thres_count,
3593e8c4d31SAmit Kucheria 			(thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
360b2ce1c88SLen Brown 			zonedev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
361b2ce1c88SLen Brown 	if (IS_ERR(zonedev->tzone)) {
362b2ce1c88SLen Brown 		err = PTR_ERR(zonedev->tzone);
363b2ce1c88SLen Brown 		kfree(zonedev);
3643e8c4d31SAmit Kucheria 		return err;
3653e8c4d31SAmit Kucheria 	}
366bbcf90c0SAndrzej Pietrasiewicz 	err = thermal_zone_device_enable(zonedev->tzone);
367bbcf90c0SAndrzej Pietrasiewicz 	if (err) {
368bbcf90c0SAndrzej Pietrasiewicz 		thermal_zone_device_unregister(zonedev->tzone);
369bbcf90c0SAndrzej Pietrasiewicz 		kfree(zonedev);
370bbcf90c0SAndrzej Pietrasiewicz 		return err;
371bbcf90c0SAndrzej Pietrasiewicz 	}
3723e8c4d31SAmit Kucheria 	/* Store MSR value for package thermal interrupt, to restore at exit */
373b2ce1c88SLen Brown 	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, zonedev->msr_pkg_therm_low,
374b2ce1c88SLen Brown 	      zonedev->msr_pkg_therm_high);
3753e8c4d31SAmit Kucheria 
376b2ce1c88SLen Brown 	cpumask_set_cpu(cpu, &zonedev->cpumask);
377fc32150eSClark Williams 	raw_spin_lock_irq(&pkg_temp_lock);
378b2ce1c88SLen Brown 	zones[id] = zonedev;
379fc32150eSClark Williams 	raw_spin_unlock_irq(&pkg_temp_lock);
3803e8c4d31SAmit Kucheria 	return 0;
3813e8c4d31SAmit Kucheria }
3823e8c4d31SAmit Kucheria 
3833e8c4d31SAmit Kucheria static int pkg_thermal_cpu_offline(unsigned int cpu)
3843e8c4d31SAmit Kucheria {
385b2ce1c88SLen Brown 	struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
3863e8c4d31SAmit Kucheria 	bool lastcpu, was_target;
3873e8c4d31SAmit Kucheria 	int target;
3883e8c4d31SAmit Kucheria 
389b2ce1c88SLen Brown 	if (!zonedev)
3903e8c4d31SAmit Kucheria 		return 0;
3913e8c4d31SAmit Kucheria 
392b2ce1c88SLen Brown 	target = cpumask_any_but(&zonedev->cpumask, cpu);
393b2ce1c88SLen Brown 	cpumask_clear_cpu(cpu, &zonedev->cpumask);
3943e8c4d31SAmit Kucheria 	lastcpu = target >= nr_cpu_ids;
3953e8c4d31SAmit Kucheria 	/*
3963e8c4d31SAmit Kucheria 	 * Remove the sysfs files, if this is the last cpu in the package
3973e8c4d31SAmit Kucheria 	 * before doing further cleanups.
3983e8c4d31SAmit Kucheria 	 */
3993e8c4d31SAmit Kucheria 	if (lastcpu) {
400b2ce1c88SLen Brown 		struct thermal_zone_device *tzone = zonedev->tzone;
4013e8c4d31SAmit Kucheria 
4023e8c4d31SAmit Kucheria 		/*
4033e8c4d31SAmit Kucheria 		 * We must protect against a work function calling
4043e8c4d31SAmit Kucheria 		 * thermal_zone_update, after/while unregister. We null out
4053e8c4d31SAmit Kucheria 		 * the pointer under the zone mutex, so the worker function
4063e8c4d31SAmit Kucheria 		 * won't try to call.
4073e8c4d31SAmit Kucheria 		 */
4083e8c4d31SAmit Kucheria 		mutex_lock(&thermal_zone_mutex);
409b2ce1c88SLen Brown 		zonedev->tzone = NULL;
4103e8c4d31SAmit Kucheria 		mutex_unlock(&thermal_zone_mutex);
4113e8c4d31SAmit Kucheria 
4123e8c4d31SAmit Kucheria 		thermal_zone_device_unregister(tzone);
4133e8c4d31SAmit Kucheria 	}
4143e8c4d31SAmit Kucheria 
4153e8c4d31SAmit Kucheria 	/* Protect against work and interrupts */
416fc32150eSClark Williams 	raw_spin_lock_irq(&pkg_temp_lock);
4173e8c4d31SAmit Kucheria 
4183e8c4d31SAmit Kucheria 	/*
4193e8c4d31SAmit Kucheria 	 * Check whether this cpu was the current target and store the new
4203e8c4d31SAmit Kucheria 	 * one. When we drop the lock, then the interrupt notify function
4213e8c4d31SAmit Kucheria 	 * will see the new target.
4223e8c4d31SAmit Kucheria 	 */
423b2ce1c88SLen Brown 	was_target = zonedev->cpu == cpu;
424b2ce1c88SLen Brown 	zonedev->cpu = target;
4253e8c4d31SAmit Kucheria 
4263e8c4d31SAmit Kucheria 	/*
4273e8c4d31SAmit Kucheria 	 * If this is the last CPU in the package remove the package
4283e8c4d31SAmit Kucheria 	 * reference from the array and restore the interrupt MSR. When we
4293e8c4d31SAmit Kucheria 	 * drop the lock neither the interrupt notify function nor the
4303e8c4d31SAmit Kucheria 	 * worker will see the package anymore.
4313e8c4d31SAmit Kucheria 	 */
4323e8c4d31SAmit Kucheria 	if (lastcpu) {
433b2ce1c88SLen Brown 		zones[topology_logical_die_id(cpu)] = NULL;
4343e8c4d31SAmit Kucheria 		/* After this point nothing touches the MSR anymore. */
4353e8c4d31SAmit Kucheria 		wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
436b2ce1c88SLen Brown 		      zonedev->msr_pkg_therm_low, zonedev->msr_pkg_therm_high);
4373e8c4d31SAmit Kucheria 	}
4383e8c4d31SAmit Kucheria 
4393e8c4d31SAmit Kucheria 	/*
4403e8c4d31SAmit Kucheria 	 * Check whether there is work scheduled and whether the work is
4413e8c4d31SAmit Kucheria 	 * targeted at the outgoing CPU.
4423e8c4d31SAmit Kucheria 	 */
443b2ce1c88SLen Brown 	if (zonedev->work_scheduled && was_target) {
4443e8c4d31SAmit Kucheria 		/*
4453e8c4d31SAmit Kucheria 		 * To cancel the work we need to drop the lock, otherwise
4463e8c4d31SAmit Kucheria 		 * we might deadlock if the work needs to be flushed.
4473e8c4d31SAmit Kucheria 		 */
448fc32150eSClark Williams 		raw_spin_unlock_irq(&pkg_temp_lock);
449b2ce1c88SLen Brown 		cancel_delayed_work_sync(&zonedev->work);
450fc32150eSClark Williams 		raw_spin_lock_irq(&pkg_temp_lock);
4513e8c4d31SAmit Kucheria 		/*
4523e8c4d31SAmit Kucheria 		 * If this is not the last cpu in the package and the work
4533e8c4d31SAmit Kucheria 		 * did not run after we dropped the lock above, then we
4543e8c4d31SAmit Kucheria 		 * need to reschedule the work, otherwise the interrupt
4553e8c4d31SAmit Kucheria 		 * stays disabled forever.
4563e8c4d31SAmit Kucheria 		 */
457b2ce1c88SLen Brown 		if (!lastcpu && zonedev->work_scheduled)
458b2ce1c88SLen Brown 			pkg_thermal_schedule_work(target, &zonedev->work);
4593e8c4d31SAmit Kucheria 	}
4603e8c4d31SAmit Kucheria 
461fc32150eSClark Williams 	raw_spin_unlock_irq(&pkg_temp_lock);
4623e8c4d31SAmit Kucheria 
4633e8c4d31SAmit Kucheria 	/* Final cleanup if this is the last cpu */
4643e8c4d31SAmit Kucheria 	if (lastcpu)
465b2ce1c88SLen Brown 		kfree(zonedev);
4663e8c4d31SAmit Kucheria 	return 0;
4673e8c4d31SAmit Kucheria }
4683e8c4d31SAmit Kucheria 
4693e8c4d31SAmit Kucheria static int pkg_thermal_cpu_online(unsigned int cpu)
4703e8c4d31SAmit Kucheria {
471b2ce1c88SLen Brown 	struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
4723e8c4d31SAmit Kucheria 	struct cpuinfo_x86 *c = &cpu_data(cpu);
4733e8c4d31SAmit Kucheria 
4743e8c4d31SAmit Kucheria 	/* Paranoia check */
4753e8c4d31SAmit Kucheria 	if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS))
4763e8c4d31SAmit Kucheria 		return -ENODEV;
4773e8c4d31SAmit Kucheria 
4783e8c4d31SAmit Kucheria 	/* If the package exists, nothing to do */
479b2ce1c88SLen Brown 	if (zonedev) {
480b2ce1c88SLen Brown 		cpumask_set_cpu(cpu, &zonedev->cpumask);
4813e8c4d31SAmit Kucheria 		return 0;
4823e8c4d31SAmit Kucheria 	}
4833e8c4d31SAmit Kucheria 	return pkg_temp_thermal_device_add(cpu);
4843e8c4d31SAmit Kucheria }
4853e8c4d31SAmit Kucheria 
4863e8c4d31SAmit Kucheria static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
4879c51044cSThomas Gleixner 	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_PTS, NULL),
4883e8c4d31SAmit Kucheria 	{}
4893e8c4d31SAmit Kucheria };
4903e8c4d31SAmit Kucheria MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
4913e8c4d31SAmit Kucheria 
4923e8c4d31SAmit Kucheria static int __init pkg_temp_thermal_init(void)
4933e8c4d31SAmit Kucheria {
4943e8c4d31SAmit Kucheria 	int ret;
4953e8c4d31SAmit Kucheria 
4963e8c4d31SAmit Kucheria 	if (!x86_match_cpu(pkg_temp_thermal_ids))
4973e8c4d31SAmit Kucheria 		return -ENODEV;
4983e8c4d31SAmit Kucheria 
499b2ce1c88SLen Brown 	max_id = topology_max_packages() * topology_max_die_per_package();
500b2ce1c88SLen Brown 	zones = kcalloc(max_id, sizeof(struct zone_device *),
5013e8c4d31SAmit Kucheria 			   GFP_KERNEL);
502b2ce1c88SLen Brown 	if (!zones)
5033e8c4d31SAmit Kucheria 		return -ENOMEM;
5043e8c4d31SAmit Kucheria 
5053e8c4d31SAmit Kucheria 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
5063e8c4d31SAmit Kucheria 				pkg_thermal_cpu_online,	pkg_thermal_cpu_offline);
5073e8c4d31SAmit Kucheria 	if (ret < 0)
5083e8c4d31SAmit Kucheria 		goto err;
5093e8c4d31SAmit Kucheria 
5103e8c4d31SAmit Kucheria 	/* Store the state for module exit */
5113e8c4d31SAmit Kucheria 	pkg_thermal_hp_state = ret;
5123e8c4d31SAmit Kucheria 
5133e8c4d31SAmit Kucheria 	platform_thermal_package_notify = pkg_thermal_notify;
5143e8c4d31SAmit Kucheria 	platform_thermal_package_rate_control = pkg_thermal_rate_control;
5153e8c4d31SAmit Kucheria 
5163e8c4d31SAmit Kucheria 	 /* Don't care if it fails */
5173e8c4d31SAmit Kucheria 	pkg_temp_debugfs_init();
5183e8c4d31SAmit Kucheria 	return 0;
5193e8c4d31SAmit Kucheria 
5203e8c4d31SAmit Kucheria err:
521b2ce1c88SLen Brown 	kfree(zones);
5223e8c4d31SAmit Kucheria 	return ret;
5233e8c4d31SAmit Kucheria }
5243e8c4d31SAmit Kucheria module_init(pkg_temp_thermal_init)
5253e8c4d31SAmit Kucheria 
5263e8c4d31SAmit Kucheria static void __exit pkg_temp_thermal_exit(void)
5273e8c4d31SAmit Kucheria {
5283e8c4d31SAmit Kucheria 	platform_thermal_package_notify = NULL;
5293e8c4d31SAmit Kucheria 	platform_thermal_package_rate_control = NULL;
5303e8c4d31SAmit Kucheria 
5313e8c4d31SAmit Kucheria 	cpuhp_remove_state(pkg_thermal_hp_state);
5323e8c4d31SAmit Kucheria 	debugfs_remove_recursive(debugfs);
533b2ce1c88SLen Brown 	kfree(zones);
5343e8c4d31SAmit Kucheria }
5353e8c4d31SAmit Kucheria module_exit(pkg_temp_thermal_exit)
5363e8c4d31SAmit Kucheria 
5373e8c4d31SAmit Kucheria MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver");
5383e8c4d31SAmit Kucheria MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
5393e8c4d31SAmit Kucheria MODULE_LICENSE("GPL v2");
540