1f6cc69f1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
23e8c4d31SAmit Kucheria /*
33e8c4d31SAmit Kucheria  * x86_pkg_temp_thermal driver
43e8c4d31SAmit Kucheria  * Copyright (c) 2013, Intel Corporation.
53e8c4d31SAmit Kucheria  */
63e8c4d31SAmit Kucheria #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
73e8c4d31SAmit Kucheria 
83e8c4d31SAmit Kucheria #include <linux/module.h>
93e8c4d31SAmit Kucheria #include <linux/init.h>
103e8c4d31SAmit Kucheria #include <linux/err.h>
113e8c4d31SAmit Kucheria #include <linux/param.h>
123e8c4d31SAmit Kucheria #include <linux/device.h>
133e8c4d31SAmit Kucheria #include <linux/platform_device.h>
143e8c4d31SAmit Kucheria #include <linux/cpu.h>
153e8c4d31SAmit Kucheria #include <linux/smp.h>
163e8c4d31SAmit Kucheria #include <linux/slab.h>
173e8c4d31SAmit Kucheria #include <linux/pm.h>
183e8c4d31SAmit Kucheria #include <linux/thermal.h>
193e8c4d31SAmit Kucheria #include <linux/debugfs.h>
20*9223d0dcSBorislav Petkov 
213e8c4d31SAmit Kucheria #include <asm/cpu_device_id.h>
22*9223d0dcSBorislav Petkov 
23*9223d0dcSBorislav Petkov #include "thermal_interrupt.h"
243e8c4d31SAmit Kucheria 
253e8c4d31SAmit Kucheria /*
263e8c4d31SAmit Kucheria * Rate control delay: Idea is to introduce denounce effect
273e8c4d31SAmit Kucheria * This should be long enough to avoid reduce events, when
283e8c4d31SAmit Kucheria * threshold is set to a temperature, which is constantly
293e8c4d31SAmit Kucheria * violated, but at the short enough to take any action.
303e8c4d31SAmit Kucheria * The action can be remove threshold or change it to next
313e8c4d31SAmit Kucheria * interesting setting. Based on experiments, in around
323e8c4d31SAmit Kucheria * every 5 seconds under load will give us a significant
333e8c4d31SAmit Kucheria * temperature change.
343e8c4d31SAmit Kucheria */
353e8c4d31SAmit Kucheria #define PKG_TEMP_THERMAL_NOTIFY_DELAY	5000
363e8c4d31SAmit Kucheria static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY;
373e8c4d31SAmit Kucheria module_param(notify_delay_ms, int, 0644);
383e8c4d31SAmit Kucheria MODULE_PARM_DESC(notify_delay_ms,
393e8c4d31SAmit Kucheria 	"User space notification delay in milli seconds.");
403e8c4d31SAmit Kucheria 
413e8c4d31SAmit Kucheria /* Number of trip points in thermal zone. Currently it can't
423e8c4d31SAmit Kucheria * be more than 2. MSR can allow setting and getting notifications
433e8c4d31SAmit Kucheria * for only 2 thresholds. This define enforces this, if there
443e8c4d31SAmit Kucheria * is some wrong values returned by cpuid for number of thresholds.
453e8c4d31SAmit Kucheria */
463e8c4d31SAmit Kucheria #define MAX_NUMBER_OF_TRIPS	2
473e8c4d31SAmit Kucheria 
48b2ce1c88SLen Brown struct zone_device {
493e8c4d31SAmit Kucheria 	int				cpu;
503e8c4d31SAmit Kucheria 	bool				work_scheduled;
513e8c4d31SAmit Kucheria 	u32				tj_max;
523e8c4d31SAmit Kucheria 	u32				msr_pkg_therm_low;
533e8c4d31SAmit Kucheria 	u32				msr_pkg_therm_high;
543e8c4d31SAmit Kucheria 	struct delayed_work		work;
553e8c4d31SAmit Kucheria 	struct thermal_zone_device	*tzone;
563e8c4d31SAmit Kucheria 	struct cpumask			cpumask;
573e8c4d31SAmit Kucheria };
583e8c4d31SAmit Kucheria 
593e8c4d31SAmit Kucheria static struct thermal_zone_params pkg_temp_tz_params = {
603e8c4d31SAmit Kucheria 	.no_hwmon	= true,
613e8c4d31SAmit Kucheria };
623e8c4d31SAmit Kucheria 
63b2ce1c88SLen Brown /* Keep track of how many zone pointers we allocated in init() */
64b2ce1c88SLen Brown static int max_id __read_mostly;
65b2ce1c88SLen Brown /* Array of zone pointers */
66b2ce1c88SLen Brown static struct zone_device **zones;
673e8c4d31SAmit Kucheria /* Serializes interrupt notification, work and hotplug */
68fc32150eSClark Williams static DEFINE_RAW_SPINLOCK(pkg_temp_lock);
693e8c4d31SAmit Kucheria /* Protects zone operation in the work function against hotplug removal */
703e8c4d31SAmit Kucheria static DEFINE_MUTEX(thermal_zone_mutex);
713e8c4d31SAmit Kucheria 
723e8c4d31SAmit Kucheria /* The dynamically assigned cpu hotplug state for module_exit() */
733e8c4d31SAmit Kucheria static enum cpuhp_state pkg_thermal_hp_state __read_mostly;
743e8c4d31SAmit Kucheria 
753e8c4d31SAmit Kucheria /* Debug counters to show using debugfs */
763e8c4d31SAmit Kucheria static struct dentry *debugfs;
773e8c4d31SAmit Kucheria static unsigned int pkg_interrupt_cnt;
783e8c4d31SAmit Kucheria static unsigned int pkg_work_cnt;
793e8c4d31SAmit Kucheria 
8072c9f26bSGreg Kroah-Hartman static void pkg_temp_debugfs_init(void)
813e8c4d31SAmit Kucheria {
823e8c4d31SAmit Kucheria 	debugfs = debugfs_create_dir("pkg_temp_thermal", NULL);
833e8c4d31SAmit Kucheria 
8472c9f26bSGreg Kroah-Hartman 	debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs,
853e8c4d31SAmit Kucheria 			   &pkg_interrupt_cnt);
8672c9f26bSGreg Kroah-Hartman 	debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs,
873e8c4d31SAmit Kucheria 			   &pkg_work_cnt);
883e8c4d31SAmit Kucheria }
893e8c4d31SAmit Kucheria 
903e8c4d31SAmit Kucheria /*
913e8c4d31SAmit Kucheria  * Protection:
923e8c4d31SAmit Kucheria  *
933e8c4d31SAmit Kucheria  * - cpu hotplug: Read serialized by cpu hotplug lock
943e8c4d31SAmit Kucheria  *		  Write must hold pkg_temp_lock
953e8c4d31SAmit Kucheria  *
963e8c4d31SAmit Kucheria  * - Other callsites: Must hold pkg_temp_lock
973e8c4d31SAmit Kucheria  */
98b2ce1c88SLen Brown static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
993e8c4d31SAmit Kucheria {
100b2ce1c88SLen Brown 	int id = topology_logical_die_id(cpu);
1013e8c4d31SAmit Kucheria 
102b2ce1c88SLen Brown 	if (id >= 0 && id < max_id)
103b2ce1c88SLen Brown 		return zones[id];
1043e8c4d31SAmit Kucheria 	return NULL;
1053e8c4d31SAmit Kucheria }
1063e8c4d31SAmit Kucheria 
1073e8c4d31SAmit Kucheria /*
1083e8c4d31SAmit Kucheria * tj-max is is interesting because threshold is set relative to this
1093e8c4d31SAmit Kucheria * temperature.
1103e8c4d31SAmit Kucheria */
1113e8c4d31SAmit Kucheria static int get_tj_max(int cpu, u32 *tj_max)
1123e8c4d31SAmit Kucheria {
1133e8c4d31SAmit Kucheria 	u32 eax, edx, val;
1143e8c4d31SAmit Kucheria 	int err;
1153e8c4d31SAmit Kucheria 
1163e8c4d31SAmit Kucheria 	err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
1173e8c4d31SAmit Kucheria 	if (err)
1183e8c4d31SAmit Kucheria 		return err;
1193e8c4d31SAmit Kucheria 
1203e8c4d31SAmit Kucheria 	val = (eax >> 16) & 0xff;
1213e8c4d31SAmit Kucheria 	*tj_max = val * 1000;
1223e8c4d31SAmit Kucheria 
1233e8c4d31SAmit Kucheria 	return val ? 0 : -EINVAL;
1243e8c4d31SAmit Kucheria }
1253e8c4d31SAmit Kucheria 
1263e8c4d31SAmit Kucheria static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
1273e8c4d31SAmit Kucheria {
128b2ce1c88SLen Brown 	struct zone_device *zonedev = tzd->devdata;
1293e8c4d31SAmit Kucheria 	u32 eax, edx;
1303e8c4d31SAmit Kucheria 
131b2ce1c88SLen Brown 	rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_STATUS,
132b2ce1c88SLen Brown 			&eax, &edx);
1333e8c4d31SAmit Kucheria 	if (eax & 0x80000000) {
134b2ce1c88SLen Brown 		*temp = zonedev->tj_max - ((eax >> 16) & 0x7f) * 1000;
1353e8c4d31SAmit Kucheria 		pr_debug("sys_get_curr_temp %d\n", *temp);
1363e8c4d31SAmit Kucheria 		return 0;
1373e8c4d31SAmit Kucheria 	}
1383e8c4d31SAmit Kucheria 	return -EINVAL;
1393e8c4d31SAmit Kucheria }
1403e8c4d31SAmit Kucheria 
1413e8c4d31SAmit Kucheria static int sys_get_trip_temp(struct thermal_zone_device *tzd,
1423e8c4d31SAmit Kucheria 			     int trip, int *temp)
1433e8c4d31SAmit Kucheria {
144b2ce1c88SLen Brown 	struct zone_device *zonedev = tzd->devdata;
1453e8c4d31SAmit Kucheria 	unsigned long thres_reg_value;
1463e8c4d31SAmit Kucheria 	u32 mask, shift, eax, edx;
1473e8c4d31SAmit Kucheria 	int ret;
1483e8c4d31SAmit Kucheria 
1493e8c4d31SAmit Kucheria 	if (trip >= MAX_NUMBER_OF_TRIPS)
1503e8c4d31SAmit Kucheria 		return -EINVAL;
1513e8c4d31SAmit Kucheria 
1523e8c4d31SAmit Kucheria 	if (trip) {
1533e8c4d31SAmit Kucheria 		mask = THERM_MASK_THRESHOLD1;
1543e8c4d31SAmit Kucheria 		shift = THERM_SHIFT_THRESHOLD1;
1553e8c4d31SAmit Kucheria 	} else {
1563e8c4d31SAmit Kucheria 		mask = THERM_MASK_THRESHOLD0;
1573e8c4d31SAmit Kucheria 		shift = THERM_SHIFT_THRESHOLD0;
1583e8c4d31SAmit Kucheria 	}
1593e8c4d31SAmit Kucheria 
160b2ce1c88SLen Brown 	ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
1613e8c4d31SAmit Kucheria 			   &eax, &edx);
1623e8c4d31SAmit Kucheria 	if (ret < 0)
1633e8c4d31SAmit Kucheria 		return ret;
1643e8c4d31SAmit Kucheria 
1653e8c4d31SAmit Kucheria 	thres_reg_value = (eax & mask) >> shift;
1663e8c4d31SAmit Kucheria 	if (thres_reg_value)
167b2ce1c88SLen Brown 		*temp = zonedev->tj_max - thres_reg_value * 1000;
1683e8c4d31SAmit Kucheria 	else
1693e8c4d31SAmit Kucheria 		*temp = 0;
1703e8c4d31SAmit Kucheria 	pr_debug("sys_get_trip_temp %d\n", *temp);
1713e8c4d31SAmit Kucheria 
1723e8c4d31SAmit Kucheria 	return 0;
1733e8c4d31SAmit Kucheria }
1743e8c4d31SAmit Kucheria 
1753e8c4d31SAmit Kucheria static int
1763e8c4d31SAmit Kucheria sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
1773e8c4d31SAmit Kucheria {
178b2ce1c88SLen Brown 	struct zone_device *zonedev = tzd->devdata;
1793e8c4d31SAmit Kucheria 	u32 l, h, mask, shift, intr;
1803e8c4d31SAmit Kucheria 	int ret;
1813e8c4d31SAmit Kucheria 
182b2ce1c88SLen Brown 	if (trip >= MAX_NUMBER_OF_TRIPS || temp >= zonedev->tj_max)
1833e8c4d31SAmit Kucheria 		return -EINVAL;
1843e8c4d31SAmit Kucheria 
185b2ce1c88SLen Brown 	ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
1863e8c4d31SAmit Kucheria 			   &l, &h);
1873e8c4d31SAmit Kucheria 	if (ret < 0)
1883e8c4d31SAmit Kucheria 		return ret;
1893e8c4d31SAmit Kucheria 
1903e8c4d31SAmit Kucheria 	if (trip) {
1913e8c4d31SAmit Kucheria 		mask = THERM_MASK_THRESHOLD1;
1923e8c4d31SAmit Kucheria 		shift = THERM_SHIFT_THRESHOLD1;
1933e8c4d31SAmit Kucheria 		intr = THERM_INT_THRESHOLD1_ENABLE;
1943e8c4d31SAmit Kucheria 	} else {
1953e8c4d31SAmit Kucheria 		mask = THERM_MASK_THRESHOLD0;
1963e8c4d31SAmit Kucheria 		shift = THERM_SHIFT_THRESHOLD0;
1973e8c4d31SAmit Kucheria 		intr = THERM_INT_THRESHOLD0_ENABLE;
1983e8c4d31SAmit Kucheria 	}
1993e8c4d31SAmit Kucheria 	l &= ~mask;
2003e8c4d31SAmit Kucheria 	/*
2013e8c4d31SAmit Kucheria 	* When users space sets a trip temperature == 0, which is indication
2023e8c4d31SAmit Kucheria 	* that, it is no longer interested in receiving notifications.
2033e8c4d31SAmit Kucheria 	*/
2043e8c4d31SAmit Kucheria 	if (!temp) {
2053e8c4d31SAmit Kucheria 		l &= ~intr;
2063e8c4d31SAmit Kucheria 	} else {
207b2ce1c88SLen Brown 		l |= (zonedev->tj_max - temp)/1000 << shift;
2083e8c4d31SAmit Kucheria 		l |= intr;
2093e8c4d31SAmit Kucheria 	}
2103e8c4d31SAmit Kucheria 
211b2ce1c88SLen Brown 	return wrmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
212b2ce1c88SLen Brown 			l, h);
2133e8c4d31SAmit Kucheria }
2143e8c4d31SAmit Kucheria 
2153e8c4d31SAmit Kucheria static int sys_get_trip_type(struct thermal_zone_device *thermal, int trip,
2163e8c4d31SAmit Kucheria 			     enum thermal_trip_type *type)
2173e8c4d31SAmit Kucheria {
2183e8c4d31SAmit Kucheria 	*type = THERMAL_TRIP_PASSIVE;
2193e8c4d31SAmit Kucheria 	return 0;
2203e8c4d31SAmit Kucheria }
2213e8c4d31SAmit Kucheria 
2223e8c4d31SAmit Kucheria /* Thermal zone callback registry */
2233e8c4d31SAmit Kucheria static struct thermal_zone_device_ops tzone_ops = {
2243e8c4d31SAmit Kucheria 	.get_temp = sys_get_curr_temp,
2253e8c4d31SAmit Kucheria 	.get_trip_temp = sys_get_trip_temp,
2263e8c4d31SAmit Kucheria 	.get_trip_type = sys_get_trip_type,
2273e8c4d31SAmit Kucheria 	.set_trip_temp = sys_set_trip_temp,
2283e8c4d31SAmit Kucheria };
2293e8c4d31SAmit Kucheria 
2303e8c4d31SAmit Kucheria static bool pkg_thermal_rate_control(void)
2313e8c4d31SAmit Kucheria {
2323e8c4d31SAmit Kucheria 	return true;
2333e8c4d31SAmit Kucheria }
2343e8c4d31SAmit Kucheria 
2353e8c4d31SAmit Kucheria /* Enable threshold interrupt on local package/cpu */
2363e8c4d31SAmit Kucheria static inline void enable_pkg_thres_interrupt(void)
2373e8c4d31SAmit Kucheria {
2383e8c4d31SAmit Kucheria 	u8 thres_0, thres_1;
2393e8c4d31SAmit Kucheria 	u32 l, h;
2403e8c4d31SAmit Kucheria 
2413e8c4d31SAmit Kucheria 	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2423e8c4d31SAmit Kucheria 	/* only enable/disable if it had valid threshold value */
2433e8c4d31SAmit Kucheria 	thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0;
2443e8c4d31SAmit Kucheria 	thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1;
2453e8c4d31SAmit Kucheria 	if (thres_0)
2463e8c4d31SAmit Kucheria 		l |= THERM_INT_THRESHOLD0_ENABLE;
2473e8c4d31SAmit Kucheria 	if (thres_1)
2483e8c4d31SAmit Kucheria 		l |= THERM_INT_THRESHOLD1_ENABLE;
2493e8c4d31SAmit Kucheria 	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2503e8c4d31SAmit Kucheria }
2513e8c4d31SAmit Kucheria 
2523e8c4d31SAmit Kucheria /* Disable threshold interrupt on local package/cpu */
2533e8c4d31SAmit Kucheria static inline void disable_pkg_thres_interrupt(void)
2543e8c4d31SAmit Kucheria {
2553e8c4d31SAmit Kucheria 	u32 l, h;
2563e8c4d31SAmit Kucheria 
2573e8c4d31SAmit Kucheria 	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2583e8c4d31SAmit Kucheria 
2593e8c4d31SAmit Kucheria 	l &= ~(THERM_INT_THRESHOLD0_ENABLE | THERM_INT_THRESHOLD1_ENABLE);
2603e8c4d31SAmit Kucheria 	wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2613e8c4d31SAmit Kucheria }
2623e8c4d31SAmit Kucheria 
2633e8c4d31SAmit Kucheria static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
2643e8c4d31SAmit Kucheria {
2653e8c4d31SAmit Kucheria 	struct thermal_zone_device *tzone = NULL;
2663e8c4d31SAmit Kucheria 	int cpu = smp_processor_id();
267b2ce1c88SLen Brown 	struct zone_device *zonedev;
2683e8c4d31SAmit Kucheria 	u64 msr_val, wr_val;
2693e8c4d31SAmit Kucheria 
2703e8c4d31SAmit Kucheria 	mutex_lock(&thermal_zone_mutex);
271fc32150eSClark Williams 	raw_spin_lock_irq(&pkg_temp_lock);
2723e8c4d31SAmit Kucheria 	++pkg_work_cnt;
2733e8c4d31SAmit Kucheria 
274b2ce1c88SLen Brown 	zonedev = pkg_temp_thermal_get_dev(cpu);
275b2ce1c88SLen Brown 	if (!zonedev) {
276fc32150eSClark Williams 		raw_spin_unlock_irq(&pkg_temp_lock);
2773e8c4d31SAmit Kucheria 		mutex_unlock(&thermal_zone_mutex);
2783e8c4d31SAmit Kucheria 		return;
2793e8c4d31SAmit Kucheria 	}
280b2ce1c88SLen Brown 	zonedev->work_scheduled = false;
2813e8c4d31SAmit Kucheria 
2823e8c4d31SAmit Kucheria 	rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
2833e8c4d31SAmit Kucheria 	wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
2843e8c4d31SAmit Kucheria 	if (wr_val != msr_val) {
2853e8c4d31SAmit Kucheria 		wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
286b2ce1c88SLen Brown 		tzone = zonedev->tzone;
2873e8c4d31SAmit Kucheria 	}
2883e8c4d31SAmit Kucheria 
2893e8c4d31SAmit Kucheria 	enable_pkg_thres_interrupt();
290fc32150eSClark Williams 	raw_spin_unlock_irq(&pkg_temp_lock);
2913e8c4d31SAmit Kucheria 
2923e8c4d31SAmit Kucheria 	/*
2933e8c4d31SAmit Kucheria 	 * If tzone is not NULL, then thermal_zone_mutex will prevent the
2943e8c4d31SAmit Kucheria 	 * concurrent removal in the cpu offline callback.
2953e8c4d31SAmit Kucheria 	 */
2963e8c4d31SAmit Kucheria 	if (tzone)
2973e8c4d31SAmit Kucheria 		thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED);
2983e8c4d31SAmit Kucheria 
2993e8c4d31SAmit Kucheria 	mutex_unlock(&thermal_zone_mutex);
3003e8c4d31SAmit Kucheria }
3013e8c4d31SAmit Kucheria 
3023e8c4d31SAmit Kucheria static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
3033e8c4d31SAmit Kucheria {
3043e8c4d31SAmit Kucheria 	unsigned long ms = msecs_to_jiffies(notify_delay_ms);
3053e8c4d31SAmit Kucheria 
3063e8c4d31SAmit Kucheria 	schedule_delayed_work_on(cpu, work, ms);
3073e8c4d31SAmit Kucheria }
3083e8c4d31SAmit Kucheria 
3093e8c4d31SAmit Kucheria static int pkg_thermal_notify(u64 msr_val)
3103e8c4d31SAmit Kucheria {
3113e8c4d31SAmit Kucheria 	int cpu = smp_processor_id();
312b2ce1c88SLen Brown 	struct zone_device *zonedev;
3133e8c4d31SAmit Kucheria 	unsigned long flags;
3143e8c4d31SAmit Kucheria 
315fc32150eSClark Williams 	raw_spin_lock_irqsave(&pkg_temp_lock, flags);
3163e8c4d31SAmit Kucheria 	++pkg_interrupt_cnt;
3173e8c4d31SAmit Kucheria 
3183e8c4d31SAmit Kucheria 	disable_pkg_thres_interrupt();
3193e8c4d31SAmit Kucheria 
3203e8c4d31SAmit Kucheria 	/* Work is per package, so scheduling it once is enough. */
321b2ce1c88SLen Brown 	zonedev = pkg_temp_thermal_get_dev(cpu);
322b2ce1c88SLen Brown 	if (zonedev && !zonedev->work_scheduled) {
323b2ce1c88SLen Brown 		zonedev->work_scheduled = true;
324b2ce1c88SLen Brown 		pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work);
3253e8c4d31SAmit Kucheria 	}
3263e8c4d31SAmit Kucheria 
327fc32150eSClark Williams 	raw_spin_unlock_irqrestore(&pkg_temp_lock, flags);
3283e8c4d31SAmit Kucheria 	return 0;
3293e8c4d31SAmit Kucheria }
3303e8c4d31SAmit Kucheria 
3313e8c4d31SAmit Kucheria static int pkg_temp_thermal_device_add(unsigned int cpu)
3323e8c4d31SAmit Kucheria {
333b2ce1c88SLen Brown 	int id = topology_logical_die_id(cpu);
3343e8c4d31SAmit Kucheria 	u32 tj_max, eax, ebx, ecx, edx;
335b2ce1c88SLen Brown 	struct zone_device *zonedev;
3363e8c4d31SAmit Kucheria 	int thres_count, err;
3373e8c4d31SAmit Kucheria 
338b2ce1c88SLen Brown 	if (id >= max_id)
3393e8c4d31SAmit Kucheria 		return -ENOMEM;
3403e8c4d31SAmit Kucheria 
3413e8c4d31SAmit Kucheria 	cpuid(6, &eax, &ebx, &ecx, &edx);
3423e8c4d31SAmit Kucheria 	thres_count = ebx & 0x07;
3433e8c4d31SAmit Kucheria 	if (!thres_count)
3443e8c4d31SAmit Kucheria 		return -ENODEV;
3453e8c4d31SAmit Kucheria 
3463e8c4d31SAmit Kucheria 	thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
3473e8c4d31SAmit Kucheria 
3483e8c4d31SAmit Kucheria 	err = get_tj_max(cpu, &tj_max);
3493e8c4d31SAmit Kucheria 	if (err)
3503e8c4d31SAmit Kucheria 		return err;
3513e8c4d31SAmit Kucheria 
352b2ce1c88SLen Brown 	zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL);
353b2ce1c88SLen Brown 	if (!zonedev)
3543e8c4d31SAmit Kucheria 		return -ENOMEM;
3553e8c4d31SAmit Kucheria 
356b2ce1c88SLen Brown 	INIT_DELAYED_WORK(&zonedev->work, pkg_temp_thermal_threshold_work_fn);
357b2ce1c88SLen Brown 	zonedev->cpu = cpu;
358b2ce1c88SLen Brown 	zonedev->tj_max = tj_max;
359b2ce1c88SLen Brown 	zonedev->tzone = thermal_zone_device_register("x86_pkg_temp",
3603e8c4d31SAmit Kucheria 			thres_count,
3613e8c4d31SAmit Kucheria 			(thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
362b2ce1c88SLen Brown 			zonedev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
363b2ce1c88SLen Brown 	if (IS_ERR(zonedev->tzone)) {
364b2ce1c88SLen Brown 		err = PTR_ERR(zonedev->tzone);
365b2ce1c88SLen Brown 		kfree(zonedev);
3663e8c4d31SAmit Kucheria 		return err;
3673e8c4d31SAmit Kucheria 	}
368bbcf90c0SAndrzej Pietrasiewicz 	err = thermal_zone_device_enable(zonedev->tzone);
369bbcf90c0SAndrzej Pietrasiewicz 	if (err) {
370bbcf90c0SAndrzej Pietrasiewicz 		thermal_zone_device_unregister(zonedev->tzone);
371bbcf90c0SAndrzej Pietrasiewicz 		kfree(zonedev);
372bbcf90c0SAndrzej Pietrasiewicz 		return err;
373bbcf90c0SAndrzej Pietrasiewicz 	}
3743e8c4d31SAmit Kucheria 	/* Store MSR value for package thermal interrupt, to restore at exit */
375b2ce1c88SLen Brown 	rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, zonedev->msr_pkg_therm_low,
376b2ce1c88SLen Brown 	      zonedev->msr_pkg_therm_high);
3773e8c4d31SAmit Kucheria 
378b2ce1c88SLen Brown 	cpumask_set_cpu(cpu, &zonedev->cpumask);
379fc32150eSClark Williams 	raw_spin_lock_irq(&pkg_temp_lock);
380b2ce1c88SLen Brown 	zones[id] = zonedev;
381fc32150eSClark Williams 	raw_spin_unlock_irq(&pkg_temp_lock);
3823e8c4d31SAmit Kucheria 	return 0;
3833e8c4d31SAmit Kucheria }
3843e8c4d31SAmit Kucheria 
3853e8c4d31SAmit Kucheria static int pkg_thermal_cpu_offline(unsigned int cpu)
3863e8c4d31SAmit Kucheria {
387b2ce1c88SLen Brown 	struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
3883e8c4d31SAmit Kucheria 	bool lastcpu, was_target;
3893e8c4d31SAmit Kucheria 	int target;
3903e8c4d31SAmit Kucheria 
391b2ce1c88SLen Brown 	if (!zonedev)
3923e8c4d31SAmit Kucheria 		return 0;
3933e8c4d31SAmit Kucheria 
394b2ce1c88SLen Brown 	target = cpumask_any_but(&zonedev->cpumask, cpu);
395b2ce1c88SLen Brown 	cpumask_clear_cpu(cpu, &zonedev->cpumask);
3963e8c4d31SAmit Kucheria 	lastcpu = target >= nr_cpu_ids;
3973e8c4d31SAmit Kucheria 	/*
3983e8c4d31SAmit Kucheria 	 * Remove the sysfs files, if this is the last cpu in the package
3993e8c4d31SAmit Kucheria 	 * before doing further cleanups.
4003e8c4d31SAmit Kucheria 	 */
4013e8c4d31SAmit Kucheria 	if (lastcpu) {
402b2ce1c88SLen Brown 		struct thermal_zone_device *tzone = zonedev->tzone;
4033e8c4d31SAmit Kucheria 
4043e8c4d31SAmit Kucheria 		/*
4053e8c4d31SAmit Kucheria 		 * We must protect against a work function calling
4063e8c4d31SAmit Kucheria 		 * thermal_zone_update, after/while unregister. We null out
4073e8c4d31SAmit Kucheria 		 * the pointer under the zone mutex, so the worker function
4083e8c4d31SAmit Kucheria 		 * won't try to call.
4093e8c4d31SAmit Kucheria 		 */
4103e8c4d31SAmit Kucheria 		mutex_lock(&thermal_zone_mutex);
411b2ce1c88SLen Brown 		zonedev->tzone = NULL;
4123e8c4d31SAmit Kucheria 		mutex_unlock(&thermal_zone_mutex);
4133e8c4d31SAmit Kucheria 
4143e8c4d31SAmit Kucheria 		thermal_zone_device_unregister(tzone);
4153e8c4d31SAmit Kucheria 	}
4163e8c4d31SAmit Kucheria 
4173e8c4d31SAmit Kucheria 	/* Protect against work and interrupts */
418fc32150eSClark Williams 	raw_spin_lock_irq(&pkg_temp_lock);
4193e8c4d31SAmit Kucheria 
4203e8c4d31SAmit Kucheria 	/*
4213e8c4d31SAmit Kucheria 	 * Check whether this cpu was the current target and store the new
4223e8c4d31SAmit Kucheria 	 * one. When we drop the lock, then the interrupt notify function
4233e8c4d31SAmit Kucheria 	 * will see the new target.
4243e8c4d31SAmit Kucheria 	 */
425b2ce1c88SLen Brown 	was_target = zonedev->cpu == cpu;
426b2ce1c88SLen Brown 	zonedev->cpu = target;
4273e8c4d31SAmit Kucheria 
4283e8c4d31SAmit Kucheria 	/*
4293e8c4d31SAmit Kucheria 	 * If this is the last CPU in the package remove the package
4303e8c4d31SAmit Kucheria 	 * reference from the array and restore the interrupt MSR. When we
4313e8c4d31SAmit Kucheria 	 * drop the lock neither the interrupt notify function nor the
4323e8c4d31SAmit Kucheria 	 * worker will see the package anymore.
4333e8c4d31SAmit Kucheria 	 */
4343e8c4d31SAmit Kucheria 	if (lastcpu) {
435b2ce1c88SLen Brown 		zones[topology_logical_die_id(cpu)] = NULL;
4363e8c4d31SAmit Kucheria 		/* After this point nothing touches the MSR anymore. */
4373e8c4d31SAmit Kucheria 		wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
438b2ce1c88SLen Brown 		      zonedev->msr_pkg_therm_low, zonedev->msr_pkg_therm_high);
4393e8c4d31SAmit Kucheria 	}
4403e8c4d31SAmit Kucheria 
4413e8c4d31SAmit Kucheria 	/*
4423e8c4d31SAmit Kucheria 	 * Check whether there is work scheduled and whether the work is
4433e8c4d31SAmit Kucheria 	 * targeted at the outgoing CPU.
4443e8c4d31SAmit Kucheria 	 */
445b2ce1c88SLen Brown 	if (zonedev->work_scheduled && was_target) {
4463e8c4d31SAmit Kucheria 		/*
4473e8c4d31SAmit Kucheria 		 * To cancel the work we need to drop the lock, otherwise
4483e8c4d31SAmit Kucheria 		 * we might deadlock if the work needs to be flushed.
4493e8c4d31SAmit Kucheria 		 */
450fc32150eSClark Williams 		raw_spin_unlock_irq(&pkg_temp_lock);
451b2ce1c88SLen Brown 		cancel_delayed_work_sync(&zonedev->work);
452fc32150eSClark Williams 		raw_spin_lock_irq(&pkg_temp_lock);
4533e8c4d31SAmit Kucheria 		/*
4543e8c4d31SAmit Kucheria 		 * If this is not the last cpu in the package and the work
4553e8c4d31SAmit Kucheria 		 * did not run after we dropped the lock above, then we
4563e8c4d31SAmit Kucheria 		 * need to reschedule the work, otherwise the interrupt
4573e8c4d31SAmit Kucheria 		 * stays disabled forever.
4583e8c4d31SAmit Kucheria 		 */
459b2ce1c88SLen Brown 		if (!lastcpu && zonedev->work_scheduled)
460b2ce1c88SLen Brown 			pkg_thermal_schedule_work(target, &zonedev->work);
4613e8c4d31SAmit Kucheria 	}
4623e8c4d31SAmit Kucheria 
463fc32150eSClark Williams 	raw_spin_unlock_irq(&pkg_temp_lock);
4643e8c4d31SAmit Kucheria 
4653e8c4d31SAmit Kucheria 	/* Final cleanup if this is the last cpu */
4663e8c4d31SAmit Kucheria 	if (lastcpu)
467b2ce1c88SLen Brown 		kfree(zonedev);
4683e8c4d31SAmit Kucheria 	return 0;
4693e8c4d31SAmit Kucheria }
4703e8c4d31SAmit Kucheria 
4713e8c4d31SAmit Kucheria static int pkg_thermal_cpu_online(unsigned int cpu)
4723e8c4d31SAmit Kucheria {
473b2ce1c88SLen Brown 	struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
4743e8c4d31SAmit Kucheria 	struct cpuinfo_x86 *c = &cpu_data(cpu);
4753e8c4d31SAmit Kucheria 
4763e8c4d31SAmit Kucheria 	/* Paranoia check */
4773e8c4d31SAmit Kucheria 	if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS))
4783e8c4d31SAmit Kucheria 		return -ENODEV;
4793e8c4d31SAmit Kucheria 
4803e8c4d31SAmit Kucheria 	/* If the package exists, nothing to do */
481b2ce1c88SLen Brown 	if (zonedev) {
482b2ce1c88SLen Brown 		cpumask_set_cpu(cpu, &zonedev->cpumask);
4833e8c4d31SAmit Kucheria 		return 0;
4843e8c4d31SAmit Kucheria 	}
4853e8c4d31SAmit Kucheria 	return pkg_temp_thermal_device_add(cpu);
4863e8c4d31SAmit Kucheria }
4873e8c4d31SAmit Kucheria 
4883e8c4d31SAmit Kucheria static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
4899c51044cSThomas Gleixner 	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_PTS, NULL),
4903e8c4d31SAmit Kucheria 	{}
4913e8c4d31SAmit Kucheria };
4923e8c4d31SAmit Kucheria MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
4933e8c4d31SAmit Kucheria 
4943e8c4d31SAmit Kucheria static int __init pkg_temp_thermal_init(void)
4953e8c4d31SAmit Kucheria {
4963e8c4d31SAmit Kucheria 	int ret;
4973e8c4d31SAmit Kucheria 
4983e8c4d31SAmit Kucheria 	if (!x86_match_cpu(pkg_temp_thermal_ids))
4993e8c4d31SAmit Kucheria 		return -ENODEV;
5003e8c4d31SAmit Kucheria 
501b2ce1c88SLen Brown 	max_id = topology_max_packages() * topology_max_die_per_package();
502b2ce1c88SLen Brown 	zones = kcalloc(max_id, sizeof(struct zone_device *),
5033e8c4d31SAmit Kucheria 			   GFP_KERNEL);
504b2ce1c88SLen Brown 	if (!zones)
5053e8c4d31SAmit Kucheria 		return -ENOMEM;
5063e8c4d31SAmit Kucheria 
5073e8c4d31SAmit Kucheria 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
5083e8c4d31SAmit Kucheria 				pkg_thermal_cpu_online,	pkg_thermal_cpu_offline);
5093e8c4d31SAmit Kucheria 	if (ret < 0)
5103e8c4d31SAmit Kucheria 		goto err;
5113e8c4d31SAmit Kucheria 
5123e8c4d31SAmit Kucheria 	/* Store the state for module exit */
5133e8c4d31SAmit Kucheria 	pkg_thermal_hp_state = ret;
5143e8c4d31SAmit Kucheria 
5153e8c4d31SAmit Kucheria 	platform_thermal_package_notify = pkg_thermal_notify;
5163e8c4d31SAmit Kucheria 	platform_thermal_package_rate_control = pkg_thermal_rate_control;
5173e8c4d31SAmit Kucheria 
5183e8c4d31SAmit Kucheria 	 /* Don't care if it fails */
5193e8c4d31SAmit Kucheria 	pkg_temp_debugfs_init();
5203e8c4d31SAmit Kucheria 	return 0;
5213e8c4d31SAmit Kucheria 
5223e8c4d31SAmit Kucheria err:
523b2ce1c88SLen Brown 	kfree(zones);
5243e8c4d31SAmit Kucheria 	return ret;
5253e8c4d31SAmit Kucheria }
5263e8c4d31SAmit Kucheria module_init(pkg_temp_thermal_init)
5273e8c4d31SAmit Kucheria 
5283e8c4d31SAmit Kucheria static void __exit pkg_temp_thermal_exit(void)
5293e8c4d31SAmit Kucheria {
5303e8c4d31SAmit Kucheria 	platform_thermal_package_notify = NULL;
5313e8c4d31SAmit Kucheria 	platform_thermal_package_rate_control = NULL;
5323e8c4d31SAmit Kucheria 
5333e8c4d31SAmit Kucheria 	cpuhp_remove_state(pkg_thermal_hp_state);
5343e8c4d31SAmit Kucheria 	debugfs_remove_recursive(debugfs);
535b2ce1c88SLen Brown 	kfree(zones);
5363e8c4d31SAmit Kucheria }
5373e8c4d31SAmit Kucheria module_exit(pkg_temp_thermal_exit)
5383e8c4d31SAmit Kucheria 
5393e8c4d31SAmit Kucheria MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver");
5403e8c4d31SAmit Kucheria MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
5413e8c4d31SAmit Kucheria MODULE_LICENSE("GPL v2");
542