xref: /openbmc/linux/arch/x86/kernel/cpu/umwait.c (revision e553d2a5)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/syscore_ops.h>
3 #include <linux/suspend.h>
4 #include <linux/cpu.h>
5 
6 #include <asm/msr.h>
7 
8 #define UMWAIT_C02_ENABLE	0
9 
10 #define UMWAIT_CTRL_VAL(max_time, c02_disable)				\
11 	(((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) |		\
12 	((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE))
13 
14 /*
15  * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default,
16  * umwait max time is 100000 in TSC-quanta and C0.2 is enabled
17  */
18 static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
19 
20 /*
21  * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
22  * hardware or BIOS before kernel boot.
23  */
24 static u32 orig_umwait_control_cached __ro_after_init;
25 
26 /*
27  * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
28  * the sysfs write functions.
29  */
30 static DEFINE_MUTEX(umwait_lock);
31 
32 static void umwait_update_control_msr(void * unused)
33 {
34 	lockdep_assert_irqs_disabled();
35 	wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0);
36 }
37 
38 /*
39  * The CPU hotplug callback sets the control MSR to the global control
40  * value.
41  *
42  * Disable interrupts so the read of umwait_control_cached and the WRMSR
43  * are protected against a concurrent sysfs write. Otherwise the sysfs
44  * write could update the cached value after it had been read on this CPU
45  * and issue the IPI before the old value had been written. The IPI would
46  * interrupt, write the new value and after return from IPI the previous
47  * value would be written by this CPU.
48  *
49  * With interrupts disabled the upcoming CPU either sees the new control
50  * value or the IPI is updating this CPU to the new control value after
51  * interrupts have been reenabled.
52  */
53 static int umwait_cpu_online(unsigned int cpu)
54 {
55 	local_irq_disable();
56 	umwait_update_control_msr(NULL);
57 	local_irq_enable();
58 	return 0;
59 }
60 
61 /*
62  * The CPU hotplug callback sets the control MSR to the original control
63  * value.
64  */
65 static int umwait_cpu_offline(unsigned int cpu)
66 {
67 	/*
68 	 * This code is protected by the CPU hotplug already and
69 	 * orig_umwait_control_cached is never changed after it caches
70 	 * the original control MSR value in umwait_init(). So there
71 	 * is no race condition here.
72 	 */
73 	wrmsr(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached, 0);
74 
75 	return 0;
76 }
77 
78 /*
79  * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
80  * is the only active CPU at this time. The MSR is set up on the APs via the
81  * CPU hotplug callback.
82  *
83  * This function is invoked on resume from suspend and hibernation. On
84  * resume from suspend the restore should be not required, but we neither
85  * trust the firmware nor does it matter if the same value is written
86  * again.
87  */
88 static void umwait_syscore_resume(void)
89 {
90 	umwait_update_control_msr(NULL);
91 }
92 
93 static struct syscore_ops umwait_syscore_ops = {
94 	.resume	= umwait_syscore_resume,
95 };
96 
97 /* sysfs interface */
98 
99 /*
100  * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled.
101  * Otherwise, C0.2 is enabled.
102  */
103 static inline bool umwait_ctrl_c02_enabled(u32 ctrl)
104 {
105 	return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE);
106 }
107 
108 static inline u32 umwait_ctrl_max_time(u32 ctrl)
109 {
110 	return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
111 }
112 
113 static inline void umwait_update_control(u32 maxtime, bool c02_enable)
114 {
115 	u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
116 
117 	if (!c02_enable)
118 		ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE;
119 
120 	WRITE_ONCE(umwait_control_cached, ctrl);
121 	/* Propagate to all CPUs */
122 	on_each_cpu(umwait_update_control_msr, NULL, 1);
123 }
124 
125 static ssize_t
126 enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf)
127 {
128 	u32 ctrl = READ_ONCE(umwait_control_cached);
129 
130 	return sprintf(buf, "%d\n", umwait_ctrl_c02_enabled(ctrl));
131 }
132 
133 static ssize_t enable_c02_store(struct device *dev,
134 				struct device_attribute *attr,
135 				const char *buf, size_t count)
136 {
137 	bool c02_enable;
138 	u32 ctrl;
139 	int ret;
140 
141 	ret = kstrtobool(buf, &c02_enable);
142 	if (ret)
143 		return ret;
144 
145 	mutex_lock(&umwait_lock);
146 
147 	ctrl = READ_ONCE(umwait_control_cached);
148 	if (c02_enable != umwait_ctrl_c02_enabled(ctrl))
149 		umwait_update_control(ctrl, c02_enable);
150 
151 	mutex_unlock(&umwait_lock);
152 
153 	return count;
154 }
155 static DEVICE_ATTR_RW(enable_c02);
156 
157 static ssize_t
158 max_time_show(struct device *kobj, struct device_attribute *attr, char *buf)
159 {
160 	u32 ctrl = READ_ONCE(umwait_control_cached);
161 
162 	return sprintf(buf, "%u\n", umwait_ctrl_max_time(ctrl));
163 }
164 
165 static ssize_t max_time_store(struct device *kobj,
166 			      struct device_attribute *attr,
167 			      const char *buf, size_t count)
168 {
169 	u32 max_time, ctrl;
170 	int ret;
171 
172 	ret = kstrtou32(buf, 0, &max_time);
173 	if (ret)
174 		return ret;
175 
176 	/* bits[1:0] must be zero */
177 	if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK)
178 		return -EINVAL;
179 
180 	mutex_lock(&umwait_lock);
181 
182 	ctrl = READ_ONCE(umwait_control_cached);
183 	if (max_time != umwait_ctrl_max_time(ctrl))
184 		umwait_update_control(max_time, umwait_ctrl_c02_enabled(ctrl));
185 
186 	mutex_unlock(&umwait_lock);
187 
188 	return count;
189 }
190 static DEVICE_ATTR_RW(max_time);
191 
192 static struct attribute *umwait_attrs[] = {
193 	&dev_attr_enable_c02.attr,
194 	&dev_attr_max_time.attr,
195 	NULL
196 };
197 
198 static struct attribute_group umwait_attr_group = {
199 	.attrs = umwait_attrs,
200 	.name = "umwait_control",
201 };
202 
203 static int __init umwait_init(void)
204 {
205 	struct device *dev;
206 	int ret;
207 
208 	if (!boot_cpu_has(X86_FEATURE_WAITPKG))
209 		return -ENODEV;
210 
211 	/*
212 	 * Cache the original control MSR value before the control MSR is
213 	 * changed. This is the only place where orig_umwait_control_cached
214 	 * is modified.
215 	 */
216 	rdmsrl(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached);
217 
218 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
219 				umwait_cpu_online, umwait_cpu_offline);
220 	if (ret < 0) {
221 		/*
222 		 * On failure, the control MSR on all CPUs has the
223 		 * original control value.
224 		 */
225 		return ret;
226 	}
227 
228 	register_syscore_ops(&umwait_syscore_ops);
229 
230 	/*
231 	 * Add umwait control interface. Ignore failure, so at least the
232 	 * default values are set up in case the machine manages to boot.
233 	 */
234 	dev = cpu_subsys.dev_root;
235 	return sysfs_create_group(&dev->kobj, &umwait_attr_group);
236 }
237 device_initcall(umwait_init);
238