1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  cpuidle-pseries - idle state cpuidle driver.
4  *  Adapted from drivers/idle/intel_idle.c and
5  *  drivers/acpi/processor_idle.c
6  *
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/init.h>
12 #include <linux/moduleparam.h>
13 #include <linux/cpuidle.h>
14 #include <linux/cpu.h>
15 #include <linux/notifier.h>
16 
17 #include <asm/paca.h>
18 #include <asm/reg.h>
19 #include <asm/machdep.h>
20 #include <asm/firmware.h>
21 #include <asm/runlatch.h>
22 #include <asm/idle.h>
23 #include <asm/plpar_wrappers.h>
24 
25 struct cpuidle_driver pseries_idle_driver = {
26 	.name             = "pseries_idle",
27 	.owner            = THIS_MODULE,
28 };
29 
30 static int max_idle_state __read_mostly;
31 static struct cpuidle_state *cpuidle_state_table __read_mostly;
32 static u64 snooze_timeout __read_mostly;
33 static bool snooze_timeout_en __read_mostly;
34 
35 static int snooze_loop(struct cpuidle_device *dev,
36 			struct cpuidle_driver *drv,
37 			int index)
38 {
39 	u64 snooze_exit_time;
40 
41 	set_thread_flag(TIF_POLLING_NRFLAG);
42 
43 	pseries_idle_prolog();
44 	local_irq_enable();
45 	snooze_exit_time = get_tb() + snooze_timeout;
46 
47 	while (!need_resched()) {
48 		HMT_low();
49 		HMT_very_low();
50 		if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) {
51 			/*
52 			 * Task has not woken up but we are exiting the polling
53 			 * loop anyway. Require a barrier after polling is
54 			 * cleared to order subsequent test of need_resched().
55 			 */
56 			clear_thread_flag(TIF_POLLING_NRFLAG);
57 			smp_mb();
58 			break;
59 		}
60 	}
61 
62 	HMT_medium();
63 	clear_thread_flag(TIF_POLLING_NRFLAG);
64 
65 	local_irq_disable();
66 
67 	pseries_idle_epilog();
68 
69 	return index;
70 }
71 
72 static void check_and_cede_processor(void)
73 {
74 	/*
75 	 * Ensure our interrupt state is properly tracked,
76 	 * also checks if no interrupt has occurred while we
77 	 * were soft-disabled
78 	 */
79 	if (prep_irq_for_idle()) {
80 		cede_processor();
81 #ifdef CONFIG_TRACE_IRQFLAGS
82 		/* Ensure that H_CEDE returns with IRQs on */
83 		if (WARN_ON(!(mfmsr() & MSR_EE)))
84 			__hard_irq_enable();
85 #endif
86 	}
87 }
88 
89 static int dedicated_cede_loop(struct cpuidle_device *dev,
90 				struct cpuidle_driver *drv,
91 				int index)
92 {
93 
94 	pseries_idle_prolog();
95 	get_lppaca()->donate_dedicated_cpu = 1;
96 
97 	HMT_medium();
98 	check_and_cede_processor();
99 
100 	local_irq_disable();
101 	get_lppaca()->donate_dedicated_cpu = 0;
102 
103 	pseries_idle_epilog();
104 
105 	return index;
106 }
107 
108 static int shared_cede_loop(struct cpuidle_device *dev,
109 			struct cpuidle_driver *drv,
110 			int index)
111 {
112 
113 	pseries_idle_prolog();
114 
115 	/*
116 	 * Yield the processor to the hypervisor.  We return if
117 	 * an external interrupt occurs (which are driven prior
118 	 * to returning here) or if a prod occurs from another
119 	 * processor. When returning here, external interrupts
120 	 * are enabled.
121 	 */
122 	check_and_cede_processor();
123 
124 	local_irq_disable();
125 	pseries_idle_epilog();
126 
127 	return index;
128 }
129 
130 /*
131  * States for dedicated partition case.
132  */
133 static struct cpuidle_state dedicated_states[] = {
134 	{ /* Snooze */
135 		.name = "snooze",
136 		.desc = "snooze",
137 		.exit_latency = 0,
138 		.target_residency = 0,
139 		.enter = &snooze_loop },
140 	{ /* CEDE */
141 		.name = "CEDE",
142 		.desc = "CEDE",
143 		.exit_latency = 10,
144 		.target_residency = 100,
145 		.enter = &dedicated_cede_loop },
146 };
147 
148 /*
149  * States for shared partition case.
150  */
151 static struct cpuidle_state shared_states[] = {
152 	{ /* Snooze */
153 		.name = "snooze",
154 		.desc = "snooze",
155 		.exit_latency = 0,
156 		.target_residency = 0,
157 		.enter = &snooze_loop },
158 	{ /* Shared Cede */
159 		.name = "Shared Cede",
160 		.desc = "Shared Cede",
161 		.exit_latency = 10,
162 		.target_residency = 100,
163 		.enter = &shared_cede_loop },
164 };
165 
166 static int pseries_cpuidle_cpu_online(unsigned int cpu)
167 {
168 	struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
169 
170 	if (dev && cpuidle_get_driver()) {
171 		cpuidle_pause_and_lock();
172 		cpuidle_enable_device(dev);
173 		cpuidle_resume_and_unlock();
174 	}
175 	return 0;
176 }
177 
178 static int pseries_cpuidle_cpu_dead(unsigned int cpu)
179 {
180 	struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
181 
182 	if (dev && cpuidle_get_driver()) {
183 		cpuidle_pause_and_lock();
184 		cpuidle_disable_device(dev);
185 		cpuidle_resume_and_unlock();
186 	}
187 	return 0;
188 }
189 
190 /*
191  * pseries_cpuidle_driver_init()
192  */
193 static int pseries_cpuidle_driver_init(void)
194 {
195 	int idle_state;
196 	struct cpuidle_driver *drv = &pseries_idle_driver;
197 
198 	drv->state_count = 0;
199 
200 	for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
201 		/* Is the state not enabled? */
202 		if (cpuidle_state_table[idle_state].enter == NULL)
203 			continue;
204 
205 		drv->states[drv->state_count] =	/* structure copy */
206 			cpuidle_state_table[idle_state];
207 
208 		drv->state_count += 1;
209 	}
210 
211 	return 0;
212 }
213 
214 /*
215  * pseries_idle_probe()
216  * Choose state table for shared versus dedicated partition
217  */
218 static int pseries_idle_probe(void)
219 {
220 
221 	if (cpuidle_disable != IDLE_NO_OVERRIDE)
222 		return -ENODEV;
223 
224 	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
225 		/*
226 		 * Use local_paca instead of get_lppaca() since
227 		 * preemption is not disabled, and it is not required in
228 		 * fact, since lppaca_ptr does not need to be the value
229 		 * associated to the current CPU, it can be from any CPU.
230 		 */
231 		if (lppaca_shared_proc(local_paca->lppaca_ptr)) {
232 			cpuidle_state_table = shared_states;
233 			max_idle_state = ARRAY_SIZE(shared_states);
234 		} else {
235 			cpuidle_state_table = dedicated_states;
236 			max_idle_state = ARRAY_SIZE(dedicated_states);
237 		}
238 	} else
239 		return -ENODEV;
240 
241 	if (max_idle_state > 1) {
242 		snooze_timeout_en = true;
243 		snooze_timeout = cpuidle_state_table[1].target_residency *
244 				 tb_ticks_per_usec;
245 	}
246 	return 0;
247 }
248 
249 static int __init pseries_processor_idle_init(void)
250 {
251 	int retval;
252 
253 	retval = pseries_idle_probe();
254 	if (retval)
255 		return retval;
256 
257 	pseries_cpuidle_driver_init();
258 	retval = cpuidle_register(&pseries_idle_driver, NULL);
259 	if (retval) {
260 		printk(KERN_DEBUG "Registration of pseries driver failed.\n");
261 		return retval;
262 	}
263 
264 	retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
265 					   "cpuidle/pseries:online",
266 					   pseries_cpuidle_cpu_online, NULL);
267 	WARN_ON(retval < 0);
268 	retval = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD,
269 					   "cpuidle/pseries:DEAD", NULL,
270 					   pseries_cpuidle_cpu_dead);
271 	WARN_ON(retval < 0);
272 	printk(KERN_DEBUG "pseries_idle_driver registered\n");
273 	return 0;
274 }
275 
276 device_initcall(pseries_processor_idle_init);
277