xref: /openbmc/linux/drivers/cpuidle/cpuidle.c (revision 08157984)
1 /*
2  * cpuidle.c - core cpuidle infrastructure
3  *
4  * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
5  *               Shaohua Li <shaohua.li@intel.com>
6  *               Adam Belay <abelay@novell.com>
7  *
8  * This code is licenced under the GPL.
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/mutex.h>
13 #include <linux/sched.h>
14 #include <linux/notifier.h>
15 #include <linux/pm_qos.h>
16 #include <linux/cpu.h>
17 #include <linux/cpuidle.h>
18 #include <linux/ktime.h>
19 #include <linux/hrtimer.h>
20 #include <linux/module.h>
21 #include <trace/events/power.h>
22 
23 #include "cpuidle.h"
24 
25 DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
26 
27 DEFINE_MUTEX(cpuidle_lock);
28 LIST_HEAD(cpuidle_detected_devices);
29 
30 static int enabled_devices;
31 static int off __read_mostly;
32 static int initialized __read_mostly;
33 
34 int cpuidle_disabled(void)
35 {
36 	return off;
37 }
38 void disable_cpuidle(void)
39 {
40 	off = 1;
41 }
42 
43 #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
44 static void cpuidle_kick_cpus(void)
45 {
46 	cpu_idle_wait();
47 }
48 #elif defined(CONFIG_SMP)
49 # error "Arch needs cpu_idle_wait() equivalent here"
50 #else /* !CONFIG_ARCH_HAS_CPU_IDLE_WAIT && !CONFIG_SMP */
51 static void cpuidle_kick_cpus(void) {}
52 #endif
53 
54 static int __cpuidle_register_device(struct cpuidle_device *dev);
55 
56 static inline int cpuidle_enter(struct cpuidle_device *dev,
57 				struct cpuidle_driver *drv, int index)
58 {
59 	struct cpuidle_state *target_state = &drv->states[index];
60 	return target_state->enter(dev, drv, index);
61 }
62 
63 static inline int cpuidle_enter_tk(struct cpuidle_device *dev,
64 			       struct cpuidle_driver *drv, int index)
65 {
66 	return cpuidle_wrap_enter(dev, drv, index, cpuidle_enter);
67 }
68 
69 typedef int (*cpuidle_enter_t)(struct cpuidle_device *dev,
70 			       struct cpuidle_driver *drv, int index);
71 
72 static cpuidle_enter_t cpuidle_enter_ops;
73 
74 /**
75  * cpuidle_play_dead - cpu off-lining
76  *
77  * Returns in case of an error or no driver
78  */
79 int cpuidle_play_dead(void)
80 {
81 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
82 	struct cpuidle_driver *drv = cpuidle_get_driver();
83 	int i, dead_state = -1;
84 	int power_usage = -1;
85 
86 	if (!drv)
87 		return -ENODEV;
88 
89 	/* Find lowest-power state that supports long-term idle */
90 	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
91 		struct cpuidle_state *s = &drv->states[i];
92 
93 		if (s->power_usage < power_usage && s->enter_dead) {
94 			power_usage = s->power_usage;
95 			dead_state = i;
96 		}
97 	}
98 
99 	if (dead_state != -1)
100 		return drv->states[dead_state].enter_dead(dev, dead_state);
101 
102 	return -ENODEV;
103 }
104 
105 /**
106  * cpuidle_idle_call - the main idle loop
107  *
108  * NOTE: no locks or semaphores should be used here
109  * return non-zero on failure
110  */
111 int cpuidle_idle_call(void)
112 {
113 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
114 	struct cpuidle_driver *drv = cpuidle_get_driver();
115 	int next_state, entered_state;
116 
117 	if (off)
118 		return -ENODEV;
119 
120 	if (!initialized)
121 		return -ENODEV;
122 
123 	/* check if the device is ready */
124 	if (!dev || !dev->enabled)
125 		return -EBUSY;
126 
127 #if 0
128 	/* shows regressions, re-enable for 2.6.29 */
129 	/*
130 	 * run any timers that can be run now, at this point
131 	 * before calculating the idle duration etc.
132 	 */
133 	hrtimer_peek_ahead_timers();
134 #endif
135 
136 	/* ask the governor for the next state */
137 	next_state = cpuidle_curr_governor->select(drv, dev);
138 	if (need_resched()) {
139 		local_irq_enable();
140 		return 0;
141 	}
142 
143 	trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu);
144 	trace_cpu_idle_rcuidle(next_state, dev->cpu);
145 
146 	entered_state = cpuidle_enter_ops(dev, drv, next_state);
147 
148 	trace_power_end_rcuidle(dev->cpu);
149 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
150 
151 	if (entered_state >= 0) {
152 		/* Update cpuidle counters */
153 		/* This can be moved to within driver enter routine
154 		 * but that results in multiple copies of same code.
155 		 */
156 		dev->states_usage[entered_state].time +=
157 				(unsigned long long)dev->last_residency;
158 		dev->states_usage[entered_state].usage++;
159 	} else {
160 		dev->last_residency = 0;
161 	}
162 
163 	/* give the governor an opportunity to reflect on the outcome */
164 	if (cpuidle_curr_governor->reflect)
165 		cpuidle_curr_governor->reflect(dev, entered_state);
166 
167 	return 0;
168 }
169 
170 /**
171  * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
172  */
173 void cpuidle_install_idle_handler(void)
174 {
175 	if (enabled_devices) {
176 		/* Make sure all changes finished before we switch to new idle */
177 		smp_wmb();
178 		initialized = 1;
179 	}
180 }
181 
182 /**
183  * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
184  */
185 void cpuidle_uninstall_idle_handler(void)
186 {
187 	if (enabled_devices) {
188 		initialized = 0;
189 		cpuidle_kick_cpus();
190 	}
191 }
192 
193 /**
194  * cpuidle_pause_and_lock - temporarily disables CPUIDLE
195  */
196 void cpuidle_pause_and_lock(void)
197 {
198 	mutex_lock(&cpuidle_lock);
199 	cpuidle_uninstall_idle_handler();
200 }
201 
202 EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
203 
204 /**
205  * cpuidle_resume_and_unlock - resumes CPUIDLE operation
206  */
207 void cpuidle_resume_and_unlock(void)
208 {
209 	cpuidle_install_idle_handler();
210 	mutex_unlock(&cpuidle_lock);
211 }
212 
213 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
214 
215 /**
216  * cpuidle_wrap_enter - performs timekeeping and irqen around enter function
217  * @dev: pointer to a valid cpuidle_device object
218  * @drv: pointer to a valid cpuidle_driver object
219  * @index: index of the target cpuidle state.
220  */
221 int cpuidle_wrap_enter(struct cpuidle_device *dev,
222 				struct cpuidle_driver *drv, int index,
223 				int (*enter)(struct cpuidle_device *dev,
224 					struct cpuidle_driver *drv, int index))
225 {
226 	ktime_t time_start, time_end;
227 	s64 diff;
228 
229 	time_start = ktime_get();
230 
231 	index = enter(dev, drv, index);
232 
233 	time_end = ktime_get();
234 
235 	local_irq_enable();
236 
237 	diff = ktime_to_us(ktime_sub(time_end, time_start));
238 	if (diff > INT_MAX)
239 		diff = INT_MAX;
240 
241 	dev->last_residency = (int) diff;
242 
243 	return index;
244 }
245 
246 #ifdef CONFIG_ARCH_HAS_CPU_RELAX
247 static int poll_idle(struct cpuidle_device *dev,
248 		struct cpuidle_driver *drv, int index)
249 {
250 	ktime_t	t1, t2;
251 	s64 diff;
252 
253 	t1 = ktime_get();
254 	local_irq_enable();
255 	while (!need_resched())
256 		cpu_relax();
257 
258 	t2 = ktime_get();
259 	diff = ktime_to_us(ktime_sub(t2, t1));
260 	if (diff > INT_MAX)
261 		diff = INT_MAX;
262 
263 	dev->last_residency = (int) diff;
264 
265 	return index;
266 }
267 
268 static void poll_idle_init(struct cpuidle_driver *drv)
269 {
270 	struct cpuidle_state *state = &drv->states[0];
271 
272 	snprintf(state->name, CPUIDLE_NAME_LEN, "POLL");
273 	snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
274 	state->exit_latency = 0;
275 	state->target_residency = 0;
276 	state->power_usage = -1;
277 	state->flags = 0;
278 	state->enter = poll_idle;
279 	state->disable = 0;
280 }
281 #else
282 static void poll_idle_init(struct cpuidle_driver *drv) {}
283 #endif /* CONFIG_ARCH_HAS_CPU_RELAX */
284 
285 /**
286  * cpuidle_enable_device - enables idle PM for a CPU
287  * @dev: the CPU
288  *
289  * This function must be called between cpuidle_pause_and_lock and
290  * cpuidle_resume_and_unlock when used externally.
291  */
292 int cpuidle_enable_device(struct cpuidle_device *dev)
293 {
294 	int ret, i;
295 	struct cpuidle_driver *drv = cpuidle_get_driver();
296 
297 	if (dev->enabled)
298 		return 0;
299 	if (!drv || !cpuidle_curr_governor)
300 		return -EIO;
301 	if (!dev->state_count)
302 		dev->state_count = drv->state_count;
303 
304 	if (dev->registered == 0) {
305 		ret = __cpuidle_register_device(dev);
306 		if (ret)
307 			return ret;
308 	}
309 
310 	cpuidle_enter_ops = drv->en_core_tk_irqen ?
311 		cpuidle_enter_tk : cpuidle_enter;
312 
313 	poll_idle_init(drv);
314 
315 	if ((ret = cpuidle_add_state_sysfs(dev)))
316 		return ret;
317 
318 	if (cpuidle_curr_governor->enable &&
319 	    (ret = cpuidle_curr_governor->enable(drv, dev)))
320 		goto fail_sysfs;
321 
322 	for (i = 0; i < dev->state_count; i++) {
323 		dev->states_usage[i].usage = 0;
324 		dev->states_usage[i].time = 0;
325 	}
326 	dev->last_residency = 0;
327 
328 	smp_wmb();
329 
330 	dev->enabled = 1;
331 
332 	enabled_devices++;
333 	return 0;
334 
335 fail_sysfs:
336 	cpuidle_remove_state_sysfs(dev);
337 
338 	return ret;
339 }
340 
341 EXPORT_SYMBOL_GPL(cpuidle_enable_device);
342 
343 /**
344  * cpuidle_disable_device - disables idle PM for a CPU
345  * @dev: the CPU
346  *
347  * This function must be called between cpuidle_pause_and_lock and
348  * cpuidle_resume_and_unlock when used externally.
349  */
350 void cpuidle_disable_device(struct cpuidle_device *dev)
351 {
352 	if (!dev->enabled)
353 		return;
354 	if (!cpuidle_get_driver() || !cpuidle_curr_governor)
355 		return;
356 
357 	dev->enabled = 0;
358 
359 	if (cpuidle_curr_governor->disable)
360 		cpuidle_curr_governor->disable(cpuidle_get_driver(), dev);
361 
362 	cpuidle_remove_state_sysfs(dev);
363 	enabled_devices--;
364 }
365 
366 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
367 
368 /**
369  * __cpuidle_register_device - internal register function called before register
370  * and enable routines
371  * @dev: the cpu
372  *
373  * cpuidle_lock mutex must be held before this is called
374  */
375 static int __cpuidle_register_device(struct cpuidle_device *dev)
376 {
377 	int ret;
378 	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
379 	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
380 
381 	if (!dev)
382 		return -EINVAL;
383 	if (!try_module_get(cpuidle_driver->owner))
384 		return -EINVAL;
385 
386 	init_completion(&dev->kobj_unregister);
387 
388 	per_cpu(cpuidle_devices, dev->cpu) = dev;
389 	list_add(&dev->device_list, &cpuidle_detected_devices);
390 	if ((ret = cpuidle_add_sysfs(cpu_dev))) {
391 		module_put(cpuidle_driver->owner);
392 		return ret;
393 	}
394 
395 	dev->registered = 1;
396 	return 0;
397 }
398 
399 /**
400  * cpuidle_register_device - registers a CPU's idle PM feature
401  * @dev: the cpu
402  */
403 int cpuidle_register_device(struct cpuidle_device *dev)
404 {
405 	int ret;
406 
407 	mutex_lock(&cpuidle_lock);
408 
409 	if ((ret = __cpuidle_register_device(dev))) {
410 		mutex_unlock(&cpuidle_lock);
411 		return ret;
412 	}
413 
414 	cpuidle_enable_device(dev);
415 	cpuidle_install_idle_handler();
416 
417 	mutex_unlock(&cpuidle_lock);
418 
419 	return 0;
420 
421 }
422 
423 EXPORT_SYMBOL_GPL(cpuidle_register_device);
424 
425 /**
426  * cpuidle_unregister_device - unregisters a CPU's idle PM feature
427  * @dev: the cpu
428  */
429 void cpuidle_unregister_device(struct cpuidle_device *dev)
430 {
431 	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
432 	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
433 
434 	if (dev->registered == 0)
435 		return;
436 
437 	cpuidle_pause_and_lock();
438 
439 	cpuidle_disable_device(dev);
440 
441 	cpuidle_remove_sysfs(cpu_dev);
442 	list_del(&dev->device_list);
443 	wait_for_completion(&dev->kobj_unregister);
444 	per_cpu(cpuidle_devices, dev->cpu) = NULL;
445 
446 	cpuidle_resume_and_unlock();
447 
448 	module_put(cpuidle_driver->owner);
449 }
450 
451 EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
452 
453 #ifdef CONFIG_SMP
454 
455 static void smp_callback(void *v)
456 {
457 	/* we already woke the CPU up, nothing more to do */
458 }
459 
460 /*
461  * This function gets called when a part of the kernel has a new latency
462  * requirement.  This means we need to get all processors out of their C-state,
463  * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
464  * wakes them all right up.
465  */
466 static int cpuidle_latency_notify(struct notifier_block *b,
467 		unsigned long l, void *v)
468 {
469 	smp_call_function(smp_callback, NULL, 1);
470 	return NOTIFY_OK;
471 }
472 
473 static struct notifier_block cpuidle_latency_notifier = {
474 	.notifier_call = cpuidle_latency_notify,
475 };
476 
477 static inline void latency_notifier_init(struct notifier_block *n)
478 {
479 	pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
480 }
481 
482 #else /* CONFIG_SMP */
483 
484 #define latency_notifier_init(x) do { } while (0)
485 
486 #endif /* CONFIG_SMP */
487 
488 /**
489  * cpuidle_init - core initializer
490  */
491 static int __init cpuidle_init(void)
492 {
493 	int ret;
494 
495 	if (cpuidle_disabled())
496 		return -ENODEV;
497 
498 	ret = cpuidle_add_interface(cpu_subsys.dev_root);
499 	if (ret)
500 		return ret;
501 
502 	latency_notifier_init(&cpuidle_latency_notifier);
503 
504 	return 0;
505 }
506 
507 module_param(off, int, 0444);
508 core_initcall(cpuidle_init);
509