xref: /openbmc/linux/drivers/cpufreq/cpufreq.c (revision 7490ca1e)
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *	Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *	Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17 
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 #include <linux/syscore_ops.h>
32 
33 #include <trace/events/power.h>
34 
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47 
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  * - Lock should not be held across
65  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66  */
67 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
68 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69 
70 #define lock_policy_rwsem(mode, cpu)					\
71 static int lock_policy_rwsem_##mode					\
72 (int cpu)								\
73 {									\
74 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);		\
75 	BUG_ON(policy_cpu == -1);					\
76 	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
77 	if (unlikely(!cpu_online(cpu))) {				\
78 		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
79 		return -1;						\
80 	}								\
81 									\
82 	return 0;							\
83 }
84 
85 lock_policy_rwsem(read, cpu);
86 
87 lock_policy_rwsem(write, cpu);
88 
89 static void unlock_policy_rwsem_read(int cpu)
90 {
91 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
92 	BUG_ON(policy_cpu == -1);
93 	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95 
96 static void unlock_policy_rwsem_write(int cpu)
97 {
98 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
99 	BUG_ON(policy_cpu == -1);
100 	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
101 }
102 
103 
104 /* internal prototypes */
105 static int __cpufreq_governor(struct cpufreq_policy *policy,
106 		unsigned int event);
107 static unsigned int __cpufreq_get(unsigned int cpu);
108 static void handle_update(struct work_struct *work);
109 
110 /**
111  * Two notifier lists: the "policy" list is involved in the
112  * validation process for a new CPU frequency policy; the
113  * "transition" list for kernel code that needs to handle
114  * changes to devices when the CPU clock speed changes.
115  * The mutex locks both lists.
116  */
117 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
118 static struct srcu_notifier_head cpufreq_transition_notifier_list;
119 
120 static bool init_cpufreq_transition_notifier_list_called;
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124 	init_cpufreq_transition_notifier_list_called = true;
125 	return 0;
126 }
127 pure_initcall(init_cpufreq_transition_notifier_list);
128 
129 static LIST_HEAD(cpufreq_governor_list);
130 static DEFINE_MUTEX(cpufreq_governor_mutex);
131 
132 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
133 {
134 	struct cpufreq_policy *data;
135 	unsigned long flags;
136 
137 	if (cpu >= nr_cpu_ids)
138 		goto err_out;
139 
140 	/* get the cpufreq driver */
141 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
142 
143 	if (!cpufreq_driver)
144 		goto err_out_unlock;
145 
146 	if (!try_module_get(cpufreq_driver->owner))
147 		goto err_out_unlock;
148 
149 
150 	/* get the CPU */
151 	data = per_cpu(cpufreq_cpu_data, cpu);
152 
153 	if (!data)
154 		goto err_out_put_module;
155 
156 	if (!kobject_get(&data->kobj))
157 		goto err_out_put_module;
158 
159 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
160 	return data;
161 
162 err_out_put_module:
163 	module_put(cpufreq_driver->owner);
164 err_out_unlock:
165 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
166 err_out:
167 	return NULL;
168 }
169 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
170 
171 
172 void cpufreq_cpu_put(struct cpufreq_policy *data)
173 {
174 	kobject_put(&data->kobj);
175 	module_put(cpufreq_driver->owner);
176 }
177 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
178 
179 
180 /*********************************************************************
181  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
182  *********************************************************************/
183 
184 /**
185  * adjust_jiffies - adjust the system "loops_per_jiffy"
186  *
187  * This function alters the system "loops_per_jiffy" for the clock
188  * speed change. Note that loops_per_jiffy cannot be updated on SMP
189  * systems as each CPU might be scaled differently. So, use the arch
190  * per-CPU loops_per_jiffy value wherever possible.
191  */
192 #ifndef CONFIG_SMP
193 static unsigned long l_p_j_ref;
194 static unsigned int  l_p_j_ref_freq;
195 
196 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
197 {
198 	if (ci->flags & CPUFREQ_CONST_LOOPS)
199 		return;
200 
201 	if (!l_p_j_ref_freq) {
202 		l_p_j_ref = loops_per_jiffy;
203 		l_p_j_ref_freq = ci->old;
204 		pr_debug("saving %lu as reference value for loops_per_jiffy; "
205 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
206 	}
207 	if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
208 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
209 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
210 								ci->new);
211 		pr_debug("scaling loops_per_jiffy to %lu "
212 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
213 	}
214 }
215 #else
216 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
217 {
218 	return;
219 }
220 #endif
221 
222 
223 /**
224  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
225  * on frequency transition.
226  *
227  * This function calls the transition notifiers and the "adjust_jiffies"
228  * function. It is called twice on all CPU frequency changes that have
229  * external effects.
230  */
231 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
232 {
233 	struct cpufreq_policy *policy;
234 
235 	BUG_ON(irqs_disabled());
236 
237 	freqs->flags = cpufreq_driver->flags;
238 	pr_debug("notification %u of frequency transition to %u kHz\n",
239 		state, freqs->new);
240 
241 	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
242 	switch (state) {
243 
244 	case CPUFREQ_PRECHANGE:
245 		/* detect if the driver reported a value as "old frequency"
246 		 * which is not equal to what the cpufreq core thinks is
247 		 * "old frequency".
248 		 */
249 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
250 			if ((policy) && (policy->cpu == freqs->cpu) &&
251 			    (policy->cur) && (policy->cur != freqs->old)) {
252 				pr_debug("Warning: CPU frequency is"
253 					" %u, cpufreq assumed %u kHz.\n",
254 					freqs->old, policy->cur);
255 				freqs->old = policy->cur;
256 			}
257 		}
258 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
259 				CPUFREQ_PRECHANGE, freqs);
260 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
261 		break;
262 
263 	case CPUFREQ_POSTCHANGE:
264 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
265 		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
266 			(unsigned long)freqs->cpu);
267 		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
268 		trace_cpu_frequency(freqs->new, freqs->cpu);
269 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
270 				CPUFREQ_POSTCHANGE, freqs);
271 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
272 			policy->cur = freqs->new;
273 		break;
274 	}
275 }
276 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
277 
278 
279 
280 /*********************************************************************
281  *                          SYSFS INTERFACE                          *
282  *********************************************************************/
283 
284 static struct cpufreq_governor *__find_governor(const char *str_governor)
285 {
286 	struct cpufreq_governor *t;
287 
288 	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
289 		if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
290 			return t;
291 
292 	return NULL;
293 }
294 
295 /**
296  * cpufreq_parse_governor - parse a governor string
297  */
298 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
299 				struct cpufreq_governor **governor)
300 {
301 	int err = -EINVAL;
302 
303 	if (!cpufreq_driver)
304 		goto out;
305 
306 	if (cpufreq_driver->setpolicy) {
307 		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
308 			*policy = CPUFREQ_POLICY_PERFORMANCE;
309 			err = 0;
310 		} else if (!strnicmp(str_governor, "powersave",
311 						CPUFREQ_NAME_LEN)) {
312 			*policy = CPUFREQ_POLICY_POWERSAVE;
313 			err = 0;
314 		}
315 	} else if (cpufreq_driver->target) {
316 		struct cpufreq_governor *t;
317 
318 		mutex_lock(&cpufreq_governor_mutex);
319 
320 		t = __find_governor(str_governor);
321 
322 		if (t == NULL) {
323 			int ret;
324 
325 			mutex_unlock(&cpufreq_governor_mutex);
326 			ret = request_module("cpufreq_%s", str_governor);
327 			mutex_lock(&cpufreq_governor_mutex);
328 
329 			if (ret == 0)
330 				t = __find_governor(str_governor);
331 		}
332 
333 		if (t != NULL) {
334 			*governor = t;
335 			err = 0;
336 		}
337 
338 		mutex_unlock(&cpufreq_governor_mutex);
339 	}
340 out:
341 	return err;
342 }
343 
344 
345 /**
346  * cpufreq_per_cpu_attr_read() / show_##file_name() -
347  * print out cpufreq information
348  *
349  * Write out information from cpufreq_driver->policy[cpu]; object must be
350  * "unsigned int".
351  */
352 
353 #define show_one(file_name, object)			\
354 static ssize_t show_##file_name				\
355 (struct cpufreq_policy *policy, char *buf)		\
356 {							\
357 	return sprintf(buf, "%u\n", policy->object);	\
358 }
359 
360 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
361 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
362 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
363 show_one(scaling_min_freq, min);
364 show_one(scaling_max_freq, max);
365 show_one(scaling_cur_freq, cur);
366 
367 static int __cpufreq_set_policy(struct cpufreq_policy *data,
368 				struct cpufreq_policy *policy);
369 
370 /**
371  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
372  */
373 #define store_one(file_name, object)			\
374 static ssize_t store_##file_name					\
375 (struct cpufreq_policy *policy, const char *buf, size_t count)		\
376 {									\
377 	unsigned int ret = -EINVAL;					\
378 	struct cpufreq_policy new_policy;				\
379 									\
380 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
381 	if (ret)							\
382 		return -EINVAL;						\
383 									\
384 	ret = sscanf(buf, "%u", &new_policy.object);			\
385 	if (ret != 1)							\
386 		return -EINVAL;						\
387 									\
388 	ret = __cpufreq_set_policy(policy, &new_policy);		\
389 	policy->user_policy.object = policy->object;			\
390 									\
391 	return ret ? ret : count;					\
392 }
393 
394 store_one(scaling_min_freq, min);
395 store_one(scaling_max_freq, max);
396 
397 /**
398  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
399  */
400 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
401 					char *buf)
402 {
403 	unsigned int cur_freq = __cpufreq_get(policy->cpu);
404 	if (!cur_freq)
405 		return sprintf(buf, "<unknown>");
406 	return sprintf(buf, "%u\n", cur_freq);
407 }
408 
409 
410 /**
411  * show_scaling_governor - show the current policy for the specified CPU
412  */
413 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
414 {
415 	if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
416 		return sprintf(buf, "powersave\n");
417 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
418 		return sprintf(buf, "performance\n");
419 	else if (policy->governor)
420 		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
421 				policy->governor->name);
422 	return -EINVAL;
423 }
424 
425 
426 /**
427  * store_scaling_governor - store policy for the specified CPU
428  */
429 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
430 					const char *buf, size_t count)
431 {
432 	unsigned int ret = -EINVAL;
433 	char	str_governor[16];
434 	struct cpufreq_policy new_policy;
435 
436 	ret = cpufreq_get_policy(&new_policy, policy->cpu);
437 	if (ret)
438 		return ret;
439 
440 	ret = sscanf(buf, "%15s", str_governor);
441 	if (ret != 1)
442 		return -EINVAL;
443 
444 	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
445 						&new_policy.governor))
446 		return -EINVAL;
447 
448 	/* Do not use cpufreq_set_policy here or the user_policy.max
449 	   will be wrongly overridden */
450 	ret = __cpufreq_set_policy(policy, &new_policy);
451 
452 	policy->user_policy.policy = policy->policy;
453 	policy->user_policy.governor = policy->governor;
454 
455 	if (ret)
456 		return ret;
457 	else
458 		return count;
459 }
460 
461 /**
462  * show_scaling_driver - show the cpufreq driver currently loaded
463  */
464 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
465 {
466 	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
467 }
468 
469 /**
470  * show_scaling_available_governors - show the available CPUfreq governors
471  */
472 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
473 						char *buf)
474 {
475 	ssize_t i = 0;
476 	struct cpufreq_governor *t;
477 
478 	if (!cpufreq_driver->target) {
479 		i += sprintf(buf, "performance powersave");
480 		goto out;
481 	}
482 
483 	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
484 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
485 		    - (CPUFREQ_NAME_LEN + 2)))
486 			goto out;
487 		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
488 	}
489 out:
490 	i += sprintf(&buf[i], "\n");
491 	return i;
492 }
493 
494 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
495 {
496 	ssize_t i = 0;
497 	unsigned int cpu;
498 
499 	for_each_cpu(cpu, mask) {
500 		if (i)
501 			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
502 		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
503 		if (i >= (PAGE_SIZE - 5))
504 			break;
505 	}
506 	i += sprintf(&buf[i], "\n");
507 	return i;
508 }
509 
510 /**
511  * show_related_cpus - show the CPUs affected by each transition even if
512  * hw coordination is in use
513  */
514 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
515 {
516 	if (cpumask_empty(policy->related_cpus))
517 		return show_cpus(policy->cpus, buf);
518 	return show_cpus(policy->related_cpus, buf);
519 }
520 
521 /**
522  * show_affected_cpus - show the CPUs affected by each transition
523  */
524 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
525 {
526 	return show_cpus(policy->cpus, buf);
527 }
528 
529 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
530 					const char *buf, size_t count)
531 {
532 	unsigned int freq = 0;
533 	unsigned int ret;
534 
535 	if (!policy->governor || !policy->governor->store_setspeed)
536 		return -EINVAL;
537 
538 	ret = sscanf(buf, "%u", &freq);
539 	if (ret != 1)
540 		return -EINVAL;
541 
542 	policy->governor->store_setspeed(policy, freq);
543 
544 	return count;
545 }
546 
547 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
548 {
549 	if (!policy->governor || !policy->governor->show_setspeed)
550 		return sprintf(buf, "<unsupported>\n");
551 
552 	return policy->governor->show_setspeed(policy, buf);
553 }
554 
555 /**
556  * show_scaling_driver - show the current cpufreq HW/BIOS limitation
557  */
558 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
559 {
560 	unsigned int limit;
561 	int ret;
562 	if (cpufreq_driver->bios_limit) {
563 		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
564 		if (!ret)
565 			return sprintf(buf, "%u\n", limit);
566 	}
567 	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
568 }
569 
570 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
571 cpufreq_freq_attr_ro(cpuinfo_min_freq);
572 cpufreq_freq_attr_ro(cpuinfo_max_freq);
573 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
574 cpufreq_freq_attr_ro(scaling_available_governors);
575 cpufreq_freq_attr_ro(scaling_driver);
576 cpufreq_freq_attr_ro(scaling_cur_freq);
577 cpufreq_freq_attr_ro(bios_limit);
578 cpufreq_freq_attr_ro(related_cpus);
579 cpufreq_freq_attr_ro(affected_cpus);
580 cpufreq_freq_attr_rw(scaling_min_freq);
581 cpufreq_freq_attr_rw(scaling_max_freq);
582 cpufreq_freq_attr_rw(scaling_governor);
583 cpufreq_freq_attr_rw(scaling_setspeed);
584 
585 static struct attribute *default_attrs[] = {
586 	&cpuinfo_min_freq.attr,
587 	&cpuinfo_max_freq.attr,
588 	&cpuinfo_transition_latency.attr,
589 	&scaling_min_freq.attr,
590 	&scaling_max_freq.attr,
591 	&affected_cpus.attr,
592 	&related_cpus.attr,
593 	&scaling_governor.attr,
594 	&scaling_driver.attr,
595 	&scaling_available_governors.attr,
596 	&scaling_setspeed.attr,
597 	NULL
598 };
599 
600 struct kobject *cpufreq_global_kobject;
601 EXPORT_SYMBOL(cpufreq_global_kobject);
602 
603 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
604 #define to_attr(a) container_of(a, struct freq_attr, attr)
605 
606 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
607 {
608 	struct cpufreq_policy *policy = to_policy(kobj);
609 	struct freq_attr *fattr = to_attr(attr);
610 	ssize_t ret = -EINVAL;
611 	policy = cpufreq_cpu_get(policy->cpu);
612 	if (!policy)
613 		goto no_policy;
614 
615 	if (lock_policy_rwsem_read(policy->cpu) < 0)
616 		goto fail;
617 
618 	if (fattr->show)
619 		ret = fattr->show(policy, buf);
620 	else
621 		ret = -EIO;
622 
623 	unlock_policy_rwsem_read(policy->cpu);
624 fail:
625 	cpufreq_cpu_put(policy);
626 no_policy:
627 	return ret;
628 }
629 
630 static ssize_t store(struct kobject *kobj, struct attribute *attr,
631 		     const char *buf, size_t count)
632 {
633 	struct cpufreq_policy *policy = to_policy(kobj);
634 	struct freq_attr *fattr = to_attr(attr);
635 	ssize_t ret = -EINVAL;
636 	policy = cpufreq_cpu_get(policy->cpu);
637 	if (!policy)
638 		goto no_policy;
639 
640 	if (lock_policy_rwsem_write(policy->cpu) < 0)
641 		goto fail;
642 
643 	if (fattr->store)
644 		ret = fattr->store(policy, buf, count);
645 	else
646 		ret = -EIO;
647 
648 	unlock_policy_rwsem_write(policy->cpu);
649 fail:
650 	cpufreq_cpu_put(policy);
651 no_policy:
652 	return ret;
653 }
654 
655 static void cpufreq_sysfs_release(struct kobject *kobj)
656 {
657 	struct cpufreq_policy *policy = to_policy(kobj);
658 	pr_debug("last reference is dropped\n");
659 	complete(&policy->kobj_unregister);
660 }
661 
662 static const struct sysfs_ops sysfs_ops = {
663 	.show	= show,
664 	.store	= store,
665 };
666 
667 static struct kobj_type ktype_cpufreq = {
668 	.sysfs_ops	= &sysfs_ops,
669 	.default_attrs	= default_attrs,
670 	.release	= cpufreq_sysfs_release,
671 };
672 
673 /*
674  * Returns:
675  *   Negative: Failure
676  *   0:        Success
677  *   Positive: When we have a managed CPU and the sysfs got symlinked
678  */
679 static int cpufreq_add_dev_policy(unsigned int cpu,
680 				  struct cpufreq_policy *policy,
681 				  struct device *dev)
682 {
683 	int ret = 0;
684 #ifdef CONFIG_SMP
685 	unsigned long flags;
686 	unsigned int j;
687 #ifdef CONFIG_HOTPLUG_CPU
688 	struct cpufreq_governor *gov;
689 
690 	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
691 	if (gov) {
692 		policy->governor = gov;
693 		pr_debug("Restoring governor %s for cpu %d\n",
694 		       policy->governor->name, cpu);
695 	}
696 #endif
697 
698 	for_each_cpu(j, policy->cpus) {
699 		struct cpufreq_policy *managed_policy;
700 
701 		if (cpu == j)
702 			continue;
703 
704 		/* Check for existing affected CPUs.
705 		 * They may not be aware of it due to CPU Hotplug.
706 		 * cpufreq_cpu_put is called when the device is removed
707 		 * in __cpufreq_remove_dev()
708 		 */
709 		managed_policy = cpufreq_cpu_get(j);
710 		if (unlikely(managed_policy)) {
711 
712 			/* Set proper policy_cpu */
713 			unlock_policy_rwsem_write(cpu);
714 			per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
715 
716 			if (lock_policy_rwsem_write(cpu) < 0) {
717 				/* Should not go through policy unlock path */
718 				if (cpufreq_driver->exit)
719 					cpufreq_driver->exit(policy);
720 				cpufreq_cpu_put(managed_policy);
721 				return -EBUSY;
722 			}
723 
724 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
725 			cpumask_copy(managed_policy->cpus, policy->cpus);
726 			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
727 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
728 
729 			pr_debug("CPU already managed, adding link\n");
730 			ret = sysfs_create_link(&dev->kobj,
731 						&managed_policy->kobj,
732 						"cpufreq");
733 			if (ret)
734 				cpufreq_cpu_put(managed_policy);
735 			/*
736 			 * Success. We only needed to be added to the mask.
737 			 * Call driver->exit() because only the cpu parent of
738 			 * the kobj needed to call init().
739 			 */
740 			if (cpufreq_driver->exit)
741 				cpufreq_driver->exit(policy);
742 
743 			if (!ret)
744 				return 1;
745 			else
746 				return ret;
747 		}
748 	}
749 #endif
750 	return ret;
751 }
752 
753 
754 /* symlink affected CPUs */
755 static int cpufreq_add_dev_symlink(unsigned int cpu,
756 				   struct cpufreq_policy *policy)
757 {
758 	unsigned int j;
759 	int ret = 0;
760 
761 	for_each_cpu(j, policy->cpus) {
762 		struct cpufreq_policy *managed_policy;
763 		struct device *cpu_dev;
764 
765 		if (j == cpu)
766 			continue;
767 		if (!cpu_online(j))
768 			continue;
769 
770 		pr_debug("CPU %u already managed, adding link\n", j);
771 		managed_policy = cpufreq_cpu_get(cpu);
772 		cpu_dev = get_cpu_device(j);
773 		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
774 					"cpufreq");
775 		if (ret) {
776 			cpufreq_cpu_put(managed_policy);
777 			return ret;
778 		}
779 	}
780 	return ret;
781 }
782 
783 static int cpufreq_add_dev_interface(unsigned int cpu,
784 				     struct cpufreq_policy *policy,
785 				     struct device *dev)
786 {
787 	struct cpufreq_policy new_policy;
788 	struct freq_attr **drv_attr;
789 	unsigned long flags;
790 	int ret = 0;
791 	unsigned int j;
792 
793 	/* prepare interface data */
794 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
795 				   &dev->kobj, "cpufreq");
796 	if (ret)
797 		return ret;
798 
799 	/* set up files for this cpu device */
800 	drv_attr = cpufreq_driver->attr;
801 	while ((drv_attr) && (*drv_attr)) {
802 		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
803 		if (ret)
804 			goto err_out_kobj_put;
805 		drv_attr++;
806 	}
807 	if (cpufreq_driver->get) {
808 		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
809 		if (ret)
810 			goto err_out_kobj_put;
811 	}
812 	if (cpufreq_driver->target) {
813 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
814 		if (ret)
815 			goto err_out_kobj_put;
816 	}
817 	if (cpufreq_driver->bios_limit) {
818 		ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
819 		if (ret)
820 			goto err_out_kobj_put;
821 	}
822 
823 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
824 	for_each_cpu(j, policy->cpus) {
825 		if (!cpu_online(j))
826 			continue;
827 		per_cpu(cpufreq_cpu_data, j) = policy;
828 		per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
829 	}
830 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
831 
832 	ret = cpufreq_add_dev_symlink(cpu, policy);
833 	if (ret)
834 		goto err_out_kobj_put;
835 
836 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
837 	/* assure that the starting sequence is run in __cpufreq_set_policy */
838 	policy->governor = NULL;
839 
840 	/* set default policy */
841 	ret = __cpufreq_set_policy(policy, &new_policy);
842 	policy->user_policy.policy = policy->policy;
843 	policy->user_policy.governor = policy->governor;
844 
845 	if (ret) {
846 		pr_debug("setting policy failed\n");
847 		if (cpufreq_driver->exit)
848 			cpufreq_driver->exit(policy);
849 	}
850 	return ret;
851 
852 err_out_kobj_put:
853 	kobject_put(&policy->kobj);
854 	wait_for_completion(&policy->kobj_unregister);
855 	return ret;
856 }
857 
858 
859 /**
860  * cpufreq_add_dev - add a CPU device
861  *
862  * Adds the cpufreq interface for a CPU device.
863  *
864  * The Oracle says: try running cpufreq registration/unregistration concurrently
865  * with with cpu hotplugging and all hell will break loose. Tried to clean this
866  * mess up, but more thorough testing is needed. - Mathieu
867  */
868 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
869 {
870 	unsigned int cpu = dev->id;
871 	int ret = 0, found = 0;
872 	struct cpufreq_policy *policy;
873 	unsigned long flags;
874 	unsigned int j;
875 #ifdef CONFIG_HOTPLUG_CPU
876 	int sibling;
877 #endif
878 
879 	if (cpu_is_offline(cpu))
880 		return 0;
881 
882 	pr_debug("adding CPU %u\n", cpu);
883 
884 #ifdef CONFIG_SMP
885 	/* check whether a different CPU already registered this
886 	 * CPU because it is in the same boat. */
887 	policy = cpufreq_cpu_get(cpu);
888 	if (unlikely(policy)) {
889 		cpufreq_cpu_put(policy);
890 		return 0;
891 	}
892 #endif
893 
894 	if (!try_module_get(cpufreq_driver->owner)) {
895 		ret = -EINVAL;
896 		goto module_out;
897 	}
898 
899 	ret = -ENOMEM;
900 	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
901 	if (!policy)
902 		goto nomem_out;
903 
904 	if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
905 		goto err_free_policy;
906 
907 	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
908 		goto err_free_cpumask;
909 
910 	policy->cpu = cpu;
911 	cpumask_copy(policy->cpus, cpumask_of(cpu));
912 
913 	/* Initially set CPU itself as the policy_cpu */
914 	per_cpu(cpufreq_policy_cpu, cpu) = cpu;
915 	ret = (lock_policy_rwsem_write(cpu) < 0);
916 	WARN_ON(ret);
917 
918 	init_completion(&policy->kobj_unregister);
919 	INIT_WORK(&policy->update, handle_update);
920 
921 	/* Set governor before ->init, so that driver could check it */
922 #ifdef CONFIG_HOTPLUG_CPU
923 	for_each_online_cpu(sibling) {
924 		struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
925 		if (cp && cp->governor &&
926 		    (cpumask_test_cpu(cpu, cp->related_cpus))) {
927 			policy->governor = cp->governor;
928 			found = 1;
929 			break;
930 		}
931 	}
932 #endif
933 	if (!found)
934 		policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
935 	/* call driver. From then on the cpufreq must be able
936 	 * to accept all calls to ->verify and ->setpolicy for this CPU
937 	 */
938 	ret = cpufreq_driver->init(policy);
939 	if (ret) {
940 		pr_debug("initialization failed\n");
941 		goto err_unlock_policy;
942 	}
943 	policy->user_policy.min = policy->min;
944 	policy->user_policy.max = policy->max;
945 
946 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
947 				     CPUFREQ_START, policy);
948 
949 	ret = cpufreq_add_dev_policy(cpu, policy, dev);
950 	if (ret) {
951 		if (ret > 0)
952 			/* This is a managed cpu, symlink created,
953 			   exit with 0 */
954 			ret = 0;
955 		goto err_unlock_policy;
956 	}
957 
958 	ret = cpufreq_add_dev_interface(cpu, policy, dev);
959 	if (ret)
960 		goto err_out_unregister;
961 
962 	unlock_policy_rwsem_write(cpu);
963 
964 	kobject_uevent(&policy->kobj, KOBJ_ADD);
965 	module_put(cpufreq_driver->owner);
966 	pr_debug("initialization complete\n");
967 
968 	return 0;
969 
970 
971 err_out_unregister:
972 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
973 	for_each_cpu(j, policy->cpus)
974 		per_cpu(cpufreq_cpu_data, j) = NULL;
975 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
976 
977 	kobject_put(&policy->kobj);
978 	wait_for_completion(&policy->kobj_unregister);
979 
980 err_unlock_policy:
981 	unlock_policy_rwsem_write(cpu);
982 	free_cpumask_var(policy->related_cpus);
983 err_free_cpumask:
984 	free_cpumask_var(policy->cpus);
985 err_free_policy:
986 	kfree(policy);
987 nomem_out:
988 	module_put(cpufreq_driver->owner);
989 module_out:
990 	return ret;
991 }
992 
993 
994 /**
995  * __cpufreq_remove_dev - remove a CPU device
996  *
997  * Removes the cpufreq interface for a CPU device.
998  * Caller should already have policy_rwsem in write mode for this CPU.
999  * This routine frees the rwsem before returning.
1000  */
1001 static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1002 {
1003 	unsigned int cpu = dev->id;
1004 	unsigned long flags;
1005 	struct cpufreq_policy *data;
1006 	struct kobject *kobj;
1007 	struct completion *cmp;
1008 #ifdef CONFIG_SMP
1009 	struct device *cpu_dev;
1010 	unsigned int j;
1011 #endif
1012 
1013 	pr_debug("unregistering CPU %u\n", cpu);
1014 
1015 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1016 	data = per_cpu(cpufreq_cpu_data, cpu);
1017 
1018 	if (!data) {
1019 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1020 		unlock_policy_rwsem_write(cpu);
1021 		return -EINVAL;
1022 	}
1023 	per_cpu(cpufreq_cpu_data, cpu) = NULL;
1024 
1025 
1026 #ifdef CONFIG_SMP
1027 	/* if this isn't the CPU which is the parent of the kobj, we
1028 	 * only need to unlink, put and exit
1029 	 */
1030 	if (unlikely(cpu != data->cpu)) {
1031 		pr_debug("removing link\n");
1032 		cpumask_clear_cpu(cpu, data->cpus);
1033 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1034 		kobj = &dev->kobj;
1035 		cpufreq_cpu_put(data);
1036 		unlock_policy_rwsem_write(cpu);
1037 		sysfs_remove_link(kobj, "cpufreq");
1038 		return 0;
1039 	}
1040 #endif
1041 
1042 #ifdef CONFIG_SMP
1043 
1044 #ifdef CONFIG_HOTPLUG_CPU
1045 	strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1046 			CPUFREQ_NAME_LEN);
1047 #endif
1048 
1049 	/* if we have other CPUs still registered, we need to unlink them,
1050 	 * or else wait_for_completion below will lock up. Clean the
1051 	 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1052 	 * the sysfs links afterwards.
1053 	 */
1054 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1055 		for_each_cpu(j, data->cpus) {
1056 			if (j == cpu)
1057 				continue;
1058 			per_cpu(cpufreq_cpu_data, j) = NULL;
1059 		}
1060 	}
1061 
1062 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1063 
1064 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1065 		for_each_cpu(j, data->cpus) {
1066 			if (j == cpu)
1067 				continue;
1068 			pr_debug("removing link for cpu %u\n", j);
1069 #ifdef CONFIG_HOTPLUG_CPU
1070 			strncpy(per_cpu(cpufreq_cpu_governor, j),
1071 				data->governor->name, CPUFREQ_NAME_LEN);
1072 #endif
1073 			cpu_dev = get_cpu_device(j);
1074 			kobj = &cpu_dev->kobj;
1075 			unlock_policy_rwsem_write(cpu);
1076 			sysfs_remove_link(kobj, "cpufreq");
1077 			lock_policy_rwsem_write(cpu);
1078 			cpufreq_cpu_put(data);
1079 		}
1080 	}
1081 #else
1082 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1083 #endif
1084 
1085 	if (cpufreq_driver->target)
1086 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1087 
1088 	kobj = &data->kobj;
1089 	cmp = &data->kobj_unregister;
1090 	unlock_policy_rwsem_write(cpu);
1091 	kobject_put(kobj);
1092 
1093 	/* we need to make sure that the underlying kobj is actually
1094 	 * not referenced anymore by anybody before we proceed with
1095 	 * unloading.
1096 	 */
1097 	pr_debug("waiting for dropping of refcount\n");
1098 	wait_for_completion(cmp);
1099 	pr_debug("wait complete\n");
1100 
1101 	lock_policy_rwsem_write(cpu);
1102 	if (cpufreq_driver->exit)
1103 		cpufreq_driver->exit(data);
1104 	unlock_policy_rwsem_write(cpu);
1105 
1106 #ifdef CONFIG_HOTPLUG_CPU
1107 	/* when the CPU which is the parent of the kobj is hotplugged
1108 	 * offline, check for siblings, and create cpufreq sysfs interface
1109 	 * and symlinks
1110 	 */
1111 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1112 		/* first sibling now owns the new sysfs dir */
1113 		cpumask_clear_cpu(cpu, data->cpus);
1114 		cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
1115 
1116 		/* finally remove our own symlink */
1117 		lock_policy_rwsem_write(cpu);
1118 		__cpufreq_remove_dev(dev, sif);
1119 	}
1120 #endif
1121 
1122 	free_cpumask_var(data->related_cpus);
1123 	free_cpumask_var(data->cpus);
1124 	kfree(data);
1125 
1126 	return 0;
1127 }
1128 
1129 
1130 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1131 {
1132 	unsigned int cpu = dev->id;
1133 	int retval;
1134 
1135 	if (cpu_is_offline(cpu))
1136 		return 0;
1137 
1138 	if (unlikely(lock_policy_rwsem_write(cpu)))
1139 		BUG();
1140 
1141 	retval = __cpufreq_remove_dev(dev, sif);
1142 	return retval;
1143 }
1144 
1145 
1146 static void handle_update(struct work_struct *work)
1147 {
1148 	struct cpufreq_policy *policy =
1149 		container_of(work, struct cpufreq_policy, update);
1150 	unsigned int cpu = policy->cpu;
1151 	pr_debug("handle_update for cpu %u called\n", cpu);
1152 	cpufreq_update_policy(cpu);
1153 }
1154 
1155 /**
1156  *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1157  *	@cpu: cpu number
1158  *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1159  *	@new_freq: CPU frequency the CPU actually runs at
1160  *
1161  *	We adjust to current frequency first, and need to clean up later.
1162  *	So either call to cpufreq_update_policy() or schedule handle_update()).
1163  */
1164 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1165 				unsigned int new_freq)
1166 {
1167 	struct cpufreq_freqs freqs;
1168 
1169 	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1170 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1171 
1172 	freqs.cpu = cpu;
1173 	freqs.old = old_freq;
1174 	freqs.new = new_freq;
1175 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1176 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1177 }
1178 
1179 
1180 /**
1181  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1182  * @cpu: CPU number
1183  *
1184  * This is the last known freq, without actually getting it from the driver.
1185  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1186  */
1187 unsigned int cpufreq_quick_get(unsigned int cpu)
1188 {
1189 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1190 	unsigned int ret_freq = 0;
1191 
1192 	if (policy) {
1193 		ret_freq = policy->cur;
1194 		cpufreq_cpu_put(policy);
1195 	}
1196 
1197 	return ret_freq;
1198 }
1199 EXPORT_SYMBOL(cpufreq_quick_get);
1200 
1201 /**
1202  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1203  * @cpu: CPU number
1204  *
1205  * Just return the max possible frequency for a given CPU.
1206  */
1207 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1208 {
1209 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1210 	unsigned int ret_freq = 0;
1211 
1212 	if (policy) {
1213 		ret_freq = policy->max;
1214 		cpufreq_cpu_put(policy);
1215 	}
1216 
1217 	return ret_freq;
1218 }
1219 EXPORT_SYMBOL(cpufreq_quick_get_max);
1220 
1221 
1222 static unsigned int __cpufreq_get(unsigned int cpu)
1223 {
1224 	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1225 	unsigned int ret_freq = 0;
1226 
1227 	if (!cpufreq_driver->get)
1228 		return ret_freq;
1229 
1230 	ret_freq = cpufreq_driver->get(cpu);
1231 
1232 	if (ret_freq && policy->cur &&
1233 		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1234 		/* verify no discrepancy between actual and
1235 					saved value exists */
1236 		if (unlikely(ret_freq != policy->cur)) {
1237 			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1238 			schedule_work(&policy->update);
1239 		}
1240 	}
1241 
1242 	return ret_freq;
1243 }
1244 
1245 /**
1246  * cpufreq_get - get the current CPU frequency (in kHz)
1247  * @cpu: CPU number
1248  *
1249  * Get the CPU current (static) CPU frequency
1250  */
1251 unsigned int cpufreq_get(unsigned int cpu)
1252 {
1253 	unsigned int ret_freq = 0;
1254 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1255 
1256 	if (!policy)
1257 		goto out;
1258 
1259 	if (unlikely(lock_policy_rwsem_read(cpu)))
1260 		goto out_policy;
1261 
1262 	ret_freq = __cpufreq_get(cpu);
1263 
1264 	unlock_policy_rwsem_read(cpu);
1265 
1266 out_policy:
1267 	cpufreq_cpu_put(policy);
1268 out:
1269 	return ret_freq;
1270 }
1271 EXPORT_SYMBOL(cpufreq_get);
1272 
1273 static struct subsys_interface cpufreq_interface = {
1274 	.name		= "cpufreq",
1275 	.subsys		= &cpu_subsys,
1276 	.add_dev	= cpufreq_add_dev,
1277 	.remove_dev	= cpufreq_remove_dev,
1278 };
1279 
1280 
1281 /**
1282  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1283  *
1284  * This function is only executed for the boot processor.  The other CPUs
1285  * have been put offline by means of CPU hotplug.
1286  */
1287 static int cpufreq_bp_suspend(void)
1288 {
1289 	int ret = 0;
1290 
1291 	int cpu = smp_processor_id();
1292 	struct cpufreq_policy *cpu_policy;
1293 
1294 	pr_debug("suspending cpu %u\n", cpu);
1295 
1296 	/* If there's no policy for the boot CPU, we have nothing to do. */
1297 	cpu_policy = cpufreq_cpu_get(cpu);
1298 	if (!cpu_policy)
1299 		return 0;
1300 
1301 	if (cpufreq_driver->suspend) {
1302 		ret = cpufreq_driver->suspend(cpu_policy);
1303 		if (ret)
1304 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1305 					"step on CPU %u\n", cpu_policy->cpu);
1306 	}
1307 
1308 	cpufreq_cpu_put(cpu_policy);
1309 	return ret;
1310 }
1311 
1312 /**
1313  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1314  *
1315  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1316  *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1317  *	    restored. It will verify that the current freq is in sync with
1318  *	    what we believe it to be. This is a bit later than when it
1319  *	    should be, but nonethteless it's better than calling
1320  *	    cpufreq_driver->get() here which might re-enable interrupts...
1321  *
1322  * This function is only executed for the boot CPU.  The other CPUs have not
1323  * been turned on yet.
1324  */
1325 static void cpufreq_bp_resume(void)
1326 {
1327 	int ret = 0;
1328 
1329 	int cpu = smp_processor_id();
1330 	struct cpufreq_policy *cpu_policy;
1331 
1332 	pr_debug("resuming cpu %u\n", cpu);
1333 
1334 	/* If there's no policy for the boot CPU, we have nothing to do. */
1335 	cpu_policy = cpufreq_cpu_get(cpu);
1336 	if (!cpu_policy)
1337 		return;
1338 
1339 	if (cpufreq_driver->resume) {
1340 		ret = cpufreq_driver->resume(cpu_policy);
1341 		if (ret) {
1342 			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1343 					"step on CPU %u\n", cpu_policy->cpu);
1344 			goto fail;
1345 		}
1346 	}
1347 
1348 	schedule_work(&cpu_policy->update);
1349 
1350 fail:
1351 	cpufreq_cpu_put(cpu_policy);
1352 }
1353 
1354 static struct syscore_ops cpufreq_syscore_ops = {
1355 	.suspend	= cpufreq_bp_suspend,
1356 	.resume		= cpufreq_bp_resume,
1357 };
1358 
1359 
1360 /*********************************************************************
1361  *                     NOTIFIER LISTS INTERFACE                      *
1362  *********************************************************************/
1363 
1364 /**
1365  *	cpufreq_register_notifier - register a driver with cpufreq
1366  *	@nb: notifier function to register
1367  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1368  *
1369  *	Add a driver to one of two lists: either a list of drivers that
1370  *      are notified about clock rate changes (once before and once after
1371  *      the transition), or a list of drivers that are notified about
1372  *      changes in cpufreq policy.
1373  *
1374  *	This function may sleep, and has the same return conditions as
1375  *	blocking_notifier_chain_register.
1376  */
1377 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1378 {
1379 	int ret;
1380 
1381 	WARN_ON(!init_cpufreq_transition_notifier_list_called);
1382 
1383 	switch (list) {
1384 	case CPUFREQ_TRANSITION_NOTIFIER:
1385 		ret = srcu_notifier_chain_register(
1386 				&cpufreq_transition_notifier_list, nb);
1387 		break;
1388 	case CPUFREQ_POLICY_NOTIFIER:
1389 		ret = blocking_notifier_chain_register(
1390 				&cpufreq_policy_notifier_list, nb);
1391 		break;
1392 	default:
1393 		ret = -EINVAL;
1394 	}
1395 
1396 	return ret;
1397 }
1398 EXPORT_SYMBOL(cpufreq_register_notifier);
1399 
1400 
1401 /**
1402  *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1403  *	@nb: notifier block to be unregistered
1404  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1405  *
1406  *	Remove a driver from the CPU frequency notifier list.
1407  *
1408  *	This function may sleep, and has the same return conditions as
1409  *	blocking_notifier_chain_unregister.
1410  */
1411 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1412 {
1413 	int ret;
1414 
1415 	switch (list) {
1416 	case CPUFREQ_TRANSITION_NOTIFIER:
1417 		ret = srcu_notifier_chain_unregister(
1418 				&cpufreq_transition_notifier_list, nb);
1419 		break;
1420 	case CPUFREQ_POLICY_NOTIFIER:
1421 		ret = blocking_notifier_chain_unregister(
1422 				&cpufreq_policy_notifier_list, nb);
1423 		break;
1424 	default:
1425 		ret = -EINVAL;
1426 	}
1427 
1428 	return ret;
1429 }
1430 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1431 
1432 
1433 /*********************************************************************
1434  *                              GOVERNORS                            *
1435  *********************************************************************/
1436 
1437 
1438 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1439 			    unsigned int target_freq,
1440 			    unsigned int relation)
1441 {
1442 	int retval = -EINVAL;
1443 
1444 	pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1445 		target_freq, relation);
1446 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
1447 		retval = cpufreq_driver->target(policy, target_freq, relation);
1448 
1449 	return retval;
1450 }
1451 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1452 
1453 int cpufreq_driver_target(struct cpufreq_policy *policy,
1454 			  unsigned int target_freq,
1455 			  unsigned int relation)
1456 {
1457 	int ret = -EINVAL;
1458 
1459 	policy = cpufreq_cpu_get(policy->cpu);
1460 	if (!policy)
1461 		goto no_policy;
1462 
1463 	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1464 		goto fail;
1465 
1466 	ret = __cpufreq_driver_target(policy, target_freq, relation);
1467 
1468 	unlock_policy_rwsem_write(policy->cpu);
1469 
1470 fail:
1471 	cpufreq_cpu_put(policy);
1472 no_policy:
1473 	return ret;
1474 }
1475 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1476 
1477 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1478 {
1479 	int ret = 0;
1480 
1481 	policy = cpufreq_cpu_get(policy->cpu);
1482 	if (!policy)
1483 		return -EINVAL;
1484 
1485 	if (cpu_online(cpu) && cpufreq_driver->getavg)
1486 		ret = cpufreq_driver->getavg(policy, cpu);
1487 
1488 	cpufreq_cpu_put(policy);
1489 	return ret;
1490 }
1491 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1492 
1493 /*
1494  * when "event" is CPUFREQ_GOV_LIMITS
1495  */
1496 
1497 static int __cpufreq_governor(struct cpufreq_policy *policy,
1498 					unsigned int event)
1499 {
1500 	int ret;
1501 
1502 	/* Only must be defined when default governor is known to have latency
1503 	   restrictions, like e.g. conservative or ondemand.
1504 	   That this is the case is already ensured in Kconfig
1505 	*/
1506 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1507 	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1508 #else
1509 	struct cpufreq_governor *gov = NULL;
1510 #endif
1511 
1512 	if (policy->governor->max_transition_latency &&
1513 	    policy->cpuinfo.transition_latency >
1514 	    policy->governor->max_transition_latency) {
1515 		if (!gov)
1516 			return -EINVAL;
1517 		else {
1518 			printk(KERN_WARNING "%s governor failed, too long"
1519 			       " transition latency of HW, fallback"
1520 			       " to %s governor\n",
1521 			       policy->governor->name,
1522 			       gov->name);
1523 			policy->governor = gov;
1524 		}
1525 	}
1526 
1527 	if (!try_module_get(policy->governor->owner))
1528 		return -EINVAL;
1529 
1530 	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1531 						policy->cpu, event);
1532 	ret = policy->governor->governor(policy, event);
1533 
1534 	/* we keep one module reference alive for
1535 			each CPU governed by this CPU */
1536 	if ((event != CPUFREQ_GOV_START) || ret)
1537 		module_put(policy->governor->owner);
1538 	if ((event == CPUFREQ_GOV_STOP) && !ret)
1539 		module_put(policy->governor->owner);
1540 
1541 	return ret;
1542 }
1543 
1544 
1545 int cpufreq_register_governor(struct cpufreq_governor *governor)
1546 {
1547 	int err;
1548 
1549 	if (!governor)
1550 		return -EINVAL;
1551 
1552 	mutex_lock(&cpufreq_governor_mutex);
1553 
1554 	err = -EBUSY;
1555 	if (__find_governor(governor->name) == NULL) {
1556 		err = 0;
1557 		list_add(&governor->governor_list, &cpufreq_governor_list);
1558 	}
1559 
1560 	mutex_unlock(&cpufreq_governor_mutex);
1561 	return err;
1562 }
1563 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1564 
1565 
1566 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1567 {
1568 #ifdef CONFIG_HOTPLUG_CPU
1569 	int cpu;
1570 #endif
1571 
1572 	if (!governor)
1573 		return;
1574 
1575 #ifdef CONFIG_HOTPLUG_CPU
1576 	for_each_present_cpu(cpu) {
1577 		if (cpu_online(cpu))
1578 			continue;
1579 		if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1580 			strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1581 	}
1582 #endif
1583 
1584 	mutex_lock(&cpufreq_governor_mutex);
1585 	list_del(&governor->governor_list);
1586 	mutex_unlock(&cpufreq_governor_mutex);
1587 	return;
1588 }
1589 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1590 
1591 
1592 
1593 /*********************************************************************
1594  *                          POLICY INTERFACE                         *
1595  *********************************************************************/
1596 
1597 /**
1598  * cpufreq_get_policy - get the current cpufreq_policy
1599  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1600  *	is written
1601  *
1602  * Reads the current cpufreq policy.
1603  */
1604 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1605 {
1606 	struct cpufreq_policy *cpu_policy;
1607 	if (!policy)
1608 		return -EINVAL;
1609 
1610 	cpu_policy = cpufreq_cpu_get(cpu);
1611 	if (!cpu_policy)
1612 		return -EINVAL;
1613 
1614 	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1615 
1616 	cpufreq_cpu_put(cpu_policy);
1617 	return 0;
1618 }
1619 EXPORT_SYMBOL(cpufreq_get_policy);
1620 
1621 
1622 /*
1623  * data   : current policy.
1624  * policy : policy to be set.
1625  */
1626 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1627 				struct cpufreq_policy *policy)
1628 {
1629 	int ret = 0;
1630 
1631 	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1632 		policy->min, policy->max);
1633 
1634 	memcpy(&policy->cpuinfo, &data->cpuinfo,
1635 				sizeof(struct cpufreq_cpuinfo));
1636 
1637 	if (policy->min > data->max || policy->max < data->min) {
1638 		ret = -EINVAL;
1639 		goto error_out;
1640 	}
1641 
1642 	/* verify the cpu speed can be set within this limit */
1643 	ret = cpufreq_driver->verify(policy);
1644 	if (ret)
1645 		goto error_out;
1646 
1647 	/* adjust if necessary - all reasons */
1648 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1649 			CPUFREQ_ADJUST, policy);
1650 
1651 	/* adjust if necessary - hardware incompatibility*/
1652 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1653 			CPUFREQ_INCOMPATIBLE, policy);
1654 
1655 	/* verify the cpu speed can be set within this limit,
1656 	   which might be different to the first one */
1657 	ret = cpufreq_driver->verify(policy);
1658 	if (ret)
1659 		goto error_out;
1660 
1661 	/* notification of the new policy */
1662 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1663 			CPUFREQ_NOTIFY, policy);
1664 
1665 	data->min = policy->min;
1666 	data->max = policy->max;
1667 
1668 	pr_debug("new min and max freqs are %u - %u kHz\n",
1669 					data->min, data->max);
1670 
1671 	if (cpufreq_driver->setpolicy) {
1672 		data->policy = policy->policy;
1673 		pr_debug("setting range\n");
1674 		ret = cpufreq_driver->setpolicy(policy);
1675 	} else {
1676 		if (policy->governor != data->governor) {
1677 			/* save old, working values */
1678 			struct cpufreq_governor *old_gov = data->governor;
1679 
1680 			pr_debug("governor switch\n");
1681 
1682 			/* end old governor */
1683 			if (data->governor)
1684 				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1685 
1686 			/* start new governor */
1687 			data->governor = policy->governor;
1688 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1689 				/* new governor failed, so re-start old one */
1690 				pr_debug("starting governor %s failed\n",
1691 							data->governor->name);
1692 				if (old_gov) {
1693 					data->governor = old_gov;
1694 					__cpufreq_governor(data,
1695 							   CPUFREQ_GOV_START);
1696 				}
1697 				ret = -EINVAL;
1698 				goto error_out;
1699 			}
1700 			/* might be a policy change, too, so fall through */
1701 		}
1702 		pr_debug("governor: change or update limits\n");
1703 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1704 	}
1705 
1706 error_out:
1707 	return ret;
1708 }
1709 
1710 /**
1711  *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1712  *	@cpu: CPU which shall be re-evaluated
1713  *
1714  *	Useful for policy notifiers which have different necessities
1715  *	at different times.
1716  */
1717 int cpufreq_update_policy(unsigned int cpu)
1718 {
1719 	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1720 	struct cpufreq_policy policy;
1721 	int ret;
1722 
1723 	if (!data) {
1724 		ret = -ENODEV;
1725 		goto no_policy;
1726 	}
1727 
1728 	if (unlikely(lock_policy_rwsem_write(cpu))) {
1729 		ret = -EINVAL;
1730 		goto fail;
1731 	}
1732 
1733 	pr_debug("updating policy for CPU %u\n", cpu);
1734 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1735 	policy.min = data->user_policy.min;
1736 	policy.max = data->user_policy.max;
1737 	policy.policy = data->user_policy.policy;
1738 	policy.governor = data->user_policy.governor;
1739 
1740 	/* BIOS might change freq behind our back
1741 	  -> ask driver for current freq and notify governors about a change */
1742 	if (cpufreq_driver->get) {
1743 		policy.cur = cpufreq_driver->get(cpu);
1744 		if (!data->cur) {
1745 			pr_debug("Driver did not initialize current freq");
1746 			data->cur = policy.cur;
1747 		} else {
1748 			if (data->cur != policy.cur)
1749 				cpufreq_out_of_sync(cpu, data->cur,
1750 								policy.cur);
1751 		}
1752 	}
1753 
1754 	ret = __cpufreq_set_policy(data, &policy);
1755 
1756 	unlock_policy_rwsem_write(cpu);
1757 
1758 fail:
1759 	cpufreq_cpu_put(data);
1760 no_policy:
1761 	return ret;
1762 }
1763 EXPORT_SYMBOL(cpufreq_update_policy);
1764 
1765 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1766 					unsigned long action, void *hcpu)
1767 {
1768 	unsigned int cpu = (unsigned long)hcpu;
1769 	struct device *dev;
1770 
1771 	dev = get_cpu_device(cpu);
1772 	if (dev) {
1773 		switch (action) {
1774 		case CPU_ONLINE:
1775 		case CPU_ONLINE_FROZEN:
1776 			cpufreq_add_dev(dev, NULL);
1777 			break;
1778 		case CPU_DOWN_PREPARE:
1779 		case CPU_DOWN_PREPARE_FROZEN:
1780 			if (unlikely(lock_policy_rwsem_write(cpu)))
1781 				BUG();
1782 
1783 			__cpufreq_remove_dev(dev, NULL);
1784 			break;
1785 		case CPU_DOWN_FAILED:
1786 		case CPU_DOWN_FAILED_FROZEN:
1787 			cpufreq_add_dev(dev, NULL);
1788 			break;
1789 		}
1790 	}
1791 	return NOTIFY_OK;
1792 }
1793 
1794 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1795     .notifier_call = cpufreq_cpu_callback,
1796 };
1797 
1798 /*********************************************************************
1799  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1800  *********************************************************************/
1801 
1802 /**
1803  * cpufreq_register_driver - register a CPU Frequency driver
1804  * @driver_data: A struct cpufreq_driver containing the values#
1805  * submitted by the CPU Frequency driver.
1806  *
1807  *   Registers a CPU Frequency driver to this core code. This code
1808  * returns zero on success, -EBUSY when another driver got here first
1809  * (and isn't unregistered in the meantime).
1810  *
1811  */
1812 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1813 {
1814 	unsigned long flags;
1815 	int ret;
1816 
1817 	if (!driver_data || !driver_data->verify || !driver_data->init ||
1818 	    ((!driver_data->setpolicy) && (!driver_data->target)))
1819 		return -EINVAL;
1820 
1821 	pr_debug("trying to register driver %s\n", driver_data->name);
1822 
1823 	if (driver_data->setpolicy)
1824 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1825 
1826 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1827 	if (cpufreq_driver) {
1828 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1829 		return -EBUSY;
1830 	}
1831 	cpufreq_driver = driver_data;
1832 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1833 
1834 	ret = subsys_interface_register(&cpufreq_interface);
1835 	if (ret)
1836 		goto err_null_driver;
1837 
1838 	if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1839 		int i;
1840 		ret = -ENODEV;
1841 
1842 		/* check for at least one working CPU */
1843 		for (i = 0; i < nr_cpu_ids; i++)
1844 			if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1845 				ret = 0;
1846 				break;
1847 			}
1848 
1849 		/* if all ->init() calls failed, unregister */
1850 		if (ret) {
1851 			pr_debug("no CPU initialized for driver %s\n",
1852 							driver_data->name);
1853 			goto err_if_unreg;
1854 		}
1855 	}
1856 
1857 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
1858 	pr_debug("driver %s up and running\n", driver_data->name);
1859 
1860 	return 0;
1861 err_if_unreg:
1862 	subsys_interface_unregister(&cpufreq_interface);
1863 err_null_driver:
1864 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1865 	cpufreq_driver = NULL;
1866 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1867 	return ret;
1868 }
1869 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1870 
1871 
1872 /**
1873  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1874  *
1875  *    Unregister the current CPUFreq driver. Only call this if you have
1876  * the right to do so, i.e. if you have succeeded in initialising before!
1877  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1878  * currently not initialised.
1879  */
1880 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1881 {
1882 	unsigned long flags;
1883 
1884 	if (!cpufreq_driver || (driver != cpufreq_driver))
1885 		return -EINVAL;
1886 
1887 	pr_debug("unregistering driver %s\n", driver->name);
1888 
1889 	subsys_interface_unregister(&cpufreq_interface);
1890 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1891 
1892 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1893 	cpufreq_driver = NULL;
1894 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1895 
1896 	return 0;
1897 }
1898 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1899 
1900 static int __init cpufreq_core_init(void)
1901 {
1902 	int cpu;
1903 
1904 	for_each_possible_cpu(cpu) {
1905 		per_cpu(cpufreq_policy_cpu, cpu) = -1;
1906 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1907 	}
1908 
1909 	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
1910 	BUG_ON(!cpufreq_global_kobject);
1911 	register_syscore_ops(&cpufreq_syscore_ops);
1912 
1913 	return 0;
1914 }
1915 core_initcall(cpufreq_core_init);
1916