xref: /openbmc/linux/drivers/cpufreq/cpufreq.c (revision 95e9fd10)
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *	Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *	Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17 
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 #include <linux/syscore_ops.h>
32 
33 #include <trace/events/power.h>
34 
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47 
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  * - Lock should not be held across
65  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66  */
67 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
68 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69 
70 #define lock_policy_rwsem(mode, cpu)					\
71 static int lock_policy_rwsem_##mode					\
72 (int cpu)								\
73 {									\
74 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);		\
75 	BUG_ON(policy_cpu == -1);					\
76 	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
77 	if (unlikely(!cpu_online(cpu))) {				\
78 		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
79 		return -1;						\
80 	}								\
81 									\
82 	return 0;							\
83 }
84 
85 lock_policy_rwsem(read, cpu);
86 
87 lock_policy_rwsem(write, cpu);
88 
89 static void unlock_policy_rwsem_read(int cpu)
90 {
91 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
92 	BUG_ON(policy_cpu == -1);
93 	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95 
96 static void unlock_policy_rwsem_write(int cpu)
97 {
98 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
99 	BUG_ON(policy_cpu == -1);
100 	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
101 }
102 
103 
104 /* internal prototypes */
105 static int __cpufreq_governor(struct cpufreq_policy *policy,
106 		unsigned int event);
107 static unsigned int __cpufreq_get(unsigned int cpu);
108 static void handle_update(struct work_struct *work);
109 
110 /**
111  * Two notifier lists: the "policy" list is involved in the
112  * validation process for a new CPU frequency policy; the
113  * "transition" list for kernel code that needs to handle
114  * changes to devices when the CPU clock speed changes.
115  * The mutex locks both lists.
116  */
117 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
118 static struct srcu_notifier_head cpufreq_transition_notifier_list;
119 
120 static bool init_cpufreq_transition_notifier_list_called;
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124 	init_cpufreq_transition_notifier_list_called = true;
125 	return 0;
126 }
127 pure_initcall(init_cpufreq_transition_notifier_list);
128 
129 static int off __read_mostly;
130 int cpufreq_disabled(void)
131 {
132 	return off;
133 }
134 void disable_cpufreq(void)
135 {
136 	off = 1;
137 }
138 static LIST_HEAD(cpufreq_governor_list);
139 static DEFINE_MUTEX(cpufreq_governor_mutex);
140 
141 static struct cpufreq_policy *__cpufreq_cpu_get(unsigned int cpu, bool sysfs)
142 {
143 	struct cpufreq_policy *data;
144 	unsigned long flags;
145 
146 	if (cpu >= nr_cpu_ids)
147 		goto err_out;
148 
149 	/* get the cpufreq driver */
150 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
151 
152 	if (!cpufreq_driver)
153 		goto err_out_unlock;
154 
155 	if (!try_module_get(cpufreq_driver->owner))
156 		goto err_out_unlock;
157 
158 
159 	/* get the CPU */
160 	data = per_cpu(cpufreq_cpu_data, cpu);
161 
162 	if (!data)
163 		goto err_out_put_module;
164 
165 	if (!sysfs && !kobject_get(&data->kobj))
166 		goto err_out_put_module;
167 
168 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
169 	return data;
170 
171 err_out_put_module:
172 	module_put(cpufreq_driver->owner);
173 err_out_unlock:
174 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
175 err_out:
176 	return NULL;
177 }
178 
179 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
180 {
181 	return __cpufreq_cpu_get(cpu, false);
182 }
183 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
184 
185 static struct cpufreq_policy *cpufreq_cpu_get_sysfs(unsigned int cpu)
186 {
187 	return __cpufreq_cpu_get(cpu, true);
188 }
189 
190 static void __cpufreq_cpu_put(struct cpufreq_policy *data, bool sysfs)
191 {
192 	if (!sysfs)
193 		kobject_put(&data->kobj);
194 	module_put(cpufreq_driver->owner);
195 }
196 
197 void cpufreq_cpu_put(struct cpufreq_policy *data)
198 {
199 	__cpufreq_cpu_put(data, false);
200 }
201 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
202 
203 static void cpufreq_cpu_put_sysfs(struct cpufreq_policy *data)
204 {
205 	__cpufreq_cpu_put(data, true);
206 }
207 
208 /*********************************************************************
209  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
210  *********************************************************************/
211 
212 /**
213  * adjust_jiffies - adjust the system "loops_per_jiffy"
214  *
215  * This function alters the system "loops_per_jiffy" for the clock
216  * speed change. Note that loops_per_jiffy cannot be updated on SMP
217  * systems as each CPU might be scaled differently. So, use the arch
218  * per-CPU loops_per_jiffy value wherever possible.
219  */
220 #ifndef CONFIG_SMP
221 static unsigned long l_p_j_ref;
222 static unsigned int  l_p_j_ref_freq;
223 
224 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
225 {
226 	if (ci->flags & CPUFREQ_CONST_LOOPS)
227 		return;
228 
229 	if (!l_p_j_ref_freq) {
230 		l_p_j_ref = loops_per_jiffy;
231 		l_p_j_ref_freq = ci->old;
232 		pr_debug("saving %lu as reference value for loops_per_jiffy; "
233 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
234 	}
235 	if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
236 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
237 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
238 								ci->new);
239 		pr_debug("scaling loops_per_jiffy to %lu "
240 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
241 	}
242 }
243 #else
244 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
245 {
246 	return;
247 }
248 #endif
249 
250 
251 /**
252  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
253  * on frequency transition.
254  *
255  * This function calls the transition notifiers and the "adjust_jiffies"
256  * function. It is called twice on all CPU frequency changes that have
257  * external effects.
258  */
259 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
260 {
261 	struct cpufreq_policy *policy;
262 
263 	BUG_ON(irqs_disabled());
264 
265 	freqs->flags = cpufreq_driver->flags;
266 	pr_debug("notification %u of frequency transition to %u kHz\n",
267 		state, freqs->new);
268 
269 	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
270 	switch (state) {
271 
272 	case CPUFREQ_PRECHANGE:
273 		/* detect if the driver reported a value as "old frequency"
274 		 * which is not equal to what the cpufreq core thinks is
275 		 * "old frequency".
276 		 */
277 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
278 			if ((policy) && (policy->cpu == freqs->cpu) &&
279 			    (policy->cur) && (policy->cur != freqs->old)) {
280 				pr_debug("Warning: CPU frequency is"
281 					" %u, cpufreq assumed %u kHz.\n",
282 					freqs->old, policy->cur);
283 				freqs->old = policy->cur;
284 			}
285 		}
286 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
287 				CPUFREQ_PRECHANGE, freqs);
288 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
289 		break;
290 
291 	case CPUFREQ_POSTCHANGE:
292 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
293 		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
294 			(unsigned long)freqs->cpu);
295 		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
296 		trace_cpu_frequency(freqs->new, freqs->cpu);
297 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
298 				CPUFREQ_POSTCHANGE, freqs);
299 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
300 			policy->cur = freqs->new;
301 		break;
302 	}
303 }
304 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
305 
306 
307 
308 /*********************************************************************
309  *                          SYSFS INTERFACE                          *
310  *********************************************************************/
311 
312 static struct cpufreq_governor *__find_governor(const char *str_governor)
313 {
314 	struct cpufreq_governor *t;
315 
316 	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
317 		if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
318 			return t;
319 
320 	return NULL;
321 }
322 
323 /**
324  * cpufreq_parse_governor - parse a governor string
325  */
326 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
327 				struct cpufreq_governor **governor)
328 {
329 	int err = -EINVAL;
330 
331 	if (!cpufreq_driver)
332 		goto out;
333 
334 	if (cpufreq_driver->setpolicy) {
335 		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
336 			*policy = CPUFREQ_POLICY_PERFORMANCE;
337 			err = 0;
338 		} else if (!strnicmp(str_governor, "powersave",
339 						CPUFREQ_NAME_LEN)) {
340 			*policy = CPUFREQ_POLICY_POWERSAVE;
341 			err = 0;
342 		}
343 	} else if (cpufreq_driver->target) {
344 		struct cpufreq_governor *t;
345 
346 		mutex_lock(&cpufreq_governor_mutex);
347 
348 		t = __find_governor(str_governor);
349 
350 		if (t == NULL) {
351 			int ret;
352 
353 			mutex_unlock(&cpufreq_governor_mutex);
354 			ret = request_module("cpufreq_%s", str_governor);
355 			mutex_lock(&cpufreq_governor_mutex);
356 
357 			if (ret == 0)
358 				t = __find_governor(str_governor);
359 		}
360 
361 		if (t != NULL) {
362 			*governor = t;
363 			err = 0;
364 		}
365 
366 		mutex_unlock(&cpufreq_governor_mutex);
367 	}
368 out:
369 	return err;
370 }
371 
372 
373 /**
374  * cpufreq_per_cpu_attr_read() / show_##file_name() -
375  * print out cpufreq information
376  *
377  * Write out information from cpufreq_driver->policy[cpu]; object must be
378  * "unsigned int".
379  */
380 
381 #define show_one(file_name, object)			\
382 static ssize_t show_##file_name				\
383 (struct cpufreq_policy *policy, char *buf)		\
384 {							\
385 	return sprintf(buf, "%u\n", policy->object);	\
386 }
387 
388 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
389 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
390 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
391 show_one(scaling_min_freq, min);
392 show_one(scaling_max_freq, max);
393 show_one(scaling_cur_freq, cur);
394 
395 static int __cpufreq_set_policy(struct cpufreq_policy *data,
396 				struct cpufreq_policy *policy);
397 
398 /**
399  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
400  */
401 #define store_one(file_name, object)			\
402 static ssize_t store_##file_name					\
403 (struct cpufreq_policy *policy, const char *buf, size_t count)		\
404 {									\
405 	unsigned int ret = -EINVAL;					\
406 	struct cpufreq_policy new_policy;				\
407 									\
408 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
409 	if (ret)							\
410 		return -EINVAL;						\
411 									\
412 	ret = sscanf(buf, "%u", &new_policy.object);			\
413 	if (ret != 1)							\
414 		return -EINVAL;						\
415 									\
416 	ret = __cpufreq_set_policy(policy, &new_policy);		\
417 	policy->user_policy.object = policy->object;			\
418 									\
419 	return ret ? ret : count;					\
420 }
421 
422 store_one(scaling_min_freq, min);
423 store_one(scaling_max_freq, max);
424 
425 /**
426  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
427  */
428 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
429 					char *buf)
430 {
431 	unsigned int cur_freq = __cpufreq_get(policy->cpu);
432 	if (!cur_freq)
433 		return sprintf(buf, "<unknown>");
434 	return sprintf(buf, "%u\n", cur_freq);
435 }
436 
437 
438 /**
439  * show_scaling_governor - show the current policy for the specified CPU
440  */
441 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
442 {
443 	if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
444 		return sprintf(buf, "powersave\n");
445 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
446 		return sprintf(buf, "performance\n");
447 	else if (policy->governor)
448 		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n",
449 				policy->governor->name);
450 	return -EINVAL;
451 }
452 
453 
454 /**
455  * store_scaling_governor - store policy for the specified CPU
456  */
457 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
458 					const char *buf, size_t count)
459 {
460 	unsigned int ret = -EINVAL;
461 	char	str_governor[16];
462 	struct cpufreq_policy new_policy;
463 
464 	ret = cpufreq_get_policy(&new_policy, policy->cpu);
465 	if (ret)
466 		return ret;
467 
468 	ret = sscanf(buf, "%15s", str_governor);
469 	if (ret != 1)
470 		return -EINVAL;
471 
472 	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
473 						&new_policy.governor))
474 		return -EINVAL;
475 
476 	/* Do not use cpufreq_set_policy here or the user_policy.max
477 	   will be wrongly overridden */
478 	ret = __cpufreq_set_policy(policy, &new_policy);
479 
480 	policy->user_policy.policy = policy->policy;
481 	policy->user_policy.governor = policy->governor;
482 
483 	if (ret)
484 		return ret;
485 	else
486 		return count;
487 }
488 
489 /**
490  * show_scaling_driver - show the cpufreq driver currently loaded
491  */
492 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
493 {
494 	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
495 }
496 
497 /**
498  * show_scaling_available_governors - show the available CPUfreq governors
499  */
500 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
501 						char *buf)
502 {
503 	ssize_t i = 0;
504 	struct cpufreq_governor *t;
505 
506 	if (!cpufreq_driver->target) {
507 		i += sprintf(buf, "performance powersave");
508 		goto out;
509 	}
510 
511 	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
512 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
513 		    - (CPUFREQ_NAME_LEN + 2)))
514 			goto out;
515 		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
516 	}
517 out:
518 	i += sprintf(&buf[i], "\n");
519 	return i;
520 }
521 
522 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
523 {
524 	ssize_t i = 0;
525 	unsigned int cpu;
526 
527 	for_each_cpu(cpu, mask) {
528 		if (i)
529 			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
530 		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
531 		if (i >= (PAGE_SIZE - 5))
532 			break;
533 	}
534 	i += sprintf(&buf[i], "\n");
535 	return i;
536 }
537 
538 /**
539  * show_related_cpus - show the CPUs affected by each transition even if
540  * hw coordination is in use
541  */
542 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
543 {
544 	if (cpumask_empty(policy->related_cpus))
545 		return show_cpus(policy->cpus, buf);
546 	return show_cpus(policy->related_cpus, buf);
547 }
548 
549 /**
550  * show_affected_cpus - show the CPUs affected by each transition
551  */
552 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
553 {
554 	return show_cpus(policy->cpus, buf);
555 }
556 
557 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
558 					const char *buf, size_t count)
559 {
560 	unsigned int freq = 0;
561 	unsigned int ret;
562 
563 	if (!policy->governor || !policy->governor->store_setspeed)
564 		return -EINVAL;
565 
566 	ret = sscanf(buf, "%u", &freq);
567 	if (ret != 1)
568 		return -EINVAL;
569 
570 	policy->governor->store_setspeed(policy, freq);
571 
572 	return count;
573 }
574 
575 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
576 {
577 	if (!policy->governor || !policy->governor->show_setspeed)
578 		return sprintf(buf, "<unsupported>\n");
579 
580 	return policy->governor->show_setspeed(policy, buf);
581 }
582 
583 /**
584  * show_scaling_driver - show the current cpufreq HW/BIOS limitation
585  */
586 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
587 {
588 	unsigned int limit;
589 	int ret;
590 	if (cpufreq_driver->bios_limit) {
591 		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
592 		if (!ret)
593 			return sprintf(buf, "%u\n", limit);
594 	}
595 	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
596 }
597 
598 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
599 cpufreq_freq_attr_ro(cpuinfo_min_freq);
600 cpufreq_freq_attr_ro(cpuinfo_max_freq);
601 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
602 cpufreq_freq_attr_ro(scaling_available_governors);
603 cpufreq_freq_attr_ro(scaling_driver);
604 cpufreq_freq_attr_ro(scaling_cur_freq);
605 cpufreq_freq_attr_ro(bios_limit);
606 cpufreq_freq_attr_ro(related_cpus);
607 cpufreq_freq_attr_ro(affected_cpus);
608 cpufreq_freq_attr_rw(scaling_min_freq);
609 cpufreq_freq_attr_rw(scaling_max_freq);
610 cpufreq_freq_attr_rw(scaling_governor);
611 cpufreq_freq_attr_rw(scaling_setspeed);
612 
613 static struct attribute *default_attrs[] = {
614 	&cpuinfo_min_freq.attr,
615 	&cpuinfo_max_freq.attr,
616 	&cpuinfo_transition_latency.attr,
617 	&scaling_min_freq.attr,
618 	&scaling_max_freq.attr,
619 	&affected_cpus.attr,
620 	&related_cpus.attr,
621 	&scaling_governor.attr,
622 	&scaling_driver.attr,
623 	&scaling_available_governors.attr,
624 	&scaling_setspeed.attr,
625 	NULL
626 };
627 
628 struct kobject *cpufreq_global_kobject;
629 EXPORT_SYMBOL(cpufreq_global_kobject);
630 
631 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
632 #define to_attr(a) container_of(a, struct freq_attr, attr)
633 
634 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
635 {
636 	struct cpufreq_policy *policy = to_policy(kobj);
637 	struct freq_attr *fattr = to_attr(attr);
638 	ssize_t ret = -EINVAL;
639 	policy = cpufreq_cpu_get_sysfs(policy->cpu);
640 	if (!policy)
641 		goto no_policy;
642 
643 	if (lock_policy_rwsem_read(policy->cpu) < 0)
644 		goto fail;
645 
646 	if (fattr->show)
647 		ret = fattr->show(policy, buf);
648 	else
649 		ret = -EIO;
650 
651 	unlock_policy_rwsem_read(policy->cpu);
652 fail:
653 	cpufreq_cpu_put_sysfs(policy);
654 no_policy:
655 	return ret;
656 }
657 
658 static ssize_t store(struct kobject *kobj, struct attribute *attr,
659 		     const char *buf, size_t count)
660 {
661 	struct cpufreq_policy *policy = to_policy(kobj);
662 	struct freq_attr *fattr = to_attr(attr);
663 	ssize_t ret = -EINVAL;
664 	policy = cpufreq_cpu_get_sysfs(policy->cpu);
665 	if (!policy)
666 		goto no_policy;
667 
668 	if (lock_policy_rwsem_write(policy->cpu) < 0)
669 		goto fail;
670 
671 	if (fattr->store)
672 		ret = fattr->store(policy, buf, count);
673 	else
674 		ret = -EIO;
675 
676 	unlock_policy_rwsem_write(policy->cpu);
677 fail:
678 	cpufreq_cpu_put_sysfs(policy);
679 no_policy:
680 	return ret;
681 }
682 
683 static void cpufreq_sysfs_release(struct kobject *kobj)
684 {
685 	struct cpufreq_policy *policy = to_policy(kobj);
686 	pr_debug("last reference is dropped\n");
687 	complete(&policy->kobj_unregister);
688 }
689 
690 static const struct sysfs_ops sysfs_ops = {
691 	.show	= show,
692 	.store	= store,
693 };
694 
695 static struct kobj_type ktype_cpufreq = {
696 	.sysfs_ops	= &sysfs_ops,
697 	.default_attrs	= default_attrs,
698 	.release	= cpufreq_sysfs_release,
699 };
700 
701 /*
702  * Returns:
703  *   Negative: Failure
704  *   0:        Success
705  *   Positive: When we have a managed CPU and the sysfs got symlinked
706  */
707 static int cpufreq_add_dev_policy(unsigned int cpu,
708 				  struct cpufreq_policy *policy,
709 				  struct device *dev)
710 {
711 	int ret = 0;
712 #ifdef CONFIG_SMP
713 	unsigned long flags;
714 	unsigned int j;
715 #ifdef CONFIG_HOTPLUG_CPU
716 	struct cpufreq_governor *gov;
717 
718 	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
719 	if (gov) {
720 		policy->governor = gov;
721 		pr_debug("Restoring governor %s for cpu %d\n",
722 		       policy->governor->name, cpu);
723 	}
724 #endif
725 
726 	for_each_cpu(j, policy->cpus) {
727 		struct cpufreq_policy *managed_policy;
728 
729 		if (cpu == j)
730 			continue;
731 
732 		/* Check for existing affected CPUs.
733 		 * They may not be aware of it due to CPU Hotplug.
734 		 * cpufreq_cpu_put is called when the device is removed
735 		 * in __cpufreq_remove_dev()
736 		 */
737 		managed_policy = cpufreq_cpu_get(j);
738 		if (unlikely(managed_policy)) {
739 
740 			/* Set proper policy_cpu */
741 			unlock_policy_rwsem_write(cpu);
742 			per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
743 
744 			if (lock_policy_rwsem_write(cpu) < 0) {
745 				/* Should not go through policy unlock path */
746 				if (cpufreq_driver->exit)
747 					cpufreq_driver->exit(policy);
748 				cpufreq_cpu_put(managed_policy);
749 				return -EBUSY;
750 			}
751 
752 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
753 			cpumask_copy(managed_policy->cpus, policy->cpus);
754 			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
755 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
756 
757 			pr_debug("CPU already managed, adding link\n");
758 			ret = sysfs_create_link(&dev->kobj,
759 						&managed_policy->kobj,
760 						"cpufreq");
761 			if (ret)
762 				cpufreq_cpu_put(managed_policy);
763 			/*
764 			 * Success. We only needed to be added to the mask.
765 			 * Call driver->exit() because only the cpu parent of
766 			 * the kobj needed to call init().
767 			 */
768 			if (cpufreq_driver->exit)
769 				cpufreq_driver->exit(policy);
770 
771 			if (!ret)
772 				return 1;
773 			else
774 				return ret;
775 		}
776 	}
777 #endif
778 	return ret;
779 }
780 
781 
782 /* symlink affected CPUs */
783 static int cpufreq_add_dev_symlink(unsigned int cpu,
784 				   struct cpufreq_policy *policy)
785 {
786 	unsigned int j;
787 	int ret = 0;
788 
789 	for_each_cpu(j, policy->cpus) {
790 		struct cpufreq_policy *managed_policy;
791 		struct device *cpu_dev;
792 
793 		if (j == cpu)
794 			continue;
795 		if (!cpu_online(j))
796 			continue;
797 
798 		pr_debug("CPU %u already managed, adding link\n", j);
799 		managed_policy = cpufreq_cpu_get(cpu);
800 		cpu_dev = get_cpu_device(j);
801 		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
802 					"cpufreq");
803 		if (ret) {
804 			cpufreq_cpu_put(managed_policy);
805 			return ret;
806 		}
807 	}
808 	return ret;
809 }
810 
811 static int cpufreq_add_dev_interface(unsigned int cpu,
812 				     struct cpufreq_policy *policy,
813 				     struct device *dev)
814 {
815 	struct cpufreq_policy new_policy;
816 	struct freq_attr **drv_attr;
817 	unsigned long flags;
818 	int ret = 0;
819 	unsigned int j;
820 
821 	/* prepare interface data */
822 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
823 				   &dev->kobj, "cpufreq");
824 	if (ret)
825 		return ret;
826 
827 	/* set up files for this cpu device */
828 	drv_attr = cpufreq_driver->attr;
829 	while ((drv_attr) && (*drv_attr)) {
830 		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
831 		if (ret)
832 			goto err_out_kobj_put;
833 		drv_attr++;
834 	}
835 	if (cpufreq_driver->get) {
836 		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
837 		if (ret)
838 			goto err_out_kobj_put;
839 	}
840 	if (cpufreq_driver->target) {
841 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
842 		if (ret)
843 			goto err_out_kobj_put;
844 	}
845 	if (cpufreq_driver->bios_limit) {
846 		ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
847 		if (ret)
848 			goto err_out_kobj_put;
849 	}
850 
851 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
852 	for_each_cpu(j, policy->cpus) {
853 		if (!cpu_online(j))
854 			continue;
855 		per_cpu(cpufreq_cpu_data, j) = policy;
856 		per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
857 	}
858 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
859 
860 	ret = cpufreq_add_dev_symlink(cpu, policy);
861 	if (ret)
862 		goto err_out_kobj_put;
863 
864 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
865 	/* assure that the starting sequence is run in __cpufreq_set_policy */
866 	policy->governor = NULL;
867 
868 	/* set default policy */
869 	ret = __cpufreq_set_policy(policy, &new_policy);
870 	policy->user_policy.policy = policy->policy;
871 	policy->user_policy.governor = policy->governor;
872 
873 	if (ret) {
874 		pr_debug("setting policy failed\n");
875 		if (cpufreq_driver->exit)
876 			cpufreq_driver->exit(policy);
877 	}
878 	return ret;
879 
880 err_out_kobj_put:
881 	kobject_put(&policy->kobj);
882 	wait_for_completion(&policy->kobj_unregister);
883 	return ret;
884 }
885 
886 
887 /**
888  * cpufreq_add_dev - add a CPU device
889  *
890  * Adds the cpufreq interface for a CPU device.
891  *
892  * The Oracle says: try running cpufreq registration/unregistration concurrently
893  * with with cpu hotplugging and all hell will break loose. Tried to clean this
894  * mess up, but more thorough testing is needed. - Mathieu
895  */
896 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
897 {
898 	unsigned int cpu = dev->id;
899 	int ret = 0, found = 0;
900 	struct cpufreq_policy *policy;
901 	unsigned long flags;
902 	unsigned int j;
903 #ifdef CONFIG_HOTPLUG_CPU
904 	int sibling;
905 #endif
906 
907 	if (cpu_is_offline(cpu))
908 		return 0;
909 
910 	pr_debug("adding CPU %u\n", cpu);
911 
912 #ifdef CONFIG_SMP
913 	/* check whether a different CPU already registered this
914 	 * CPU because it is in the same boat. */
915 	policy = cpufreq_cpu_get(cpu);
916 	if (unlikely(policy)) {
917 		cpufreq_cpu_put(policy);
918 		return 0;
919 	}
920 #endif
921 
922 	if (!try_module_get(cpufreq_driver->owner)) {
923 		ret = -EINVAL;
924 		goto module_out;
925 	}
926 
927 	ret = -ENOMEM;
928 	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
929 	if (!policy)
930 		goto nomem_out;
931 
932 	if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
933 		goto err_free_policy;
934 
935 	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
936 		goto err_free_cpumask;
937 
938 	policy->cpu = cpu;
939 	cpumask_copy(policy->cpus, cpumask_of(cpu));
940 
941 	/* Initially set CPU itself as the policy_cpu */
942 	per_cpu(cpufreq_policy_cpu, cpu) = cpu;
943 	ret = (lock_policy_rwsem_write(cpu) < 0);
944 	WARN_ON(ret);
945 
946 	init_completion(&policy->kobj_unregister);
947 	INIT_WORK(&policy->update, handle_update);
948 
949 	/* Set governor before ->init, so that driver could check it */
950 #ifdef CONFIG_HOTPLUG_CPU
951 	for_each_online_cpu(sibling) {
952 		struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
953 		if (cp && cp->governor &&
954 		    (cpumask_test_cpu(cpu, cp->related_cpus))) {
955 			policy->governor = cp->governor;
956 			found = 1;
957 			break;
958 		}
959 	}
960 #endif
961 	if (!found)
962 		policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
963 	/* call driver. From then on the cpufreq must be able
964 	 * to accept all calls to ->verify and ->setpolicy for this CPU
965 	 */
966 	ret = cpufreq_driver->init(policy);
967 	if (ret) {
968 		pr_debug("initialization failed\n");
969 		goto err_unlock_policy;
970 	}
971 	policy->user_policy.min = policy->min;
972 	policy->user_policy.max = policy->max;
973 
974 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
975 				     CPUFREQ_START, policy);
976 
977 	ret = cpufreq_add_dev_policy(cpu, policy, dev);
978 	if (ret) {
979 		if (ret > 0)
980 			/* This is a managed cpu, symlink created,
981 			   exit with 0 */
982 			ret = 0;
983 		goto err_unlock_policy;
984 	}
985 
986 	ret = cpufreq_add_dev_interface(cpu, policy, dev);
987 	if (ret)
988 		goto err_out_unregister;
989 
990 	unlock_policy_rwsem_write(cpu);
991 
992 	kobject_uevent(&policy->kobj, KOBJ_ADD);
993 	module_put(cpufreq_driver->owner);
994 	pr_debug("initialization complete\n");
995 
996 	return 0;
997 
998 
999 err_out_unregister:
1000 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1001 	for_each_cpu(j, policy->cpus)
1002 		per_cpu(cpufreq_cpu_data, j) = NULL;
1003 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1004 
1005 	kobject_put(&policy->kobj);
1006 	wait_for_completion(&policy->kobj_unregister);
1007 
1008 err_unlock_policy:
1009 	unlock_policy_rwsem_write(cpu);
1010 	free_cpumask_var(policy->related_cpus);
1011 err_free_cpumask:
1012 	free_cpumask_var(policy->cpus);
1013 err_free_policy:
1014 	kfree(policy);
1015 nomem_out:
1016 	module_put(cpufreq_driver->owner);
1017 module_out:
1018 	return ret;
1019 }
1020 
1021 
1022 /**
1023  * __cpufreq_remove_dev - remove a CPU device
1024  *
1025  * Removes the cpufreq interface for a CPU device.
1026  * Caller should already have policy_rwsem in write mode for this CPU.
1027  * This routine frees the rwsem before returning.
1028  */
1029 static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1030 {
1031 	unsigned int cpu = dev->id;
1032 	unsigned long flags;
1033 	struct cpufreq_policy *data;
1034 	struct kobject *kobj;
1035 	struct completion *cmp;
1036 #ifdef CONFIG_SMP
1037 	struct device *cpu_dev;
1038 	unsigned int j;
1039 #endif
1040 
1041 	pr_debug("unregistering CPU %u\n", cpu);
1042 
1043 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1044 	data = per_cpu(cpufreq_cpu_data, cpu);
1045 
1046 	if (!data) {
1047 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1048 		unlock_policy_rwsem_write(cpu);
1049 		return -EINVAL;
1050 	}
1051 	per_cpu(cpufreq_cpu_data, cpu) = NULL;
1052 
1053 
1054 #ifdef CONFIG_SMP
1055 	/* if this isn't the CPU which is the parent of the kobj, we
1056 	 * only need to unlink, put and exit
1057 	 */
1058 	if (unlikely(cpu != data->cpu)) {
1059 		pr_debug("removing link\n");
1060 		cpumask_clear_cpu(cpu, data->cpus);
1061 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1062 		kobj = &dev->kobj;
1063 		cpufreq_cpu_put(data);
1064 		unlock_policy_rwsem_write(cpu);
1065 		sysfs_remove_link(kobj, "cpufreq");
1066 		return 0;
1067 	}
1068 #endif
1069 
1070 #ifdef CONFIG_SMP
1071 
1072 #ifdef CONFIG_HOTPLUG_CPU
1073 	strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1074 			CPUFREQ_NAME_LEN);
1075 #endif
1076 
1077 	/* if we have other CPUs still registered, we need to unlink them,
1078 	 * or else wait_for_completion below will lock up. Clean the
1079 	 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1080 	 * the sysfs links afterwards.
1081 	 */
1082 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1083 		for_each_cpu(j, data->cpus) {
1084 			if (j == cpu)
1085 				continue;
1086 			per_cpu(cpufreq_cpu_data, j) = NULL;
1087 		}
1088 	}
1089 
1090 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1091 
1092 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1093 		for_each_cpu(j, data->cpus) {
1094 			if (j == cpu)
1095 				continue;
1096 			pr_debug("removing link for cpu %u\n", j);
1097 #ifdef CONFIG_HOTPLUG_CPU
1098 			strncpy(per_cpu(cpufreq_cpu_governor, j),
1099 				data->governor->name, CPUFREQ_NAME_LEN);
1100 #endif
1101 			cpu_dev = get_cpu_device(j);
1102 			kobj = &cpu_dev->kobj;
1103 			unlock_policy_rwsem_write(cpu);
1104 			sysfs_remove_link(kobj, "cpufreq");
1105 			lock_policy_rwsem_write(cpu);
1106 			cpufreq_cpu_put(data);
1107 		}
1108 	}
1109 #else
1110 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1111 #endif
1112 
1113 	if (cpufreq_driver->target)
1114 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1115 
1116 	kobj = &data->kobj;
1117 	cmp = &data->kobj_unregister;
1118 	unlock_policy_rwsem_write(cpu);
1119 	kobject_put(kobj);
1120 
1121 	/* we need to make sure that the underlying kobj is actually
1122 	 * not referenced anymore by anybody before we proceed with
1123 	 * unloading.
1124 	 */
1125 	pr_debug("waiting for dropping of refcount\n");
1126 	wait_for_completion(cmp);
1127 	pr_debug("wait complete\n");
1128 
1129 	lock_policy_rwsem_write(cpu);
1130 	if (cpufreq_driver->exit)
1131 		cpufreq_driver->exit(data);
1132 	unlock_policy_rwsem_write(cpu);
1133 
1134 #ifdef CONFIG_HOTPLUG_CPU
1135 	/* when the CPU which is the parent of the kobj is hotplugged
1136 	 * offline, check for siblings, and create cpufreq sysfs interface
1137 	 * and symlinks
1138 	 */
1139 	if (unlikely(cpumask_weight(data->cpus) > 1)) {
1140 		/* first sibling now owns the new sysfs dir */
1141 		cpumask_clear_cpu(cpu, data->cpus);
1142 		cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
1143 
1144 		/* finally remove our own symlink */
1145 		lock_policy_rwsem_write(cpu);
1146 		__cpufreq_remove_dev(dev, sif);
1147 	}
1148 #endif
1149 
1150 	free_cpumask_var(data->related_cpus);
1151 	free_cpumask_var(data->cpus);
1152 	kfree(data);
1153 
1154 	return 0;
1155 }
1156 
1157 
1158 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1159 {
1160 	unsigned int cpu = dev->id;
1161 	int retval;
1162 
1163 	if (cpu_is_offline(cpu))
1164 		return 0;
1165 
1166 	if (unlikely(lock_policy_rwsem_write(cpu)))
1167 		BUG();
1168 
1169 	retval = __cpufreq_remove_dev(dev, sif);
1170 	return retval;
1171 }
1172 
1173 
1174 static void handle_update(struct work_struct *work)
1175 {
1176 	struct cpufreq_policy *policy =
1177 		container_of(work, struct cpufreq_policy, update);
1178 	unsigned int cpu = policy->cpu;
1179 	pr_debug("handle_update for cpu %u called\n", cpu);
1180 	cpufreq_update_policy(cpu);
1181 }
1182 
1183 /**
1184  *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1185  *	@cpu: cpu number
1186  *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1187  *	@new_freq: CPU frequency the CPU actually runs at
1188  *
1189  *	We adjust to current frequency first, and need to clean up later.
1190  *	So either call to cpufreq_update_policy() or schedule handle_update()).
1191  */
1192 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1193 				unsigned int new_freq)
1194 {
1195 	struct cpufreq_freqs freqs;
1196 
1197 	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1198 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1199 
1200 	freqs.cpu = cpu;
1201 	freqs.old = old_freq;
1202 	freqs.new = new_freq;
1203 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1204 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1205 }
1206 
1207 
1208 /**
1209  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1210  * @cpu: CPU number
1211  *
1212  * This is the last known freq, without actually getting it from the driver.
1213  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1214  */
1215 unsigned int cpufreq_quick_get(unsigned int cpu)
1216 {
1217 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1218 	unsigned int ret_freq = 0;
1219 
1220 	if (policy) {
1221 		ret_freq = policy->cur;
1222 		cpufreq_cpu_put(policy);
1223 	}
1224 
1225 	return ret_freq;
1226 }
1227 EXPORT_SYMBOL(cpufreq_quick_get);
1228 
1229 /**
1230  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1231  * @cpu: CPU number
1232  *
1233  * Just return the max possible frequency for a given CPU.
1234  */
1235 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1236 {
1237 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1238 	unsigned int ret_freq = 0;
1239 
1240 	if (policy) {
1241 		ret_freq = policy->max;
1242 		cpufreq_cpu_put(policy);
1243 	}
1244 
1245 	return ret_freq;
1246 }
1247 EXPORT_SYMBOL(cpufreq_quick_get_max);
1248 
1249 
1250 static unsigned int __cpufreq_get(unsigned int cpu)
1251 {
1252 	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1253 	unsigned int ret_freq = 0;
1254 
1255 	if (!cpufreq_driver->get)
1256 		return ret_freq;
1257 
1258 	ret_freq = cpufreq_driver->get(cpu);
1259 
1260 	if (ret_freq && policy->cur &&
1261 		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1262 		/* verify no discrepancy between actual and
1263 					saved value exists */
1264 		if (unlikely(ret_freq != policy->cur)) {
1265 			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1266 			schedule_work(&policy->update);
1267 		}
1268 	}
1269 
1270 	return ret_freq;
1271 }
1272 
1273 /**
1274  * cpufreq_get - get the current CPU frequency (in kHz)
1275  * @cpu: CPU number
1276  *
1277  * Get the CPU current (static) CPU frequency
1278  */
1279 unsigned int cpufreq_get(unsigned int cpu)
1280 {
1281 	unsigned int ret_freq = 0;
1282 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1283 
1284 	if (!policy)
1285 		goto out;
1286 
1287 	if (unlikely(lock_policy_rwsem_read(cpu)))
1288 		goto out_policy;
1289 
1290 	ret_freq = __cpufreq_get(cpu);
1291 
1292 	unlock_policy_rwsem_read(cpu);
1293 
1294 out_policy:
1295 	cpufreq_cpu_put(policy);
1296 out:
1297 	return ret_freq;
1298 }
1299 EXPORT_SYMBOL(cpufreq_get);
1300 
1301 static struct subsys_interface cpufreq_interface = {
1302 	.name		= "cpufreq",
1303 	.subsys		= &cpu_subsys,
1304 	.add_dev	= cpufreq_add_dev,
1305 	.remove_dev	= cpufreq_remove_dev,
1306 };
1307 
1308 
1309 /**
1310  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1311  *
1312  * This function is only executed for the boot processor.  The other CPUs
1313  * have been put offline by means of CPU hotplug.
1314  */
1315 static int cpufreq_bp_suspend(void)
1316 {
1317 	int ret = 0;
1318 
1319 	int cpu = smp_processor_id();
1320 	struct cpufreq_policy *cpu_policy;
1321 
1322 	pr_debug("suspending cpu %u\n", cpu);
1323 
1324 	/* If there's no policy for the boot CPU, we have nothing to do. */
1325 	cpu_policy = cpufreq_cpu_get(cpu);
1326 	if (!cpu_policy)
1327 		return 0;
1328 
1329 	if (cpufreq_driver->suspend) {
1330 		ret = cpufreq_driver->suspend(cpu_policy);
1331 		if (ret)
1332 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1333 					"step on CPU %u\n", cpu_policy->cpu);
1334 	}
1335 
1336 	cpufreq_cpu_put(cpu_policy);
1337 	return ret;
1338 }
1339 
1340 /**
1341  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1342  *
1343  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1344  *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1345  *	    restored. It will verify that the current freq is in sync with
1346  *	    what we believe it to be. This is a bit later than when it
1347  *	    should be, but nonethteless it's better than calling
1348  *	    cpufreq_driver->get() here which might re-enable interrupts...
1349  *
1350  * This function is only executed for the boot CPU.  The other CPUs have not
1351  * been turned on yet.
1352  */
1353 static void cpufreq_bp_resume(void)
1354 {
1355 	int ret = 0;
1356 
1357 	int cpu = smp_processor_id();
1358 	struct cpufreq_policy *cpu_policy;
1359 
1360 	pr_debug("resuming cpu %u\n", cpu);
1361 
1362 	/* If there's no policy for the boot CPU, we have nothing to do. */
1363 	cpu_policy = cpufreq_cpu_get(cpu);
1364 	if (!cpu_policy)
1365 		return;
1366 
1367 	if (cpufreq_driver->resume) {
1368 		ret = cpufreq_driver->resume(cpu_policy);
1369 		if (ret) {
1370 			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1371 					"step on CPU %u\n", cpu_policy->cpu);
1372 			goto fail;
1373 		}
1374 	}
1375 
1376 	schedule_work(&cpu_policy->update);
1377 
1378 fail:
1379 	cpufreq_cpu_put(cpu_policy);
1380 }
1381 
1382 static struct syscore_ops cpufreq_syscore_ops = {
1383 	.suspend	= cpufreq_bp_suspend,
1384 	.resume		= cpufreq_bp_resume,
1385 };
1386 
1387 
1388 /*********************************************************************
1389  *                     NOTIFIER LISTS INTERFACE                      *
1390  *********************************************************************/
1391 
1392 /**
1393  *	cpufreq_register_notifier - register a driver with cpufreq
1394  *	@nb: notifier function to register
1395  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1396  *
1397  *	Add a driver to one of two lists: either a list of drivers that
1398  *      are notified about clock rate changes (once before and once after
1399  *      the transition), or a list of drivers that are notified about
1400  *      changes in cpufreq policy.
1401  *
1402  *	This function may sleep, and has the same return conditions as
1403  *	blocking_notifier_chain_register.
1404  */
1405 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1406 {
1407 	int ret;
1408 
1409 	WARN_ON(!init_cpufreq_transition_notifier_list_called);
1410 
1411 	switch (list) {
1412 	case CPUFREQ_TRANSITION_NOTIFIER:
1413 		ret = srcu_notifier_chain_register(
1414 				&cpufreq_transition_notifier_list, nb);
1415 		break;
1416 	case CPUFREQ_POLICY_NOTIFIER:
1417 		ret = blocking_notifier_chain_register(
1418 				&cpufreq_policy_notifier_list, nb);
1419 		break;
1420 	default:
1421 		ret = -EINVAL;
1422 	}
1423 
1424 	return ret;
1425 }
1426 EXPORT_SYMBOL(cpufreq_register_notifier);
1427 
1428 
1429 /**
1430  *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1431  *	@nb: notifier block to be unregistered
1432  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1433  *
1434  *	Remove a driver from the CPU frequency notifier list.
1435  *
1436  *	This function may sleep, and has the same return conditions as
1437  *	blocking_notifier_chain_unregister.
1438  */
1439 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1440 {
1441 	int ret;
1442 
1443 	switch (list) {
1444 	case CPUFREQ_TRANSITION_NOTIFIER:
1445 		ret = srcu_notifier_chain_unregister(
1446 				&cpufreq_transition_notifier_list, nb);
1447 		break;
1448 	case CPUFREQ_POLICY_NOTIFIER:
1449 		ret = blocking_notifier_chain_unregister(
1450 				&cpufreq_policy_notifier_list, nb);
1451 		break;
1452 	default:
1453 		ret = -EINVAL;
1454 	}
1455 
1456 	return ret;
1457 }
1458 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1459 
1460 
1461 /*********************************************************************
1462  *                              GOVERNORS                            *
1463  *********************************************************************/
1464 
1465 
1466 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1467 			    unsigned int target_freq,
1468 			    unsigned int relation)
1469 {
1470 	int retval = -EINVAL;
1471 
1472 	if (cpufreq_disabled())
1473 		return -ENODEV;
1474 
1475 	pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1476 		target_freq, relation);
1477 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
1478 		retval = cpufreq_driver->target(policy, target_freq, relation);
1479 
1480 	return retval;
1481 }
1482 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1483 
1484 int cpufreq_driver_target(struct cpufreq_policy *policy,
1485 			  unsigned int target_freq,
1486 			  unsigned int relation)
1487 {
1488 	int ret = -EINVAL;
1489 
1490 	policy = cpufreq_cpu_get(policy->cpu);
1491 	if (!policy)
1492 		goto no_policy;
1493 
1494 	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1495 		goto fail;
1496 
1497 	ret = __cpufreq_driver_target(policy, target_freq, relation);
1498 
1499 	unlock_policy_rwsem_write(policy->cpu);
1500 
1501 fail:
1502 	cpufreq_cpu_put(policy);
1503 no_policy:
1504 	return ret;
1505 }
1506 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1507 
1508 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1509 {
1510 	int ret = 0;
1511 
1512 	policy = cpufreq_cpu_get(policy->cpu);
1513 	if (!policy)
1514 		return -EINVAL;
1515 
1516 	if (cpu_online(cpu) && cpufreq_driver->getavg)
1517 		ret = cpufreq_driver->getavg(policy, cpu);
1518 
1519 	cpufreq_cpu_put(policy);
1520 	return ret;
1521 }
1522 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1523 
1524 /*
1525  * when "event" is CPUFREQ_GOV_LIMITS
1526  */
1527 
1528 static int __cpufreq_governor(struct cpufreq_policy *policy,
1529 					unsigned int event)
1530 {
1531 	int ret;
1532 
1533 	/* Only must be defined when default governor is known to have latency
1534 	   restrictions, like e.g. conservative or ondemand.
1535 	   That this is the case is already ensured in Kconfig
1536 	*/
1537 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1538 	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1539 #else
1540 	struct cpufreq_governor *gov = NULL;
1541 #endif
1542 
1543 	if (policy->governor->max_transition_latency &&
1544 	    policy->cpuinfo.transition_latency >
1545 	    policy->governor->max_transition_latency) {
1546 		if (!gov)
1547 			return -EINVAL;
1548 		else {
1549 			printk(KERN_WARNING "%s governor failed, too long"
1550 			       " transition latency of HW, fallback"
1551 			       " to %s governor\n",
1552 			       policy->governor->name,
1553 			       gov->name);
1554 			policy->governor = gov;
1555 		}
1556 	}
1557 
1558 	if (!try_module_get(policy->governor->owner))
1559 		return -EINVAL;
1560 
1561 	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1562 						policy->cpu, event);
1563 	ret = policy->governor->governor(policy, event);
1564 
1565 	/* we keep one module reference alive for
1566 			each CPU governed by this CPU */
1567 	if ((event != CPUFREQ_GOV_START) || ret)
1568 		module_put(policy->governor->owner);
1569 	if ((event == CPUFREQ_GOV_STOP) && !ret)
1570 		module_put(policy->governor->owner);
1571 
1572 	return ret;
1573 }
1574 
1575 
1576 int cpufreq_register_governor(struct cpufreq_governor *governor)
1577 {
1578 	int err;
1579 
1580 	if (!governor)
1581 		return -EINVAL;
1582 
1583 	if (cpufreq_disabled())
1584 		return -ENODEV;
1585 
1586 	mutex_lock(&cpufreq_governor_mutex);
1587 
1588 	err = -EBUSY;
1589 	if (__find_governor(governor->name) == NULL) {
1590 		err = 0;
1591 		list_add(&governor->governor_list, &cpufreq_governor_list);
1592 	}
1593 
1594 	mutex_unlock(&cpufreq_governor_mutex);
1595 	return err;
1596 }
1597 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1598 
1599 
1600 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1601 {
1602 #ifdef CONFIG_HOTPLUG_CPU
1603 	int cpu;
1604 #endif
1605 
1606 	if (!governor)
1607 		return;
1608 
1609 	if (cpufreq_disabled())
1610 		return;
1611 
1612 #ifdef CONFIG_HOTPLUG_CPU
1613 	for_each_present_cpu(cpu) {
1614 		if (cpu_online(cpu))
1615 			continue;
1616 		if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1617 			strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1618 	}
1619 #endif
1620 
1621 	mutex_lock(&cpufreq_governor_mutex);
1622 	list_del(&governor->governor_list);
1623 	mutex_unlock(&cpufreq_governor_mutex);
1624 	return;
1625 }
1626 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1627 
1628 
1629 
1630 /*********************************************************************
1631  *                          POLICY INTERFACE                         *
1632  *********************************************************************/
1633 
1634 /**
1635  * cpufreq_get_policy - get the current cpufreq_policy
1636  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1637  *	is written
1638  *
1639  * Reads the current cpufreq policy.
1640  */
1641 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1642 {
1643 	struct cpufreq_policy *cpu_policy;
1644 	if (!policy)
1645 		return -EINVAL;
1646 
1647 	cpu_policy = cpufreq_cpu_get(cpu);
1648 	if (!cpu_policy)
1649 		return -EINVAL;
1650 
1651 	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1652 
1653 	cpufreq_cpu_put(cpu_policy);
1654 	return 0;
1655 }
1656 EXPORT_SYMBOL(cpufreq_get_policy);
1657 
1658 
1659 /*
1660  * data   : current policy.
1661  * policy : policy to be set.
1662  */
1663 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1664 				struct cpufreq_policy *policy)
1665 {
1666 	int ret = 0;
1667 
1668 	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1669 		policy->min, policy->max);
1670 
1671 	memcpy(&policy->cpuinfo, &data->cpuinfo,
1672 				sizeof(struct cpufreq_cpuinfo));
1673 
1674 	if (policy->min > data->max || policy->max < data->min) {
1675 		ret = -EINVAL;
1676 		goto error_out;
1677 	}
1678 
1679 	/* verify the cpu speed can be set within this limit */
1680 	ret = cpufreq_driver->verify(policy);
1681 	if (ret)
1682 		goto error_out;
1683 
1684 	/* adjust if necessary - all reasons */
1685 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1686 			CPUFREQ_ADJUST, policy);
1687 
1688 	/* adjust if necessary - hardware incompatibility*/
1689 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1690 			CPUFREQ_INCOMPATIBLE, policy);
1691 
1692 	/* verify the cpu speed can be set within this limit,
1693 	   which might be different to the first one */
1694 	ret = cpufreq_driver->verify(policy);
1695 	if (ret)
1696 		goto error_out;
1697 
1698 	/* notification of the new policy */
1699 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1700 			CPUFREQ_NOTIFY, policy);
1701 
1702 	data->min = policy->min;
1703 	data->max = policy->max;
1704 
1705 	pr_debug("new min and max freqs are %u - %u kHz\n",
1706 					data->min, data->max);
1707 
1708 	if (cpufreq_driver->setpolicy) {
1709 		data->policy = policy->policy;
1710 		pr_debug("setting range\n");
1711 		ret = cpufreq_driver->setpolicy(policy);
1712 	} else {
1713 		if (policy->governor != data->governor) {
1714 			/* save old, working values */
1715 			struct cpufreq_governor *old_gov = data->governor;
1716 
1717 			pr_debug("governor switch\n");
1718 
1719 			/* end old governor */
1720 			if (data->governor)
1721 				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1722 
1723 			/* start new governor */
1724 			data->governor = policy->governor;
1725 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1726 				/* new governor failed, so re-start old one */
1727 				pr_debug("starting governor %s failed\n",
1728 							data->governor->name);
1729 				if (old_gov) {
1730 					data->governor = old_gov;
1731 					__cpufreq_governor(data,
1732 							   CPUFREQ_GOV_START);
1733 				}
1734 				ret = -EINVAL;
1735 				goto error_out;
1736 			}
1737 			/* might be a policy change, too, so fall through */
1738 		}
1739 		pr_debug("governor: change or update limits\n");
1740 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1741 	}
1742 
1743 error_out:
1744 	return ret;
1745 }
1746 
1747 /**
1748  *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1749  *	@cpu: CPU which shall be re-evaluated
1750  *
1751  *	Useful for policy notifiers which have different necessities
1752  *	at different times.
1753  */
1754 int cpufreq_update_policy(unsigned int cpu)
1755 {
1756 	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1757 	struct cpufreq_policy policy;
1758 	int ret;
1759 
1760 	if (!data) {
1761 		ret = -ENODEV;
1762 		goto no_policy;
1763 	}
1764 
1765 	if (unlikely(lock_policy_rwsem_write(cpu))) {
1766 		ret = -EINVAL;
1767 		goto fail;
1768 	}
1769 
1770 	pr_debug("updating policy for CPU %u\n", cpu);
1771 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1772 	policy.min = data->user_policy.min;
1773 	policy.max = data->user_policy.max;
1774 	policy.policy = data->user_policy.policy;
1775 	policy.governor = data->user_policy.governor;
1776 
1777 	/* BIOS might change freq behind our back
1778 	  -> ask driver for current freq and notify governors about a change */
1779 	if (cpufreq_driver->get) {
1780 		policy.cur = cpufreq_driver->get(cpu);
1781 		if (!data->cur) {
1782 			pr_debug("Driver did not initialize current freq");
1783 			data->cur = policy.cur;
1784 		} else {
1785 			if (data->cur != policy.cur)
1786 				cpufreq_out_of_sync(cpu, data->cur,
1787 								policy.cur);
1788 		}
1789 	}
1790 
1791 	ret = __cpufreq_set_policy(data, &policy);
1792 
1793 	unlock_policy_rwsem_write(cpu);
1794 
1795 fail:
1796 	cpufreq_cpu_put(data);
1797 no_policy:
1798 	return ret;
1799 }
1800 EXPORT_SYMBOL(cpufreq_update_policy);
1801 
1802 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1803 					unsigned long action, void *hcpu)
1804 {
1805 	unsigned int cpu = (unsigned long)hcpu;
1806 	struct device *dev;
1807 
1808 	dev = get_cpu_device(cpu);
1809 	if (dev) {
1810 		switch (action) {
1811 		case CPU_ONLINE:
1812 		case CPU_ONLINE_FROZEN:
1813 			cpufreq_add_dev(dev, NULL);
1814 			break;
1815 		case CPU_DOWN_PREPARE:
1816 		case CPU_DOWN_PREPARE_FROZEN:
1817 			if (unlikely(lock_policy_rwsem_write(cpu)))
1818 				BUG();
1819 
1820 			__cpufreq_remove_dev(dev, NULL);
1821 			break;
1822 		case CPU_DOWN_FAILED:
1823 		case CPU_DOWN_FAILED_FROZEN:
1824 			cpufreq_add_dev(dev, NULL);
1825 			break;
1826 		}
1827 	}
1828 	return NOTIFY_OK;
1829 }
1830 
1831 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1832     .notifier_call = cpufreq_cpu_callback,
1833 };
1834 
1835 /*********************************************************************
1836  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1837  *********************************************************************/
1838 
1839 /**
1840  * cpufreq_register_driver - register a CPU Frequency driver
1841  * @driver_data: A struct cpufreq_driver containing the values#
1842  * submitted by the CPU Frequency driver.
1843  *
1844  *   Registers a CPU Frequency driver to this core code. This code
1845  * returns zero on success, -EBUSY when another driver got here first
1846  * (and isn't unregistered in the meantime).
1847  *
1848  */
1849 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1850 {
1851 	unsigned long flags;
1852 	int ret;
1853 
1854 	if (cpufreq_disabled())
1855 		return -ENODEV;
1856 
1857 	if (!driver_data || !driver_data->verify || !driver_data->init ||
1858 	    ((!driver_data->setpolicy) && (!driver_data->target)))
1859 		return -EINVAL;
1860 
1861 	pr_debug("trying to register driver %s\n", driver_data->name);
1862 
1863 	if (driver_data->setpolicy)
1864 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1865 
1866 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1867 	if (cpufreq_driver) {
1868 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1869 		return -EBUSY;
1870 	}
1871 	cpufreq_driver = driver_data;
1872 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1873 
1874 	ret = subsys_interface_register(&cpufreq_interface);
1875 	if (ret)
1876 		goto err_null_driver;
1877 
1878 	if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1879 		int i;
1880 		ret = -ENODEV;
1881 
1882 		/* check for at least one working CPU */
1883 		for (i = 0; i < nr_cpu_ids; i++)
1884 			if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1885 				ret = 0;
1886 				break;
1887 			}
1888 
1889 		/* if all ->init() calls failed, unregister */
1890 		if (ret) {
1891 			pr_debug("no CPU initialized for driver %s\n",
1892 							driver_data->name);
1893 			goto err_if_unreg;
1894 		}
1895 	}
1896 
1897 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
1898 	pr_debug("driver %s up and running\n", driver_data->name);
1899 
1900 	return 0;
1901 err_if_unreg:
1902 	subsys_interface_unregister(&cpufreq_interface);
1903 err_null_driver:
1904 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1905 	cpufreq_driver = NULL;
1906 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1907 	return ret;
1908 }
1909 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1910 
1911 
1912 /**
1913  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1914  *
1915  *    Unregister the current CPUFreq driver. Only call this if you have
1916  * the right to do so, i.e. if you have succeeded in initialising before!
1917  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1918  * currently not initialised.
1919  */
1920 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1921 {
1922 	unsigned long flags;
1923 
1924 	if (!cpufreq_driver || (driver != cpufreq_driver))
1925 		return -EINVAL;
1926 
1927 	pr_debug("unregistering driver %s\n", driver->name);
1928 
1929 	subsys_interface_unregister(&cpufreq_interface);
1930 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1931 
1932 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1933 	cpufreq_driver = NULL;
1934 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1935 
1936 	return 0;
1937 }
1938 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1939 
1940 static int __init cpufreq_core_init(void)
1941 {
1942 	int cpu;
1943 
1944 	if (cpufreq_disabled())
1945 		return -ENODEV;
1946 
1947 	for_each_possible_cpu(cpu) {
1948 		per_cpu(cpufreq_policy_cpu, cpu) = -1;
1949 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1950 	}
1951 
1952 	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
1953 	BUG_ON(!cpufreq_global_kobject);
1954 	register_syscore_ops(&cpufreq_syscore_ops);
1955 
1956 	return 0;
1957 }
1958 core_initcall(cpufreq_core_init);
1959