xref: /openbmc/linux/drivers/cpufreq/cpufreq.c (revision 9d749629)
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *	Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *	Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17 
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/notifier.h>
24 #include <linux/cpufreq.h>
25 #include <linux/delay.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/device.h>
29 #include <linux/slab.h>
30 #include <linux/cpu.h>
31 #include <linux/completion.h>
32 #include <linux/mutex.h>
33 #include <linux/syscore_ops.h>
34 
35 #include <trace/events/power.h>
36 
37 /**
38  * The "cpufreq driver" - the arch- or hardware-dependent low
39  * level driver of CPUFreq support, and its spinlock. This lock
40  * also protects the cpufreq_cpu_data array.
41  */
42 static struct cpufreq_driver *cpufreq_driver;
43 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
44 #ifdef CONFIG_HOTPLUG_CPU
45 /* This one keeps track of the previously set governor of a removed CPU */
46 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
47 #endif
48 static DEFINE_SPINLOCK(cpufreq_driver_lock);
49 
50 /*
51  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
52  * all cpufreq/hotplug/workqueue/etc related lock issues.
53  *
54  * The rules for this semaphore:
55  * - Any routine that wants to read from the policy structure will
56  *   do a down_read on this semaphore.
57  * - Any routine that will write to the policy structure and/or may take away
58  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
59  *   mode before doing so.
60  *
61  * Additional rules:
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  * - Lock should not be held across
65  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
66  */
67 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
68 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
69 
70 #define lock_policy_rwsem(mode, cpu)					\
71 static int lock_policy_rwsem_##mode(int cpu)				\
72 {									\
73 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);		\
74 	BUG_ON(policy_cpu == -1);					\
75 	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
76 									\
77 	return 0;							\
78 }
79 
80 lock_policy_rwsem(read, cpu);
81 lock_policy_rwsem(write, cpu);
82 
83 #define unlock_policy_rwsem(mode, cpu)					\
84 static void unlock_policy_rwsem_##mode(int cpu)				\
85 {									\
86 	int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);		\
87 	BUG_ON(policy_cpu == -1);					\
88 	up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
89 }
90 
91 unlock_policy_rwsem(read, cpu);
92 unlock_policy_rwsem(write, cpu);
93 
94 /* internal prototypes */
95 static int __cpufreq_governor(struct cpufreq_policy *policy,
96 		unsigned int event);
97 static unsigned int __cpufreq_get(unsigned int cpu);
98 static void handle_update(struct work_struct *work);
99 
100 /**
101  * Two notifier lists: the "policy" list is involved in the
102  * validation process for a new CPU frequency policy; the
103  * "transition" list for kernel code that needs to handle
104  * changes to devices when the CPU clock speed changes.
105  * The mutex locks both lists.
106  */
107 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
108 static struct srcu_notifier_head cpufreq_transition_notifier_list;
109 
110 static bool init_cpufreq_transition_notifier_list_called;
111 static int __init init_cpufreq_transition_notifier_list(void)
112 {
113 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
114 	init_cpufreq_transition_notifier_list_called = true;
115 	return 0;
116 }
117 pure_initcall(init_cpufreq_transition_notifier_list);
118 
119 static int off __read_mostly;
120 static int cpufreq_disabled(void)
121 {
122 	return off;
123 }
124 void disable_cpufreq(void)
125 {
126 	off = 1;
127 }
128 static LIST_HEAD(cpufreq_governor_list);
129 static DEFINE_MUTEX(cpufreq_governor_mutex);
130 
131 static struct cpufreq_policy *__cpufreq_cpu_get(unsigned int cpu, bool sysfs)
132 {
133 	struct cpufreq_policy *data;
134 	unsigned long flags;
135 
136 	if (cpu >= nr_cpu_ids)
137 		goto err_out;
138 
139 	/* get the cpufreq driver */
140 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
141 
142 	if (!cpufreq_driver)
143 		goto err_out_unlock;
144 
145 	if (!try_module_get(cpufreq_driver->owner))
146 		goto err_out_unlock;
147 
148 
149 	/* get the CPU */
150 	data = per_cpu(cpufreq_cpu_data, cpu);
151 
152 	if (!data)
153 		goto err_out_put_module;
154 
155 	if (!sysfs && !kobject_get(&data->kobj))
156 		goto err_out_put_module;
157 
158 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
159 	return data;
160 
161 err_out_put_module:
162 	module_put(cpufreq_driver->owner);
163 err_out_unlock:
164 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
165 err_out:
166 	return NULL;
167 }
168 
169 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
170 {
171 	if (cpufreq_disabled())
172 		return NULL;
173 
174 	return __cpufreq_cpu_get(cpu, false);
175 }
176 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
177 
178 static struct cpufreq_policy *cpufreq_cpu_get_sysfs(unsigned int cpu)
179 {
180 	return __cpufreq_cpu_get(cpu, true);
181 }
182 
183 static void __cpufreq_cpu_put(struct cpufreq_policy *data, bool sysfs)
184 {
185 	if (!sysfs)
186 		kobject_put(&data->kobj);
187 	module_put(cpufreq_driver->owner);
188 }
189 
190 void cpufreq_cpu_put(struct cpufreq_policy *data)
191 {
192 	if (cpufreq_disabled())
193 		return;
194 
195 	__cpufreq_cpu_put(data, false);
196 }
197 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
198 
199 static void cpufreq_cpu_put_sysfs(struct cpufreq_policy *data)
200 {
201 	__cpufreq_cpu_put(data, true);
202 }
203 
204 /*********************************************************************
205  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
206  *********************************************************************/
207 
208 /**
209  * adjust_jiffies - adjust the system "loops_per_jiffy"
210  *
211  * This function alters the system "loops_per_jiffy" for the clock
212  * speed change. Note that loops_per_jiffy cannot be updated on SMP
213  * systems as each CPU might be scaled differently. So, use the arch
214  * per-CPU loops_per_jiffy value wherever possible.
215  */
216 #ifndef CONFIG_SMP
217 static unsigned long l_p_j_ref;
218 static unsigned int  l_p_j_ref_freq;
219 
220 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
221 {
222 	if (ci->flags & CPUFREQ_CONST_LOOPS)
223 		return;
224 
225 	if (!l_p_j_ref_freq) {
226 		l_p_j_ref = loops_per_jiffy;
227 		l_p_j_ref_freq = ci->old;
228 		pr_debug("saving %lu as reference value for loops_per_jiffy; "
229 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
230 	}
231 	if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
232 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
233 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
234 								ci->new);
235 		pr_debug("scaling loops_per_jiffy to %lu "
236 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
237 	}
238 }
239 #else
240 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
241 {
242 	return;
243 }
244 #endif
245 
246 
247 /**
248  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
249  * on frequency transition.
250  *
251  * This function calls the transition notifiers and the "adjust_jiffies"
252  * function. It is called twice on all CPU frequency changes that have
253  * external effects.
254  */
255 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
256 {
257 	struct cpufreq_policy *policy;
258 	unsigned long flags;
259 
260 	BUG_ON(irqs_disabled());
261 
262 	if (cpufreq_disabled())
263 		return;
264 
265 	freqs->flags = cpufreq_driver->flags;
266 	pr_debug("notification %u of frequency transition to %u kHz\n",
267 		state, freqs->new);
268 
269 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
270 	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
271 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
272 
273 	switch (state) {
274 
275 	case CPUFREQ_PRECHANGE:
276 		/* detect if the driver reported a value as "old frequency"
277 		 * which is not equal to what the cpufreq core thinks is
278 		 * "old frequency".
279 		 */
280 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
281 			if ((policy) && (policy->cpu == freqs->cpu) &&
282 			    (policy->cur) && (policy->cur != freqs->old)) {
283 				pr_debug("Warning: CPU frequency is"
284 					" %u, cpufreq assumed %u kHz.\n",
285 					freqs->old, policy->cur);
286 				freqs->old = policy->cur;
287 			}
288 		}
289 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
290 				CPUFREQ_PRECHANGE, freqs);
291 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
292 		break;
293 
294 	case CPUFREQ_POSTCHANGE:
295 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
296 		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
297 			(unsigned long)freqs->cpu);
298 		trace_cpu_frequency(freqs->new, freqs->cpu);
299 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
300 				CPUFREQ_POSTCHANGE, freqs);
301 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
302 			policy->cur = freqs->new;
303 		break;
304 	}
305 }
306 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
307 
308 
309 
310 /*********************************************************************
311  *                          SYSFS INTERFACE                          *
312  *********************************************************************/
313 
314 static struct cpufreq_governor *__find_governor(const char *str_governor)
315 {
316 	struct cpufreq_governor *t;
317 
318 	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
319 		if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
320 			return t;
321 
322 	return NULL;
323 }
324 
325 /**
326  * cpufreq_parse_governor - parse a governor string
327  */
328 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
329 				struct cpufreq_governor **governor)
330 {
331 	int err = -EINVAL;
332 
333 	if (!cpufreq_driver)
334 		goto out;
335 
336 	if (cpufreq_driver->setpolicy) {
337 		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
338 			*policy = CPUFREQ_POLICY_PERFORMANCE;
339 			err = 0;
340 		} else if (!strnicmp(str_governor, "powersave",
341 						CPUFREQ_NAME_LEN)) {
342 			*policy = CPUFREQ_POLICY_POWERSAVE;
343 			err = 0;
344 		}
345 	} else if (cpufreq_driver->target) {
346 		struct cpufreq_governor *t;
347 
348 		mutex_lock(&cpufreq_governor_mutex);
349 
350 		t = __find_governor(str_governor);
351 
352 		if (t == NULL) {
353 			int ret;
354 
355 			mutex_unlock(&cpufreq_governor_mutex);
356 			ret = request_module("cpufreq_%s", str_governor);
357 			mutex_lock(&cpufreq_governor_mutex);
358 
359 			if (ret == 0)
360 				t = __find_governor(str_governor);
361 		}
362 
363 		if (t != NULL) {
364 			*governor = t;
365 			err = 0;
366 		}
367 
368 		mutex_unlock(&cpufreq_governor_mutex);
369 	}
370 out:
371 	return err;
372 }
373 
374 
375 /**
376  * cpufreq_per_cpu_attr_read() / show_##file_name() -
377  * print out cpufreq information
378  *
379  * Write out information from cpufreq_driver->policy[cpu]; object must be
380  * "unsigned int".
381  */
382 
383 #define show_one(file_name, object)			\
384 static ssize_t show_##file_name				\
385 (struct cpufreq_policy *policy, char *buf)		\
386 {							\
387 	return sprintf(buf, "%u\n", policy->object);	\
388 }
389 
390 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
391 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
392 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
393 show_one(scaling_min_freq, min);
394 show_one(scaling_max_freq, max);
395 show_one(scaling_cur_freq, cur);
396 
397 static int __cpufreq_set_policy(struct cpufreq_policy *data,
398 				struct cpufreq_policy *policy);
399 
400 /**
401  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
402  */
403 #define store_one(file_name, object)			\
404 static ssize_t store_##file_name					\
405 (struct cpufreq_policy *policy, const char *buf, size_t count)		\
406 {									\
407 	unsigned int ret;						\
408 	struct cpufreq_policy new_policy;				\
409 									\
410 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
411 	if (ret)							\
412 		return -EINVAL;						\
413 									\
414 	ret = sscanf(buf, "%u", &new_policy.object);			\
415 	if (ret != 1)							\
416 		return -EINVAL;						\
417 									\
418 	ret = __cpufreq_set_policy(policy, &new_policy);		\
419 	policy->user_policy.object = policy->object;			\
420 									\
421 	return ret ? ret : count;					\
422 }
423 
424 store_one(scaling_min_freq, min);
425 store_one(scaling_max_freq, max);
426 
427 /**
428  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
429  */
430 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
431 					char *buf)
432 {
433 	unsigned int cur_freq = __cpufreq_get(policy->cpu);
434 	if (!cur_freq)
435 		return sprintf(buf, "<unknown>");
436 	return sprintf(buf, "%u\n", cur_freq);
437 }
438 
439 
440 /**
441  * show_scaling_governor - show the current policy for the specified CPU
442  */
443 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
444 {
445 	if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
446 		return sprintf(buf, "powersave\n");
447 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
448 		return sprintf(buf, "performance\n");
449 	else if (policy->governor)
450 		return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n",
451 				policy->governor->name);
452 	return -EINVAL;
453 }
454 
455 
456 /**
457  * store_scaling_governor - store policy for the specified CPU
458  */
459 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
460 					const char *buf, size_t count)
461 {
462 	unsigned int ret;
463 	char	str_governor[16];
464 	struct cpufreq_policy new_policy;
465 
466 	ret = cpufreq_get_policy(&new_policy, policy->cpu);
467 	if (ret)
468 		return ret;
469 
470 	ret = sscanf(buf, "%15s", str_governor);
471 	if (ret != 1)
472 		return -EINVAL;
473 
474 	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
475 						&new_policy.governor))
476 		return -EINVAL;
477 
478 	/* Do not use cpufreq_set_policy here or the user_policy.max
479 	   will be wrongly overridden */
480 	ret = __cpufreq_set_policy(policy, &new_policy);
481 
482 	policy->user_policy.policy = policy->policy;
483 	policy->user_policy.governor = policy->governor;
484 
485 	if (ret)
486 		return ret;
487 	else
488 		return count;
489 }
490 
491 /**
492  * show_scaling_driver - show the cpufreq driver currently loaded
493  */
494 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
495 {
496 	return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", cpufreq_driver->name);
497 }
498 
499 /**
500  * show_scaling_available_governors - show the available CPUfreq governors
501  */
502 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
503 						char *buf)
504 {
505 	ssize_t i = 0;
506 	struct cpufreq_governor *t;
507 
508 	if (!cpufreq_driver->target) {
509 		i += sprintf(buf, "performance powersave");
510 		goto out;
511 	}
512 
513 	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
514 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
515 		    - (CPUFREQ_NAME_LEN + 2)))
516 			goto out;
517 		i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name);
518 	}
519 out:
520 	i += sprintf(&buf[i], "\n");
521 	return i;
522 }
523 
524 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
525 {
526 	ssize_t i = 0;
527 	unsigned int cpu;
528 
529 	for_each_cpu(cpu, mask) {
530 		if (i)
531 			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
532 		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
533 		if (i >= (PAGE_SIZE - 5))
534 			break;
535 	}
536 	i += sprintf(&buf[i], "\n");
537 	return i;
538 }
539 
540 /**
541  * show_related_cpus - show the CPUs affected by each transition even if
542  * hw coordination is in use
543  */
544 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
545 {
546 	return show_cpus(policy->related_cpus, buf);
547 }
548 
549 /**
550  * show_affected_cpus - show the CPUs affected by each transition
551  */
552 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
553 {
554 	return show_cpus(policy->cpus, buf);
555 }
556 
557 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
558 					const char *buf, size_t count)
559 {
560 	unsigned int freq = 0;
561 	unsigned int ret;
562 
563 	if (!policy->governor || !policy->governor->store_setspeed)
564 		return -EINVAL;
565 
566 	ret = sscanf(buf, "%u", &freq);
567 	if (ret != 1)
568 		return -EINVAL;
569 
570 	policy->governor->store_setspeed(policy, freq);
571 
572 	return count;
573 }
574 
575 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
576 {
577 	if (!policy->governor || !policy->governor->show_setspeed)
578 		return sprintf(buf, "<unsupported>\n");
579 
580 	return policy->governor->show_setspeed(policy, buf);
581 }
582 
583 /**
584  * show_bios_limit - show the current cpufreq HW/BIOS limitation
585  */
586 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
587 {
588 	unsigned int limit;
589 	int ret;
590 	if (cpufreq_driver->bios_limit) {
591 		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
592 		if (!ret)
593 			return sprintf(buf, "%u\n", limit);
594 	}
595 	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
596 }
597 
598 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
599 cpufreq_freq_attr_ro(cpuinfo_min_freq);
600 cpufreq_freq_attr_ro(cpuinfo_max_freq);
601 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
602 cpufreq_freq_attr_ro(scaling_available_governors);
603 cpufreq_freq_attr_ro(scaling_driver);
604 cpufreq_freq_attr_ro(scaling_cur_freq);
605 cpufreq_freq_attr_ro(bios_limit);
606 cpufreq_freq_attr_ro(related_cpus);
607 cpufreq_freq_attr_ro(affected_cpus);
608 cpufreq_freq_attr_rw(scaling_min_freq);
609 cpufreq_freq_attr_rw(scaling_max_freq);
610 cpufreq_freq_attr_rw(scaling_governor);
611 cpufreq_freq_attr_rw(scaling_setspeed);
612 
613 static struct attribute *default_attrs[] = {
614 	&cpuinfo_min_freq.attr,
615 	&cpuinfo_max_freq.attr,
616 	&cpuinfo_transition_latency.attr,
617 	&scaling_min_freq.attr,
618 	&scaling_max_freq.attr,
619 	&affected_cpus.attr,
620 	&related_cpus.attr,
621 	&scaling_governor.attr,
622 	&scaling_driver.attr,
623 	&scaling_available_governors.attr,
624 	&scaling_setspeed.attr,
625 	NULL
626 };
627 
628 struct kobject *cpufreq_global_kobject;
629 EXPORT_SYMBOL(cpufreq_global_kobject);
630 
631 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
632 #define to_attr(a) container_of(a, struct freq_attr, attr)
633 
634 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
635 {
636 	struct cpufreq_policy *policy = to_policy(kobj);
637 	struct freq_attr *fattr = to_attr(attr);
638 	ssize_t ret = -EINVAL;
639 	policy = cpufreq_cpu_get_sysfs(policy->cpu);
640 	if (!policy)
641 		goto no_policy;
642 
643 	if (lock_policy_rwsem_read(policy->cpu) < 0)
644 		goto fail;
645 
646 	if (fattr->show)
647 		ret = fattr->show(policy, buf);
648 	else
649 		ret = -EIO;
650 
651 	unlock_policy_rwsem_read(policy->cpu);
652 fail:
653 	cpufreq_cpu_put_sysfs(policy);
654 no_policy:
655 	return ret;
656 }
657 
658 static ssize_t store(struct kobject *kobj, struct attribute *attr,
659 		     const char *buf, size_t count)
660 {
661 	struct cpufreq_policy *policy = to_policy(kobj);
662 	struct freq_attr *fattr = to_attr(attr);
663 	ssize_t ret = -EINVAL;
664 	policy = cpufreq_cpu_get_sysfs(policy->cpu);
665 	if (!policy)
666 		goto no_policy;
667 
668 	if (lock_policy_rwsem_write(policy->cpu) < 0)
669 		goto fail;
670 
671 	if (fattr->store)
672 		ret = fattr->store(policy, buf, count);
673 	else
674 		ret = -EIO;
675 
676 	unlock_policy_rwsem_write(policy->cpu);
677 fail:
678 	cpufreq_cpu_put_sysfs(policy);
679 no_policy:
680 	return ret;
681 }
682 
683 static void cpufreq_sysfs_release(struct kobject *kobj)
684 {
685 	struct cpufreq_policy *policy = to_policy(kobj);
686 	pr_debug("last reference is dropped\n");
687 	complete(&policy->kobj_unregister);
688 }
689 
690 static const struct sysfs_ops sysfs_ops = {
691 	.show	= show,
692 	.store	= store,
693 };
694 
695 static struct kobj_type ktype_cpufreq = {
696 	.sysfs_ops	= &sysfs_ops,
697 	.default_attrs	= default_attrs,
698 	.release	= cpufreq_sysfs_release,
699 };
700 
701 /* symlink affected CPUs */
702 static int cpufreq_add_dev_symlink(unsigned int cpu,
703 				   struct cpufreq_policy *policy)
704 {
705 	unsigned int j;
706 	int ret = 0;
707 
708 	for_each_cpu(j, policy->cpus) {
709 		struct cpufreq_policy *managed_policy;
710 		struct device *cpu_dev;
711 
712 		if (j == cpu)
713 			continue;
714 
715 		pr_debug("CPU %u already managed, adding link\n", j);
716 		managed_policy = cpufreq_cpu_get(cpu);
717 		cpu_dev = get_cpu_device(j);
718 		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
719 					"cpufreq");
720 		if (ret) {
721 			cpufreq_cpu_put(managed_policy);
722 			return ret;
723 		}
724 	}
725 	return ret;
726 }
727 
728 static int cpufreq_add_dev_interface(unsigned int cpu,
729 				     struct cpufreq_policy *policy,
730 				     struct device *dev)
731 {
732 	struct cpufreq_policy new_policy;
733 	struct freq_attr **drv_attr;
734 	unsigned long flags;
735 	int ret = 0;
736 	unsigned int j;
737 
738 	/* prepare interface data */
739 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
740 				   &dev->kobj, "cpufreq");
741 	if (ret)
742 		return ret;
743 
744 	/* set up files for this cpu device */
745 	drv_attr = cpufreq_driver->attr;
746 	while ((drv_attr) && (*drv_attr)) {
747 		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
748 		if (ret)
749 			goto err_out_kobj_put;
750 		drv_attr++;
751 	}
752 	if (cpufreq_driver->get) {
753 		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
754 		if (ret)
755 			goto err_out_kobj_put;
756 	}
757 	if (cpufreq_driver->target) {
758 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
759 		if (ret)
760 			goto err_out_kobj_put;
761 	}
762 	if (cpufreq_driver->bios_limit) {
763 		ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
764 		if (ret)
765 			goto err_out_kobj_put;
766 	}
767 
768 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
769 	for_each_cpu(j, policy->cpus) {
770 		per_cpu(cpufreq_cpu_data, j) = policy;
771 		per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
772 	}
773 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
774 
775 	ret = cpufreq_add_dev_symlink(cpu, policy);
776 	if (ret)
777 		goto err_out_kobj_put;
778 
779 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
780 	/* assure that the starting sequence is run in __cpufreq_set_policy */
781 	policy->governor = NULL;
782 
783 	/* set default policy */
784 	ret = __cpufreq_set_policy(policy, &new_policy);
785 	policy->user_policy.policy = policy->policy;
786 	policy->user_policy.governor = policy->governor;
787 
788 	if (ret) {
789 		pr_debug("setting policy failed\n");
790 		if (cpufreq_driver->exit)
791 			cpufreq_driver->exit(policy);
792 	}
793 	return ret;
794 
795 err_out_kobj_put:
796 	kobject_put(&policy->kobj);
797 	wait_for_completion(&policy->kobj_unregister);
798 	return ret;
799 }
800 
801 #ifdef CONFIG_HOTPLUG_CPU
802 static int cpufreq_add_policy_cpu(unsigned int cpu, unsigned int sibling,
803 				  struct device *dev)
804 {
805 	struct cpufreq_policy *policy;
806 	int ret = 0;
807 	unsigned long flags;
808 
809 	policy = cpufreq_cpu_get(sibling);
810 	WARN_ON(!policy);
811 
812 	__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
813 
814 	lock_policy_rwsem_write(sibling);
815 
816 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
817 
818 	cpumask_set_cpu(cpu, policy->cpus);
819 	per_cpu(cpufreq_policy_cpu, cpu) = policy->cpu;
820 	per_cpu(cpufreq_cpu_data, cpu) = policy;
821 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
822 
823 	unlock_policy_rwsem_write(sibling);
824 
825 	__cpufreq_governor(policy, CPUFREQ_GOV_START);
826 	__cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
827 
828 	ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
829 	if (ret) {
830 		cpufreq_cpu_put(policy);
831 		return ret;
832 	}
833 
834 	return 0;
835 }
836 #endif
837 
838 /**
839  * cpufreq_add_dev - add a CPU device
840  *
841  * Adds the cpufreq interface for a CPU device.
842  *
843  * The Oracle says: try running cpufreq registration/unregistration concurrently
844  * with with cpu hotplugging and all hell will break loose. Tried to clean this
845  * mess up, but more thorough testing is needed. - Mathieu
846  */
847 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
848 {
849 	unsigned int j, cpu = dev->id;
850 	int ret = -ENOMEM;
851 	struct cpufreq_policy *policy;
852 	unsigned long flags;
853 #ifdef CONFIG_HOTPLUG_CPU
854 	struct cpufreq_governor *gov;
855 	int sibling;
856 #endif
857 
858 	if (cpu_is_offline(cpu))
859 		return 0;
860 
861 	pr_debug("adding CPU %u\n", cpu);
862 
863 #ifdef CONFIG_SMP
864 	/* check whether a different CPU already registered this
865 	 * CPU because it is in the same boat. */
866 	policy = cpufreq_cpu_get(cpu);
867 	if (unlikely(policy)) {
868 		cpufreq_cpu_put(policy);
869 		return 0;
870 	}
871 
872 #ifdef CONFIG_HOTPLUG_CPU
873 	/* Check if this cpu was hot-unplugged earlier and has siblings */
874 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
875 	for_each_online_cpu(sibling) {
876 		struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
877 		if (cp && cpumask_test_cpu(cpu, cp->related_cpus)) {
878 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
879 			return cpufreq_add_policy_cpu(cpu, sibling, dev);
880 		}
881 	}
882 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
883 #endif
884 #endif
885 
886 	if (!try_module_get(cpufreq_driver->owner)) {
887 		ret = -EINVAL;
888 		goto module_out;
889 	}
890 
891 	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
892 	if (!policy)
893 		goto nomem_out;
894 
895 	if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
896 		goto err_free_policy;
897 
898 	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
899 		goto err_free_cpumask;
900 
901 	policy->cpu = cpu;
902 	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
903 	cpumask_copy(policy->cpus, cpumask_of(cpu));
904 
905 	/* Initially set CPU itself as the policy_cpu */
906 	per_cpu(cpufreq_policy_cpu, cpu) = cpu;
907 
908 	init_completion(&policy->kobj_unregister);
909 	INIT_WORK(&policy->update, handle_update);
910 
911 	/* call driver. From then on the cpufreq must be able
912 	 * to accept all calls to ->verify and ->setpolicy for this CPU
913 	 */
914 	ret = cpufreq_driver->init(policy);
915 	if (ret) {
916 		pr_debug("initialization failed\n");
917 		goto err_set_policy_cpu;
918 	}
919 
920 	/* related cpus should atleast have policy->cpus */
921 	cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
922 
923 	/*
924 	 * affected cpus must always be the one, which are online. We aren't
925 	 * managing offline cpus here.
926 	 */
927 	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
928 
929 	policy->user_policy.min = policy->min;
930 	policy->user_policy.max = policy->max;
931 
932 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
933 				     CPUFREQ_START, policy);
934 
935 #ifdef CONFIG_HOTPLUG_CPU
936 	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
937 	if (gov) {
938 		policy->governor = gov;
939 		pr_debug("Restoring governor %s for cpu %d\n",
940 		       policy->governor->name, cpu);
941 	}
942 #endif
943 
944 	ret = cpufreq_add_dev_interface(cpu, policy, dev);
945 	if (ret)
946 		goto err_out_unregister;
947 
948 	kobject_uevent(&policy->kobj, KOBJ_ADD);
949 	module_put(cpufreq_driver->owner);
950 	pr_debug("initialization complete\n");
951 
952 	return 0;
953 
954 err_out_unregister:
955 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
956 	for_each_cpu(j, policy->cpus)
957 		per_cpu(cpufreq_cpu_data, j) = NULL;
958 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
959 
960 	kobject_put(&policy->kobj);
961 	wait_for_completion(&policy->kobj_unregister);
962 
963 err_set_policy_cpu:
964 	per_cpu(cpufreq_policy_cpu, cpu) = -1;
965 	free_cpumask_var(policy->related_cpus);
966 err_free_cpumask:
967 	free_cpumask_var(policy->cpus);
968 err_free_policy:
969 	kfree(policy);
970 nomem_out:
971 	module_put(cpufreq_driver->owner);
972 module_out:
973 	return ret;
974 }
975 
976 static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
977 {
978 	int j;
979 
980 	policy->last_cpu = policy->cpu;
981 	policy->cpu = cpu;
982 
983 	for_each_cpu(j, policy->cpus)
984 		per_cpu(cpufreq_policy_cpu, j) = cpu;
985 
986 #ifdef CONFIG_CPU_FREQ_TABLE
987 	cpufreq_frequency_table_update_policy_cpu(policy);
988 #endif
989 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
990 			CPUFREQ_UPDATE_POLICY_CPU, policy);
991 }
992 
993 /**
994  * __cpufreq_remove_dev - remove a CPU device
995  *
996  * Removes the cpufreq interface for a CPU device.
997  * Caller should already have policy_rwsem in write mode for this CPU.
998  * This routine frees the rwsem before returning.
999  */
1000 static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1001 {
1002 	unsigned int cpu = dev->id, ret, cpus;
1003 	unsigned long flags;
1004 	struct cpufreq_policy *data;
1005 	struct kobject *kobj;
1006 	struct completion *cmp;
1007 	struct device *cpu_dev;
1008 
1009 	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
1010 
1011 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1012 
1013 	data = per_cpu(cpufreq_cpu_data, cpu);
1014 	per_cpu(cpufreq_cpu_data, cpu) = NULL;
1015 
1016 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1017 
1018 	if (!data) {
1019 		pr_debug("%s: No cpu_data found\n", __func__);
1020 		return -EINVAL;
1021 	}
1022 
1023 	if (cpufreq_driver->target)
1024 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1025 
1026 #ifdef CONFIG_HOTPLUG_CPU
1027 	if (!cpufreq_driver->setpolicy)
1028 		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
1029 			data->governor->name, CPUFREQ_NAME_LEN);
1030 #endif
1031 
1032 	WARN_ON(lock_policy_rwsem_write(cpu));
1033 	cpus = cpumask_weight(data->cpus);
1034 	cpumask_clear_cpu(cpu, data->cpus);
1035 	unlock_policy_rwsem_write(cpu);
1036 
1037 	if (cpu != data->cpu) {
1038 		sysfs_remove_link(&dev->kobj, "cpufreq");
1039 	} else if (cpus > 1) {
1040 		/* first sibling now owns the new sysfs dir */
1041 		cpu_dev = get_cpu_device(cpumask_first(data->cpus));
1042 		sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
1043 		ret = kobject_move(&data->kobj, &cpu_dev->kobj);
1044 		if (ret) {
1045 			pr_err("%s: Failed to move kobj: %d", __func__, ret);
1046 
1047 			WARN_ON(lock_policy_rwsem_write(cpu));
1048 			cpumask_set_cpu(cpu, data->cpus);
1049 
1050 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
1051 			per_cpu(cpufreq_cpu_data, cpu) = data;
1052 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1053 
1054 			unlock_policy_rwsem_write(cpu);
1055 
1056 			ret = sysfs_create_link(&cpu_dev->kobj, &data->kobj,
1057 					"cpufreq");
1058 			return -EINVAL;
1059 		}
1060 
1061 		WARN_ON(lock_policy_rwsem_write(cpu));
1062 		update_policy_cpu(data, cpu_dev->id);
1063 		unlock_policy_rwsem_write(cpu);
1064 		pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
1065 				__func__, cpu_dev->id, cpu);
1066 	}
1067 
1068 	pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
1069 	cpufreq_cpu_put(data);
1070 
1071 	/* If cpu is last user of policy, free policy */
1072 	if (cpus == 1) {
1073 		lock_policy_rwsem_read(cpu);
1074 		kobj = &data->kobj;
1075 		cmp = &data->kobj_unregister;
1076 		unlock_policy_rwsem_read(cpu);
1077 		kobject_put(kobj);
1078 
1079 		/* we need to make sure that the underlying kobj is actually
1080 		 * not referenced anymore by anybody before we proceed with
1081 		 * unloading.
1082 		 */
1083 		pr_debug("waiting for dropping of refcount\n");
1084 		wait_for_completion(cmp);
1085 		pr_debug("wait complete\n");
1086 
1087 		if (cpufreq_driver->exit)
1088 			cpufreq_driver->exit(data);
1089 
1090 		free_cpumask_var(data->related_cpus);
1091 		free_cpumask_var(data->cpus);
1092 		kfree(data);
1093 	} else if (cpufreq_driver->target) {
1094 		__cpufreq_governor(data, CPUFREQ_GOV_START);
1095 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1096 	}
1097 
1098 	per_cpu(cpufreq_policy_cpu, cpu) = -1;
1099 	return 0;
1100 }
1101 
1102 
1103 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1104 {
1105 	unsigned int cpu = dev->id;
1106 	int retval;
1107 
1108 	if (cpu_is_offline(cpu))
1109 		return 0;
1110 
1111 	retval = __cpufreq_remove_dev(dev, sif);
1112 	return retval;
1113 }
1114 
1115 
1116 static void handle_update(struct work_struct *work)
1117 {
1118 	struct cpufreq_policy *policy =
1119 		container_of(work, struct cpufreq_policy, update);
1120 	unsigned int cpu = policy->cpu;
1121 	pr_debug("handle_update for cpu %u called\n", cpu);
1122 	cpufreq_update_policy(cpu);
1123 }
1124 
1125 /**
1126  *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1127  *	@cpu: cpu number
1128  *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1129  *	@new_freq: CPU frequency the CPU actually runs at
1130  *
1131  *	We adjust to current frequency first, and need to clean up later.
1132  *	So either call to cpufreq_update_policy() or schedule handle_update()).
1133  */
1134 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1135 				unsigned int new_freq)
1136 {
1137 	struct cpufreq_freqs freqs;
1138 
1139 	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1140 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1141 
1142 	freqs.cpu = cpu;
1143 	freqs.old = old_freq;
1144 	freqs.new = new_freq;
1145 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1146 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1147 }
1148 
1149 
1150 /**
1151  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1152  * @cpu: CPU number
1153  *
1154  * This is the last known freq, without actually getting it from the driver.
1155  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1156  */
1157 unsigned int cpufreq_quick_get(unsigned int cpu)
1158 {
1159 	struct cpufreq_policy *policy;
1160 	unsigned int ret_freq = 0;
1161 
1162 	if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
1163 		return cpufreq_driver->get(cpu);
1164 
1165 	policy = cpufreq_cpu_get(cpu);
1166 	if (policy) {
1167 		ret_freq = policy->cur;
1168 		cpufreq_cpu_put(policy);
1169 	}
1170 
1171 	return ret_freq;
1172 }
1173 EXPORT_SYMBOL(cpufreq_quick_get);
1174 
1175 /**
1176  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1177  * @cpu: CPU number
1178  *
1179  * Just return the max possible frequency for a given CPU.
1180  */
1181 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1182 {
1183 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1184 	unsigned int ret_freq = 0;
1185 
1186 	if (policy) {
1187 		ret_freq = policy->max;
1188 		cpufreq_cpu_put(policy);
1189 	}
1190 
1191 	return ret_freq;
1192 }
1193 EXPORT_SYMBOL(cpufreq_quick_get_max);
1194 
1195 
1196 static unsigned int __cpufreq_get(unsigned int cpu)
1197 {
1198 	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1199 	unsigned int ret_freq = 0;
1200 
1201 	if (!cpufreq_driver->get)
1202 		return ret_freq;
1203 
1204 	ret_freq = cpufreq_driver->get(cpu);
1205 
1206 	if (ret_freq && policy->cur &&
1207 		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1208 		/* verify no discrepancy between actual and
1209 					saved value exists */
1210 		if (unlikely(ret_freq != policy->cur)) {
1211 			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1212 			schedule_work(&policy->update);
1213 		}
1214 	}
1215 
1216 	return ret_freq;
1217 }
1218 
1219 /**
1220  * cpufreq_get - get the current CPU frequency (in kHz)
1221  * @cpu: CPU number
1222  *
1223  * Get the CPU current (static) CPU frequency
1224  */
1225 unsigned int cpufreq_get(unsigned int cpu)
1226 {
1227 	unsigned int ret_freq = 0;
1228 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1229 
1230 	if (!policy)
1231 		goto out;
1232 
1233 	if (unlikely(lock_policy_rwsem_read(cpu)))
1234 		goto out_policy;
1235 
1236 	ret_freq = __cpufreq_get(cpu);
1237 
1238 	unlock_policy_rwsem_read(cpu);
1239 
1240 out_policy:
1241 	cpufreq_cpu_put(policy);
1242 out:
1243 	return ret_freq;
1244 }
1245 EXPORT_SYMBOL(cpufreq_get);
1246 
1247 static struct subsys_interface cpufreq_interface = {
1248 	.name		= "cpufreq",
1249 	.subsys		= &cpu_subsys,
1250 	.add_dev	= cpufreq_add_dev,
1251 	.remove_dev	= cpufreq_remove_dev,
1252 };
1253 
1254 
1255 /**
1256  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1257  *
1258  * This function is only executed for the boot processor.  The other CPUs
1259  * have been put offline by means of CPU hotplug.
1260  */
1261 static int cpufreq_bp_suspend(void)
1262 {
1263 	int ret = 0;
1264 
1265 	int cpu = smp_processor_id();
1266 	struct cpufreq_policy *cpu_policy;
1267 
1268 	pr_debug("suspending cpu %u\n", cpu);
1269 
1270 	/* If there's no policy for the boot CPU, we have nothing to do. */
1271 	cpu_policy = cpufreq_cpu_get(cpu);
1272 	if (!cpu_policy)
1273 		return 0;
1274 
1275 	if (cpufreq_driver->suspend) {
1276 		ret = cpufreq_driver->suspend(cpu_policy);
1277 		if (ret)
1278 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1279 					"step on CPU %u\n", cpu_policy->cpu);
1280 	}
1281 
1282 	cpufreq_cpu_put(cpu_policy);
1283 	return ret;
1284 }
1285 
1286 /**
1287  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1288  *
1289  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1290  *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1291  *	    restored. It will verify that the current freq is in sync with
1292  *	    what we believe it to be. This is a bit later than when it
1293  *	    should be, but nonethteless it's better than calling
1294  *	    cpufreq_driver->get() here which might re-enable interrupts...
1295  *
1296  * This function is only executed for the boot CPU.  The other CPUs have not
1297  * been turned on yet.
1298  */
1299 static void cpufreq_bp_resume(void)
1300 {
1301 	int ret = 0;
1302 
1303 	int cpu = smp_processor_id();
1304 	struct cpufreq_policy *cpu_policy;
1305 
1306 	pr_debug("resuming cpu %u\n", cpu);
1307 
1308 	/* If there's no policy for the boot CPU, we have nothing to do. */
1309 	cpu_policy = cpufreq_cpu_get(cpu);
1310 	if (!cpu_policy)
1311 		return;
1312 
1313 	if (cpufreq_driver->resume) {
1314 		ret = cpufreq_driver->resume(cpu_policy);
1315 		if (ret) {
1316 			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1317 					"step on CPU %u\n", cpu_policy->cpu);
1318 			goto fail;
1319 		}
1320 	}
1321 
1322 	schedule_work(&cpu_policy->update);
1323 
1324 fail:
1325 	cpufreq_cpu_put(cpu_policy);
1326 }
1327 
1328 static struct syscore_ops cpufreq_syscore_ops = {
1329 	.suspend	= cpufreq_bp_suspend,
1330 	.resume		= cpufreq_bp_resume,
1331 };
1332 
1333 /**
1334  *	cpufreq_get_current_driver - return current driver's name
1335  *
1336  *	Return the name string of the currently loaded cpufreq driver
1337  *	or NULL, if none.
1338  */
1339 const char *cpufreq_get_current_driver(void)
1340 {
1341 	if (cpufreq_driver)
1342 		return cpufreq_driver->name;
1343 
1344 	return NULL;
1345 }
1346 EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);
1347 
1348 /*********************************************************************
1349  *                     NOTIFIER LISTS INTERFACE                      *
1350  *********************************************************************/
1351 
1352 /**
1353  *	cpufreq_register_notifier - register a driver with cpufreq
1354  *	@nb: notifier function to register
1355  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1356  *
1357  *	Add a driver to one of two lists: either a list of drivers that
1358  *      are notified about clock rate changes (once before and once after
1359  *      the transition), or a list of drivers that are notified about
1360  *      changes in cpufreq policy.
1361  *
1362  *	This function may sleep, and has the same return conditions as
1363  *	blocking_notifier_chain_register.
1364  */
1365 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1366 {
1367 	int ret;
1368 
1369 	if (cpufreq_disabled())
1370 		return -EINVAL;
1371 
1372 	WARN_ON(!init_cpufreq_transition_notifier_list_called);
1373 
1374 	switch (list) {
1375 	case CPUFREQ_TRANSITION_NOTIFIER:
1376 		ret = srcu_notifier_chain_register(
1377 				&cpufreq_transition_notifier_list, nb);
1378 		break;
1379 	case CPUFREQ_POLICY_NOTIFIER:
1380 		ret = blocking_notifier_chain_register(
1381 				&cpufreq_policy_notifier_list, nb);
1382 		break;
1383 	default:
1384 		ret = -EINVAL;
1385 	}
1386 
1387 	return ret;
1388 }
1389 EXPORT_SYMBOL(cpufreq_register_notifier);
1390 
1391 
1392 /**
1393  *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1394  *	@nb: notifier block to be unregistered
1395  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1396  *
1397  *	Remove a driver from the CPU frequency notifier list.
1398  *
1399  *	This function may sleep, and has the same return conditions as
1400  *	blocking_notifier_chain_unregister.
1401  */
1402 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1403 {
1404 	int ret;
1405 
1406 	if (cpufreq_disabled())
1407 		return -EINVAL;
1408 
1409 	switch (list) {
1410 	case CPUFREQ_TRANSITION_NOTIFIER:
1411 		ret = srcu_notifier_chain_unregister(
1412 				&cpufreq_transition_notifier_list, nb);
1413 		break;
1414 	case CPUFREQ_POLICY_NOTIFIER:
1415 		ret = blocking_notifier_chain_unregister(
1416 				&cpufreq_policy_notifier_list, nb);
1417 		break;
1418 	default:
1419 		ret = -EINVAL;
1420 	}
1421 
1422 	return ret;
1423 }
1424 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1425 
1426 
1427 /*********************************************************************
1428  *                              GOVERNORS                            *
1429  *********************************************************************/
1430 
1431 
1432 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1433 			    unsigned int target_freq,
1434 			    unsigned int relation)
1435 {
1436 	int retval = -EINVAL;
1437 	unsigned int old_target_freq = target_freq;
1438 
1439 	if (cpufreq_disabled())
1440 		return -ENODEV;
1441 
1442 	/* Make sure that target_freq is within supported range */
1443 	if (target_freq > policy->max)
1444 		target_freq = policy->max;
1445 	if (target_freq < policy->min)
1446 		target_freq = policy->min;
1447 
1448 	pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
1449 			policy->cpu, target_freq, relation, old_target_freq);
1450 
1451 	if (target_freq == policy->cur)
1452 		return 0;
1453 
1454 	if (cpufreq_driver->target)
1455 		retval = cpufreq_driver->target(policy, target_freq, relation);
1456 
1457 	return retval;
1458 }
1459 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1460 
1461 int cpufreq_driver_target(struct cpufreq_policy *policy,
1462 			  unsigned int target_freq,
1463 			  unsigned int relation)
1464 {
1465 	int ret = -EINVAL;
1466 
1467 	policy = cpufreq_cpu_get(policy->cpu);
1468 	if (!policy)
1469 		goto no_policy;
1470 
1471 	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1472 		goto fail;
1473 
1474 	ret = __cpufreq_driver_target(policy, target_freq, relation);
1475 
1476 	unlock_policy_rwsem_write(policy->cpu);
1477 
1478 fail:
1479 	cpufreq_cpu_put(policy);
1480 no_policy:
1481 	return ret;
1482 }
1483 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1484 
1485 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1486 {
1487 	int ret = 0;
1488 
1489 	if (cpufreq_disabled())
1490 		return ret;
1491 
1492 	if (!cpufreq_driver->getavg)
1493 		return 0;
1494 
1495 	policy = cpufreq_cpu_get(policy->cpu);
1496 	if (!policy)
1497 		return -EINVAL;
1498 
1499 	ret = cpufreq_driver->getavg(policy, cpu);
1500 
1501 	cpufreq_cpu_put(policy);
1502 	return ret;
1503 }
1504 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1505 
1506 /*
1507  * when "event" is CPUFREQ_GOV_LIMITS
1508  */
1509 
1510 static int __cpufreq_governor(struct cpufreq_policy *policy,
1511 					unsigned int event)
1512 {
1513 	int ret;
1514 
1515 	/* Only must be defined when default governor is known to have latency
1516 	   restrictions, like e.g. conservative or ondemand.
1517 	   That this is the case is already ensured in Kconfig
1518 	*/
1519 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1520 	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1521 #else
1522 	struct cpufreq_governor *gov = NULL;
1523 #endif
1524 
1525 	if (policy->governor->max_transition_latency &&
1526 	    policy->cpuinfo.transition_latency >
1527 	    policy->governor->max_transition_latency) {
1528 		if (!gov)
1529 			return -EINVAL;
1530 		else {
1531 			printk(KERN_WARNING "%s governor failed, too long"
1532 			       " transition latency of HW, fallback"
1533 			       " to %s governor\n",
1534 			       policy->governor->name,
1535 			       gov->name);
1536 			policy->governor = gov;
1537 		}
1538 	}
1539 
1540 	if (!try_module_get(policy->governor->owner))
1541 		return -EINVAL;
1542 
1543 	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1544 						policy->cpu, event);
1545 	ret = policy->governor->governor(policy, event);
1546 
1547 	if (event == CPUFREQ_GOV_START)
1548 		policy->governor->initialized++;
1549 	else if (event == CPUFREQ_GOV_STOP)
1550 		policy->governor->initialized--;
1551 
1552 	/* we keep one module reference alive for
1553 			each CPU governed by this CPU */
1554 	if ((event != CPUFREQ_GOV_START) || ret)
1555 		module_put(policy->governor->owner);
1556 	if ((event == CPUFREQ_GOV_STOP) && !ret)
1557 		module_put(policy->governor->owner);
1558 
1559 	return ret;
1560 }
1561 
1562 
1563 int cpufreq_register_governor(struct cpufreq_governor *governor)
1564 {
1565 	int err;
1566 
1567 	if (!governor)
1568 		return -EINVAL;
1569 
1570 	if (cpufreq_disabled())
1571 		return -ENODEV;
1572 
1573 	mutex_lock(&cpufreq_governor_mutex);
1574 
1575 	governor->initialized = 0;
1576 	err = -EBUSY;
1577 	if (__find_governor(governor->name) == NULL) {
1578 		err = 0;
1579 		list_add(&governor->governor_list, &cpufreq_governor_list);
1580 	}
1581 
1582 	mutex_unlock(&cpufreq_governor_mutex);
1583 	return err;
1584 }
1585 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1586 
1587 
1588 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1589 {
1590 #ifdef CONFIG_HOTPLUG_CPU
1591 	int cpu;
1592 #endif
1593 
1594 	if (!governor)
1595 		return;
1596 
1597 	if (cpufreq_disabled())
1598 		return;
1599 
1600 #ifdef CONFIG_HOTPLUG_CPU
1601 	for_each_present_cpu(cpu) {
1602 		if (cpu_online(cpu))
1603 			continue;
1604 		if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1605 			strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1606 	}
1607 #endif
1608 
1609 	mutex_lock(&cpufreq_governor_mutex);
1610 	list_del(&governor->governor_list);
1611 	mutex_unlock(&cpufreq_governor_mutex);
1612 	return;
1613 }
1614 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1615 
1616 
1617 
1618 /*********************************************************************
1619  *                          POLICY INTERFACE                         *
1620  *********************************************************************/
1621 
1622 /**
1623  * cpufreq_get_policy - get the current cpufreq_policy
1624  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1625  *	is written
1626  *
1627  * Reads the current cpufreq policy.
1628  */
1629 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1630 {
1631 	struct cpufreq_policy *cpu_policy;
1632 	if (!policy)
1633 		return -EINVAL;
1634 
1635 	cpu_policy = cpufreq_cpu_get(cpu);
1636 	if (!cpu_policy)
1637 		return -EINVAL;
1638 
1639 	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1640 
1641 	cpufreq_cpu_put(cpu_policy);
1642 	return 0;
1643 }
1644 EXPORT_SYMBOL(cpufreq_get_policy);
1645 
1646 
1647 /*
1648  * data   : current policy.
1649  * policy : policy to be set.
1650  */
1651 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1652 				struct cpufreq_policy *policy)
1653 {
1654 	int ret = 0;
1655 
1656 	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1657 		policy->min, policy->max);
1658 
1659 	memcpy(&policy->cpuinfo, &data->cpuinfo,
1660 				sizeof(struct cpufreq_cpuinfo));
1661 
1662 	if (policy->min > data->max || policy->max < data->min) {
1663 		ret = -EINVAL;
1664 		goto error_out;
1665 	}
1666 
1667 	/* verify the cpu speed can be set within this limit */
1668 	ret = cpufreq_driver->verify(policy);
1669 	if (ret)
1670 		goto error_out;
1671 
1672 	/* adjust if necessary - all reasons */
1673 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1674 			CPUFREQ_ADJUST, policy);
1675 
1676 	/* adjust if necessary - hardware incompatibility*/
1677 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1678 			CPUFREQ_INCOMPATIBLE, policy);
1679 
1680 	/* verify the cpu speed can be set within this limit,
1681 	   which might be different to the first one */
1682 	ret = cpufreq_driver->verify(policy);
1683 	if (ret)
1684 		goto error_out;
1685 
1686 	/* notification of the new policy */
1687 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1688 			CPUFREQ_NOTIFY, policy);
1689 
1690 	data->min = policy->min;
1691 	data->max = policy->max;
1692 
1693 	pr_debug("new min and max freqs are %u - %u kHz\n",
1694 					data->min, data->max);
1695 
1696 	if (cpufreq_driver->setpolicy) {
1697 		data->policy = policy->policy;
1698 		pr_debug("setting range\n");
1699 		ret = cpufreq_driver->setpolicy(policy);
1700 	} else {
1701 		if (policy->governor != data->governor) {
1702 			/* save old, working values */
1703 			struct cpufreq_governor *old_gov = data->governor;
1704 
1705 			pr_debug("governor switch\n");
1706 
1707 			/* end old governor */
1708 			if (data->governor)
1709 				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1710 
1711 			/* start new governor */
1712 			data->governor = policy->governor;
1713 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1714 				/* new governor failed, so re-start old one */
1715 				pr_debug("starting governor %s failed\n",
1716 							data->governor->name);
1717 				if (old_gov) {
1718 					data->governor = old_gov;
1719 					__cpufreq_governor(data,
1720 							   CPUFREQ_GOV_START);
1721 				}
1722 				ret = -EINVAL;
1723 				goto error_out;
1724 			}
1725 			/* might be a policy change, too, so fall through */
1726 		}
1727 		pr_debug("governor: change or update limits\n");
1728 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1729 	}
1730 
1731 error_out:
1732 	return ret;
1733 }
1734 
1735 /**
1736  *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1737  *	@cpu: CPU which shall be re-evaluated
1738  *
1739  *	Useful for policy notifiers which have different necessities
1740  *	at different times.
1741  */
1742 int cpufreq_update_policy(unsigned int cpu)
1743 {
1744 	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1745 	struct cpufreq_policy policy;
1746 	int ret;
1747 
1748 	if (!data) {
1749 		ret = -ENODEV;
1750 		goto no_policy;
1751 	}
1752 
1753 	if (unlikely(lock_policy_rwsem_write(cpu))) {
1754 		ret = -EINVAL;
1755 		goto fail;
1756 	}
1757 
1758 	pr_debug("updating policy for CPU %u\n", cpu);
1759 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1760 	policy.min = data->user_policy.min;
1761 	policy.max = data->user_policy.max;
1762 	policy.policy = data->user_policy.policy;
1763 	policy.governor = data->user_policy.governor;
1764 
1765 	/* BIOS might change freq behind our back
1766 	  -> ask driver for current freq and notify governors about a change */
1767 	if (cpufreq_driver->get) {
1768 		policy.cur = cpufreq_driver->get(cpu);
1769 		if (!data->cur) {
1770 			pr_debug("Driver did not initialize current freq");
1771 			data->cur = policy.cur;
1772 		} else {
1773 			if (data->cur != policy.cur && cpufreq_driver->target)
1774 				cpufreq_out_of_sync(cpu, data->cur,
1775 								policy.cur);
1776 		}
1777 	}
1778 
1779 	ret = __cpufreq_set_policy(data, &policy);
1780 
1781 	unlock_policy_rwsem_write(cpu);
1782 
1783 fail:
1784 	cpufreq_cpu_put(data);
1785 no_policy:
1786 	return ret;
1787 }
1788 EXPORT_SYMBOL(cpufreq_update_policy);
1789 
1790 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1791 					unsigned long action, void *hcpu)
1792 {
1793 	unsigned int cpu = (unsigned long)hcpu;
1794 	struct device *dev;
1795 
1796 	dev = get_cpu_device(cpu);
1797 	if (dev) {
1798 		switch (action) {
1799 		case CPU_ONLINE:
1800 		case CPU_ONLINE_FROZEN:
1801 			cpufreq_add_dev(dev, NULL);
1802 			break;
1803 		case CPU_DOWN_PREPARE:
1804 		case CPU_DOWN_PREPARE_FROZEN:
1805 			__cpufreq_remove_dev(dev, NULL);
1806 			break;
1807 		case CPU_DOWN_FAILED:
1808 		case CPU_DOWN_FAILED_FROZEN:
1809 			cpufreq_add_dev(dev, NULL);
1810 			break;
1811 		}
1812 	}
1813 	return NOTIFY_OK;
1814 }
1815 
1816 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1817     .notifier_call = cpufreq_cpu_callback,
1818 };
1819 
1820 /*********************************************************************
1821  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1822  *********************************************************************/
1823 
1824 /**
1825  * cpufreq_register_driver - register a CPU Frequency driver
1826  * @driver_data: A struct cpufreq_driver containing the values#
1827  * submitted by the CPU Frequency driver.
1828  *
1829  *   Registers a CPU Frequency driver to this core code. This code
1830  * returns zero on success, -EBUSY when another driver got here first
1831  * (and isn't unregistered in the meantime).
1832  *
1833  */
1834 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1835 {
1836 	unsigned long flags;
1837 	int ret;
1838 
1839 	if (cpufreq_disabled())
1840 		return -ENODEV;
1841 
1842 	if (!driver_data || !driver_data->verify || !driver_data->init ||
1843 	    ((!driver_data->setpolicy) && (!driver_data->target)))
1844 		return -EINVAL;
1845 
1846 	pr_debug("trying to register driver %s\n", driver_data->name);
1847 
1848 	if (driver_data->setpolicy)
1849 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1850 
1851 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1852 	if (cpufreq_driver) {
1853 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1854 		return -EBUSY;
1855 	}
1856 	cpufreq_driver = driver_data;
1857 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1858 
1859 	ret = subsys_interface_register(&cpufreq_interface);
1860 	if (ret)
1861 		goto err_null_driver;
1862 
1863 	if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1864 		int i;
1865 		ret = -ENODEV;
1866 
1867 		/* check for at least one working CPU */
1868 		for (i = 0; i < nr_cpu_ids; i++)
1869 			if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1870 				ret = 0;
1871 				break;
1872 			}
1873 
1874 		/* if all ->init() calls failed, unregister */
1875 		if (ret) {
1876 			pr_debug("no CPU initialized for driver %s\n",
1877 							driver_data->name);
1878 			goto err_if_unreg;
1879 		}
1880 	}
1881 
1882 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
1883 	pr_debug("driver %s up and running\n", driver_data->name);
1884 
1885 	return 0;
1886 err_if_unreg:
1887 	subsys_interface_unregister(&cpufreq_interface);
1888 err_null_driver:
1889 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1890 	cpufreq_driver = NULL;
1891 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1892 	return ret;
1893 }
1894 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1895 
1896 
1897 /**
1898  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1899  *
1900  *    Unregister the current CPUFreq driver. Only call this if you have
1901  * the right to do so, i.e. if you have succeeded in initialising before!
1902  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1903  * currently not initialised.
1904  */
1905 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1906 {
1907 	unsigned long flags;
1908 
1909 	if (!cpufreq_driver || (driver != cpufreq_driver))
1910 		return -EINVAL;
1911 
1912 	pr_debug("unregistering driver %s\n", driver->name);
1913 
1914 	subsys_interface_unregister(&cpufreq_interface);
1915 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1916 
1917 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1918 	cpufreq_driver = NULL;
1919 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1920 
1921 	return 0;
1922 }
1923 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1924 
1925 static int __init cpufreq_core_init(void)
1926 {
1927 	int cpu;
1928 
1929 	if (cpufreq_disabled())
1930 		return -ENODEV;
1931 
1932 	for_each_possible_cpu(cpu) {
1933 		per_cpu(cpufreq_policy_cpu, cpu) = -1;
1934 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1935 	}
1936 
1937 	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
1938 	BUG_ON(!cpufreq_global_kobject);
1939 	register_syscore_ops(&cpufreq_syscore_ops);
1940 
1941 	return 0;
1942 }
1943 core_initcall(cpufreq_core_init);
1944