xref: /openbmc/linux/drivers/cpufreq/cpufreq.c (revision 96de0e252cedffad61b3cb5e05662c591898e69a)
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *	Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *	Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17 
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 
32 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
33 						"cpufreq-core", msg)
34 
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static struct cpufreq_governor *cpufreq_cpu_governor[NR_CPUS];
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47 
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  */
65 static DEFINE_PER_CPU(int, policy_cpu);
66 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
67 
68 #define lock_policy_rwsem(mode, cpu)					\
69 int lock_policy_rwsem_##mode						\
70 (int cpu)								\
71 {									\
72 	int policy_cpu = per_cpu(policy_cpu, cpu);			\
73 	BUG_ON(policy_cpu == -1);					\
74 	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
75 	if (unlikely(!cpu_online(cpu))) {				\
76 		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
77 		return -1;						\
78 	}								\
79 									\
80 	return 0;							\
81 }
82 
83 lock_policy_rwsem(read, cpu);
84 EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
85 
86 lock_policy_rwsem(write, cpu);
87 EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
88 
89 void unlock_policy_rwsem_read(int cpu)
90 {
91 	int policy_cpu = per_cpu(policy_cpu, cpu);
92 	BUG_ON(policy_cpu == -1);
93 	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
96 
97 void unlock_policy_rwsem_write(int cpu)
98 {
99 	int policy_cpu = per_cpu(policy_cpu, cpu);
100 	BUG_ON(policy_cpu == -1);
101 	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
102 }
103 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
104 
105 
106 /* internal prototypes */
107 static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
108 static unsigned int __cpufreq_get(unsigned int cpu);
109 static void handle_update(struct work_struct *work);
110 
111 /**
112  * Two notifier lists: the "policy" list is involved in the
113  * validation process for a new CPU frequency policy; the
114  * "transition" list for kernel code that needs to handle
115  * changes to devices when the CPU clock speed changes.
116  * The mutex locks both lists.
117  */
118 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
119 static struct srcu_notifier_head cpufreq_transition_notifier_list;
120 
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124 	return 0;
125 }
126 pure_initcall(init_cpufreq_transition_notifier_list);
127 
128 static LIST_HEAD(cpufreq_governor_list);
129 static DEFINE_MUTEX (cpufreq_governor_mutex);
130 
131 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
132 {
133 	struct cpufreq_policy *data;
134 	unsigned long flags;
135 
136 	if (cpu >= NR_CPUS)
137 		goto err_out;
138 
139 	/* get the cpufreq driver */
140 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
141 
142 	if (!cpufreq_driver)
143 		goto err_out_unlock;
144 
145 	if (!try_module_get(cpufreq_driver->owner))
146 		goto err_out_unlock;
147 
148 
149 	/* get the CPU */
150 	data = cpufreq_cpu_data[cpu];
151 
152 	if (!data)
153 		goto err_out_put_module;
154 
155 	if (!kobject_get(&data->kobj))
156 		goto err_out_put_module;
157 
158 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
159 	return data;
160 
161 err_out_put_module:
162 	module_put(cpufreq_driver->owner);
163 err_out_unlock:
164 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
165 err_out:
166 	return NULL;
167 }
168 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
169 
170 
171 void cpufreq_cpu_put(struct cpufreq_policy *data)
172 {
173 	kobject_put(&data->kobj);
174 	module_put(cpufreq_driver->owner);
175 }
176 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
177 
178 
179 /*********************************************************************
180  *                     UNIFIED DEBUG HELPERS                         *
181  *********************************************************************/
182 #ifdef CONFIG_CPU_FREQ_DEBUG
183 
184 /* what part(s) of the CPUfreq subsystem are debugged? */
185 static unsigned int debug;
186 
187 /* is the debug output ratelimit'ed using printk_ratelimit? User can
188  * set or modify this value.
189  */
190 static unsigned int debug_ratelimit = 1;
191 
192 /* is the printk_ratelimit'ing enabled? It's enabled after a successful
193  * loading of a cpufreq driver, temporarily disabled when a new policy
194  * is set, and disabled upon cpufreq driver removal
195  */
196 static unsigned int disable_ratelimit = 1;
197 static DEFINE_SPINLOCK(disable_ratelimit_lock);
198 
199 static void cpufreq_debug_enable_ratelimit(void)
200 {
201 	unsigned long flags;
202 
203 	spin_lock_irqsave(&disable_ratelimit_lock, flags);
204 	if (disable_ratelimit)
205 		disable_ratelimit--;
206 	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
207 }
208 
209 static void cpufreq_debug_disable_ratelimit(void)
210 {
211 	unsigned long flags;
212 
213 	spin_lock_irqsave(&disable_ratelimit_lock, flags);
214 	disable_ratelimit++;
215 	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
216 }
217 
218 void cpufreq_debug_printk(unsigned int type, const char *prefix,
219 							const char *fmt, ...)
220 {
221 	char s[256];
222 	va_list args;
223 	unsigned int len;
224 	unsigned long flags;
225 
226 	WARN_ON(!prefix);
227 	if (type & debug) {
228 		spin_lock_irqsave(&disable_ratelimit_lock, flags);
229 		if (!disable_ratelimit && debug_ratelimit
230 					&& !printk_ratelimit()) {
231 			spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
232 			return;
233 		}
234 		spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
235 
236 		len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
237 
238 		va_start(args, fmt);
239 		len += vsnprintf(&s[len], (256 - len), fmt, args);
240 		va_end(args);
241 
242 		printk(s);
243 
244 		WARN_ON(len < 5);
245 	}
246 }
247 EXPORT_SYMBOL(cpufreq_debug_printk);
248 
249 
250 module_param(debug, uint, 0644);
251 MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
252 			" 2 to debug drivers, and 4 to debug governors.");
253 
254 module_param(debug_ratelimit, uint, 0644);
255 MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
256 					" set to 0 to disable ratelimiting.");
257 
258 #else /* !CONFIG_CPU_FREQ_DEBUG */
259 
260 static inline void cpufreq_debug_enable_ratelimit(void) { return; }
261 static inline void cpufreq_debug_disable_ratelimit(void) { return; }
262 
263 #endif /* CONFIG_CPU_FREQ_DEBUG */
264 
265 
266 /*********************************************************************
267  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
268  *********************************************************************/
269 
270 /**
271  * adjust_jiffies - adjust the system "loops_per_jiffy"
272  *
273  * This function alters the system "loops_per_jiffy" for the clock
274  * speed change. Note that loops_per_jiffy cannot be updated on SMP
275  * systems as each CPU might be scaled differently. So, use the arch
276  * per-CPU loops_per_jiffy value wherever possible.
277  */
278 #ifndef CONFIG_SMP
279 static unsigned long l_p_j_ref;
280 static unsigned int  l_p_j_ref_freq;
281 
282 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
283 {
284 	if (ci->flags & CPUFREQ_CONST_LOOPS)
285 		return;
286 
287 	if (!l_p_j_ref_freq) {
288 		l_p_j_ref = loops_per_jiffy;
289 		l_p_j_ref_freq = ci->old;
290 		dprintk("saving %lu as reference value for loops_per_jiffy;"
291 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
292 	}
293 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
294 	    (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
295 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
296 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
297 								ci->new);
298 		dprintk("scaling loops_per_jiffy to %lu"
299 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
300 	}
301 }
302 #else
303 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
304 {
305 	return;
306 }
307 #endif
308 
309 
310 /**
311  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
312  * on frequency transition.
313  *
314  * This function calls the transition notifiers and the "adjust_jiffies"
315  * function. It is called twice on all CPU frequency changes that have
316  * external effects.
317  */
318 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
319 {
320 	struct cpufreq_policy *policy;
321 
322 	BUG_ON(irqs_disabled());
323 
324 	freqs->flags = cpufreq_driver->flags;
325 	dprintk("notification %u of frequency transition to %u kHz\n",
326 		state, freqs->new);
327 
328 	policy = cpufreq_cpu_data[freqs->cpu];
329 	switch (state) {
330 
331 	case CPUFREQ_PRECHANGE:
332 		/* detect if the driver reported a value as "old frequency"
333 		 * which is not equal to what the cpufreq core thinks is
334 		 * "old frequency".
335 		 */
336 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
337 			if ((policy) && (policy->cpu == freqs->cpu) &&
338 			    (policy->cur) && (policy->cur != freqs->old)) {
339 				dprintk("Warning: CPU frequency is"
340 					" %u, cpufreq assumed %u kHz.\n",
341 					freqs->old, policy->cur);
342 				freqs->old = policy->cur;
343 			}
344 		}
345 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
346 				CPUFREQ_PRECHANGE, freqs);
347 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
348 		break;
349 
350 	case CPUFREQ_POSTCHANGE:
351 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
352 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
353 				CPUFREQ_POSTCHANGE, freqs);
354 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
355 			policy->cur = freqs->new;
356 		break;
357 	}
358 }
359 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
360 
361 
362 
363 /*********************************************************************
364  *                          SYSFS INTERFACE                          *
365  *********************************************************************/
366 
367 static struct cpufreq_governor *__find_governor(const char *str_governor)
368 {
369 	struct cpufreq_governor *t;
370 
371 	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
372 		if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN))
373 			return t;
374 
375 	return NULL;
376 }
377 
378 /**
379  * cpufreq_parse_governor - parse a governor string
380  */
381 static int cpufreq_parse_governor (char *str_governor, unsigned int *policy,
382 				struct cpufreq_governor **governor)
383 {
384 	int err = -EINVAL;
385 
386 	if (!cpufreq_driver)
387 		goto out;
388 
389 	if (cpufreq_driver->setpolicy) {
390 		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
391 			*policy = CPUFREQ_POLICY_PERFORMANCE;
392 			err = 0;
393 		} else if (!strnicmp(str_governor, "powersave",
394 						CPUFREQ_NAME_LEN)) {
395 			*policy = CPUFREQ_POLICY_POWERSAVE;
396 			err = 0;
397 		}
398 	} else if (cpufreq_driver->target) {
399 		struct cpufreq_governor *t;
400 
401 		mutex_lock(&cpufreq_governor_mutex);
402 
403 		t = __find_governor(str_governor);
404 
405 		if (t == NULL) {
406 			char *name = kasprintf(GFP_KERNEL, "cpufreq_%s",
407 								str_governor);
408 
409 			if (name) {
410 				int ret;
411 
412 				mutex_unlock(&cpufreq_governor_mutex);
413 				ret = request_module(name);
414 				mutex_lock(&cpufreq_governor_mutex);
415 
416 				if (ret == 0)
417 					t = __find_governor(str_governor);
418 			}
419 
420 			kfree(name);
421 		}
422 
423 		if (t != NULL) {
424 			*governor = t;
425 			err = 0;
426 		}
427 
428 		mutex_unlock(&cpufreq_governor_mutex);
429 	}
430   out:
431 	return err;
432 }
433 
434 
435 /* drivers/base/cpu.c */
436 extern struct sysdev_class cpu_sysdev_class;
437 
438 
439 /**
440  * cpufreq_per_cpu_attr_read() / show_##file_name() -
441  * print out cpufreq information
442  *
443  * Write out information from cpufreq_driver->policy[cpu]; object must be
444  * "unsigned int".
445  */
446 
447 #define show_one(file_name, object)			\
448 static ssize_t show_##file_name				\
449 (struct cpufreq_policy * policy, char *buf)		\
450 {							\
451 	return sprintf (buf, "%u\n", policy->object);	\
452 }
453 
454 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
455 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
456 show_one(scaling_min_freq, min);
457 show_one(scaling_max_freq, max);
458 show_one(scaling_cur_freq, cur);
459 
460 static int __cpufreq_set_policy(struct cpufreq_policy *data,
461 				struct cpufreq_policy *policy);
462 
463 /**
464  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
465  */
466 #define store_one(file_name, object)			\
467 static ssize_t store_##file_name					\
468 (struct cpufreq_policy * policy, const char *buf, size_t count)		\
469 {									\
470 	unsigned int ret = -EINVAL;					\
471 	struct cpufreq_policy new_policy;				\
472 									\
473 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
474 	if (ret)							\
475 		return -EINVAL;						\
476 									\
477 	ret = sscanf (buf, "%u", &new_policy.object);			\
478 	if (ret != 1)							\
479 		return -EINVAL;						\
480 									\
481 	ret = __cpufreq_set_policy(policy, &new_policy);		\
482 	policy->user_policy.object = policy->object;			\
483 									\
484 	return ret ? ret : count;					\
485 }
486 
487 store_one(scaling_min_freq,min);
488 store_one(scaling_max_freq,max);
489 
490 /**
491  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
492  */
493 static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy,
494 							char *buf)
495 {
496 	unsigned int cur_freq = __cpufreq_get(policy->cpu);
497 	if (!cur_freq)
498 		return sprintf(buf, "<unknown>");
499 	return sprintf(buf, "%u\n", cur_freq);
500 }
501 
502 
503 /**
504  * show_scaling_governor - show the current policy for the specified CPU
505  */
506 static ssize_t show_scaling_governor (struct cpufreq_policy * policy,
507 							char *buf)
508 {
509 	if(policy->policy == CPUFREQ_POLICY_POWERSAVE)
510 		return sprintf(buf, "powersave\n");
511 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
512 		return sprintf(buf, "performance\n");
513 	else if (policy->governor)
514 		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", policy->governor->name);
515 	return -EINVAL;
516 }
517 
518 
519 /**
520  * store_scaling_governor - store policy for the specified CPU
521  */
522 static ssize_t store_scaling_governor (struct cpufreq_policy * policy,
523 				       const char *buf, size_t count)
524 {
525 	unsigned int ret = -EINVAL;
526 	char	str_governor[16];
527 	struct cpufreq_policy new_policy;
528 
529 	ret = cpufreq_get_policy(&new_policy, policy->cpu);
530 	if (ret)
531 		return ret;
532 
533 	ret = sscanf (buf, "%15s", str_governor);
534 	if (ret != 1)
535 		return -EINVAL;
536 
537 	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
538 						&new_policy.governor))
539 		return -EINVAL;
540 
541 	/* Do not use cpufreq_set_policy here or the user_policy.max
542 	   will be wrongly overridden */
543 	ret = __cpufreq_set_policy(policy, &new_policy);
544 
545 	policy->user_policy.policy = policy->policy;
546 	policy->user_policy.governor = policy->governor;
547 
548 	if (ret)
549 		return ret;
550 	else
551 		return count;
552 }
553 
554 /**
555  * show_scaling_driver - show the cpufreq driver currently loaded
556  */
557 static ssize_t show_scaling_driver (struct cpufreq_policy * policy, char *buf)
558 {
559 	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
560 }
561 
562 /**
563  * show_scaling_available_governors - show the available CPUfreq governors
564  */
565 static ssize_t show_scaling_available_governors (struct cpufreq_policy *policy,
566 				char *buf)
567 {
568 	ssize_t i = 0;
569 	struct cpufreq_governor *t;
570 
571 	if (!cpufreq_driver->target) {
572 		i += sprintf(buf, "performance powersave");
573 		goto out;
574 	}
575 
576 	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
577 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2)))
578 			goto out;
579 		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
580 	}
581 out:
582 	i += sprintf(&buf[i], "\n");
583 	return i;
584 }
585 /**
586  * show_affected_cpus - show the CPUs affected by each transition
587  */
588 static ssize_t show_affected_cpus (struct cpufreq_policy * policy, char *buf)
589 {
590 	ssize_t i = 0;
591 	unsigned int cpu;
592 
593 	for_each_cpu_mask(cpu, policy->cpus) {
594 		if (i)
595 			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
596 		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
597 		if (i >= (PAGE_SIZE - 5))
598 		    break;
599 	}
600 	i += sprintf(&buf[i], "\n");
601 	return i;
602 }
603 
604 
605 #define define_one_ro(_name) \
606 static struct freq_attr _name = \
607 __ATTR(_name, 0444, show_##_name, NULL)
608 
609 #define define_one_ro0400(_name) \
610 static struct freq_attr _name = \
611 __ATTR(_name, 0400, show_##_name, NULL)
612 
613 #define define_one_rw(_name) \
614 static struct freq_attr _name = \
615 __ATTR(_name, 0644, show_##_name, store_##_name)
616 
617 define_one_ro0400(cpuinfo_cur_freq);
618 define_one_ro(cpuinfo_min_freq);
619 define_one_ro(cpuinfo_max_freq);
620 define_one_ro(scaling_available_governors);
621 define_one_ro(scaling_driver);
622 define_one_ro(scaling_cur_freq);
623 define_one_ro(affected_cpus);
624 define_one_rw(scaling_min_freq);
625 define_one_rw(scaling_max_freq);
626 define_one_rw(scaling_governor);
627 
628 static struct attribute * default_attrs[] = {
629 	&cpuinfo_min_freq.attr,
630 	&cpuinfo_max_freq.attr,
631 	&scaling_min_freq.attr,
632 	&scaling_max_freq.attr,
633 	&affected_cpus.attr,
634 	&scaling_governor.attr,
635 	&scaling_driver.attr,
636 	&scaling_available_governors.attr,
637 	NULL
638 };
639 
640 #define to_policy(k) container_of(k,struct cpufreq_policy,kobj)
641 #define to_attr(a) container_of(a,struct freq_attr,attr)
642 
643 static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf)
644 {
645 	struct cpufreq_policy * policy = to_policy(kobj);
646 	struct freq_attr * fattr = to_attr(attr);
647 	ssize_t ret;
648 	policy = cpufreq_cpu_get(policy->cpu);
649 	if (!policy)
650 		return -EINVAL;
651 
652 	if (lock_policy_rwsem_read(policy->cpu) < 0)
653 		return -EINVAL;
654 
655 	if (fattr->show)
656 		ret = fattr->show(policy, buf);
657 	else
658 		ret = -EIO;
659 
660 	unlock_policy_rwsem_read(policy->cpu);
661 
662 	cpufreq_cpu_put(policy);
663 	return ret;
664 }
665 
666 static ssize_t store(struct kobject * kobj, struct attribute * attr,
667 		     const char * buf, size_t count)
668 {
669 	struct cpufreq_policy * policy = to_policy(kobj);
670 	struct freq_attr * fattr = to_attr(attr);
671 	ssize_t ret;
672 	policy = cpufreq_cpu_get(policy->cpu);
673 	if (!policy)
674 		return -EINVAL;
675 
676 	if (lock_policy_rwsem_write(policy->cpu) < 0)
677 		return -EINVAL;
678 
679 	if (fattr->store)
680 		ret = fattr->store(policy, buf, count);
681 	else
682 		ret = -EIO;
683 
684 	unlock_policy_rwsem_write(policy->cpu);
685 
686 	cpufreq_cpu_put(policy);
687 	return ret;
688 }
689 
690 static void cpufreq_sysfs_release(struct kobject * kobj)
691 {
692 	struct cpufreq_policy * policy = to_policy(kobj);
693 	dprintk("last reference is dropped\n");
694 	complete(&policy->kobj_unregister);
695 }
696 
697 static struct sysfs_ops sysfs_ops = {
698 	.show	= show,
699 	.store	= store,
700 };
701 
702 static struct kobj_type ktype_cpufreq = {
703 	.sysfs_ops	= &sysfs_ops,
704 	.default_attrs	= default_attrs,
705 	.release	= cpufreq_sysfs_release,
706 };
707 
708 
709 /**
710  * cpufreq_add_dev - add a CPU device
711  *
712  * Adds the cpufreq interface for a CPU device.
713  */
714 static int cpufreq_add_dev (struct sys_device * sys_dev)
715 {
716 	unsigned int cpu = sys_dev->id;
717 	int ret = 0;
718 	struct cpufreq_policy new_policy;
719 	struct cpufreq_policy *policy;
720 	struct freq_attr **drv_attr;
721 	struct sys_device *cpu_sys_dev;
722 	unsigned long flags;
723 	unsigned int j;
724 #ifdef CONFIG_SMP
725 	struct cpufreq_policy *managed_policy;
726 #endif
727 
728 	if (cpu_is_offline(cpu))
729 		return 0;
730 
731 	cpufreq_debug_disable_ratelimit();
732 	dprintk("adding CPU %u\n", cpu);
733 
734 #ifdef CONFIG_SMP
735 	/* check whether a different CPU already registered this
736 	 * CPU because it is in the same boat. */
737 	policy = cpufreq_cpu_get(cpu);
738 	if (unlikely(policy)) {
739 		cpufreq_cpu_put(policy);
740 		cpufreq_debug_enable_ratelimit();
741 		return 0;
742 	}
743 #endif
744 
745 	if (!try_module_get(cpufreq_driver->owner)) {
746 		ret = -EINVAL;
747 		goto module_out;
748 	}
749 
750 	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
751 	if (!policy) {
752 		ret = -ENOMEM;
753 		goto nomem_out;
754 	}
755 
756 	policy->cpu = cpu;
757 	policy->cpus = cpumask_of_cpu(cpu);
758 
759 	/* Initially set CPU itself as the policy_cpu */
760 	per_cpu(policy_cpu, cpu) = cpu;
761 	lock_policy_rwsem_write(cpu);
762 
763 	init_completion(&policy->kobj_unregister);
764 	INIT_WORK(&policy->update, handle_update);
765 
766 	/* Set governor before ->init, so that driver could check it */
767 	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
768 	/* call driver. From then on the cpufreq must be able
769 	 * to accept all calls to ->verify and ->setpolicy for this CPU
770 	 */
771 	ret = cpufreq_driver->init(policy);
772 	if (ret) {
773 		dprintk("initialization failed\n");
774 		unlock_policy_rwsem_write(cpu);
775 		goto err_out;
776 	}
777 	policy->user_policy.min = policy->cpuinfo.min_freq;
778 	policy->user_policy.max = policy->cpuinfo.max_freq;
779 
780 #ifdef CONFIG_SMP
781 
782 #ifdef CONFIG_HOTPLUG_CPU
783 	if (cpufreq_cpu_governor[cpu]){
784 		policy->governor = cpufreq_cpu_governor[cpu];
785 		dprintk("Restoring governor %s for cpu %d\n",
786 		       policy->governor->name, cpu);
787 	}
788 #endif
789 
790 	for_each_cpu_mask(j, policy->cpus) {
791 		if (cpu == j)
792 			continue;
793 
794 		/* check for existing affected CPUs.  They may not be aware
795 		 * of it due to CPU Hotplug.
796 		 */
797 		managed_policy = cpufreq_cpu_get(j);
798 		if (unlikely(managed_policy)) {
799 
800 			/* Set proper policy_cpu */
801 			unlock_policy_rwsem_write(cpu);
802 			per_cpu(policy_cpu, cpu) = managed_policy->cpu;
803 
804 			if (lock_policy_rwsem_write(cpu) < 0)
805 				goto err_out_driver_exit;
806 
807 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
808 			managed_policy->cpus = policy->cpus;
809 			cpufreq_cpu_data[cpu] = managed_policy;
810 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
811 
812 			dprintk("CPU already managed, adding link\n");
813 			ret = sysfs_create_link(&sys_dev->kobj,
814 						&managed_policy->kobj,
815 						"cpufreq");
816 			if (ret) {
817 				unlock_policy_rwsem_write(cpu);
818 				goto err_out_driver_exit;
819 			}
820 
821 			cpufreq_debug_enable_ratelimit();
822 			ret = 0;
823 			unlock_policy_rwsem_write(cpu);
824 			goto err_out_driver_exit; /* call driver->exit() */
825 		}
826 	}
827 #endif
828 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
829 
830 	/* prepare interface data */
831 	policy->kobj.parent = &sys_dev->kobj;
832 	policy->kobj.ktype = &ktype_cpufreq;
833 	kobject_set_name(&policy->kobj, "cpufreq");
834 
835 	ret = kobject_register(&policy->kobj);
836 	if (ret) {
837 		unlock_policy_rwsem_write(cpu);
838 		goto err_out_driver_exit;
839 	}
840 	/* set up files for this cpu device */
841 	drv_attr = cpufreq_driver->attr;
842 	while ((drv_attr) && (*drv_attr)) {
843 		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
844 		if (ret)
845 			goto err_out_driver_exit;
846 		drv_attr++;
847 	}
848 	if (cpufreq_driver->get){
849 		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
850 		if (ret)
851 			goto err_out_driver_exit;
852 	}
853 	if (cpufreq_driver->target){
854 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
855 		if (ret)
856 			goto err_out_driver_exit;
857 	}
858 
859 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
860 	for_each_cpu_mask(j, policy->cpus) {
861 		cpufreq_cpu_data[j] = policy;
862 		per_cpu(policy_cpu, j) = policy->cpu;
863 	}
864 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
865 
866 	/* symlink affected CPUs */
867 	for_each_cpu_mask(j, policy->cpus) {
868 		if (j == cpu)
869 			continue;
870 		if (!cpu_online(j))
871 			continue;
872 
873 		dprintk("CPU %u already managed, adding link\n", j);
874 		cpufreq_cpu_get(cpu);
875 		cpu_sys_dev = get_cpu_sysdev(j);
876 		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
877 					"cpufreq");
878 		if (ret) {
879 			unlock_policy_rwsem_write(cpu);
880 			goto err_out_unregister;
881 		}
882 	}
883 
884 	policy->governor = NULL; /* to assure that the starting sequence is
885 				  * run in cpufreq_set_policy */
886 
887 	/* set default policy */
888 	ret = __cpufreq_set_policy(policy, &new_policy);
889 	policy->user_policy.policy = policy->policy;
890 	policy->user_policy.governor = policy->governor;
891 
892 	unlock_policy_rwsem_write(cpu);
893 
894 	if (ret) {
895 		dprintk("setting policy failed\n");
896 		goto err_out_unregister;
897 	}
898 
899 	module_put(cpufreq_driver->owner);
900 	dprintk("initialization complete\n");
901 	cpufreq_debug_enable_ratelimit();
902 
903 	return 0;
904 
905 
906 err_out_unregister:
907 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
908 	for_each_cpu_mask(j, policy->cpus)
909 		cpufreq_cpu_data[j] = NULL;
910 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
911 
912 	kobject_unregister(&policy->kobj);
913 	wait_for_completion(&policy->kobj_unregister);
914 
915 err_out_driver_exit:
916 	if (cpufreq_driver->exit)
917 		cpufreq_driver->exit(policy);
918 
919 err_out:
920 	kfree(policy);
921 
922 nomem_out:
923 	module_put(cpufreq_driver->owner);
924 module_out:
925 	cpufreq_debug_enable_ratelimit();
926 	return ret;
927 }
928 
929 
930 /**
931  * __cpufreq_remove_dev - remove a CPU device
932  *
933  * Removes the cpufreq interface for a CPU device.
934  * Caller should already have policy_rwsem in write mode for this CPU.
935  * This routine frees the rwsem before returning.
936  */
937 static int __cpufreq_remove_dev (struct sys_device * sys_dev)
938 {
939 	unsigned int cpu = sys_dev->id;
940 	unsigned long flags;
941 	struct cpufreq_policy *data;
942 #ifdef CONFIG_SMP
943 	struct sys_device *cpu_sys_dev;
944 	unsigned int j;
945 #endif
946 
947 	cpufreq_debug_disable_ratelimit();
948 	dprintk("unregistering CPU %u\n", cpu);
949 
950 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
951 	data = cpufreq_cpu_data[cpu];
952 
953 	if (!data) {
954 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
955 		cpufreq_debug_enable_ratelimit();
956 		unlock_policy_rwsem_write(cpu);
957 		return -EINVAL;
958 	}
959 	cpufreq_cpu_data[cpu] = NULL;
960 
961 
962 #ifdef CONFIG_SMP
963 	/* if this isn't the CPU which is the parent of the kobj, we
964 	 * only need to unlink, put and exit
965 	 */
966 	if (unlikely(cpu != data->cpu)) {
967 		dprintk("removing link\n");
968 		cpu_clear(cpu, data->cpus);
969 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
970 		sysfs_remove_link(&sys_dev->kobj, "cpufreq");
971 		cpufreq_cpu_put(data);
972 		cpufreq_debug_enable_ratelimit();
973 		unlock_policy_rwsem_write(cpu);
974 		return 0;
975 	}
976 #endif
977 
978 
979 	if (!kobject_get(&data->kobj)) {
980 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
981 		cpufreq_debug_enable_ratelimit();
982 		unlock_policy_rwsem_write(cpu);
983 		return -EFAULT;
984 	}
985 
986 #ifdef CONFIG_SMP
987 
988 #ifdef CONFIG_HOTPLUG_CPU
989 	cpufreq_cpu_governor[cpu] = data->governor;
990 #endif
991 
992 	/* if we have other CPUs still registered, we need to unlink them,
993 	 * or else wait_for_completion below will lock up. Clean the
994 	 * cpufreq_cpu_data[] while holding the lock, and remove the sysfs
995 	 * links afterwards.
996 	 */
997 	if (unlikely(cpus_weight(data->cpus) > 1)) {
998 		for_each_cpu_mask(j, data->cpus) {
999 			if (j == cpu)
1000 				continue;
1001 			cpufreq_cpu_data[j] = NULL;
1002 		}
1003 	}
1004 
1005 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1006 
1007 	if (unlikely(cpus_weight(data->cpus) > 1)) {
1008 		for_each_cpu_mask(j, data->cpus) {
1009 			if (j == cpu)
1010 				continue;
1011 			dprintk("removing link for cpu %u\n", j);
1012 #ifdef CONFIG_HOTPLUG_CPU
1013 			cpufreq_cpu_governor[j] = data->governor;
1014 #endif
1015 			cpu_sys_dev = get_cpu_sysdev(j);
1016 			sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
1017 			cpufreq_cpu_put(data);
1018 		}
1019 	}
1020 #else
1021 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1022 #endif
1023 
1024 	if (cpufreq_driver->target)
1025 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1026 
1027 	unlock_policy_rwsem_write(cpu);
1028 
1029 	kobject_unregister(&data->kobj);
1030 
1031 	kobject_put(&data->kobj);
1032 
1033 	/* we need to make sure that the underlying kobj is actually
1034 	 * not referenced anymore by anybody before we proceed with
1035 	 * unloading.
1036 	 */
1037 	dprintk("waiting for dropping of refcount\n");
1038 	wait_for_completion(&data->kobj_unregister);
1039 	dprintk("wait complete\n");
1040 
1041 	if (cpufreq_driver->exit)
1042 		cpufreq_driver->exit(data);
1043 
1044 	kfree(data);
1045 
1046 	cpufreq_debug_enable_ratelimit();
1047 	return 0;
1048 }
1049 
1050 
1051 static int cpufreq_remove_dev (struct sys_device * sys_dev)
1052 {
1053 	unsigned int cpu = sys_dev->id;
1054 	int retval;
1055 
1056 	if (cpu_is_offline(cpu))
1057 		return 0;
1058 
1059 	if (unlikely(lock_policy_rwsem_write(cpu)))
1060 		BUG();
1061 
1062 	retval = __cpufreq_remove_dev(sys_dev);
1063 	return retval;
1064 }
1065 
1066 
1067 static void handle_update(struct work_struct *work)
1068 {
1069 	struct cpufreq_policy *policy =
1070 		container_of(work, struct cpufreq_policy, update);
1071 	unsigned int cpu = policy->cpu;
1072 	dprintk("handle_update for cpu %u called\n", cpu);
1073 	cpufreq_update_policy(cpu);
1074 }
1075 
1076 /**
1077  *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1078  *	@cpu: cpu number
1079  *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1080  *	@new_freq: CPU frequency the CPU actually runs at
1081  *
1082  *	We adjust to current frequency first, and need to clean up later. So either call
1083  *	to cpufreq_update_policy() or schedule handle_update()).
1084  */
1085 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1086 				unsigned int new_freq)
1087 {
1088 	struct cpufreq_freqs freqs;
1089 
1090 	dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
1091 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1092 
1093 	freqs.cpu = cpu;
1094 	freqs.old = old_freq;
1095 	freqs.new = new_freq;
1096 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1097 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1098 }
1099 
1100 
1101 /**
1102  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1103  * @cpu: CPU number
1104  *
1105  * This is the last known freq, without actually getting it from the driver.
1106  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1107  */
1108 unsigned int cpufreq_quick_get(unsigned int cpu)
1109 {
1110 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1111 	unsigned int ret_freq = 0;
1112 
1113 	if (policy) {
1114 		ret_freq = policy->cur;
1115 		cpufreq_cpu_put(policy);
1116 	}
1117 
1118 	return (ret_freq);
1119 }
1120 EXPORT_SYMBOL(cpufreq_quick_get);
1121 
1122 
1123 static unsigned int __cpufreq_get(unsigned int cpu)
1124 {
1125 	struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
1126 	unsigned int ret_freq = 0;
1127 
1128 	if (!cpufreq_driver->get)
1129 		return (ret_freq);
1130 
1131 	ret_freq = cpufreq_driver->get(cpu);
1132 
1133 	if (ret_freq && policy->cur &&
1134 		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1135 		/* verify no discrepancy between actual and
1136 					saved value exists */
1137 		if (unlikely(ret_freq != policy->cur)) {
1138 			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1139 			schedule_work(&policy->update);
1140 		}
1141 	}
1142 
1143 	return (ret_freq);
1144 }
1145 
1146 /**
1147  * cpufreq_get - get the current CPU frequency (in kHz)
1148  * @cpu: CPU number
1149  *
1150  * Get the CPU current (static) CPU frequency
1151  */
1152 unsigned int cpufreq_get(unsigned int cpu)
1153 {
1154 	unsigned int ret_freq = 0;
1155 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1156 
1157 	if (!policy)
1158 		goto out;
1159 
1160 	if (unlikely(lock_policy_rwsem_read(cpu)))
1161 		goto out_policy;
1162 
1163 	ret_freq = __cpufreq_get(cpu);
1164 
1165 	unlock_policy_rwsem_read(cpu);
1166 
1167 out_policy:
1168 	cpufreq_cpu_put(policy);
1169 out:
1170 	return (ret_freq);
1171 }
1172 EXPORT_SYMBOL(cpufreq_get);
1173 
1174 
1175 /**
1176  *	cpufreq_suspend - let the low level driver prepare for suspend
1177  */
1178 
1179 static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg)
1180 {
1181 	int cpu = sysdev->id;
1182 	int ret = 0;
1183 	unsigned int cur_freq = 0;
1184 	struct cpufreq_policy *cpu_policy;
1185 
1186 	dprintk("suspending cpu %u\n", cpu);
1187 
1188 	if (!cpu_online(cpu))
1189 		return 0;
1190 
1191 	/* we may be lax here as interrupts are off. Nonetheless
1192 	 * we need to grab the correct cpu policy, as to check
1193 	 * whether we really run on this CPU.
1194 	 */
1195 
1196 	cpu_policy = cpufreq_cpu_get(cpu);
1197 	if (!cpu_policy)
1198 		return -EINVAL;
1199 
1200 	/* only handle each CPU group once */
1201 	if (unlikely(cpu_policy->cpu != cpu)) {
1202 		cpufreq_cpu_put(cpu_policy);
1203 		return 0;
1204 	}
1205 
1206 	if (cpufreq_driver->suspend) {
1207 		ret = cpufreq_driver->suspend(cpu_policy, pmsg);
1208 		if (ret) {
1209 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1210 					"step on CPU %u\n", cpu_policy->cpu);
1211 			cpufreq_cpu_put(cpu_policy);
1212 			return ret;
1213 		}
1214 	}
1215 
1216 
1217 	if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)
1218 		goto out;
1219 
1220 	if (cpufreq_driver->get)
1221 		cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1222 
1223 	if (!cur_freq || !cpu_policy->cur) {
1224 		printk(KERN_ERR "cpufreq: suspend failed to assert current "
1225 		       "frequency is what timing core thinks it is.\n");
1226 		goto out;
1227 	}
1228 
1229 	if (unlikely(cur_freq != cpu_policy->cur)) {
1230 		struct cpufreq_freqs freqs;
1231 
1232 		if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1233 			dprintk("Warning: CPU frequency is %u, "
1234 			       "cpufreq assumed %u kHz.\n",
1235 			       cur_freq, cpu_policy->cur);
1236 
1237 		freqs.cpu = cpu;
1238 		freqs.old = cpu_policy->cur;
1239 		freqs.new = cur_freq;
1240 
1241 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
1242 				    CPUFREQ_SUSPENDCHANGE, &freqs);
1243 		adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
1244 
1245 		cpu_policy->cur = cur_freq;
1246 	}
1247 
1248 out:
1249 	cpufreq_cpu_put(cpu_policy);
1250 	return 0;
1251 }
1252 
1253 /**
1254  *	cpufreq_resume -  restore proper CPU frequency handling after resume
1255  *
1256  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1257  *	2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync
1258  *	3.) schedule call cpufreq_update_policy() ASAP as interrupts are
1259  *	    restored.
1260  */
1261 static int cpufreq_resume(struct sys_device * sysdev)
1262 {
1263 	int cpu = sysdev->id;
1264 	int ret = 0;
1265 	struct cpufreq_policy *cpu_policy;
1266 
1267 	dprintk("resuming cpu %u\n", cpu);
1268 
1269 	if (!cpu_online(cpu))
1270 		return 0;
1271 
1272 	/* we may be lax here as interrupts are off. Nonetheless
1273 	 * we need to grab the correct cpu policy, as to check
1274 	 * whether we really run on this CPU.
1275 	 */
1276 
1277 	cpu_policy = cpufreq_cpu_get(cpu);
1278 	if (!cpu_policy)
1279 		return -EINVAL;
1280 
1281 	/* only handle each CPU group once */
1282 	if (unlikely(cpu_policy->cpu != cpu)) {
1283 		cpufreq_cpu_put(cpu_policy);
1284 		return 0;
1285 	}
1286 
1287 	if (cpufreq_driver->resume) {
1288 		ret = cpufreq_driver->resume(cpu_policy);
1289 		if (ret) {
1290 			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1291 					"step on CPU %u\n", cpu_policy->cpu);
1292 			cpufreq_cpu_put(cpu_policy);
1293 			return ret;
1294 		}
1295 	}
1296 
1297 	if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1298 		unsigned int cur_freq = 0;
1299 
1300 		if (cpufreq_driver->get)
1301 			cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1302 
1303 		if (!cur_freq || !cpu_policy->cur) {
1304 			printk(KERN_ERR "cpufreq: resume failed to assert "
1305 					"current frequency is what timing core "
1306 					"thinks it is.\n");
1307 			goto out;
1308 		}
1309 
1310 		if (unlikely(cur_freq != cpu_policy->cur)) {
1311 			struct cpufreq_freqs freqs;
1312 
1313 			if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1314 				dprintk("Warning: CPU frequency"
1315 				       "is %u, cpufreq assumed %u kHz.\n",
1316 				       cur_freq, cpu_policy->cur);
1317 
1318 			freqs.cpu = cpu;
1319 			freqs.old = cpu_policy->cur;
1320 			freqs.new = cur_freq;
1321 
1322 			srcu_notifier_call_chain(
1323 					&cpufreq_transition_notifier_list,
1324 					CPUFREQ_RESUMECHANGE, &freqs);
1325 			adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
1326 
1327 			cpu_policy->cur = cur_freq;
1328 		}
1329 	}
1330 
1331 out:
1332 	schedule_work(&cpu_policy->update);
1333 	cpufreq_cpu_put(cpu_policy);
1334 	return ret;
1335 }
1336 
1337 static struct sysdev_driver cpufreq_sysdev_driver = {
1338 	.add		= cpufreq_add_dev,
1339 	.remove		= cpufreq_remove_dev,
1340 	.suspend	= cpufreq_suspend,
1341 	.resume		= cpufreq_resume,
1342 };
1343 
1344 
1345 /*********************************************************************
1346  *                     NOTIFIER LISTS INTERFACE                      *
1347  *********************************************************************/
1348 
1349 /**
1350  *	cpufreq_register_notifier - register a driver with cpufreq
1351  *	@nb: notifier function to register
1352  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1353  *
1354  *	Add a driver to one of two lists: either a list of drivers that
1355  *      are notified about clock rate changes (once before and once after
1356  *      the transition), or a list of drivers that are notified about
1357  *      changes in cpufreq policy.
1358  *
1359  *	This function may sleep, and has the same return conditions as
1360  *	blocking_notifier_chain_register.
1361  */
1362 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1363 {
1364 	int ret;
1365 
1366 	switch (list) {
1367 	case CPUFREQ_TRANSITION_NOTIFIER:
1368 		ret = srcu_notifier_chain_register(
1369 				&cpufreq_transition_notifier_list, nb);
1370 		break;
1371 	case CPUFREQ_POLICY_NOTIFIER:
1372 		ret = blocking_notifier_chain_register(
1373 				&cpufreq_policy_notifier_list, nb);
1374 		break;
1375 	default:
1376 		ret = -EINVAL;
1377 	}
1378 
1379 	return ret;
1380 }
1381 EXPORT_SYMBOL(cpufreq_register_notifier);
1382 
1383 
1384 /**
1385  *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1386  *	@nb: notifier block to be unregistered
1387  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1388  *
1389  *	Remove a driver from the CPU frequency notifier list.
1390  *
1391  *	This function may sleep, and has the same return conditions as
1392  *	blocking_notifier_chain_unregister.
1393  */
1394 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1395 {
1396 	int ret;
1397 
1398 	switch (list) {
1399 	case CPUFREQ_TRANSITION_NOTIFIER:
1400 		ret = srcu_notifier_chain_unregister(
1401 				&cpufreq_transition_notifier_list, nb);
1402 		break;
1403 	case CPUFREQ_POLICY_NOTIFIER:
1404 		ret = blocking_notifier_chain_unregister(
1405 				&cpufreq_policy_notifier_list, nb);
1406 		break;
1407 	default:
1408 		ret = -EINVAL;
1409 	}
1410 
1411 	return ret;
1412 }
1413 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1414 
1415 
1416 /*********************************************************************
1417  *                              GOVERNORS                            *
1418  *********************************************************************/
1419 
1420 
1421 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1422 			    unsigned int target_freq,
1423 			    unsigned int relation)
1424 {
1425 	int retval = -EINVAL;
1426 
1427 	dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1428 		target_freq, relation);
1429 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
1430 		retval = cpufreq_driver->target(policy, target_freq, relation);
1431 
1432 	return retval;
1433 }
1434 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1435 
1436 int cpufreq_driver_target(struct cpufreq_policy *policy,
1437 			  unsigned int target_freq,
1438 			  unsigned int relation)
1439 {
1440 	int ret;
1441 
1442 	policy = cpufreq_cpu_get(policy->cpu);
1443 	if (!policy)
1444 		return -EINVAL;
1445 
1446 	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1447 		return -EINVAL;
1448 
1449 	ret = __cpufreq_driver_target(policy, target_freq, relation);
1450 
1451 	unlock_policy_rwsem_write(policy->cpu);
1452 
1453 	cpufreq_cpu_put(policy);
1454 	return ret;
1455 }
1456 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1457 
1458 int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
1459 {
1460 	int ret = 0;
1461 
1462 	policy = cpufreq_cpu_get(policy->cpu);
1463 	if (!policy)
1464 		return -EINVAL;
1465 
1466 	if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
1467 		ret = cpufreq_driver->getavg(policy->cpu);
1468 
1469 	cpufreq_cpu_put(policy);
1470 	return ret;
1471 }
1472 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1473 
1474 /*
1475  * when "event" is CPUFREQ_GOV_LIMITS
1476  */
1477 
1478 static int __cpufreq_governor(struct cpufreq_policy *policy,
1479 					unsigned int event)
1480 {
1481 	int ret;
1482 
1483 	/* Only must be defined when default governor is known to have latency
1484 	   restrictions, like e.g. conservative or ondemand.
1485 	   That this is the case is already ensured in Kconfig
1486 	*/
1487 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1488 	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1489 #else
1490 	struct cpufreq_governor *gov = NULL;
1491 #endif
1492 
1493 	if (policy->governor->max_transition_latency &&
1494 	    policy->cpuinfo.transition_latency >
1495 	    policy->governor->max_transition_latency) {
1496 		if (!gov)
1497 			return -EINVAL;
1498 		else {
1499 			printk(KERN_WARNING "%s governor failed, too long"
1500 			       " transition latency of HW, fallback"
1501 			       " to %s governor\n",
1502 			       policy->governor->name,
1503 			       gov->name);
1504 			policy->governor = gov;
1505 		}
1506 	}
1507 
1508 	if (!try_module_get(policy->governor->owner))
1509 		return -EINVAL;
1510 
1511 	dprintk("__cpufreq_governor for CPU %u, event %u\n",
1512 						policy->cpu, event);
1513 	ret = policy->governor->governor(policy, event);
1514 
1515 	/* we keep one module reference alive for
1516 			each CPU governed by this CPU */
1517 	if ((event != CPUFREQ_GOV_START) || ret)
1518 		module_put(policy->governor->owner);
1519 	if ((event == CPUFREQ_GOV_STOP) && !ret)
1520 		module_put(policy->governor->owner);
1521 
1522 	return ret;
1523 }
1524 
1525 
1526 int cpufreq_register_governor(struct cpufreq_governor *governor)
1527 {
1528 	int err;
1529 
1530 	if (!governor)
1531 		return -EINVAL;
1532 
1533 	mutex_lock(&cpufreq_governor_mutex);
1534 
1535 	err = -EBUSY;
1536 	if (__find_governor(governor->name) == NULL) {
1537 		err = 0;
1538 		list_add(&governor->governor_list, &cpufreq_governor_list);
1539 	}
1540 
1541 	mutex_unlock(&cpufreq_governor_mutex);
1542 	return err;
1543 }
1544 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1545 
1546 
1547 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1548 {
1549 	if (!governor)
1550 		return;
1551 
1552 	mutex_lock(&cpufreq_governor_mutex);
1553 	list_del(&governor->governor_list);
1554 	mutex_unlock(&cpufreq_governor_mutex);
1555 	return;
1556 }
1557 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1558 
1559 
1560 
1561 /*********************************************************************
1562  *                          POLICY INTERFACE                         *
1563  *********************************************************************/
1564 
1565 /**
1566  * cpufreq_get_policy - get the current cpufreq_policy
1567  * @policy: struct cpufreq_policy into which the current cpufreq_policy is written
1568  *
1569  * Reads the current cpufreq policy.
1570  */
1571 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1572 {
1573 	struct cpufreq_policy *cpu_policy;
1574 	if (!policy)
1575 		return -EINVAL;
1576 
1577 	cpu_policy = cpufreq_cpu_get(cpu);
1578 	if (!cpu_policy)
1579 		return -EINVAL;
1580 
1581 	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1582 
1583 	cpufreq_cpu_put(cpu_policy);
1584 	return 0;
1585 }
1586 EXPORT_SYMBOL(cpufreq_get_policy);
1587 
1588 
1589 /*
1590  * data   : current policy.
1591  * policy : policy to be set.
1592  */
1593 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1594 				struct cpufreq_policy *policy)
1595 {
1596 	int ret = 0;
1597 
1598 	cpufreq_debug_disable_ratelimit();
1599 	dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1600 		policy->min, policy->max);
1601 
1602 	memcpy(&policy->cpuinfo, &data->cpuinfo,
1603 				sizeof(struct cpufreq_cpuinfo));
1604 
1605 	if (policy->min > data->min && policy->min > policy->max) {
1606 		ret = -EINVAL;
1607 		goto error_out;
1608 	}
1609 
1610 	/* verify the cpu speed can be set within this limit */
1611 	ret = cpufreq_driver->verify(policy);
1612 	if (ret)
1613 		goto error_out;
1614 
1615 	/* adjust if necessary - all reasons */
1616 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1617 			CPUFREQ_ADJUST, policy);
1618 
1619 	/* adjust if necessary - hardware incompatibility*/
1620 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1621 			CPUFREQ_INCOMPATIBLE, policy);
1622 
1623 	/* verify the cpu speed can be set within this limit,
1624 	   which might be different to the first one */
1625 	ret = cpufreq_driver->verify(policy);
1626 	if (ret)
1627 		goto error_out;
1628 
1629 	/* notification of the new policy */
1630 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1631 			CPUFREQ_NOTIFY, policy);
1632 
1633 	data->min = policy->min;
1634 	data->max = policy->max;
1635 
1636 	dprintk("new min and max freqs are %u - %u kHz\n",
1637 					data->min, data->max);
1638 
1639 	if (cpufreq_driver->setpolicy) {
1640 		data->policy = policy->policy;
1641 		dprintk("setting range\n");
1642 		ret = cpufreq_driver->setpolicy(policy);
1643 	} else {
1644 		if (policy->governor != data->governor) {
1645 			/* save old, working values */
1646 			struct cpufreq_governor *old_gov = data->governor;
1647 
1648 			dprintk("governor switch\n");
1649 
1650 			/* end old governor */
1651 			if (data->governor)
1652 				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1653 
1654 			/* start new governor */
1655 			data->governor = policy->governor;
1656 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1657 				/* new governor failed, so re-start old one */
1658 				dprintk("starting governor %s failed\n",
1659 							data->governor->name);
1660 				if (old_gov) {
1661 					data->governor = old_gov;
1662 					__cpufreq_governor(data,
1663 							   CPUFREQ_GOV_START);
1664 				}
1665 				ret = -EINVAL;
1666 				goto error_out;
1667 			}
1668 			/* might be a policy change, too, so fall through */
1669 		}
1670 		dprintk("governor: change or update limits\n");
1671 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1672 	}
1673 
1674 error_out:
1675 	cpufreq_debug_enable_ratelimit();
1676 	return ret;
1677 }
1678 
1679 /**
1680  *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1681  *	@cpu: CPU which shall be re-evaluated
1682  *
1683  *	Usefull for policy notifiers which have different necessities
1684  *	at different times.
1685  */
1686 int cpufreq_update_policy(unsigned int cpu)
1687 {
1688 	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1689 	struct cpufreq_policy policy;
1690 	int ret = 0;
1691 
1692 	if (!data)
1693 		return -ENODEV;
1694 
1695 	if (unlikely(lock_policy_rwsem_write(cpu)))
1696 		return -EINVAL;
1697 
1698 	dprintk("updating policy for CPU %u\n", cpu);
1699 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1700 	policy.min = data->user_policy.min;
1701 	policy.max = data->user_policy.max;
1702 	policy.policy = data->user_policy.policy;
1703 	policy.governor = data->user_policy.governor;
1704 
1705 	/* BIOS might change freq behind our back
1706 	  -> ask driver for current freq and notify governors about a change */
1707 	if (cpufreq_driver->get) {
1708 		policy.cur = cpufreq_driver->get(cpu);
1709 		if (!data->cur) {
1710 			dprintk("Driver did not initialize current freq");
1711 			data->cur = policy.cur;
1712 		} else {
1713 			if (data->cur != policy.cur)
1714 				cpufreq_out_of_sync(cpu, data->cur,
1715 								policy.cur);
1716 		}
1717 	}
1718 
1719 	ret = __cpufreq_set_policy(data, &policy);
1720 
1721 	unlock_policy_rwsem_write(cpu);
1722 
1723 	cpufreq_cpu_put(data);
1724 	return ret;
1725 }
1726 EXPORT_SYMBOL(cpufreq_update_policy);
1727 
1728 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1729 					unsigned long action, void *hcpu)
1730 {
1731 	unsigned int cpu = (unsigned long)hcpu;
1732 	struct sys_device *sys_dev;
1733 
1734 	sys_dev = get_cpu_sysdev(cpu);
1735 	if (sys_dev) {
1736 		switch (action) {
1737 		case CPU_ONLINE:
1738 		case CPU_ONLINE_FROZEN:
1739 			cpufreq_add_dev(sys_dev);
1740 			break;
1741 		case CPU_DOWN_PREPARE:
1742 		case CPU_DOWN_PREPARE_FROZEN:
1743 			if (unlikely(lock_policy_rwsem_write(cpu)))
1744 				BUG();
1745 
1746 			__cpufreq_remove_dev(sys_dev);
1747 			break;
1748 		case CPU_DOWN_FAILED:
1749 		case CPU_DOWN_FAILED_FROZEN:
1750 			cpufreq_add_dev(sys_dev);
1751 			break;
1752 		}
1753 	}
1754 	return NOTIFY_OK;
1755 }
1756 
1757 static struct notifier_block __cpuinitdata cpufreq_cpu_notifier =
1758 {
1759     .notifier_call = cpufreq_cpu_callback,
1760 };
1761 
1762 /*********************************************************************
1763  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1764  *********************************************************************/
1765 
1766 /**
1767  * cpufreq_register_driver - register a CPU Frequency driver
1768  * @driver_data: A struct cpufreq_driver containing the values#
1769  * submitted by the CPU Frequency driver.
1770  *
1771  *   Registers a CPU Frequency driver to this core code. This code
1772  * returns zero on success, -EBUSY when another driver got here first
1773  * (and isn't unregistered in the meantime).
1774  *
1775  */
1776 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1777 {
1778 	unsigned long flags;
1779 	int ret;
1780 
1781 	if (!driver_data || !driver_data->verify || !driver_data->init ||
1782 	    ((!driver_data->setpolicy) && (!driver_data->target)))
1783 		return -EINVAL;
1784 
1785 	dprintk("trying to register driver %s\n", driver_data->name);
1786 
1787 	if (driver_data->setpolicy)
1788 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1789 
1790 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1791 	if (cpufreq_driver) {
1792 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1793 		return -EBUSY;
1794 	}
1795 	cpufreq_driver = driver_data;
1796 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1797 
1798 	ret = sysdev_driver_register(&cpu_sysdev_class,&cpufreq_sysdev_driver);
1799 
1800 	if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1801 		int i;
1802 		ret = -ENODEV;
1803 
1804 		/* check for at least one working CPU */
1805 		for (i=0; i<NR_CPUS; i++)
1806 			if (cpufreq_cpu_data[i])
1807 				ret = 0;
1808 
1809 		/* if all ->init() calls failed, unregister */
1810 		if (ret) {
1811 			dprintk("no CPU initialized for driver %s\n",
1812 							driver_data->name);
1813 			sysdev_driver_unregister(&cpu_sysdev_class,
1814 						&cpufreq_sysdev_driver);
1815 
1816 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
1817 			cpufreq_driver = NULL;
1818 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1819 		}
1820 	}
1821 
1822 	if (!ret) {
1823 		register_hotcpu_notifier(&cpufreq_cpu_notifier);
1824 		dprintk("driver %s up and running\n", driver_data->name);
1825 		cpufreq_debug_enable_ratelimit();
1826 	}
1827 
1828 	return (ret);
1829 }
1830 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1831 
1832 
1833 /**
1834  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1835  *
1836  *    Unregister the current CPUFreq driver. Only call this if you have
1837  * the right to do so, i.e. if you have succeeded in initialising before!
1838  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1839  * currently not initialised.
1840  */
1841 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1842 {
1843 	unsigned long flags;
1844 
1845 	cpufreq_debug_disable_ratelimit();
1846 
1847 	if (!cpufreq_driver || (driver != cpufreq_driver)) {
1848 		cpufreq_debug_enable_ratelimit();
1849 		return -EINVAL;
1850 	}
1851 
1852 	dprintk("unregistering driver %s\n", driver->name);
1853 
1854 	sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1855 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1856 
1857 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1858 	cpufreq_driver = NULL;
1859 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1860 
1861 	return 0;
1862 }
1863 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1864 
1865 static int __init cpufreq_core_init(void)
1866 {
1867 	int cpu;
1868 
1869 	for_each_possible_cpu(cpu) {
1870 		per_cpu(policy_cpu, cpu) = -1;
1871 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1872 	}
1873 	return 0;
1874 }
1875 
1876 core_initcall(cpufreq_core_init);
1877