xref: /openbmc/linux/drivers/cpufreq/cpufreq.c (revision c21b37f6)
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *	Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *	Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17 
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31 
32 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
33 						"cpufreq-core", msg)
34 
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static struct cpufreq_governor *cpufreq_cpu_governor[NR_CPUS];
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47 
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  */
65 static DEFINE_PER_CPU(int, policy_cpu);
66 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
67 
68 #define lock_policy_rwsem(mode, cpu)					\
69 int lock_policy_rwsem_##mode						\
70 (int cpu)								\
71 {									\
72 	int policy_cpu = per_cpu(policy_cpu, cpu);			\
73 	BUG_ON(policy_cpu == -1);					\
74 	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
75 	if (unlikely(!cpu_online(cpu))) {				\
76 		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
77 		return -1;						\
78 	}								\
79 									\
80 	return 0;							\
81 }
82 
83 lock_policy_rwsem(read, cpu);
84 EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
85 
86 lock_policy_rwsem(write, cpu);
87 EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
88 
89 void unlock_policy_rwsem_read(int cpu)
90 {
91 	int policy_cpu = per_cpu(policy_cpu, cpu);
92 	BUG_ON(policy_cpu == -1);
93 	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
96 
97 void unlock_policy_rwsem_write(int cpu)
98 {
99 	int policy_cpu = per_cpu(policy_cpu, cpu);
100 	BUG_ON(policy_cpu == -1);
101 	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
102 }
103 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
104 
105 
106 /* internal prototypes */
107 static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
108 static unsigned int __cpufreq_get(unsigned int cpu);
109 static void handle_update(struct work_struct *work);
110 
111 /**
112  * Two notifier lists: the "policy" list is involved in the
113  * validation process for a new CPU frequency policy; the
114  * "transition" list for kernel code that needs to handle
115  * changes to devices when the CPU clock speed changes.
116  * The mutex locks both lists.
117  */
118 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
119 static struct srcu_notifier_head cpufreq_transition_notifier_list;
120 
121 static int __init init_cpufreq_transition_notifier_list(void)
122 {
123 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
124 	return 0;
125 }
126 pure_initcall(init_cpufreq_transition_notifier_list);
127 
128 static LIST_HEAD(cpufreq_governor_list);
129 static DEFINE_MUTEX (cpufreq_governor_mutex);
130 
131 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
132 {
133 	struct cpufreq_policy *data;
134 	unsigned long flags;
135 
136 	if (cpu >= NR_CPUS)
137 		goto err_out;
138 
139 	/* get the cpufreq driver */
140 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
141 
142 	if (!cpufreq_driver)
143 		goto err_out_unlock;
144 
145 	if (!try_module_get(cpufreq_driver->owner))
146 		goto err_out_unlock;
147 
148 
149 	/* get the CPU */
150 	data = cpufreq_cpu_data[cpu];
151 
152 	if (!data)
153 		goto err_out_put_module;
154 
155 	if (!kobject_get(&data->kobj))
156 		goto err_out_put_module;
157 
158 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
159 	return data;
160 
161 err_out_put_module:
162 	module_put(cpufreq_driver->owner);
163 err_out_unlock:
164 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
165 err_out:
166 	return NULL;
167 }
168 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
169 
170 
171 void cpufreq_cpu_put(struct cpufreq_policy *data)
172 {
173 	kobject_put(&data->kobj);
174 	module_put(cpufreq_driver->owner);
175 }
176 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
177 
178 
179 /*********************************************************************
180  *                     UNIFIED DEBUG HELPERS                         *
181  *********************************************************************/
182 #ifdef CONFIG_CPU_FREQ_DEBUG
183 
184 /* what part(s) of the CPUfreq subsystem are debugged? */
185 static unsigned int debug;
186 
187 /* is the debug output ratelimit'ed using printk_ratelimit? User can
188  * set or modify this value.
189  */
190 static unsigned int debug_ratelimit = 1;
191 
192 /* is the printk_ratelimit'ing enabled? It's enabled after a successful
193  * loading of a cpufreq driver, temporarily disabled when a new policy
194  * is set, and disabled upon cpufreq driver removal
195  */
196 static unsigned int disable_ratelimit = 1;
197 static DEFINE_SPINLOCK(disable_ratelimit_lock);
198 
199 static void cpufreq_debug_enable_ratelimit(void)
200 {
201 	unsigned long flags;
202 
203 	spin_lock_irqsave(&disable_ratelimit_lock, flags);
204 	if (disable_ratelimit)
205 		disable_ratelimit--;
206 	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
207 }
208 
209 static void cpufreq_debug_disable_ratelimit(void)
210 {
211 	unsigned long flags;
212 
213 	spin_lock_irqsave(&disable_ratelimit_lock, flags);
214 	disable_ratelimit++;
215 	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
216 }
217 
218 void cpufreq_debug_printk(unsigned int type, const char *prefix,
219 							const char *fmt, ...)
220 {
221 	char s[256];
222 	va_list args;
223 	unsigned int len;
224 	unsigned long flags;
225 
226 	WARN_ON(!prefix);
227 	if (type & debug) {
228 		spin_lock_irqsave(&disable_ratelimit_lock, flags);
229 		if (!disable_ratelimit && debug_ratelimit
230 					&& !printk_ratelimit()) {
231 			spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
232 			return;
233 		}
234 		spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
235 
236 		len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
237 
238 		va_start(args, fmt);
239 		len += vsnprintf(&s[len], (256 - len), fmt, args);
240 		va_end(args);
241 
242 		printk(s);
243 
244 		WARN_ON(len < 5);
245 	}
246 }
247 EXPORT_SYMBOL(cpufreq_debug_printk);
248 
249 
250 module_param(debug, uint, 0644);
251 MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
252 			" 2 to debug drivers, and 4 to debug governors.");
253 
254 module_param(debug_ratelimit, uint, 0644);
255 MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
256 					" set to 0 to disable ratelimiting.");
257 
258 #else /* !CONFIG_CPU_FREQ_DEBUG */
259 
260 static inline void cpufreq_debug_enable_ratelimit(void) { return; }
261 static inline void cpufreq_debug_disable_ratelimit(void) { return; }
262 
263 #endif /* CONFIG_CPU_FREQ_DEBUG */
264 
265 
266 /*********************************************************************
267  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
268  *********************************************************************/
269 
270 /**
271  * adjust_jiffies - adjust the system "loops_per_jiffy"
272  *
273  * This function alters the system "loops_per_jiffy" for the clock
274  * speed change. Note that loops_per_jiffy cannot be updated on SMP
275  * systems as each CPU might be scaled differently. So, use the arch
276  * per-CPU loops_per_jiffy value wherever possible.
277  */
278 #ifndef CONFIG_SMP
279 static unsigned long l_p_j_ref;
280 static unsigned int  l_p_j_ref_freq;
281 
282 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
283 {
284 	if (ci->flags & CPUFREQ_CONST_LOOPS)
285 		return;
286 
287 	if (!l_p_j_ref_freq) {
288 		l_p_j_ref = loops_per_jiffy;
289 		l_p_j_ref_freq = ci->old;
290 		dprintk("saving %lu as reference value for loops_per_jiffy;"
291 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
292 	}
293 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
294 	    (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
295 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
296 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
297 								ci->new);
298 		dprintk("scaling loops_per_jiffy to %lu"
299 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
300 	}
301 }
302 #else
303 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
304 {
305 	return;
306 }
307 #endif
308 
309 
310 /**
311  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
312  * on frequency transition.
313  *
314  * This function calls the transition notifiers and the "adjust_jiffies"
315  * function. It is called twice on all CPU frequency changes that have
316  * external effects.
317  */
318 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
319 {
320 	struct cpufreq_policy *policy;
321 
322 	BUG_ON(irqs_disabled());
323 
324 	freqs->flags = cpufreq_driver->flags;
325 	dprintk("notification %u of frequency transition to %u kHz\n",
326 		state, freqs->new);
327 
328 	policy = cpufreq_cpu_data[freqs->cpu];
329 	switch (state) {
330 
331 	case CPUFREQ_PRECHANGE:
332 		/* detect if the driver reported a value as "old frequency"
333 		 * which is not equal to what the cpufreq core thinks is
334 		 * "old frequency".
335 		 */
336 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
337 			if ((policy) && (policy->cpu == freqs->cpu) &&
338 			    (policy->cur) && (policy->cur != freqs->old)) {
339 				dprintk("Warning: CPU frequency is"
340 					" %u, cpufreq assumed %u kHz.\n",
341 					freqs->old, policy->cur);
342 				freqs->old = policy->cur;
343 			}
344 		}
345 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
346 				CPUFREQ_PRECHANGE, freqs);
347 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
348 		break;
349 
350 	case CPUFREQ_POSTCHANGE:
351 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
352 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
353 				CPUFREQ_POSTCHANGE, freqs);
354 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
355 			policy->cur = freqs->new;
356 		break;
357 	}
358 }
359 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
360 
361 
362 
363 /*********************************************************************
364  *                          SYSFS INTERFACE                          *
365  *********************************************************************/
366 
367 static struct cpufreq_governor *__find_governor(const char *str_governor)
368 {
369 	struct cpufreq_governor *t;
370 
371 	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
372 		if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN))
373 			return t;
374 
375 	return NULL;
376 }
377 
378 /**
379  * cpufreq_parse_governor - parse a governor string
380  */
381 static int cpufreq_parse_governor (char *str_governor, unsigned int *policy,
382 				struct cpufreq_governor **governor)
383 {
384 	int err = -EINVAL;
385 
386 	if (!cpufreq_driver)
387 		goto out;
388 
389 	if (cpufreq_driver->setpolicy) {
390 		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
391 			*policy = CPUFREQ_POLICY_PERFORMANCE;
392 			err = 0;
393 		} else if (!strnicmp(str_governor, "powersave",
394 						CPUFREQ_NAME_LEN)) {
395 			*policy = CPUFREQ_POLICY_POWERSAVE;
396 			err = 0;
397 		}
398 	} else if (cpufreq_driver->target) {
399 		struct cpufreq_governor *t;
400 
401 		mutex_lock(&cpufreq_governor_mutex);
402 
403 		t = __find_governor(str_governor);
404 
405 		if (t == NULL) {
406 			char *name = kasprintf(GFP_KERNEL, "cpufreq_%s",
407 								str_governor);
408 
409 			if (name) {
410 				int ret;
411 
412 				mutex_unlock(&cpufreq_governor_mutex);
413 				ret = request_module(name);
414 				mutex_lock(&cpufreq_governor_mutex);
415 
416 				if (ret == 0)
417 					t = __find_governor(str_governor);
418 			}
419 
420 			kfree(name);
421 		}
422 
423 		if (t != NULL) {
424 			*governor = t;
425 			err = 0;
426 		}
427 
428 		mutex_unlock(&cpufreq_governor_mutex);
429 	}
430   out:
431 	return err;
432 }
433 
434 
435 /* drivers/base/cpu.c */
436 extern struct sysdev_class cpu_sysdev_class;
437 
438 
439 /**
440  * cpufreq_per_cpu_attr_read() / show_##file_name() -
441  * print out cpufreq information
442  *
443  * Write out information from cpufreq_driver->policy[cpu]; object must be
444  * "unsigned int".
445  */
446 
447 #define show_one(file_name, object)			\
448 static ssize_t show_##file_name				\
449 (struct cpufreq_policy * policy, char *buf)		\
450 {							\
451 	return sprintf (buf, "%u\n", policy->object);	\
452 }
453 
454 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
455 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
456 show_one(scaling_min_freq, min);
457 show_one(scaling_max_freq, max);
458 show_one(scaling_cur_freq, cur);
459 
460 static int __cpufreq_set_policy(struct cpufreq_policy *data,
461 				struct cpufreq_policy *policy);
462 
463 /**
464  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
465  */
466 #define store_one(file_name, object)			\
467 static ssize_t store_##file_name					\
468 (struct cpufreq_policy * policy, const char *buf, size_t count)		\
469 {									\
470 	unsigned int ret = -EINVAL;					\
471 	struct cpufreq_policy new_policy;				\
472 									\
473 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
474 	if (ret)							\
475 		return -EINVAL;						\
476 									\
477 	ret = sscanf (buf, "%u", &new_policy.object);			\
478 	if (ret != 1)							\
479 		return -EINVAL;						\
480 									\
481 	ret = __cpufreq_set_policy(policy, &new_policy);		\
482 	policy->user_policy.object = policy->object;			\
483 									\
484 	return ret ? ret : count;					\
485 }
486 
487 store_one(scaling_min_freq,min);
488 store_one(scaling_max_freq,max);
489 
490 /**
491  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
492  */
493 static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy,
494 							char *buf)
495 {
496 	unsigned int cur_freq = __cpufreq_get(policy->cpu);
497 	if (!cur_freq)
498 		return sprintf(buf, "<unknown>");
499 	return sprintf(buf, "%u\n", cur_freq);
500 }
501 
502 
503 /**
504  * show_scaling_governor - show the current policy for the specified CPU
505  */
506 static ssize_t show_scaling_governor (struct cpufreq_policy * policy,
507 							char *buf)
508 {
509 	if(policy->policy == CPUFREQ_POLICY_POWERSAVE)
510 		return sprintf(buf, "powersave\n");
511 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
512 		return sprintf(buf, "performance\n");
513 	else if (policy->governor)
514 		return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", policy->governor->name);
515 	return -EINVAL;
516 }
517 
518 
519 /**
520  * store_scaling_governor - store policy for the specified CPU
521  */
522 static ssize_t store_scaling_governor (struct cpufreq_policy * policy,
523 				       const char *buf, size_t count)
524 {
525 	unsigned int ret = -EINVAL;
526 	char	str_governor[16];
527 	struct cpufreq_policy new_policy;
528 
529 	ret = cpufreq_get_policy(&new_policy, policy->cpu);
530 	if (ret)
531 		return ret;
532 
533 	ret = sscanf (buf, "%15s", str_governor);
534 	if (ret != 1)
535 		return -EINVAL;
536 
537 	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
538 						&new_policy.governor))
539 		return -EINVAL;
540 
541 	/* Do not use cpufreq_set_policy here or the user_policy.max
542 	   will be wrongly overridden */
543 	ret = __cpufreq_set_policy(policy, &new_policy);
544 
545 	policy->user_policy.policy = policy->policy;
546 	policy->user_policy.governor = policy->governor;
547 
548 	if (ret)
549 		return ret;
550 	else
551 		return count;
552 }
553 
554 /**
555  * show_scaling_driver - show the cpufreq driver currently loaded
556  */
557 static ssize_t show_scaling_driver (struct cpufreq_policy * policy, char *buf)
558 {
559 	return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
560 }
561 
562 /**
563  * show_scaling_available_governors - show the available CPUfreq governors
564  */
565 static ssize_t show_scaling_available_governors (struct cpufreq_policy *policy,
566 				char *buf)
567 {
568 	ssize_t i = 0;
569 	struct cpufreq_governor *t;
570 
571 	if (!cpufreq_driver->target) {
572 		i += sprintf(buf, "performance powersave");
573 		goto out;
574 	}
575 
576 	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
577 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2)))
578 			goto out;
579 		i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
580 	}
581 out:
582 	i += sprintf(&buf[i], "\n");
583 	return i;
584 }
585 /**
586  * show_affected_cpus - show the CPUs affected by each transition
587  */
588 static ssize_t show_affected_cpus (struct cpufreq_policy * policy, char *buf)
589 {
590 	ssize_t i = 0;
591 	unsigned int cpu;
592 
593 	for_each_cpu_mask(cpu, policy->cpus) {
594 		if (i)
595 			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
596 		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
597 		if (i >= (PAGE_SIZE - 5))
598 		    break;
599 	}
600 	i += sprintf(&buf[i], "\n");
601 	return i;
602 }
603 
604 
605 #define define_one_ro(_name) \
606 static struct freq_attr _name = \
607 __ATTR(_name, 0444, show_##_name, NULL)
608 
609 #define define_one_ro0400(_name) \
610 static struct freq_attr _name = \
611 __ATTR(_name, 0400, show_##_name, NULL)
612 
613 #define define_one_rw(_name) \
614 static struct freq_attr _name = \
615 __ATTR(_name, 0644, show_##_name, store_##_name)
616 
617 define_one_ro0400(cpuinfo_cur_freq);
618 define_one_ro(cpuinfo_min_freq);
619 define_one_ro(cpuinfo_max_freq);
620 define_one_ro(scaling_available_governors);
621 define_one_ro(scaling_driver);
622 define_one_ro(scaling_cur_freq);
623 define_one_ro(affected_cpus);
624 define_one_rw(scaling_min_freq);
625 define_one_rw(scaling_max_freq);
626 define_one_rw(scaling_governor);
627 
628 static struct attribute * default_attrs[] = {
629 	&cpuinfo_min_freq.attr,
630 	&cpuinfo_max_freq.attr,
631 	&scaling_min_freq.attr,
632 	&scaling_max_freq.attr,
633 	&affected_cpus.attr,
634 	&scaling_governor.attr,
635 	&scaling_driver.attr,
636 	&scaling_available_governors.attr,
637 	NULL
638 };
639 
640 #define to_policy(k) container_of(k,struct cpufreq_policy,kobj)
641 #define to_attr(a) container_of(a,struct freq_attr,attr)
642 
643 static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf)
644 {
645 	struct cpufreq_policy * policy = to_policy(kobj);
646 	struct freq_attr * fattr = to_attr(attr);
647 	ssize_t ret;
648 	policy = cpufreq_cpu_get(policy->cpu);
649 	if (!policy)
650 		return -EINVAL;
651 
652 	if (lock_policy_rwsem_read(policy->cpu) < 0)
653 		return -EINVAL;
654 
655 	if (fattr->show)
656 		ret = fattr->show(policy, buf);
657 	else
658 		ret = -EIO;
659 
660 	unlock_policy_rwsem_read(policy->cpu);
661 
662 	cpufreq_cpu_put(policy);
663 	return ret;
664 }
665 
666 static ssize_t store(struct kobject * kobj, struct attribute * attr,
667 		     const char * buf, size_t count)
668 {
669 	struct cpufreq_policy * policy = to_policy(kobj);
670 	struct freq_attr * fattr = to_attr(attr);
671 	ssize_t ret;
672 	policy = cpufreq_cpu_get(policy->cpu);
673 	if (!policy)
674 		return -EINVAL;
675 
676 	if (lock_policy_rwsem_write(policy->cpu) < 0)
677 		return -EINVAL;
678 
679 	if (fattr->store)
680 		ret = fattr->store(policy, buf, count);
681 	else
682 		ret = -EIO;
683 
684 	unlock_policy_rwsem_write(policy->cpu);
685 
686 	cpufreq_cpu_put(policy);
687 	return ret;
688 }
689 
690 static void cpufreq_sysfs_release(struct kobject * kobj)
691 {
692 	struct cpufreq_policy * policy = to_policy(kobj);
693 	dprintk("last reference is dropped\n");
694 	complete(&policy->kobj_unregister);
695 }
696 
697 static struct sysfs_ops sysfs_ops = {
698 	.show	= show,
699 	.store	= store,
700 };
701 
702 static struct kobj_type ktype_cpufreq = {
703 	.sysfs_ops	= &sysfs_ops,
704 	.default_attrs	= default_attrs,
705 	.release	= cpufreq_sysfs_release,
706 };
707 
708 
709 /**
710  * cpufreq_add_dev - add a CPU device
711  *
712  * Adds the cpufreq interface for a CPU device.
713  */
714 static int cpufreq_add_dev (struct sys_device * sys_dev)
715 {
716 	unsigned int cpu = sys_dev->id;
717 	int ret = 0;
718 	struct cpufreq_policy new_policy;
719 	struct cpufreq_policy *policy;
720 	struct freq_attr **drv_attr;
721 	struct sys_device *cpu_sys_dev;
722 	unsigned long flags;
723 	unsigned int j;
724 #ifdef CONFIG_SMP
725 	struct cpufreq_policy *managed_policy;
726 #endif
727 
728 	if (cpu_is_offline(cpu))
729 		return 0;
730 
731 	cpufreq_debug_disable_ratelimit();
732 	dprintk("adding CPU %u\n", cpu);
733 
734 #ifdef CONFIG_SMP
735 	/* check whether a different CPU already registered this
736 	 * CPU because it is in the same boat. */
737 	policy = cpufreq_cpu_get(cpu);
738 	if (unlikely(policy)) {
739 		cpufreq_cpu_put(policy);
740 		cpufreq_debug_enable_ratelimit();
741 		return 0;
742 	}
743 #endif
744 
745 	if (!try_module_get(cpufreq_driver->owner)) {
746 		ret = -EINVAL;
747 		goto module_out;
748 	}
749 
750 	policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
751 	if (!policy) {
752 		ret = -ENOMEM;
753 		goto nomem_out;
754 	}
755 
756 	policy->cpu = cpu;
757 	policy->cpus = cpumask_of_cpu(cpu);
758 
759 	/* Initially set CPU itself as the policy_cpu */
760 	per_cpu(policy_cpu, cpu) = cpu;
761 	lock_policy_rwsem_write(cpu);
762 
763 	init_completion(&policy->kobj_unregister);
764 	INIT_WORK(&policy->update, handle_update);
765 
766 	/* call driver. From then on the cpufreq must be able
767 	 * to accept all calls to ->verify and ->setpolicy for this CPU
768 	 */
769 	ret = cpufreq_driver->init(policy);
770 	if (ret) {
771 		dprintk("initialization failed\n");
772 		unlock_policy_rwsem_write(cpu);
773 		goto err_out;
774 	}
775 	policy->user_policy.min = policy->cpuinfo.min_freq;
776 	policy->user_policy.max = policy->cpuinfo.max_freq;
777 
778 #ifdef CONFIG_SMP
779 
780 #ifdef CONFIG_HOTPLUG_CPU
781 	if (cpufreq_cpu_governor[cpu]){
782 		policy->governor = cpufreq_cpu_governor[cpu];
783 		dprintk("Restoring governor %s for cpu %d\n",
784 		       policy->governor->name, cpu);
785 	}
786 #endif
787 
788 	for_each_cpu_mask(j, policy->cpus) {
789 		if (cpu == j)
790 			continue;
791 
792 		/* check for existing affected CPUs.  They may not be aware
793 		 * of it due to CPU Hotplug.
794 		 */
795 		managed_policy = cpufreq_cpu_get(j);
796 		if (unlikely(managed_policy)) {
797 
798 			/* Set proper policy_cpu */
799 			unlock_policy_rwsem_write(cpu);
800 			per_cpu(policy_cpu, cpu) = managed_policy->cpu;
801 
802 			if (lock_policy_rwsem_write(cpu) < 0)
803 				goto err_out_driver_exit;
804 
805 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
806 			managed_policy->cpus = policy->cpus;
807 			cpufreq_cpu_data[cpu] = managed_policy;
808 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
809 
810 			dprintk("CPU already managed, adding link\n");
811 			ret = sysfs_create_link(&sys_dev->kobj,
812 						&managed_policy->kobj,
813 						"cpufreq");
814 			if (ret) {
815 				unlock_policy_rwsem_write(cpu);
816 				goto err_out_driver_exit;
817 			}
818 
819 			cpufreq_debug_enable_ratelimit();
820 			ret = 0;
821 			unlock_policy_rwsem_write(cpu);
822 			goto err_out_driver_exit; /* call driver->exit() */
823 		}
824 	}
825 #endif
826 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
827 
828 	/* prepare interface data */
829 	policy->kobj.parent = &sys_dev->kobj;
830 	policy->kobj.ktype = &ktype_cpufreq;
831 	strlcpy(policy->kobj.name, "cpufreq", KOBJ_NAME_LEN);
832 
833 	ret = kobject_register(&policy->kobj);
834 	if (ret) {
835 		unlock_policy_rwsem_write(cpu);
836 		goto err_out_driver_exit;
837 	}
838 	/* set up files for this cpu device */
839 	drv_attr = cpufreq_driver->attr;
840 	while ((drv_attr) && (*drv_attr)) {
841 		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
842 		if (ret)
843 			goto err_out_driver_exit;
844 		drv_attr++;
845 	}
846 	if (cpufreq_driver->get){
847 		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
848 		if (ret)
849 			goto err_out_driver_exit;
850 	}
851 	if (cpufreq_driver->target){
852 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
853 		if (ret)
854 			goto err_out_driver_exit;
855 	}
856 
857 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
858 	for_each_cpu_mask(j, policy->cpus) {
859 		cpufreq_cpu_data[j] = policy;
860 		per_cpu(policy_cpu, j) = policy->cpu;
861 	}
862 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
863 
864 	/* symlink affected CPUs */
865 	for_each_cpu_mask(j, policy->cpus) {
866 		if (j == cpu)
867 			continue;
868 		if (!cpu_online(j))
869 			continue;
870 
871 		dprintk("CPU %u already managed, adding link\n", j);
872 		cpufreq_cpu_get(cpu);
873 		cpu_sys_dev = get_cpu_sysdev(j);
874 		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
875 					"cpufreq");
876 		if (ret) {
877 			unlock_policy_rwsem_write(cpu);
878 			goto err_out_unregister;
879 		}
880 	}
881 
882 	policy->governor = NULL; /* to assure that the starting sequence is
883 				  * run in cpufreq_set_policy */
884 
885 	/* set default policy */
886 	ret = __cpufreq_set_policy(policy, &new_policy);
887 	policy->user_policy.policy = policy->policy;
888 	policy->user_policy.governor = policy->governor;
889 
890 	unlock_policy_rwsem_write(cpu);
891 
892 	if (ret) {
893 		dprintk("setting policy failed\n");
894 		goto err_out_unregister;
895 	}
896 
897 	module_put(cpufreq_driver->owner);
898 	dprintk("initialization complete\n");
899 	cpufreq_debug_enable_ratelimit();
900 
901 	return 0;
902 
903 
904 err_out_unregister:
905 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
906 	for_each_cpu_mask(j, policy->cpus)
907 		cpufreq_cpu_data[j] = NULL;
908 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
909 
910 	kobject_unregister(&policy->kobj);
911 	wait_for_completion(&policy->kobj_unregister);
912 
913 err_out_driver_exit:
914 	if (cpufreq_driver->exit)
915 		cpufreq_driver->exit(policy);
916 
917 err_out:
918 	kfree(policy);
919 
920 nomem_out:
921 	module_put(cpufreq_driver->owner);
922 module_out:
923 	cpufreq_debug_enable_ratelimit();
924 	return ret;
925 }
926 
927 
928 /**
929  * __cpufreq_remove_dev - remove a CPU device
930  *
931  * Removes the cpufreq interface for a CPU device.
932  * Caller should already have policy_rwsem in write mode for this CPU.
933  * This routine frees the rwsem before returning.
934  */
935 static int __cpufreq_remove_dev (struct sys_device * sys_dev)
936 {
937 	unsigned int cpu = sys_dev->id;
938 	unsigned long flags;
939 	struct cpufreq_policy *data;
940 #ifdef CONFIG_SMP
941 	struct sys_device *cpu_sys_dev;
942 	unsigned int j;
943 #endif
944 
945 	cpufreq_debug_disable_ratelimit();
946 	dprintk("unregistering CPU %u\n", cpu);
947 
948 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
949 	data = cpufreq_cpu_data[cpu];
950 
951 	if (!data) {
952 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
953 		cpufreq_debug_enable_ratelimit();
954 		unlock_policy_rwsem_write(cpu);
955 		return -EINVAL;
956 	}
957 	cpufreq_cpu_data[cpu] = NULL;
958 
959 
960 #ifdef CONFIG_SMP
961 	/* if this isn't the CPU which is the parent of the kobj, we
962 	 * only need to unlink, put and exit
963 	 */
964 	if (unlikely(cpu != data->cpu)) {
965 		dprintk("removing link\n");
966 		cpu_clear(cpu, data->cpus);
967 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
968 		sysfs_remove_link(&sys_dev->kobj, "cpufreq");
969 		cpufreq_cpu_put(data);
970 		cpufreq_debug_enable_ratelimit();
971 		unlock_policy_rwsem_write(cpu);
972 		return 0;
973 	}
974 #endif
975 
976 
977 	if (!kobject_get(&data->kobj)) {
978 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
979 		cpufreq_debug_enable_ratelimit();
980 		unlock_policy_rwsem_write(cpu);
981 		return -EFAULT;
982 	}
983 
984 #ifdef CONFIG_SMP
985 
986 #ifdef CONFIG_HOTPLUG_CPU
987 	cpufreq_cpu_governor[cpu] = data->governor;
988 #endif
989 
990 	/* if we have other CPUs still registered, we need to unlink them,
991 	 * or else wait_for_completion below will lock up. Clean the
992 	 * cpufreq_cpu_data[] while holding the lock, and remove the sysfs
993 	 * links afterwards.
994 	 */
995 	if (unlikely(cpus_weight(data->cpus) > 1)) {
996 		for_each_cpu_mask(j, data->cpus) {
997 			if (j == cpu)
998 				continue;
999 			cpufreq_cpu_data[j] = NULL;
1000 		}
1001 	}
1002 
1003 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1004 
1005 	if (unlikely(cpus_weight(data->cpus) > 1)) {
1006 		for_each_cpu_mask(j, data->cpus) {
1007 			if (j == cpu)
1008 				continue;
1009 			dprintk("removing link for cpu %u\n", j);
1010 #ifdef CONFIG_HOTPLUG_CPU
1011 			cpufreq_cpu_governor[j] = data->governor;
1012 #endif
1013 			cpu_sys_dev = get_cpu_sysdev(j);
1014 			sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
1015 			cpufreq_cpu_put(data);
1016 		}
1017 	}
1018 #else
1019 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1020 #endif
1021 
1022 	if (cpufreq_driver->target)
1023 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1024 
1025 	unlock_policy_rwsem_write(cpu);
1026 
1027 	kobject_unregister(&data->kobj);
1028 
1029 	kobject_put(&data->kobj);
1030 
1031 	/* we need to make sure that the underlying kobj is actually
1032 	 * not referenced anymore by anybody before we proceed with
1033 	 * unloading.
1034 	 */
1035 	dprintk("waiting for dropping of refcount\n");
1036 	wait_for_completion(&data->kobj_unregister);
1037 	dprintk("wait complete\n");
1038 
1039 	if (cpufreq_driver->exit)
1040 		cpufreq_driver->exit(data);
1041 
1042 	kfree(data);
1043 
1044 	cpufreq_debug_enable_ratelimit();
1045 	return 0;
1046 }
1047 
1048 
1049 static int cpufreq_remove_dev (struct sys_device * sys_dev)
1050 {
1051 	unsigned int cpu = sys_dev->id;
1052 	int retval;
1053 
1054 	if (cpu_is_offline(cpu))
1055 		return 0;
1056 
1057 	if (unlikely(lock_policy_rwsem_write(cpu)))
1058 		BUG();
1059 
1060 	retval = __cpufreq_remove_dev(sys_dev);
1061 	return retval;
1062 }
1063 
1064 
1065 static void handle_update(struct work_struct *work)
1066 {
1067 	struct cpufreq_policy *policy =
1068 		container_of(work, struct cpufreq_policy, update);
1069 	unsigned int cpu = policy->cpu;
1070 	dprintk("handle_update for cpu %u called\n", cpu);
1071 	cpufreq_update_policy(cpu);
1072 }
1073 
1074 /**
1075  *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1076  *	@cpu: cpu number
1077  *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1078  *	@new_freq: CPU frequency the CPU actually runs at
1079  *
1080  *	We adjust to current frequency first, and need to clean up later. So either call
1081  *	to cpufreq_update_policy() or schedule handle_update()).
1082  */
1083 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1084 				unsigned int new_freq)
1085 {
1086 	struct cpufreq_freqs freqs;
1087 
1088 	dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
1089 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1090 
1091 	freqs.cpu = cpu;
1092 	freqs.old = old_freq;
1093 	freqs.new = new_freq;
1094 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1095 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1096 }
1097 
1098 
1099 /**
1100  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1101  * @cpu: CPU number
1102  *
1103  * This is the last known freq, without actually getting it from the driver.
1104  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1105  */
1106 unsigned int cpufreq_quick_get(unsigned int cpu)
1107 {
1108 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1109 	unsigned int ret_freq = 0;
1110 
1111 	if (policy) {
1112 		if (unlikely(lock_policy_rwsem_read(cpu)))
1113 			return ret_freq;
1114 
1115 		ret_freq = policy->cur;
1116 
1117 		unlock_policy_rwsem_read(cpu);
1118 		cpufreq_cpu_put(policy);
1119 	}
1120 
1121 	return (ret_freq);
1122 }
1123 EXPORT_SYMBOL(cpufreq_quick_get);
1124 
1125 
1126 static unsigned int __cpufreq_get(unsigned int cpu)
1127 {
1128 	struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
1129 	unsigned int ret_freq = 0;
1130 
1131 	if (!cpufreq_driver->get)
1132 		return (ret_freq);
1133 
1134 	ret_freq = cpufreq_driver->get(cpu);
1135 
1136 	if (ret_freq && policy->cur &&
1137 		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1138 		/* verify no discrepancy between actual and
1139 					saved value exists */
1140 		if (unlikely(ret_freq != policy->cur)) {
1141 			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1142 			schedule_work(&policy->update);
1143 		}
1144 	}
1145 
1146 	return (ret_freq);
1147 }
1148 
1149 /**
1150  * cpufreq_get - get the current CPU frequency (in kHz)
1151  * @cpu: CPU number
1152  *
1153  * Get the CPU current (static) CPU frequency
1154  */
1155 unsigned int cpufreq_get(unsigned int cpu)
1156 {
1157 	unsigned int ret_freq = 0;
1158 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1159 
1160 	if (!policy)
1161 		goto out;
1162 
1163 	if (unlikely(lock_policy_rwsem_read(cpu)))
1164 		goto out_policy;
1165 
1166 	ret_freq = __cpufreq_get(cpu);
1167 
1168 	unlock_policy_rwsem_read(cpu);
1169 
1170 out_policy:
1171 	cpufreq_cpu_put(policy);
1172 out:
1173 	return (ret_freq);
1174 }
1175 EXPORT_SYMBOL(cpufreq_get);
1176 
1177 
1178 /**
1179  *	cpufreq_suspend - let the low level driver prepare for suspend
1180  */
1181 
1182 static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg)
1183 {
1184 	int cpu = sysdev->id;
1185 	int ret = 0;
1186 	unsigned int cur_freq = 0;
1187 	struct cpufreq_policy *cpu_policy;
1188 
1189 	dprintk("suspending cpu %u\n", cpu);
1190 
1191 	if (!cpu_online(cpu))
1192 		return 0;
1193 
1194 	/* we may be lax here as interrupts are off. Nonetheless
1195 	 * we need to grab the correct cpu policy, as to check
1196 	 * whether we really run on this CPU.
1197 	 */
1198 
1199 	cpu_policy = cpufreq_cpu_get(cpu);
1200 	if (!cpu_policy)
1201 		return -EINVAL;
1202 
1203 	/* only handle each CPU group once */
1204 	if (unlikely(cpu_policy->cpu != cpu)) {
1205 		cpufreq_cpu_put(cpu_policy);
1206 		return 0;
1207 	}
1208 
1209 	if (cpufreq_driver->suspend) {
1210 		ret = cpufreq_driver->suspend(cpu_policy, pmsg);
1211 		if (ret) {
1212 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1213 					"step on CPU %u\n", cpu_policy->cpu);
1214 			cpufreq_cpu_put(cpu_policy);
1215 			return ret;
1216 		}
1217 	}
1218 
1219 
1220 	if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)
1221 		goto out;
1222 
1223 	if (cpufreq_driver->get)
1224 		cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1225 
1226 	if (!cur_freq || !cpu_policy->cur) {
1227 		printk(KERN_ERR "cpufreq: suspend failed to assert current "
1228 		       "frequency is what timing core thinks it is.\n");
1229 		goto out;
1230 	}
1231 
1232 	if (unlikely(cur_freq != cpu_policy->cur)) {
1233 		struct cpufreq_freqs freqs;
1234 
1235 		if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1236 			dprintk("Warning: CPU frequency is %u, "
1237 			       "cpufreq assumed %u kHz.\n",
1238 			       cur_freq, cpu_policy->cur);
1239 
1240 		freqs.cpu = cpu;
1241 		freqs.old = cpu_policy->cur;
1242 		freqs.new = cur_freq;
1243 
1244 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
1245 				    CPUFREQ_SUSPENDCHANGE, &freqs);
1246 		adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
1247 
1248 		cpu_policy->cur = cur_freq;
1249 	}
1250 
1251 out:
1252 	cpufreq_cpu_put(cpu_policy);
1253 	return 0;
1254 }
1255 
1256 /**
1257  *	cpufreq_resume -  restore proper CPU frequency handling after resume
1258  *
1259  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1260  *	2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync
1261  *	3.) schedule call cpufreq_update_policy() ASAP as interrupts are
1262  *	    restored.
1263  */
1264 static int cpufreq_resume(struct sys_device * sysdev)
1265 {
1266 	int cpu = sysdev->id;
1267 	int ret = 0;
1268 	struct cpufreq_policy *cpu_policy;
1269 
1270 	dprintk("resuming cpu %u\n", cpu);
1271 
1272 	if (!cpu_online(cpu))
1273 		return 0;
1274 
1275 	/* we may be lax here as interrupts are off. Nonetheless
1276 	 * we need to grab the correct cpu policy, as to check
1277 	 * whether we really run on this CPU.
1278 	 */
1279 
1280 	cpu_policy = cpufreq_cpu_get(cpu);
1281 	if (!cpu_policy)
1282 		return -EINVAL;
1283 
1284 	/* only handle each CPU group once */
1285 	if (unlikely(cpu_policy->cpu != cpu)) {
1286 		cpufreq_cpu_put(cpu_policy);
1287 		return 0;
1288 	}
1289 
1290 	if (cpufreq_driver->resume) {
1291 		ret = cpufreq_driver->resume(cpu_policy);
1292 		if (ret) {
1293 			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1294 					"step on CPU %u\n", cpu_policy->cpu);
1295 			cpufreq_cpu_put(cpu_policy);
1296 			return ret;
1297 		}
1298 	}
1299 
1300 	if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1301 		unsigned int cur_freq = 0;
1302 
1303 		if (cpufreq_driver->get)
1304 			cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1305 
1306 		if (!cur_freq || !cpu_policy->cur) {
1307 			printk(KERN_ERR "cpufreq: resume failed to assert "
1308 					"current frequency is what timing core "
1309 					"thinks it is.\n");
1310 			goto out;
1311 		}
1312 
1313 		if (unlikely(cur_freq != cpu_policy->cur)) {
1314 			struct cpufreq_freqs freqs;
1315 
1316 			if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1317 				dprintk("Warning: CPU frequency"
1318 				       "is %u, cpufreq assumed %u kHz.\n",
1319 				       cur_freq, cpu_policy->cur);
1320 
1321 			freqs.cpu = cpu;
1322 			freqs.old = cpu_policy->cur;
1323 			freqs.new = cur_freq;
1324 
1325 			srcu_notifier_call_chain(
1326 					&cpufreq_transition_notifier_list,
1327 					CPUFREQ_RESUMECHANGE, &freqs);
1328 			adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
1329 
1330 			cpu_policy->cur = cur_freq;
1331 		}
1332 	}
1333 
1334 out:
1335 	schedule_work(&cpu_policy->update);
1336 	cpufreq_cpu_put(cpu_policy);
1337 	return ret;
1338 }
1339 
1340 static struct sysdev_driver cpufreq_sysdev_driver = {
1341 	.add		= cpufreq_add_dev,
1342 	.remove		= cpufreq_remove_dev,
1343 	.suspend	= cpufreq_suspend,
1344 	.resume		= cpufreq_resume,
1345 };
1346 
1347 
1348 /*********************************************************************
1349  *                     NOTIFIER LISTS INTERFACE                      *
1350  *********************************************************************/
1351 
1352 /**
1353  *	cpufreq_register_notifier - register a driver with cpufreq
1354  *	@nb: notifier function to register
1355  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1356  *
1357  *	Add a driver to one of two lists: either a list of drivers that
1358  *      are notified about clock rate changes (once before and once after
1359  *      the transition), or a list of drivers that are notified about
1360  *      changes in cpufreq policy.
1361  *
1362  *	This function may sleep, and has the same return conditions as
1363  *	blocking_notifier_chain_register.
1364  */
1365 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1366 {
1367 	int ret;
1368 
1369 	switch (list) {
1370 	case CPUFREQ_TRANSITION_NOTIFIER:
1371 		ret = srcu_notifier_chain_register(
1372 				&cpufreq_transition_notifier_list, nb);
1373 		break;
1374 	case CPUFREQ_POLICY_NOTIFIER:
1375 		ret = blocking_notifier_chain_register(
1376 				&cpufreq_policy_notifier_list, nb);
1377 		break;
1378 	default:
1379 		ret = -EINVAL;
1380 	}
1381 
1382 	return ret;
1383 }
1384 EXPORT_SYMBOL(cpufreq_register_notifier);
1385 
1386 
1387 /**
1388  *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1389  *	@nb: notifier block to be unregistered
1390  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1391  *
1392  *	Remove a driver from the CPU frequency notifier list.
1393  *
1394  *	This function may sleep, and has the same return conditions as
1395  *	blocking_notifier_chain_unregister.
1396  */
1397 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1398 {
1399 	int ret;
1400 
1401 	switch (list) {
1402 	case CPUFREQ_TRANSITION_NOTIFIER:
1403 		ret = srcu_notifier_chain_unregister(
1404 				&cpufreq_transition_notifier_list, nb);
1405 		break;
1406 	case CPUFREQ_POLICY_NOTIFIER:
1407 		ret = blocking_notifier_chain_unregister(
1408 				&cpufreq_policy_notifier_list, nb);
1409 		break;
1410 	default:
1411 		ret = -EINVAL;
1412 	}
1413 
1414 	return ret;
1415 }
1416 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1417 
1418 
1419 /*********************************************************************
1420  *                              GOVERNORS                            *
1421  *********************************************************************/
1422 
1423 
1424 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1425 			    unsigned int target_freq,
1426 			    unsigned int relation)
1427 {
1428 	int retval = -EINVAL;
1429 
1430 	dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1431 		target_freq, relation);
1432 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
1433 		retval = cpufreq_driver->target(policy, target_freq, relation);
1434 
1435 	return retval;
1436 }
1437 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1438 
1439 int cpufreq_driver_target(struct cpufreq_policy *policy,
1440 			  unsigned int target_freq,
1441 			  unsigned int relation)
1442 {
1443 	int ret;
1444 
1445 	policy = cpufreq_cpu_get(policy->cpu);
1446 	if (!policy)
1447 		return -EINVAL;
1448 
1449 	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1450 		return -EINVAL;
1451 
1452 	ret = __cpufreq_driver_target(policy, target_freq, relation);
1453 
1454 	unlock_policy_rwsem_write(policy->cpu);
1455 
1456 	cpufreq_cpu_put(policy);
1457 	return ret;
1458 }
1459 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1460 
1461 int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
1462 {
1463 	int ret = 0;
1464 
1465 	policy = cpufreq_cpu_get(policy->cpu);
1466 	if (!policy)
1467 		return -EINVAL;
1468 
1469 	if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
1470 		ret = cpufreq_driver->getavg(policy->cpu);
1471 
1472 	cpufreq_cpu_put(policy);
1473 	return ret;
1474 }
1475 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1476 
1477 /*
1478  * when "event" is CPUFREQ_GOV_LIMITS
1479  */
1480 
1481 static int __cpufreq_governor(struct cpufreq_policy *policy,
1482 					unsigned int event)
1483 {
1484 	int ret;
1485 
1486 	if (!try_module_get(policy->governor->owner))
1487 		return -EINVAL;
1488 
1489 	dprintk("__cpufreq_governor for CPU %u, event %u\n",
1490 						policy->cpu, event);
1491 	ret = policy->governor->governor(policy, event);
1492 
1493 	/* we keep one module reference alive for
1494 			each CPU governed by this CPU */
1495 	if ((event != CPUFREQ_GOV_START) || ret)
1496 		module_put(policy->governor->owner);
1497 	if ((event == CPUFREQ_GOV_STOP) && !ret)
1498 		module_put(policy->governor->owner);
1499 
1500 	return ret;
1501 }
1502 
1503 
1504 int cpufreq_register_governor(struct cpufreq_governor *governor)
1505 {
1506 	int err;
1507 
1508 	if (!governor)
1509 		return -EINVAL;
1510 
1511 	mutex_lock(&cpufreq_governor_mutex);
1512 
1513 	err = -EBUSY;
1514 	if (__find_governor(governor->name) == NULL) {
1515 		err = 0;
1516 		list_add(&governor->governor_list, &cpufreq_governor_list);
1517 	}
1518 
1519 	mutex_unlock(&cpufreq_governor_mutex);
1520 	return err;
1521 }
1522 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1523 
1524 
1525 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1526 {
1527 	if (!governor)
1528 		return;
1529 
1530 	mutex_lock(&cpufreq_governor_mutex);
1531 	list_del(&governor->governor_list);
1532 	mutex_unlock(&cpufreq_governor_mutex);
1533 	return;
1534 }
1535 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1536 
1537 
1538 
1539 /*********************************************************************
1540  *                          POLICY INTERFACE                         *
1541  *********************************************************************/
1542 
1543 /**
1544  * cpufreq_get_policy - get the current cpufreq_policy
1545  * @policy: struct cpufreq_policy into which the current cpufreq_policy is written
1546  *
1547  * Reads the current cpufreq policy.
1548  */
1549 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1550 {
1551 	struct cpufreq_policy *cpu_policy;
1552 	if (!policy)
1553 		return -EINVAL;
1554 
1555 	cpu_policy = cpufreq_cpu_get(cpu);
1556 	if (!cpu_policy)
1557 		return -EINVAL;
1558 
1559 	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1560 
1561 	cpufreq_cpu_put(cpu_policy);
1562 	return 0;
1563 }
1564 EXPORT_SYMBOL(cpufreq_get_policy);
1565 
1566 
1567 /*
1568  * data   : current policy.
1569  * policy : policy to be set.
1570  */
1571 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1572 				struct cpufreq_policy *policy)
1573 {
1574 	int ret = 0;
1575 
1576 	cpufreq_debug_disable_ratelimit();
1577 	dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1578 		policy->min, policy->max);
1579 
1580 	memcpy(&policy->cpuinfo, &data->cpuinfo,
1581 				sizeof(struct cpufreq_cpuinfo));
1582 
1583 	if (policy->min > data->min && policy->min > policy->max) {
1584 		ret = -EINVAL;
1585 		goto error_out;
1586 	}
1587 
1588 	/* verify the cpu speed can be set within this limit */
1589 	ret = cpufreq_driver->verify(policy);
1590 	if (ret)
1591 		goto error_out;
1592 
1593 	/* adjust if necessary - all reasons */
1594 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1595 			CPUFREQ_ADJUST, policy);
1596 
1597 	/* adjust if necessary - hardware incompatibility*/
1598 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1599 			CPUFREQ_INCOMPATIBLE, policy);
1600 
1601 	/* verify the cpu speed can be set within this limit,
1602 	   which might be different to the first one */
1603 	ret = cpufreq_driver->verify(policy);
1604 	if (ret)
1605 		goto error_out;
1606 
1607 	/* notification of the new policy */
1608 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1609 			CPUFREQ_NOTIFY, policy);
1610 
1611 	data->min = policy->min;
1612 	data->max = policy->max;
1613 
1614 	dprintk("new min and max freqs are %u - %u kHz\n",
1615 					data->min, data->max);
1616 
1617 	if (cpufreq_driver->setpolicy) {
1618 		data->policy = policy->policy;
1619 		dprintk("setting range\n");
1620 		ret = cpufreq_driver->setpolicy(policy);
1621 	} else {
1622 		if (policy->governor != data->governor) {
1623 			/* save old, working values */
1624 			struct cpufreq_governor *old_gov = data->governor;
1625 
1626 			dprintk("governor switch\n");
1627 
1628 			/* end old governor */
1629 			if (data->governor)
1630 				__cpufreq_governor(data, CPUFREQ_GOV_STOP);
1631 
1632 			/* start new governor */
1633 			data->governor = policy->governor;
1634 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1635 				/* new governor failed, so re-start old one */
1636 				dprintk("starting governor %s failed\n",
1637 							data->governor->name);
1638 				if (old_gov) {
1639 					data->governor = old_gov;
1640 					__cpufreq_governor(data,
1641 							   CPUFREQ_GOV_START);
1642 				}
1643 				ret = -EINVAL;
1644 				goto error_out;
1645 			}
1646 			/* might be a policy change, too, so fall through */
1647 		}
1648 		dprintk("governor: change or update limits\n");
1649 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1650 	}
1651 
1652 error_out:
1653 	cpufreq_debug_enable_ratelimit();
1654 	return ret;
1655 }
1656 
1657 /**
1658  *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
1659  *	@cpu: CPU which shall be re-evaluated
1660  *
1661  *	Usefull for policy notifiers which have different necessities
1662  *	at different times.
1663  */
1664 int cpufreq_update_policy(unsigned int cpu)
1665 {
1666 	struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1667 	struct cpufreq_policy policy;
1668 	int ret = 0;
1669 
1670 	if (!data)
1671 		return -ENODEV;
1672 
1673 	if (unlikely(lock_policy_rwsem_write(cpu)))
1674 		return -EINVAL;
1675 
1676 	dprintk("updating policy for CPU %u\n", cpu);
1677 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
1678 	policy.min = data->user_policy.min;
1679 	policy.max = data->user_policy.max;
1680 	policy.policy = data->user_policy.policy;
1681 	policy.governor = data->user_policy.governor;
1682 
1683 	/* BIOS might change freq behind our back
1684 	  -> ask driver for current freq and notify governors about a change */
1685 	if (cpufreq_driver->get) {
1686 		policy.cur = cpufreq_driver->get(cpu);
1687 		if (!data->cur) {
1688 			dprintk("Driver did not initialize current freq");
1689 			data->cur = policy.cur;
1690 		} else {
1691 			if (data->cur != policy.cur)
1692 				cpufreq_out_of_sync(cpu, data->cur,
1693 								policy.cur);
1694 		}
1695 	}
1696 
1697 	ret = __cpufreq_set_policy(data, &policy);
1698 
1699 	unlock_policy_rwsem_write(cpu);
1700 
1701 	cpufreq_cpu_put(data);
1702 	return ret;
1703 }
1704 EXPORT_SYMBOL(cpufreq_update_policy);
1705 
1706 static int cpufreq_cpu_callback(struct notifier_block *nfb,
1707 					unsigned long action, void *hcpu)
1708 {
1709 	unsigned int cpu = (unsigned long)hcpu;
1710 	struct sys_device *sys_dev;
1711 
1712 	sys_dev = get_cpu_sysdev(cpu);
1713 	if (sys_dev) {
1714 		switch (action) {
1715 		case CPU_ONLINE:
1716 		case CPU_ONLINE_FROZEN:
1717 			cpufreq_add_dev(sys_dev);
1718 			break;
1719 		case CPU_DOWN_PREPARE:
1720 		case CPU_DOWN_PREPARE_FROZEN:
1721 			if (unlikely(lock_policy_rwsem_write(cpu)))
1722 				BUG();
1723 
1724 			__cpufreq_remove_dev(sys_dev);
1725 			break;
1726 		case CPU_DOWN_FAILED:
1727 		case CPU_DOWN_FAILED_FROZEN:
1728 			cpufreq_add_dev(sys_dev);
1729 			break;
1730 		}
1731 	}
1732 	return NOTIFY_OK;
1733 }
1734 
1735 static struct notifier_block __cpuinitdata cpufreq_cpu_notifier =
1736 {
1737     .notifier_call = cpufreq_cpu_callback,
1738 };
1739 
1740 /*********************************************************************
1741  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1742  *********************************************************************/
1743 
1744 /**
1745  * cpufreq_register_driver - register a CPU Frequency driver
1746  * @driver_data: A struct cpufreq_driver containing the values#
1747  * submitted by the CPU Frequency driver.
1748  *
1749  *   Registers a CPU Frequency driver to this core code. This code
1750  * returns zero on success, -EBUSY when another driver got here first
1751  * (and isn't unregistered in the meantime).
1752  *
1753  */
1754 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1755 {
1756 	unsigned long flags;
1757 	int ret;
1758 
1759 	if (!driver_data || !driver_data->verify || !driver_data->init ||
1760 	    ((!driver_data->setpolicy) && (!driver_data->target)))
1761 		return -EINVAL;
1762 
1763 	dprintk("trying to register driver %s\n", driver_data->name);
1764 
1765 	if (driver_data->setpolicy)
1766 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
1767 
1768 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1769 	if (cpufreq_driver) {
1770 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1771 		return -EBUSY;
1772 	}
1773 	cpufreq_driver = driver_data;
1774 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1775 
1776 	ret = sysdev_driver_register(&cpu_sysdev_class,&cpufreq_sysdev_driver);
1777 
1778 	if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1779 		int i;
1780 		ret = -ENODEV;
1781 
1782 		/* check for at least one working CPU */
1783 		for (i=0; i<NR_CPUS; i++)
1784 			if (cpufreq_cpu_data[i])
1785 				ret = 0;
1786 
1787 		/* if all ->init() calls failed, unregister */
1788 		if (ret) {
1789 			dprintk("no CPU initialized for driver %s\n",
1790 							driver_data->name);
1791 			sysdev_driver_unregister(&cpu_sysdev_class,
1792 						&cpufreq_sysdev_driver);
1793 
1794 			spin_lock_irqsave(&cpufreq_driver_lock, flags);
1795 			cpufreq_driver = NULL;
1796 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1797 		}
1798 	}
1799 
1800 	if (!ret) {
1801 		register_hotcpu_notifier(&cpufreq_cpu_notifier);
1802 		dprintk("driver %s up and running\n", driver_data->name);
1803 		cpufreq_debug_enable_ratelimit();
1804 	}
1805 
1806 	return (ret);
1807 }
1808 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1809 
1810 
1811 /**
1812  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1813  *
1814  *    Unregister the current CPUFreq driver. Only call this if you have
1815  * the right to do so, i.e. if you have succeeded in initialising before!
1816  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1817  * currently not initialised.
1818  */
1819 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1820 {
1821 	unsigned long flags;
1822 
1823 	cpufreq_debug_disable_ratelimit();
1824 
1825 	if (!cpufreq_driver || (driver != cpufreq_driver)) {
1826 		cpufreq_debug_enable_ratelimit();
1827 		return -EINVAL;
1828 	}
1829 
1830 	dprintk("unregistering driver %s\n", driver->name);
1831 
1832 	sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1833 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1834 
1835 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
1836 	cpufreq_driver = NULL;
1837 	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1838 
1839 	return 0;
1840 }
1841 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1842 
1843 static int __init cpufreq_core_init(void)
1844 {
1845 	int cpu;
1846 
1847 	for_each_possible_cpu(cpu) {
1848 		per_cpu(policy_cpu, cpu) = -1;
1849 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1850 	}
1851 	return 0;
1852 }
1853 
1854 core_initcall(cpufreq_core_init);
1855