xref: /openbmc/linux/drivers/cpufreq/cpufreq.c (revision bc000245)
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *            (C) 2013 Viresh Kumar <viresh.kumar@linaro.org>
7  *
8  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
9  *	Added handling for CPU hotplug
10  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
11  *	Fix handling for CPU hotplug -- affected CPUs
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License version 2 as
15  * published by the Free Software Foundation.
16  */
17 
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 
20 #include <linux/cpu.h>
21 #include <linux/cpufreq.h>
22 #include <linux/delay.h>
23 #include <linux/device.h>
24 #include <linux/init.h>
25 #include <linux/kernel_stat.h>
26 #include <linux/module.h>
27 #include <linux/mutex.h>
28 #include <linux/slab.h>
29 #include <linux/syscore_ops.h>
30 #include <linux/tick.h>
31 #include <trace/events/power.h>
32 
33 /**
34  * The "cpufreq driver" - the arch- or hardware-dependent low
35  * level driver of CPUFreq support, and its spinlock. This lock
36  * also protects the cpufreq_cpu_data array.
37  */
38 static struct cpufreq_driver *cpufreq_driver;
39 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
40 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback);
41 static DEFINE_RWLOCK(cpufreq_driver_lock);
42 static DEFINE_MUTEX(cpufreq_governor_lock);
43 static LIST_HEAD(cpufreq_policy_list);
44 
45 #ifdef CONFIG_HOTPLUG_CPU
46 /* This one keeps track of the previously set governor of a removed CPU */
47 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
48 #endif
49 
50 static inline bool has_target(void)
51 {
52 	return cpufreq_driver->target_index || cpufreq_driver->target;
53 }
54 
55 /*
56  * rwsem to guarantee that cpufreq driver module doesn't unload during critical
57  * sections
58  */
59 static DECLARE_RWSEM(cpufreq_rwsem);
60 
61 /* internal prototypes */
62 static int __cpufreq_governor(struct cpufreq_policy *policy,
63 		unsigned int event);
64 static unsigned int __cpufreq_get(unsigned int cpu);
65 static void handle_update(struct work_struct *work);
66 
67 /**
68  * Two notifier lists: the "policy" list is involved in the
69  * validation process for a new CPU frequency policy; the
70  * "transition" list for kernel code that needs to handle
71  * changes to devices when the CPU clock speed changes.
72  * The mutex locks both lists.
73  */
74 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
75 static struct srcu_notifier_head cpufreq_transition_notifier_list;
76 
77 static bool init_cpufreq_transition_notifier_list_called;
78 static int __init init_cpufreq_transition_notifier_list(void)
79 {
80 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
81 	init_cpufreq_transition_notifier_list_called = true;
82 	return 0;
83 }
84 pure_initcall(init_cpufreq_transition_notifier_list);
85 
86 static int off __read_mostly;
87 static int cpufreq_disabled(void)
88 {
89 	return off;
90 }
91 void disable_cpufreq(void)
92 {
93 	off = 1;
94 }
95 static LIST_HEAD(cpufreq_governor_list);
96 static DEFINE_MUTEX(cpufreq_governor_mutex);
97 
98 bool have_governor_per_policy(void)
99 {
100 	return !!(cpufreq_driver->flags & CPUFREQ_HAVE_GOVERNOR_PER_POLICY);
101 }
102 EXPORT_SYMBOL_GPL(have_governor_per_policy);
103 
104 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
105 {
106 	if (have_governor_per_policy())
107 		return &policy->kobj;
108 	else
109 		return cpufreq_global_kobject;
110 }
111 EXPORT_SYMBOL_GPL(get_governor_parent_kobj);
112 
113 static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
114 {
115 	u64 idle_time;
116 	u64 cur_wall_time;
117 	u64 busy_time;
118 
119 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
120 
121 	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
122 	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
123 	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
124 	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
125 	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
126 	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
127 
128 	idle_time = cur_wall_time - busy_time;
129 	if (wall)
130 		*wall = cputime_to_usecs(cur_wall_time);
131 
132 	return cputime_to_usecs(idle_time);
133 }
134 
135 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
136 {
137 	u64 idle_time = get_cpu_idle_time_us(cpu, io_busy ? wall : NULL);
138 
139 	if (idle_time == -1ULL)
140 		return get_cpu_idle_time_jiffy(cpu, wall);
141 	else if (!io_busy)
142 		idle_time += get_cpu_iowait_time_us(cpu, wall);
143 
144 	return idle_time;
145 }
146 EXPORT_SYMBOL_GPL(get_cpu_idle_time);
147 
148 /*
149  * This is a generic cpufreq init() routine which can be used by cpufreq
150  * drivers of SMP systems. It will do following:
151  * - validate & show freq table passed
152  * - set policies transition latency
153  * - policy->cpus with all possible CPUs
154  */
155 int cpufreq_generic_init(struct cpufreq_policy *policy,
156 		struct cpufreq_frequency_table *table,
157 		unsigned int transition_latency)
158 {
159 	int ret;
160 
161 	ret = cpufreq_table_validate_and_show(policy, table);
162 	if (ret) {
163 		pr_err("%s: invalid frequency table: %d\n", __func__, ret);
164 		return ret;
165 	}
166 
167 	policy->cpuinfo.transition_latency = transition_latency;
168 
169 	/*
170 	 * The driver only supports the SMP configuartion where all processors
171 	 * share the clock and voltage and clock.
172 	 */
173 	cpumask_setall(policy->cpus);
174 
175 	return 0;
176 }
177 EXPORT_SYMBOL_GPL(cpufreq_generic_init);
178 
179 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
180 {
181 	struct cpufreq_policy *policy = NULL;
182 	unsigned long flags;
183 
184 	if (cpufreq_disabled() || (cpu >= nr_cpu_ids))
185 		return NULL;
186 
187 	if (!down_read_trylock(&cpufreq_rwsem))
188 		return NULL;
189 
190 	/* get the cpufreq driver */
191 	read_lock_irqsave(&cpufreq_driver_lock, flags);
192 
193 	if (cpufreq_driver) {
194 		/* get the CPU */
195 		policy = per_cpu(cpufreq_cpu_data, cpu);
196 		if (policy)
197 			kobject_get(&policy->kobj);
198 	}
199 
200 	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
201 
202 	if (!policy)
203 		up_read(&cpufreq_rwsem);
204 
205 	return policy;
206 }
207 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
208 
209 void cpufreq_cpu_put(struct cpufreq_policy *policy)
210 {
211 	if (cpufreq_disabled())
212 		return;
213 
214 	kobject_put(&policy->kobj);
215 	up_read(&cpufreq_rwsem);
216 }
217 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
218 
219 /*********************************************************************
220  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
221  *********************************************************************/
222 
223 /**
224  * adjust_jiffies - adjust the system "loops_per_jiffy"
225  *
226  * This function alters the system "loops_per_jiffy" for the clock
227  * speed change. Note that loops_per_jiffy cannot be updated on SMP
228  * systems as each CPU might be scaled differently. So, use the arch
229  * per-CPU loops_per_jiffy value wherever possible.
230  */
231 #ifndef CONFIG_SMP
232 static unsigned long l_p_j_ref;
233 static unsigned int l_p_j_ref_freq;
234 
235 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
236 {
237 	if (ci->flags & CPUFREQ_CONST_LOOPS)
238 		return;
239 
240 	if (!l_p_j_ref_freq) {
241 		l_p_j_ref = loops_per_jiffy;
242 		l_p_j_ref_freq = ci->old;
243 		pr_debug("saving %lu as reference value for loops_per_jiffy; "
244 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
245 	}
246 	if ((val == CPUFREQ_POSTCHANGE && ci->old != ci->new) ||
247 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
248 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
249 								ci->new);
250 		pr_debug("scaling loops_per_jiffy to %lu "
251 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
252 	}
253 }
254 #else
255 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
256 {
257 	return;
258 }
259 #endif
260 
261 static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
262 		struct cpufreq_freqs *freqs, unsigned int state)
263 {
264 	BUG_ON(irqs_disabled());
265 
266 	if (cpufreq_disabled())
267 		return;
268 
269 	freqs->flags = cpufreq_driver->flags;
270 	pr_debug("notification %u of frequency transition to %u kHz\n",
271 		state, freqs->new);
272 
273 	switch (state) {
274 
275 	case CPUFREQ_PRECHANGE:
276 		/* detect if the driver reported a value as "old frequency"
277 		 * which is not equal to what the cpufreq core thinks is
278 		 * "old frequency".
279 		 */
280 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
281 			if ((policy) && (policy->cpu == freqs->cpu) &&
282 			    (policy->cur) && (policy->cur != freqs->old)) {
283 				pr_debug("Warning: CPU frequency is"
284 					" %u, cpufreq assumed %u kHz.\n",
285 					freqs->old, policy->cur);
286 				freqs->old = policy->cur;
287 			}
288 		}
289 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
290 				CPUFREQ_PRECHANGE, freqs);
291 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
292 		break;
293 
294 	case CPUFREQ_POSTCHANGE:
295 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
296 		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
297 			(unsigned long)freqs->cpu);
298 		trace_cpu_frequency(freqs->new, freqs->cpu);
299 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
300 				CPUFREQ_POSTCHANGE, freqs);
301 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
302 			policy->cur = freqs->new;
303 		break;
304 	}
305 }
306 
307 /**
308  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
309  * on frequency transition.
310  *
311  * This function calls the transition notifiers and the "adjust_jiffies"
312  * function. It is called twice on all CPU frequency changes that have
313  * external effects.
314  */
315 void cpufreq_notify_transition(struct cpufreq_policy *policy,
316 		struct cpufreq_freqs *freqs, unsigned int state)
317 {
318 	for_each_cpu(freqs->cpu, policy->cpus)
319 		__cpufreq_notify_transition(policy, freqs, state);
320 }
321 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
322 
323 
324 /*********************************************************************
325  *                          SYSFS INTERFACE                          *
326  *********************************************************************/
327 
328 static struct cpufreq_governor *__find_governor(const char *str_governor)
329 {
330 	struct cpufreq_governor *t;
331 
332 	list_for_each_entry(t, &cpufreq_governor_list, governor_list)
333 		if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
334 			return t;
335 
336 	return NULL;
337 }
338 
339 /**
340  * cpufreq_parse_governor - parse a governor string
341  */
342 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
343 				struct cpufreq_governor **governor)
344 {
345 	int err = -EINVAL;
346 
347 	if (!cpufreq_driver)
348 		goto out;
349 
350 	if (cpufreq_driver->setpolicy) {
351 		if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
352 			*policy = CPUFREQ_POLICY_PERFORMANCE;
353 			err = 0;
354 		} else if (!strnicmp(str_governor, "powersave",
355 						CPUFREQ_NAME_LEN)) {
356 			*policy = CPUFREQ_POLICY_POWERSAVE;
357 			err = 0;
358 		}
359 	} else if (has_target()) {
360 		struct cpufreq_governor *t;
361 
362 		mutex_lock(&cpufreq_governor_mutex);
363 
364 		t = __find_governor(str_governor);
365 
366 		if (t == NULL) {
367 			int ret;
368 
369 			mutex_unlock(&cpufreq_governor_mutex);
370 			ret = request_module("cpufreq_%s", str_governor);
371 			mutex_lock(&cpufreq_governor_mutex);
372 
373 			if (ret == 0)
374 				t = __find_governor(str_governor);
375 		}
376 
377 		if (t != NULL) {
378 			*governor = t;
379 			err = 0;
380 		}
381 
382 		mutex_unlock(&cpufreq_governor_mutex);
383 	}
384 out:
385 	return err;
386 }
387 
388 /**
389  * cpufreq_per_cpu_attr_read() / show_##file_name() -
390  * print out cpufreq information
391  *
392  * Write out information from cpufreq_driver->policy[cpu]; object must be
393  * "unsigned int".
394  */
395 
396 #define show_one(file_name, object)			\
397 static ssize_t show_##file_name				\
398 (struct cpufreq_policy *policy, char *buf)		\
399 {							\
400 	return sprintf(buf, "%u\n", policy->object);	\
401 }
402 
403 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
404 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
405 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
406 show_one(scaling_min_freq, min);
407 show_one(scaling_max_freq, max);
408 show_one(scaling_cur_freq, cur);
409 
410 static int cpufreq_set_policy(struct cpufreq_policy *policy,
411 				struct cpufreq_policy *new_policy);
412 
413 /**
414  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
415  */
416 #define store_one(file_name, object)			\
417 static ssize_t store_##file_name					\
418 (struct cpufreq_policy *policy, const char *buf, size_t count)		\
419 {									\
420 	int ret;							\
421 	struct cpufreq_policy new_policy;				\
422 									\
423 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
424 	if (ret)							\
425 		return -EINVAL;						\
426 									\
427 	ret = sscanf(buf, "%u", &new_policy.object);			\
428 	if (ret != 1)							\
429 		return -EINVAL;						\
430 									\
431 	ret = cpufreq_set_policy(policy, &new_policy);		\
432 	policy->user_policy.object = policy->object;			\
433 									\
434 	return ret ? ret : count;					\
435 }
436 
437 store_one(scaling_min_freq, min);
438 store_one(scaling_max_freq, max);
439 
440 /**
441  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
442  */
443 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
444 					char *buf)
445 {
446 	unsigned int cur_freq = __cpufreq_get(policy->cpu);
447 	if (!cur_freq)
448 		return sprintf(buf, "<unknown>");
449 	return sprintf(buf, "%u\n", cur_freq);
450 }
451 
452 /**
453  * show_scaling_governor - show the current policy for the specified CPU
454  */
455 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
456 {
457 	if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
458 		return sprintf(buf, "powersave\n");
459 	else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
460 		return sprintf(buf, "performance\n");
461 	else if (policy->governor)
462 		return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n",
463 				policy->governor->name);
464 	return -EINVAL;
465 }
466 
467 /**
468  * store_scaling_governor - store policy for the specified CPU
469  */
470 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
471 					const char *buf, size_t count)
472 {
473 	int ret;
474 	char	str_governor[16];
475 	struct cpufreq_policy new_policy;
476 
477 	ret = cpufreq_get_policy(&new_policy, policy->cpu);
478 	if (ret)
479 		return ret;
480 
481 	ret = sscanf(buf, "%15s", str_governor);
482 	if (ret != 1)
483 		return -EINVAL;
484 
485 	if (cpufreq_parse_governor(str_governor, &new_policy.policy,
486 						&new_policy.governor))
487 		return -EINVAL;
488 
489 	ret = cpufreq_set_policy(policy, &new_policy);
490 
491 	policy->user_policy.policy = policy->policy;
492 	policy->user_policy.governor = policy->governor;
493 
494 	if (ret)
495 		return ret;
496 	else
497 		return count;
498 }
499 
500 /**
501  * show_scaling_driver - show the cpufreq driver currently loaded
502  */
503 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
504 {
505 	return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", cpufreq_driver->name);
506 }
507 
508 /**
509  * show_scaling_available_governors - show the available CPUfreq governors
510  */
511 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
512 						char *buf)
513 {
514 	ssize_t i = 0;
515 	struct cpufreq_governor *t;
516 
517 	if (!has_target()) {
518 		i += sprintf(buf, "performance powersave");
519 		goto out;
520 	}
521 
522 	list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
523 		if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
524 		    - (CPUFREQ_NAME_LEN + 2)))
525 			goto out;
526 		i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name);
527 	}
528 out:
529 	i += sprintf(&buf[i], "\n");
530 	return i;
531 }
532 
533 ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf)
534 {
535 	ssize_t i = 0;
536 	unsigned int cpu;
537 
538 	for_each_cpu(cpu, mask) {
539 		if (i)
540 			i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
541 		i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
542 		if (i >= (PAGE_SIZE - 5))
543 			break;
544 	}
545 	i += sprintf(&buf[i], "\n");
546 	return i;
547 }
548 EXPORT_SYMBOL_GPL(cpufreq_show_cpus);
549 
550 /**
551  * show_related_cpus - show the CPUs affected by each transition even if
552  * hw coordination is in use
553  */
554 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
555 {
556 	return cpufreq_show_cpus(policy->related_cpus, buf);
557 }
558 
559 /**
560  * show_affected_cpus - show the CPUs affected by each transition
561  */
562 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
563 {
564 	return cpufreq_show_cpus(policy->cpus, buf);
565 }
566 
567 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
568 					const char *buf, size_t count)
569 {
570 	unsigned int freq = 0;
571 	unsigned int ret;
572 
573 	if (!policy->governor || !policy->governor->store_setspeed)
574 		return -EINVAL;
575 
576 	ret = sscanf(buf, "%u", &freq);
577 	if (ret != 1)
578 		return -EINVAL;
579 
580 	policy->governor->store_setspeed(policy, freq);
581 
582 	return count;
583 }
584 
585 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
586 {
587 	if (!policy->governor || !policy->governor->show_setspeed)
588 		return sprintf(buf, "<unsupported>\n");
589 
590 	return policy->governor->show_setspeed(policy, buf);
591 }
592 
593 /**
594  * show_bios_limit - show the current cpufreq HW/BIOS limitation
595  */
596 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
597 {
598 	unsigned int limit;
599 	int ret;
600 	if (cpufreq_driver->bios_limit) {
601 		ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
602 		if (!ret)
603 			return sprintf(buf, "%u\n", limit);
604 	}
605 	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
606 }
607 
608 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
609 cpufreq_freq_attr_ro(cpuinfo_min_freq);
610 cpufreq_freq_attr_ro(cpuinfo_max_freq);
611 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
612 cpufreq_freq_attr_ro(scaling_available_governors);
613 cpufreq_freq_attr_ro(scaling_driver);
614 cpufreq_freq_attr_ro(scaling_cur_freq);
615 cpufreq_freq_attr_ro(bios_limit);
616 cpufreq_freq_attr_ro(related_cpus);
617 cpufreq_freq_attr_ro(affected_cpus);
618 cpufreq_freq_attr_rw(scaling_min_freq);
619 cpufreq_freq_attr_rw(scaling_max_freq);
620 cpufreq_freq_attr_rw(scaling_governor);
621 cpufreq_freq_attr_rw(scaling_setspeed);
622 
623 static struct attribute *default_attrs[] = {
624 	&cpuinfo_min_freq.attr,
625 	&cpuinfo_max_freq.attr,
626 	&cpuinfo_transition_latency.attr,
627 	&scaling_min_freq.attr,
628 	&scaling_max_freq.attr,
629 	&affected_cpus.attr,
630 	&related_cpus.attr,
631 	&scaling_governor.attr,
632 	&scaling_driver.attr,
633 	&scaling_available_governors.attr,
634 	&scaling_setspeed.attr,
635 	NULL
636 };
637 
638 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
639 #define to_attr(a) container_of(a, struct freq_attr, attr)
640 
641 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
642 {
643 	struct cpufreq_policy *policy = to_policy(kobj);
644 	struct freq_attr *fattr = to_attr(attr);
645 	ssize_t ret;
646 
647 	if (!down_read_trylock(&cpufreq_rwsem))
648 		return -EINVAL;
649 
650 	down_read(&policy->rwsem);
651 
652 	if (fattr->show)
653 		ret = fattr->show(policy, buf);
654 	else
655 		ret = -EIO;
656 
657 	up_read(&policy->rwsem);
658 	up_read(&cpufreq_rwsem);
659 
660 	return ret;
661 }
662 
663 static ssize_t store(struct kobject *kobj, struct attribute *attr,
664 		     const char *buf, size_t count)
665 {
666 	struct cpufreq_policy *policy = to_policy(kobj);
667 	struct freq_attr *fattr = to_attr(attr);
668 	ssize_t ret = -EINVAL;
669 
670 	get_online_cpus();
671 
672 	if (!cpu_online(policy->cpu))
673 		goto unlock;
674 
675 	if (!down_read_trylock(&cpufreq_rwsem))
676 		goto unlock;
677 
678 	down_write(&policy->rwsem);
679 
680 	if (fattr->store)
681 		ret = fattr->store(policy, buf, count);
682 	else
683 		ret = -EIO;
684 
685 	up_write(&policy->rwsem);
686 
687 	up_read(&cpufreq_rwsem);
688 unlock:
689 	put_online_cpus();
690 
691 	return ret;
692 }
693 
694 static void cpufreq_sysfs_release(struct kobject *kobj)
695 {
696 	struct cpufreq_policy *policy = to_policy(kobj);
697 	pr_debug("last reference is dropped\n");
698 	complete(&policy->kobj_unregister);
699 }
700 
701 static const struct sysfs_ops sysfs_ops = {
702 	.show	= show,
703 	.store	= store,
704 };
705 
706 static struct kobj_type ktype_cpufreq = {
707 	.sysfs_ops	= &sysfs_ops,
708 	.default_attrs	= default_attrs,
709 	.release	= cpufreq_sysfs_release,
710 };
711 
712 struct kobject *cpufreq_global_kobject;
713 EXPORT_SYMBOL(cpufreq_global_kobject);
714 
715 static int cpufreq_global_kobject_usage;
716 
717 int cpufreq_get_global_kobject(void)
718 {
719 	if (!cpufreq_global_kobject_usage++)
720 		return kobject_add(cpufreq_global_kobject,
721 				&cpu_subsys.dev_root->kobj, "%s", "cpufreq");
722 
723 	return 0;
724 }
725 EXPORT_SYMBOL(cpufreq_get_global_kobject);
726 
727 void cpufreq_put_global_kobject(void)
728 {
729 	if (!--cpufreq_global_kobject_usage)
730 		kobject_del(cpufreq_global_kobject);
731 }
732 EXPORT_SYMBOL(cpufreq_put_global_kobject);
733 
734 int cpufreq_sysfs_create_file(const struct attribute *attr)
735 {
736 	int ret = cpufreq_get_global_kobject();
737 
738 	if (!ret) {
739 		ret = sysfs_create_file(cpufreq_global_kobject, attr);
740 		if (ret)
741 			cpufreq_put_global_kobject();
742 	}
743 
744 	return ret;
745 }
746 EXPORT_SYMBOL(cpufreq_sysfs_create_file);
747 
748 void cpufreq_sysfs_remove_file(const struct attribute *attr)
749 {
750 	sysfs_remove_file(cpufreq_global_kobject, attr);
751 	cpufreq_put_global_kobject();
752 }
753 EXPORT_SYMBOL(cpufreq_sysfs_remove_file);
754 
755 /* symlink affected CPUs */
756 static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
757 {
758 	unsigned int j;
759 	int ret = 0;
760 
761 	for_each_cpu(j, policy->cpus) {
762 		struct device *cpu_dev;
763 
764 		if (j == policy->cpu)
765 			continue;
766 
767 		pr_debug("Adding link for CPU: %u\n", j);
768 		cpu_dev = get_cpu_device(j);
769 		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
770 					"cpufreq");
771 		if (ret)
772 			break;
773 	}
774 	return ret;
775 }
776 
777 static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
778 				     struct device *dev)
779 {
780 	struct freq_attr **drv_attr;
781 	int ret = 0;
782 
783 	/* prepare interface data */
784 	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
785 				   &dev->kobj, "cpufreq");
786 	if (ret)
787 		return ret;
788 
789 	/* set up files for this cpu device */
790 	drv_attr = cpufreq_driver->attr;
791 	while ((drv_attr) && (*drv_attr)) {
792 		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
793 		if (ret)
794 			goto err_out_kobj_put;
795 		drv_attr++;
796 	}
797 	if (cpufreq_driver->get) {
798 		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
799 		if (ret)
800 			goto err_out_kobj_put;
801 	}
802 	if (has_target()) {
803 		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
804 		if (ret)
805 			goto err_out_kobj_put;
806 	}
807 	if (cpufreq_driver->bios_limit) {
808 		ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
809 		if (ret)
810 			goto err_out_kobj_put;
811 	}
812 
813 	ret = cpufreq_add_dev_symlink(policy);
814 	if (ret)
815 		goto err_out_kobj_put;
816 
817 	return ret;
818 
819 err_out_kobj_put:
820 	kobject_put(&policy->kobj);
821 	wait_for_completion(&policy->kobj_unregister);
822 	return ret;
823 }
824 
825 static void cpufreq_init_policy(struct cpufreq_policy *policy)
826 {
827 	struct cpufreq_policy new_policy;
828 	int ret = 0;
829 
830 	memcpy(&new_policy, policy, sizeof(*policy));
831 	/* assure that the starting sequence is run in cpufreq_set_policy */
832 	policy->governor = NULL;
833 
834 	/* set default policy */
835 	ret = cpufreq_set_policy(policy, &new_policy);
836 	policy->user_policy.policy = policy->policy;
837 	policy->user_policy.governor = policy->governor;
838 
839 	if (ret) {
840 		pr_debug("setting policy failed\n");
841 		if (cpufreq_driver->exit)
842 			cpufreq_driver->exit(policy);
843 	}
844 }
845 
846 #ifdef CONFIG_HOTPLUG_CPU
847 static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
848 				  unsigned int cpu, struct device *dev,
849 				  bool frozen)
850 {
851 	int ret = 0;
852 	unsigned long flags;
853 
854 	if (has_target()) {
855 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
856 		if (ret) {
857 			pr_err("%s: Failed to stop governor\n", __func__);
858 			return ret;
859 		}
860 	}
861 
862 	down_write(&policy->rwsem);
863 
864 	write_lock_irqsave(&cpufreq_driver_lock, flags);
865 
866 	cpumask_set_cpu(cpu, policy->cpus);
867 	per_cpu(cpufreq_cpu_data, cpu) = policy;
868 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
869 
870 	up_write(&policy->rwsem);
871 
872 	if (has_target()) {
873 		if ((ret = __cpufreq_governor(policy, CPUFREQ_GOV_START)) ||
874 			(ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))) {
875 			pr_err("%s: Failed to start governor\n", __func__);
876 			return ret;
877 		}
878 	}
879 
880 	/* Don't touch sysfs links during light-weight init */
881 	if (!frozen)
882 		ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
883 
884 	return ret;
885 }
886 #endif
887 
888 static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
889 {
890 	struct cpufreq_policy *policy;
891 	unsigned long flags;
892 
893 	read_lock_irqsave(&cpufreq_driver_lock, flags);
894 
895 	policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
896 
897 	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
898 
899 	return policy;
900 }
901 
902 static struct cpufreq_policy *cpufreq_policy_alloc(void)
903 {
904 	struct cpufreq_policy *policy;
905 
906 	policy = kzalloc(sizeof(*policy), GFP_KERNEL);
907 	if (!policy)
908 		return NULL;
909 
910 	if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
911 		goto err_free_policy;
912 
913 	if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
914 		goto err_free_cpumask;
915 
916 	INIT_LIST_HEAD(&policy->policy_list);
917 	init_rwsem(&policy->rwsem);
918 
919 	return policy;
920 
921 err_free_cpumask:
922 	free_cpumask_var(policy->cpus);
923 err_free_policy:
924 	kfree(policy);
925 
926 	return NULL;
927 }
928 
929 static void cpufreq_policy_free(struct cpufreq_policy *policy)
930 {
931 	free_cpumask_var(policy->related_cpus);
932 	free_cpumask_var(policy->cpus);
933 	kfree(policy);
934 }
935 
936 static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
937 {
938 	if (WARN_ON(cpu == policy->cpu))
939 		return;
940 
941 	down_write(&policy->rwsem);
942 
943 	policy->last_cpu = policy->cpu;
944 	policy->cpu = cpu;
945 
946 	up_write(&policy->rwsem);
947 
948 	cpufreq_frequency_table_update_policy_cpu(policy);
949 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
950 			CPUFREQ_UPDATE_POLICY_CPU, policy);
951 }
952 
953 static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
954 			     bool frozen)
955 {
956 	unsigned int j, cpu = dev->id;
957 	int ret = -ENOMEM;
958 	struct cpufreq_policy *policy;
959 	unsigned long flags;
960 #ifdef CONFIG_HOTPLUG_CPU
961 	struct cpufreq_policy *tpolicy;
962 	struct cpufreq_governor *gov;
963 #endif
964 
965 	if (cpu_is_offline(cpu))
966 		return 0;
967 
968 	pr_debug("adding CPU %u\n", cpu);
969 
970 #ifdef CONFIG_SMP
971 	/* check whether a different CPU already registered this
972 	 * CPU because it is in the same boat. */
973 	policy = cpufreq_cpu_get(cpu);
974 	if (unlikely(policy)) {
975 		cpufreq_cpu_put(policy);
976 		return 0;
977 	}
978 #endif
979 
980 	if (!down_read_trylock(&cpufreq_rwsem))
981 		return 0;
982 
983 #ifdef CONFIG_HOTPLUG_CPU
984 	/* Check if this cpu was hot-unplugged earlier and has siblings */
985 	read_lock_irqsave(&cpufreq_driver_lock, flags);
986 	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
987 		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
988 			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
989 			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev, frozen);
990 			up_read(&cpufreq_rwsem);
991 			return ret;
992 		}
993 	}
994 	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
995 #endif
996 
997 	if (frozen)
998 		/* Restore the saved policy when doing light-weight init */
999 		policy = cpufreq_policy_restore(cpu);
1000 	else
1001 		policy = cpufreq_policy_alloc();
1002 
1003 	if (!policy)
1004 		goto nomem_out;
1005 
1006 
1007 	/*
1008 	 * In the resume path, since we restore a saved policy, the assignment
1009 	 * to policy->cpu is like an update of the existing policy, rather than
1010 	 * the creation of a brand new one. So we need to perform this update
1011 	 * by invoking update_policy_cpu().
1012 	 */
1013 	if (frozen && cpu != policy->cpu)
1014 		update_policy_cpu(policy, cpu);
1015 	else
1016 		policy->cpu = cpu;
1017 
1018 	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
1019 	cpumask_copy(policy->cpus, cpumask_of(cpu));
1020 
1021 	init_completion(&policy->kobj_unregister);
1022 	INIT_WORK(&policy->update, handle_update);
1023 
1024 	/* call driver. From then on the cpufreq must be able
1025 	 * to accept all calls to ->verify and ->setpolicy for this CPU
1026 	 */
1027 	ret = cpufreq_driver->init(policy);
1028 	if (ret) {
1029 		pr_debug("initialization failed\n");
1030 		goto err_set_policy_cpu;
1031 	}
1032 
1033 	if (cpufreq_driver->get) {
1034 		policy->cur = cpufreq_driver->get(policy->cpu);
1035 		if (!policy->cur) {
1036 			pr_err("%s: ->get() failed\n", __func__);
1037 			goto err_get_freq;
1038 		}
1039 	}
1040 
1041 	/* related cpus should atleast have policy->cpus */
1042 	cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
1043 
1044 	/*
1045 	 * affected cpus must always be the one, which are online. We aren't
1046 	 * managing offline cpus here.
1047 	 */
1048 	cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
1049 
1050 	policy->user_policy.min = policy->min;
1051 	policy->user_policy.max = policy->max;
1052 
1053 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1054 				     CPUFREQ_START, policy);
1055 
1056 #ifdef CONFIG_HOTPLUG_CPU
1057 	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
1058 	if (gov) {
1059 		policy->governor = gov;
1060 		pr_debug("Restoring governor %s for cpu %d\n",
1061 		       policy->governor->name, cpu);
1062 	}
1063 #endif
1064 
1065 	write_lock_irqsave(&cpufreq_driver_lock, flags);
1066 	for_each_cpu(j, policy->cpus)
1067 		per_cpu(cpufreq_cpu_data, j) = policy;
1068 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1069 
1070 	if (!frozen) {
1071 		ret = cpufreq_add_dev_interface(policy, dev);
1072 		if (ret)
1073 			goto err_out_unregister;
1074 	}
1075 
1076 	write_lock_irqsave(&cpufreq_driver_lock, flags);
1077 	list_add(&policy->policy_list, &cpufreq_policy_list);
1078 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1079 
1080 	cpufreq_init_policy(policy);
1081 
1082 	kobject_uevent(&policy->kobj, KOBJ_ADD);
1083 	up_read(&cpufreq_rwsem);
1084 
1085 	pr_debug("initialization complete\n");
1086 
1087 	return 0;
1088 
1089 err_out_unregister:
1090 	write_lock_irqsave(&cpufreq_driver_lock, flags);
1091 	for_each_cpu(j, policy->cpus)
1092 		per_cpu(cpufreq_cpu_data, j) = NULL;
1093 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1094 
1095 err_get_freq:
1096 	if (cpufreq_driver->exit)
1097 		cpufreq_driver->exit(policy);
1098 err_set_policy_cpu:
1099 	cpufreq_policy_free(policy);
1100 nomem_out:
1101 	up_read(&cpufreq_rwsem);
1102 
1103 	return ret;
1104 }
1105 
1106 /**
1107  * cpufreq_add_dev - add a CPU device
1108  *
1109  * Adds the cpufreq interface for a CPU device.
1110  *
1111  * The Oracle says: try running cpufreq registration/unregistration concurrently
1112  * with with cpu hotplugging and all hell will break loose. Tried to clean this
1113  * mess up, but more thorough testing is needed. - Mathieu
1114  */
1115 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
1116 {
1117 	return __cpufreq_add_dev(dev, sif, false);
1118 }
1119 
1120 static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
1121 					   unsigned int old_cpu, bool frozen)
1122 {
1123 	struct device *cpu_dev;
1124 	int ret;
1125 
1126 	/* first sibling now owns the new sysfs dir */
1127 	cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu));
1128 
1129 	/* Don't touch sysfs files during light-weight tear-down */
1130 	if (frozen)
1131 		return cpu_dev->id;
1132 
1133 	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
1134 	ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
1135 	if (ret) {
1136 		pr_err("%s: Failed to move kobj: %d", __func__, ret);
1137 
1138 		down_write(&policy->rwsem);
1139 		cpumask_set_cpu(old_cpu, policy->cpus);
1140 		up_write(&policy->rwsem);
1141 
1142 		ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
1143 					"cpufreq");
1144 
1145 		return -EINVAL;
1146 	}
1147 
1148 	return cpu_dev->id;
1149 }
1150 
1151 static int __cpufreq_remove_dev_prepare(struct device *dev,
1152 					struct subsys_interface *sif,
1153 					bool frozen)
1154 {
1155 	unsigned int cpu = dev->id, cpus;
1156 	int new_cpu, ret;
1157 	unsigned long flags;
1158 	struct cpufreq_policy *policy;
1159 
1160 	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
1161 
1162 	write_lock_irqsave(&cpufreq_driver_lock, flags);
1163 
1164 	policy = per_cpu(cpufreq_cpu_data, cpu);
1165 
1166 	/* Save the policy somewhere when doing a light-weight tear-down */
1167 	if (frozen)
1168 		per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
1169 
1170 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1171 
1172 	if (!policy) {
1173 		pr_debug("%s: No cpu_data found\n", __func__);
1174 		return -EINVAL;
1175 	}
1176 
1177 	if (has_target()) {
1178 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
1179 		if (ret) {
1180 			pr_err("%s: Failed to stop governor\n", __func__);
1181 			return ret;
1182 		}
1183 	}
1184 
1185 #ifdef CONFIG_HOTPLUG_CPU
1186 	if (!cpufreq_driver->setpolicy)
1187 		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
1188 			policy->governor->name, CPUFREQ_NAME_LEN);
1189 #endif
1190 
1191 	down_read(&policy->rwsem);
1192 	cpus = cpumask_weight(policy->cpus);
1193 	up_read(&policy->rwsem);
1194 
1195 	if (cpu != policy->cpu) {
1196 		if (!frozen)
1197 			sysfs_remove_link(&dev->kobj, "cpufreq");
1198 	} else if (cpus > 1) {
1199 		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen);
1200 		if (new_cpu >= 0) {
1201 			update_policy_cpu(policy, new_cpu);
1202 
1203 			if (!frozen) {
1204 				pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
1205 						__func__, new_cpu, cpu);
1206 			}
1207 		}
1208 	}
1209 
1210 	return 0;
1211 }
1212 
1213 static int __cpufreq_remove_dev_finish(struct device *dev,
1214 				       struct subsys_interface *sif,
1215 				       bool frozen)
1216 {
1217 	unsigned int cpu = dev->id, cpus;
1218 	int ret;
1219 	unsigned long flags;
1220 	struct cpufreq_policy *policy;
1221 	struct kobject *kobj;
1222 	struct completion *cmp;
1223 
1224 	read_lock_irqsave(&cpufreq_driver_lock, flags);
1225 	policy = per_cpu(cpufreq_cpu_data, cpu);
1226 	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
1227 
1228 	if (!policy) {
1229 		pr_debug("%s: No cpu_data found\n", __func__);
1230 		return -EINVAL;
1231 	}
1232 
1233 	down_write(&policy->rwsem);
1234 	cpus = cpumask_weight(policy->cpus);
1235 
1236 	if (cpus > 1)
1237 		cpumask_clear_cpu(cpu, policy->cpus);
1238 	up_write(&policy->rwsem);
1239 
1240 	/* If cpu is last user of policy, free policy */
1241 	if (cpus == 1) {
1242 		if (has_target()) {
1243 			ret = __cpufreq_governor(policy,
1244 					CPUFREQ_GOV_POLICY_EXIT);
1245 			if (ret) {
1246 				pr_err("%s: Failed to exit governor\n",
1247 						__func__);
1248 				return ret;
1249 			}
1250 		}
1251 
1252 		if (!frozen) {
1253 			down_read(&policy->rwsem);
1254 			kobj = &policy->kobj;
1255 			cmp = &policy->kobj_unregister;
1256 			up_read(&policy->rwsem);
1257 			kobject_put(kobj);
1258 
1259 			/*
1260 			 * We need to make sure that the underlying kobj is
1261 			 * actually not referenced anymore by anybody before we
1262 			 * proceed with unloading.
1263 			 */
1264 			pr_debug("waiting for dropping of refcount\n");
1265 			wait_for_completion(cmp);
1266 			pr_debug("wait complete\n");
1267 		}
1268 
1269 		/*
1270 		 * Perform the ->exit() even during light-weight tear-down,
1271 		 * since this is a core component, and is essential for the
1272 		 * subsequent light-weight ->init() to succeed.
1273 		 */
1274 		if (cpufreq_driver->exit)
1275 			cpufreq_driver->exit(policy);
1276 
1277 		/* Remove policy from list of active policies */
1278 		write_lock_irqsave(&cpufreq_driver_lock, flags);
1279 		list_del(&policy->policy_list);
1280 		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1281 
1282 		if (!frozen)
1283 			cpufreq_policy_free(policy);
1284 	} else {
1285 		if (has_target()) {
1286 			if ((ret = __cpufreq_governor(policy, CPUFREQ_GOV_START)) ||
1287 					(ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))) {
1288 				pr_err("%s: Failed to start governor\n",
1289 						__func__);
1290 				return ret;
1291 			}
1292 		}
1293 	}
1294 
1295 	per_cpu(cpufreq_cpu_data, cpu) = NULL;
1296 	return 0;
1297 }
1298 
1299 /**
1300  * cpufreq_remove_dev - remove a CPU device
1301  *
1302  * Removes the cpufreq interface for a CPU device.
1303  */
1304 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1305 {
1306 	unsigned int cpu = dev->id;
1307 	int ret;
1308 
1309 	if (cpu_is_offline(cpu))
1310 		return 0;
1311 
1312 	ret = __cpufreq_remove_dev_prepare(dev, sif, false);
1313 
1314 	if (!ret)
1315 		ret = __cpufreq_remove_dev_finish(dev, sif, false);
1316 
1317 	return ret;
1318 }
1319 
1320 static void handle_update(struct work_struct *work)
1321 {
1322 	struct cpufreq_policy *policy =
1323 		container_of(work, struct cpufreq_policy, update);
1324 	unsigned int cpu = policy->cpu;
1325 	pr_debug("handle_update for cpu %u called\n", cpu);
1326 	cpufreq_update_policy(cpu);
1327 }
1328 
1329 /**
1330  *	cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're
1331  *	in deep trouble.
1332  *	@cpu: cpu number
1333  *	@old_freq: CPU frequency the kernel thinks the CPU runs at
1334  *	@new_freq: CPU frequency the CPU actually runs at
1335  *
1336  *	We adjust to current frequency first, and need to clean up later.
1337  *	So either call to cpufreq_update_policy() or schedule handle_update()).
1338  */
1339 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1340 				unsigned int new_freq)
1341 {
1342 	struct cpufreq_policy *policy;
1343 	struct cpufreq_freqs freqs;
1344 	unsigned long flags;
1345 
1346 	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1347 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1348 
1349 	freqs.old = old_freq;
1350 	freqs.new = new_freq;
1351 
1352 	read_lock_irqsave(&cpufreq_driver_lock, flags);
1353 	policy = per_cpu(cpufreq_cpu_data, cpu);
1354 	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
1355 
1356 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
1357 	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
1358 }
1359 
1360 /**
1361  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1362  * @cpu: CPU number
1363  *
1364  * This is the last known freq, without actually getting it from the driver.
1365  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1366  */
1367 unsigned int cpufreq_quick_get(unsigned int cpu)
1368 {
1369 	struct cpufreq_policy *policy;
1370 	unsigned int ret_freq = 0;
1371 
1372 	if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
1373 		return cpufreq_driver->get(cpu);
1374 
1375 	policy = cpufreq_cpu_get(cpu);
1376 	if (policy) {
1377 		ret_freq = policy->cur;
1378 		cpufreq_cpu_put(policy);
1379 	}
1380 
1381 	return ret_freq;
1382 }
1383 EXPORT_SYMBOL(cpufreq_quick_get);
1384 
1385 /**
1386  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1387  * @cpu: CPU number
1388  *
1389  * Just return the max possible frequency for a given CPU.
1390  */
1391 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1392 {
1393 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1394 	unsigned int ret_freq = 0;
1395 
1396 	if (policy) {
1397 		ret_freq = policy->max;
1398 		cpufreq_cpu_put(policy);
1399 	}
1400 
1401 	return ret_freq;
1402 }
1403 EXPORT_SYMBOL(cpufreq_quick_get_max);
1404 
1405 static unsigned int __cpufreq_get(unsigned int cpu)
1406 {
1407 	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1408 	unsigned int ret_freq = 0;
1409 
1410 	if (!cpufreq_driver->get)
1411 		return ret_freq;
1412 
1413 	ret_freq = cpufreq_driver->get(cpu);
1414 
1415 	if (ret_freq && policy->cur &&
1416 		!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1417 		/* verify no discrepancy between actual and
1418 					saved value exists */
1419 		if (unlikely(ret_freq != policy->cur)) {
1420 			cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1421 			schedule_work(&policy->update);
1422 		}
1423 	}
1424 
1425 	return ret_freq;
1426 }
1427 
1428 /**
1429  * cpufreq_get - get the current CPU frequency (in kHz)
1430  * @cpu: CPU number
1431  *
1432  * Get the CPU current (static) CPU frequency
1433  */
1434 unsigned int cpufreq_get(unsigned int cpu)
1435 {
1436 	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1437 	unsigned int ret_freq = 0;
1438 
1439 	if (cpufreq_disabled() || !cpufreq_driver)
1440 		return -ENOENT;
1441 
1442 	BUG_ON(!policy);
1443 
1444 	if (!down_read_trylock(&cpufreq_rwsem))
1445 		return 0;
1446 
1447 	down_read(&policy->rwsem);
1448 
1449 	ret_freq = __cpufreq_get(cpu);
1450 
1451 	up_read(&policy->rwsem);
1452 	up_read(&cpufreq_rwsem);
1453 
1454 	return ret_freq;
1455 }
1456 EXPORT_SYMBOL(cpufreq_get);
1457 
1458 static struct subsys_interface cpufreq_interface = {
1459 	.name		= "cpufreq",
1460 	.subsys		= &cpu_subsys,
1461 	.add_dev	= cpufreq_add_dev,
1462 	.remove_dev	= cpufreq_remove_dev,
1463 };
1464 
1465 /**
1466  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1467  *
1468  * This function is only executed for the boot processor.  The other CPUs
1469  * have been put offline by means of CPU hotplug.
1470  */
1471 static int cpufreq_bp_suspend(void)
1472 {
1473 	int ret = 0;
1474 
1475 	int cpu = smp_processor_id();
1476 	struct cpufreq_policy *policy;
1477 
1478 	pr_debug("suspending cpu %u\n", cpu);
1479 
1480 	/* If there's no policy for the boot CPU, we have nothing to do. */
1481 	policy = cpufreq_cpu_get(cpu);
1482 	if (!policy)
1483 		return 0;
1484 
1485 	if (cpufreq_driver->suspend) {
1486 		ret = cpufreq_driver->suspend(policy);
1487 		if (ret)
1488 			printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1489 					"step on CPU %u\n", policy->cpu);
1490 	}
1491 
1492 	cpufreq_cpu_put(policy);
1493 	return ret;
1494 }
1495 
1496 /**
1497  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1498  *
1499  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1500  *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1501  *	    restored. It will verify that the current freq is in sync with
1502  *	    what we believe it to be. This is a bit later than when it
1503  *	    should be, but nonethteless it's better than calling
1504  *	    cpufreq_driver->get() here which might re-enable interrupts...
1505  *
1506  * This function is only executed for the boot CPU.  The other CPUs have not
1507  * been turned on yet.
1508  */
1509 static void cpufreq_bp_resume(void)
1510 {
1511 	int ret = 0;
1512 
1513 	int cpu = smp_processor_id();
1514 	struct cpufreq_policy *policy;
1515 
1516 	pr_debug("resuming cpu %u\n", cpu);
1517 
1518 	/* If there's no policy for the boot CPU, we have nothing to do. */
1519 	policy = cpufreq_cpu_get(cpu);
1520 	if (!policy)
1521 		return;
1522 
1523 	if (cpufreq_driver->resume) {
1524 		ret = cpufreq_driver->resume(policy);
1525 		if (ret) {
1526 			printk(KERN_ERR "cpufreq: resume failed in ->resume "
1527 					"step on CPU %u\n", policy->cpu);
1528 			goto fail;
1529 		}
1530 	}
1531 
1532 	schedule_work(&policy->update);
1533 
1534 fail:
1535 	cpufreq_cpu_put(policy);
1536 }
1537 
1538 static struct syscore_ops cpufreq_syscore_ops = {
1539 	.suspend	= cpufreq_bp_suspend,
1540 	.resume		= cpufreq_bp_resume,
1541 };
1542 
1543 /**
1544  *	cpufreq_get_current_driver - return current driver's name
1545  *
1546  *	Return the name string of the currently loaded cpufreq driver
1547  *	or NULL, if none.
1548  */
1549 const char *cpufreq_get_current_driver(void)
1550 {
1551 	if (cpufreq_driver)
1552 		return cpufreq_driver->name;
1553 
1554 	return NULL;
1555 }
1556 EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);
1557 
1558 /*********************************************************************
1559  *                     NOTIFIER LISTS INTERFACE                      *
1560  *********************************************************************/
1561 
1562 /**
1563  *	cpufreq_register_notifier - register a driver with cpufreq
1564  *	@nb: notifier function to register
1565  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1566  *
1567  *	Add a driver to one of two lists: either a list of drivers that
1568  *      are notified about clock rate changes (once before and once after
1569  *      the transition), or a list of drivers that are notified about
1570  *      changes in cpufreq policy.
1571  *
1572  *	This function may sleep, and has the same return conditions as
1573  *	blocking_notifier_chain_register.
1574  */
1575 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1576 {
1577 	int ret;
1578 
1579 	if (cpufreq_disabled())
1580 		return -EINVAL;
1581 
1582 	WARN_ON(!init_cpufreq_transition_notifier_list_called);
1583 
1584 	switch (list) {
1585 	case CPUFREQ_TRANSITION_NOTIFIER:
1586 		ret = srcu_notifier_chain_register(
1587 				&cpufreq_transition_notifier_list, nb);
1588 		break;
1589 	case CPUFREQ_POLICY_NOTIFIER:
1590 		ret = blocking_notifier_chain_register(
1591 				&cpufreq_policy_notifier_list, nb);
1592 		break;
1593 	default:
1594 		ret = -EINVAL;
1595 	}
1596 
1597 	return ret;
1598 }
1599 EXPORT_SYMBOL(cpufreq_register_notifier);
1600 
1601 /**
1602  *	cpufreq_unregister_notifier - unregister a driver with cpufreq
1603  *	@nb: notifier block to be unregistered
1604  *	@list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1605  *
1606  *	Remove a driver from the CPU frequency notifier list.
1607  *
1608  *	This function may sleep, and has the same return conditions as
1609  *	blocking_notifier_chain_unregister.
1610  */
1611 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1612 {
1613 	int ret;
1614 
1615 	if (cpufreq_disabled())
1616 		return -EINVAL;
1617 
1618 	switch (list) {
1619 	case CPUFREQ_TRANSITION_NOTIFIER:
1620 		ret = srcu_notifier_chain_unregister(
1621 				&cpufreq_transition_notifier_list, nb);
1622 		break;
1623 	case CPUFREQ_POLICY_NOTIFIER:
1624 		ret = blocking_notifier_chain_unregister(
1625 				&cpufreq_policy_notifier_list, nb);
1626 		break;
1627 	default:
1628 		ret = -EINVAL;
1629 	}
1630 
1631 	return ret;
1632 }
1633 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1634 
1635 
1636 /*********************************************************************
1637  *                              GOVERNORS                            *
1638  *********************************************************************/
1639 
1640 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1641 			    unsigned int target_freq,
1642 			    unsigned int relation)
1643 {
1644 	int retval = -EINVAL;
1645 	unsigned int old_target_freq = target_freq;
1646 
1647 	if (cpufreq_disabled())
1648 		return -ENODEV;
1649 
1650 	/* Make sure that target_freq is within supported range */
1651 	if (target_freq > policy->max)
1652 		target_freq = policy->max;
1653 	if (target_freq < policy->min)
1654 		target_freq = policy->min;
1655 
1656 	pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
1657 			policy->cpu, target_freq, relation, old_target_freq);
1658 
1659 	/*
1660 	 * This might look like a redundant call as we are checking it again
1661 	 * after finding index. But it is left intentionally for cases where
1662 	 * exactly same freq is called again and so we can save on few function
1663 	 * calls.
1664 	 */
1665 	if (target_freq == policy->cur)
1666 		return 0;
1667 
1668 	if (cpufreq_driver->target)
1669 		retval = cpufreq_driver->target(policy, target_freq, relation);
1670 	else if (cpufreq_driver->target_index) {
1671 		struct cpufreq_frequency_table *freq_table;
1672 		struct cpufreq_freqs freqs;
1673 		bool notify;
1674 		int index;
1675 
1676 		freq_table = cpufreq_frequency_get_table(policy->cpu);
1677 		if (unlikely(!freq_table)) {
1678 			pr_err("%s: Unable to find freq_table\n", __func__);
1679 			goto out;
1680 		}
1681 
1682 		retval = cpufreq_frequency_table_target(policy, freq_table,
1683 				target_freq, relation, &index);
1684 		if (unlikely(retval)) {
1685 			pr_err("%s: Unable to find matching freq\n", __func__);
1686 			goto out;
1687 		}
1688 
1689 		if (freq_table[index].frequency == policy->cur) {
1690 			retval = 0;
1691 			goto out;
1692 		}
1693 
1694 		notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION);
1695 
1696 		if (notify) {
1697 			freqs.old = policy->cur;
1698 			freqs.new = freq_table[index].frequency;
1699 			freqs.flags = 0;
1700 
1701 			pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n",
1702 					__func__, policy->cpu, freqs.old,
1703 					freqs.new);
1704 
1705 			cpufreq_notify_transition(policy, &freqs,
1706 					CPUFREQ_PRECHANGE);
1707 		}
1708 
1709 		retval = cpufreq_driver->target_index(policy, index);
1710 		if (retval)
1711 			pr_err("%s: Failed to change cpu frequency: %d\n",
1712 					__func__, retval);
1713 
1714 		if (notify) {
1715 			/*
1716 			 * Notify with old freq in case we failed to change
1717 			 * frequency
1718 			 */
1719 			if (retval)
1720 				freqs.new = freqs.old;
1721 
1722 			cpufreq_notify_transition(policy, &freqs,
1723 					CPUFREQ_POSTCHANGE);
1724 		}
1725 	}
1726 
1727 out:
1728 	return retval;
1729 }
1730 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1731 
1732 int cpufreq_driver_target(struct cpufreq_policy *policy,
1733 			  unsigned int target_freq,
1734 			  unsigned int relation)
1735 {
1736 	int ret = -EINVAL;
1737 
1738 	down_write(&policy->rwsem);
1739 
1740 	ret = __cpufreq_driver_target(policy, target_freq, relation);
1741 
1742 	up_write(&policy->rwsem);
1743 
1744 	return ret;
1745 }
1746 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1747 
1748 /*
1749  * when "event" is CPUFREQ_GOV_LIMITS
1750  */
1751 
1752 static int __cpufreq_governor(struct cpufreq_policy *policy,
1753 					unsigned int event)
1754 {
1755 	int ret;
1756 
1757 	/* Only must be defined when default governor is known to have latency
1758 	   restrictions, like e.g. conservative or ondemand.
1759 	   That this is the case is already ensured in Kconfig
1760 	*/
1761 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1762 	struct cpufreq_governor *gov = &cpufreq_gov_performance;
1763 #else
1764 	struct cpufreq_governor *gov = NULL;
1765 #endif
1766 
1767 	if (policy->governor->max_transition_latency &&
1768 	    policy->cpuinfo.transition_latency >
1769 	    policy->governor->max_transition_latency) {
1770 		if (!gov)
1771 			return -EINVAL;
1772 		else {
1773 			printk(KERN_WARNING "%s governor failed, too long"
1774 			       " transition latency of HW, fallback"
1775 			       " to %s governor\n",
1776 			       policy->governor->name,
1777 			       gov->name);
1778 			policy->governor = gov;
1779 		}
1780 	}
1781 
1782 	if (event == CPUFREQ_GOV_POLICY_INIT)
1783 		if (!try_module_get(policy->governor->owner))
1784 			return -EINVAL;
1785 
1786 	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1787 						policy->cpu, event);
1788 
1789 	mutex_lock(&cpufreq_governor_lock);
1790 	if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
1791 	    || (!policy->governor_enabled
1792 	    && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) {
1793 		mutex_unlock(&cpufreq_governor_lock);
1794 		return -EBUSY;
1795 	}
1796 
1797 	if (event == CPUFREQ_GOV_STOP)
1798 		policy->governor_enabled = false;
1799 	else if (event == CPUFREQ_GOV_START)
1800 		policy->governor_enabled = true;
1801 
1802 	mutex_unlock(&cpufreq_governor_lock);
1803 
1804 	ret = policy->governor->governor(policy, event);
1805 
1806 	if (!ret) {
1807 		if (event == CPUFREQ_GOV_POLICY_INIT)
1808 			policy->governor->initialized++;
1809 		else if (event == CPUFREQ_GOV_POLICY_EXIT)
1810 			policy->governor->initialized--;
1811 	} else {
1812 		/* Restore original values */
1813 		mutex_lock(&cpufreq_governor_lock);
1814 		if (event == CPUFREQ_GOV_STOP)
1815 			policy->governor_enabled = true;
1816 		else if (event == CPUFREQ_GOV_START)
1817 			policy->governor_enabled = false;
1818 		mutex_unlock(&cpufreq_governor_lock);
1819 	}
1820 
1821 	if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) ||
1822 			((event == CPUFREQ_GOV_POLICY_EXIT) && !ret))
1823 		module_put(policy->governor->owner);
1824 
1825 	return ret;
1826 }
1827 
1828 int cpufreq_register_governor(struct cpufreq_governor *governor)
1829 {
1830 	int err;
1831 
1832 	if (!governor)
1833 		return -EINVAL;
1834 
1835 	if (cpufreq_disabled())
1836 		return -ENODEV;
1837 
1838 	mutex_lock(&cpufreq_governor_mutex);
1839 
1840 	governor->initialized = 0;
1841 	err = -EBUSY;
1842 	if (__find_governor(governor->name) == NULL) {
1843 		err = 0;
1844 		list_add(&governor->governor_list, &cpufreq_governor_list);
1845 	}
1846 
1847 	mutex_unlock(&cpufreq_governor_mutex);
1848 	return err;
1849 }
1850 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1851 
1852 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1853 {
1854 #ifdef CONFIG_HOTPLUG_CPU
1855 	int cpu;
1856 #endif
1857 
1858 	if (!governor)
1859 		return;
1860 
1861 	if (cpufreq_disabled())
1862 		return;
1863 
1864 #ifdef CONFIG_HOTPLUG_CPU
1865 	for_each_present_cpu(cpu) {
1866 		if (cpu_online(cpu))
1867 			continue;
1868 		if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1869 			strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1870 	}
1871 #endif
1872 
1873 	mutex_lock(&cpufreq_governor_mutex);
1874 	list_del(&governor->governor_list);
1875 	mutex_unlock(&cpufreq_governor_mutex);
1876 	return;
1877 }
1878 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1879 
1880 
1881 /*********************************************************************
1882  *                          POLICY INTERFACE                         *
1883  *********************************************************************/
1884 
1885 /**
1886  * cpufreq_get_policy - get the current cpufreq_policy
1887  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1888  *	is written
1889  *
1890  * Reads the current cpufreq policy.
1891  */
1892 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1893 {
1894 	struct cpufreq_policy *cpu_policy;
1895 	if (!policy)
1896 		return -EINVAL;
1897 
1898 	cpu_policy = cpufreq_cpu_get(cpu);
1899 	if (!cpu_policy)
1900 		return -EINVAL;
1901 
1902 	memcpy(policy, cpu_policy, sizeof(*policy));
1903 
1904 	cpufreq_cpu_put(cpu_policy);
1905 	return 0;
1906 }
1907 EXPORT_SYMBOL(cpufreq_get_policy);
1908 
1909 /*
1910  * policy : current policy.
1911  * new_policy: policy to be set.
1912  */
1913 static int cpufreq_set_policy(struct cpufreq_policy *policy,
1914 				struct cpufreq_policy *new_policy)
1915 {
1916 	int ret = 0, failed = 1;
1917 
1918 	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", new_policy->cpu,
1919 		new_policy->min, new_policy->max);
1920 
1921 	memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo));
1922 
1923 	if (new_policy->min > policy->max || new_policy->max < policy->min) {
1924 		ret = -EINVAL;
1925 		goto error_out;
1926 	}
1927 
1928 	/* verify the cpu speed can be set within this limit */
1929 	ret = cpufreq_driver->verify(new_policy);
1930 	if (ret)
1931 		goto error_out;
1932 
1933 	/* adjust if necessary - all reasons */
1934 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1935 			CPUFREQ_ADJUST, new_policy);
1936 
1937 	/* adjust if necessary - hardware incompatibility*/
1938 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1939 			CPUFREQ_INCOMPATIBLE, new_policy);
1940 
1941 	/*
1942 	 * verify the cpu speed can be set within this limit, which might be
1943 	 * different to the first one
1944 	 */
1945 	ret = cpufreq_driver->verify(new_policy);
1946 	if (ret)
1947 		goto error_out;
1948 
1949 	/* notification of the new policy */
1950 	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1951 			CPUFREQ_NOTIFY, new_policy);
1952 
1953 	policy->min = new_policy->min;
1954 	policy->max = new_policy->max;
1955 
1956 	pr_debug("new min and max freqs are %u - %u kHz\n",
1957 					policy->min, policy->max);
1958 
1959 	if (cpufreq_driver->setpolicy) {
1960 		policy->policy = new_policy->policy;
1961 		pr_debug("setting range\n");
1962 		ret = cpufreq_driver->setpolicy(new_policy);
1963 	} else {
1964 		if (new_policy->governor != policy->governor) {
1965 			/* save old, working values */
1966 			struct cpufreq_governor *old_gov = policy->governor;
1967 
1968 			pr_debug("governor switch\n");
1969 
1970 			/* end old governor */
1971 			if (policy->governor) {
1972 				__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
1973 				up_write(&policy->rwsem);
1974 				__cpufreq_governor(policy,
1975 						CPUFREQ_GOV_POLICY_EXIT);
1976 				down_write(&policy->rwsem);
1977 			}
1978 
1979 			/* start new governor */
1980 			policy->governor = new_policy->governor;
1981 			if (!__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) {
1982 				if (!__cpufreq_governor(policy, CPUFREQ_GOV_START)) {
1983 					failed = 0;
1984 				} else {
1985 					up_write(&policy->rwsem);
1986 					__cpufreq_governor(policy,
1987 							CPUFREQ_GOV_POLICY_EXIT);
1988 					down_write(&policy->rwsem);
1989 				}
1990 			}
1991 
1992 			if (failed) {
1993 				/* new governor failed, so re-start old one */
1994 				pr_debug("starting governor %s failed\n",
1995 							policy->governor->name);
1996 				if (old_gov) {
1997 					policy->governor = old_gov;
1998 					__cpufreq_governor(policy,
1999 							CPUFREQ_GOV_POLICY_INIT);
2000 					__cpufreq_governor(policy,
2001 							   CPUFREQ_GOV_START);
2002 				}
2003 				ret = -EINVAL;
2004 				goto error_out;
2005 			}
2006 			/* might be a policy change, too, so fall through */
2007 		}
2008 		pr_debug("governor: change or update limits\n");
2009 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
2010 	}
2011 
2012 error_out:
2013 	return ret;
2014 }
2015 
2016 /**
2017  *	cpufreq_update_policy - re-evaluate an existing cpufreq policy
2018  *	@cpu: CPU which shall be re-evaluated
2019  *
2020  *	Useful for policy notifiers which have different necessities
2021  *	at different times.
2022  */
2023 int cpufreq_update_policy(unsigned int cpu)
2024 {
2025 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
2026 	struct cpufreq_policy new_policy;
2027 	int ret;
2028 
2029 	if (!policy) {
2030 		ret = -ENODEV;
2031 		goto no_policy;
2032 	}
2033 
2034 	down_write(&policy->rwsem);
2035 
2036 	pr_debug("updating policy for CPU %u\n", cpu);
2037 	memcpy(&new_policy, policy, sizeof(*policy));
2038 	new_policy.min = policy->user_policy.min;
2039 	new_policy.max = policy->user_policy.max;
2040 	new_policy.policy = policy->user_policy.policy;
2041 	new_policy.governor = policy->user_policy.governor;
2042 
2043 	/*
2044 	 * BIOS might change freq behind our back
2045 	 * -> ask driver for current freq and notify governors about a change
2046 	 */
2047 	if (cpufreq_driver->get) {
2048 		new_policy.cur = cpufreq_driver->get(cpu);
2049 		if (!policy->cur) {
2050 			pr_debug("Driver did not initialize current freq");
2051 			policy->cur = new_policy.cur;
2052 		} else {
2053 			if (policy->cur != new_policy.cur && has_target())
2054 				cpufreq_out_of_sync(cpu, policy->cur,
2055 								new_policy.cur);
2056 		}
2057 	}
2058 
2059 	ret = cpufreq_set_policy(policy, &new_policy);
2060 
2061 	up_write(&policy->rwsem);
2062 
2063 	cpufreq_cpu_put(policy);
2064 no_policy:
2065 	return ret;
2066 }
2067 EXPORT_SYMBOL(cpufreq_update_policy);
2068 
2069 static int cpufreq_cpu_callback(struct notifier_block *nfb,
2070 					unsigned long action, void *hcpu)
2071 {
2072 	unsigned int cpu = (unsigned long)hcpu;
2073 	struct device *dev;
2074 	bool frozen = false;
2075 
2076 	dev = get_cpu_device(cpu);
2077 	if (dev) {
2078 
2079 		if (action & CPU_TASKS_FROZEN)
2080 			frozen = true;
2081 
2082 		switch (action & ~CPU_TASKS_FROZEN) {
2083 		case CPU_ONLINE:
2084 			__cpufreq_add_dev(dev, NULL, frozen);
2085 			cpufreq_update_policy(cpu);
2086 			break;
2087 
2088 		case CPU_DOWN_PREPARE:
2089 			__cpufreq_remove_dev_prepare(dev, NULL, frozen);
2090 			break;
2091 
2092 		case CPU_POST_DEAD:
2093 			__cpufreq_remove_dev_finish(dev, NULL, frozen);
2094 			break;
2095 
2096 		case CPU_DOWN_FAILED:
2097 			__cpufreq_add_dev(dev, NULL, frozen);
2098 			break;
2099 		}
2100 	}
2101 	return NOTIFY_OK;
2102 }
2103 
2104 static struct notifier_block __refdata cpufreq_cpu_notifier = {
2105 	.notifier_call = cpufreq_cpu_callback,
2106 };
2107 
2108 /*********************************************************************
2109  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
2110  *********************************************************************/
2111 
2112 /**
2113  * cpufreq_register_driver - register a CPU Frequency driver
2114  * @driver_data: A struct cpufreq_driver containing the values#
2115  * submitted by the CPU Frequency driver.
2116  *
2117  * Registers a CPU Frequency driver to this core code. This code
2118  * returns zero on success, -EBUSY when another driver got here first
2119  * (and isn't unregistered in the meantime).
2120  *
2121  */
2122 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
2123 {
2124 	unsigned long flags;
2125 	int ret;
2126 
2127 	if (cpufreq_disabled())
2128 		return -ENODEV;
2129 
2130 	if (!driver_data || !driver_data->verify || !driver_data->init ||
2131 	    !(driver_data->setpolicy || driver_data->target_index ||
2132 		    driver_data->target))
2133 		return -EINVAL;
2134 
2135 	pr_debug("trying to register driver %s\n", driver_data->name);
2136 
2137 	if (driver_data->setpolicy)
2138 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
2139 
2140 	write_lock_irqsave(&cpufreq_driver_lock, flags);
2141 	if (cpufreq_driver) {
2142 		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2143 		return -EEXIST;
2144 	}
2145 	cpufreq_driver = driver_data;
2146 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2147 
2148 	ret = subsys_interface_register(&cpufreq_interface);
2149 	if (ret)
2150 		goto err_null_driver;
2151 
2152 	if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
2153 		int i;
2154 		ret = -ENODEV;
2155 
2156 		/* check for at least one working CPU */
2157 		for (i = 0; i < nr_cpu_ids; i++)
2158 			if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
2159 				ret = 0;
2160 				break;
2161 			}
2162 
2163 		/* if all ->init() calls failed, unregister */
2164 		if (ret) {
2165 			pr_debug("no CPU initialized for driver %s\n",
2166 							driver_data->name);
2167 			goto err_if_unreg;
2168 		}
2169 	}
2170 
2171 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
2172 	pr_debug("driver %s up and running\n", driver_data->name);
2173 
2174 	return 0;
2175 err_if_unreg:
2176 	subsys_interface_unregister(&cpufreq_interface);
2177 err_null_driver:
2178 	write_lock_irqsave(&cpufreq_driver_lock, flags);
2179 	cpufreq_driver = NULL;
2180 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2181 	return ret;
2182 }
2183 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
2184 
2185 /**
2186  * cpufreq_unregister_driver - unregister the current CPUFreq driver
2187  *
2188  * Unregister the current CPUFreq driver. Only call this if you have
2189  * the right to do so, i.e. if you have succeeded in initialising before!
2190  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
2191  * currently not initialised.
2192  */
2193 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
2194 {
2195 	unsigned long flags;
2196 
2197 	if (!cpufreq_driver || (driver != cpufreq_driver))
2198 		return -EINVAL;
2199 
2200 	pr_debug("unregistering driver %s\n", driver->name);
2201 
2202 	subsys_interface_unregister(&cpufreq_interface);
2203 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
2204 
2205 	down_write(&cpufreq_rwsem);
2206 	write_lock_irqsave(&cpufreq_driver_lock, flags);
2207 
2208 	cpufreq_driver = NULL;
2209 
2210 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2211 	up_write(&cpufreq_rwsem);
2212 
2213 	return 0;
2214 }
2215 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
2216 
2217 static int __init cpufreq_core_init(void)
2218 {
2219 	if (cpufreq_disabled())
2220 		return -ENODEV;
2221 
2222 	cpufreq_global_kobject = kobject_create();
2223 	BUG_ON(!cpufreq_global_kobject);
2224 	register_syscore_ops(&cpufreq_syscore_ops);
2225 
2226 	return 0;
2227 }
2228 core_initcall(cpufreq_core_init);
2229