1 /*
2  *  drivers/cpufreq/cpufreq_conservative.c
3  *
4  *  Copyright (C)  2001 Russell King
5  *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6  *                      Jun Nakajima <jun.nakajima@intel.com>
7  *            (C)  2009 Alexander Clouter <alex@digriz.org.uk>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License version 2 as
11  * published by the Free Software Foundation.
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/module.h>
16 #include <linux/smp.h>
17 #include <linux/init.h>
18 #include <linux/interrupt.h>
19 #include <linux/ctype.h>
20 #include <linux/cpufreq.h>
21 #include <linux/sysctl.h>
22 #include <linux/types.h>
23 #include <linux/fs.h>
24 #include <linux/sysfs.h>
25 #include <linux/cpu.h>
26 #include <linux/kmod.h>
27 #include <linux/workqueue.h>
28 #include <linux/jiffies.h>
29 #include <linux/kernel_stat.h>
30 #include <linux/percpu.h>
31 #include <linux/mutex.h>
32 /*
33  * dbs is used in this file as a shortform for demandbased switching
34  * It helps to keep variable names smaller, simpler
35  */
36 
37 #define DEF_FREQUENCY_UP_THRESHOLD		(80)
38 #define DEF_FREQUENCY_DOWN_THRESHOLD		(20)
39 
40 /*
41  * The polling frequency of this governor depends on the capability of
42  * the processor. Default polling frequency is 1000 times the transition
43  * latency of the processor. The governor will work on any processor with
44  * transition latency <= 10mS, using appropriate sampling
45  * rate.
46  * For CPUs with transition latency > 10mS (mostly drivers
47  * with CPUFREQ_ETERNAL), this governor will not work.
48  * All times here are in uS.
49  */
50 static unsigned int def_sampling_rate;
51 #define MIN_SAMPLING_RATE_RATIO			(2)
52 /* for correct statistics, we need at least 10 ticks between each measure */
53 #define MIN_STAT_SAMPLING_RATE			\
54 			(MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10))
55 #define MIN_SAMPLING_RATE			\
56 			(def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
57 /* Above MIN_SAMPLING_RATE will vanish with its sysfs file soon
58  * Define the minimal settable sampling rate to the greater of:
59  *   - "HW transition latency" * 100 (same as default sampling / 10)
60  *   - MIN_STAT_SAMPLING_RATE
61  * To avoid that userspace shoots itself.
62 */
63 static unsigned int minimum_sampling_rate(void)
64 {
65 	return max(def_sampling_rate / 10, MIN_STAT_SAMPLING_RATE);
66 }
67 
68 /* This will also vanish soon with removing sampling_rate_max */
69 #define MAX_SAMPLING_RATE			(500 * def_sampling_rate)
70 #define LATENCY_MULTIPLIER			(1000)
71 #define DEF_SAMPLING_DOWN_FACTOR		(1)
72 #define MAX_SAMPLING_DOWN_FACTOR		(10)
73 #define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
74 
75 static void do_dbs_timer(struct work_struct *work);
76 
77 struct cpu_dbs_info_s {
78 	struct cpufreq_policy *cur_policy;
79 	unsigned int prev_cpu_idle_up;
80 	unsigned int prev_cpu_idle_down;
81 	unsigned int enable;
82 	unsigned int down_skip;
83 	unsigned int requested_freq;
84 };
85 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
86 
87 static unsigned int dbs_enable;	/* number of CPUs using this policy */
88 
89 /*
90  * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug
91  * lock and dbs_mutex. cpu_hotplug lock should always be held before
92  * dbs_mutex. If any function that can potentially take cpu_hotplug lock
93  * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
94  * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
95  * is recursive for the same process. -Venki
96  */
97 static DEFINE_MUTEX(dbs_mutex);
98 static DECLARE_DELAYED_WORK(dbs_work, do_dbs_timer);
99 
100 struct dbs_tuners {
101 	unsigned int sampling_rate;
102 	unsigned int sampling_down_factor;
103 	unsigned int up_threshold;
104 	unsigned int down_threshold;
105 	unsigned int ignore_nice;
106 	unsigned int freq_step;
107 };
108 
109 static struct dbs_tuners dbs_tuners_ins = {
110 	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
111 	.down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD,
112 	.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
113 	.ignore_nice = 0,
114 	.freq_step = 5,
115 };
116 
117 static inline unsigned int get_cpu_idle_time(unsigned int cpu)
118 {
119 	unsigned int add_nice = 0, ret;
120 
121 	if (dbs_tuners_ins.ignore_nice)
122 		add_nice = kstat_cpu(cpu).cpustat.nice;
123 
124 	ret = kstat_cpu(cpu).cpustat.idle +
125 		kstat_cpu(cpu).cpustat.iowait +
126 		add_nice;
127 
128 	return ret;
129 }
130 
131 /* keep track of frequency transitions */
132 static int
133 dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
134 		     void *data)
135 {
136 	struct cpufreq_freqs *freq = data;
137 	struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info,
138 							freq->cpu);
139 
140 	if (!this_dbs_info->enable)
141 		return 0;
142 
143 	this_dbs_info->requested_freq = freq->new;
144 
145 	return 0;
146 }
147 
148 static struct notifier_block dbs_cpufreq_notifier_block = {
149 	.notifier_call = dbs_cpufreq_notifier
150 };
151 
152 /************************** sysfs interface ************************/
153 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
154 {
155 	static int print_once;
156 
157 	if (!print_once) {
158 		printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max "
159 		       "sysfs file is deprecated - used by: %s\n",
160 		       current->comm);
161 		print_once = 1;
162 	}
163 	return sprintf(buf, "%u\n", MAX_SAMPLING_RATE);
164 }
165 
166 static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf)
167 {
168 	static int print_once;
169 
170 	if (!print_once) {
171 		printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max "
172 		       "sysfs file is deprecated - used by: %s\n", current->comm);
173 		print_once = 1;
174 	}
175 	return sprintf(buf, "%u\n", MIN_SAMPLING_RATE);
176 }
177 
178 #define define_one_ro(_name)				\
179 static struct freq_attr _name =				\
180 __ATTR(_name, 0444, show_##_name, NULL)
181 
182 define_one_ro(sampling_rate_max);
183 define_one_ro(sampling_rate_min);
184 
185 /* cpufreq_conservative Governor Tunables */
186 #define show_one(file_name, object)					\
187 static ssize_t show_##file_name						\
188 (struct cpufreq_policy *unused, char *buf)				\
189 {									\
190 	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
191 }
192 show_one(sampling_rate, sampling_rate);
193 show_one(sampling_down_factor, sampling_down_factor);
194 show_one(up_threshold, up_threshold);
195 show_one(down_threshold, down_threshold);
196 show_one(ignore_nice_load, ignore_nice);
197 show_one(freq_step, freq_step);
198 
199 static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
200 		const char *buf, size_t count)
201 {
202 	unsigned int input;
203 	int ret;
204 	ret = sscanf(buf, "%u", &input);
205 	if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
206 		return -EINVAL;
207 
208 	mutex_lock(&dbs_mutex);
209 	dbs_tuners_ins.sampling_down_factor = input;
210 	mutex_unlock(&dbs_mutex);
211 
212 	return count;
213 }
214 
215 static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
216 		const char *buf, size_t count)
217 {
218 	unsigned int input;
219 	int ret;
220 	ret = sscanf(buf, "%u", &input);
221 
222 	mutex_lock(&dbs_mutex);
223 	if (ret != 1) {
224 		mutex_unlock(&dbs_mutex);
225 		return -EINVAL;
226 	}
227 	dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate());
228 	mutex_unlock(&dbs_mutex);
229 
230 	return count;
231 }
232 
233 static ssize_t store_up_threshold(struct cpufreq_policy *unused,
234 		const char *buf, size_t count)
235 {
236 	unsigned int input;
237 	int ret;
238 	ret = sscanf(buf, "%u", &input);
239 
240 	mutex_lock(&dbs_mutex);
241 	if (ret != 1 || input > 100 ||
242 	    input <= dbs_tuners_ins.down_threshold) {
243 		mutex_unlock(&dbs_mutex);
244 		return -EINVAL;
245 	}
246 
247 	dbs_tuners_ins.up_threshold = input;
248 	mutex_unlock(&dbs_mutex);
249 
250 	return count;
251 }
252 
253 static ssize_t store_down_threshold(struct cpufreq_policy *unused,
254 		const char *buf, size_t count)
255 {
256 	unsigned int input;
257 	int ret;
258 	ret = sscanf(buf, "%u", &input);
259 
260 	mutex_lock(&dbs_mutex);
261 	if (ret != 1 || input > 100 || input >= dbs_tuners_ins.up_threshold) {
262 		mutex_unlock(&dbs_mutex);
263 		return -EINVAL;
264 	}
265 
266 	dbs_tuners_ins.down_threshold = input;
267 	mutex_unlock(&dbs_mutex);
268 
269 	return count;
270 }
271 
272 static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
273 		const char *buf, size_t count)
274 {
275 	unsigned int input;
276 	int ret;
277 
278 	unsigned int j;
279 
280 	ret = sscanf(buf, "%u", &input);
281 	if (ret != 1)
282 		return -EINVAL;
283 
284 	if (input > 1)
285 		input = 1;
286 
287 	mutex_lock(&dbs_mutex);
288 	if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
289 		mutex_unlock(&dbs_mutex);
290 		return count;
291 	}
292 	dbs_tuners_ins.ignore_nice = input;
293 
294 	/* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */
295 	for_each_online_cpu(j) {
296 		struct cpu_dbs_info_s *j_dbs_info;
297 		j_dbs_info = &per_cpu(cpu_dbs_info, j);
298 		j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
299 		j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up;
300 	}
301 	mutex_unlock(&dbs_mutex);
302 
303 	return count;
304 }
305 
306 static ssize_t store_freq_step(struct cpufreq_policy *policy,
307 		const char *buf, size_t count)
308 {
309 	unsigned int input;
310 	int ret;
311 
312 	ret = sscanf(buf, "%u", &input);
313 
314 	if (ret != 1)
315 		return -EINVAL;
316 
317 	if (input > 100)
318 		input = 100;
319 
320 	/* no need to test here if freq_step is zero as the user might actually
321 	 * want this, they would be crazy though :) */
322 	mutex_lock(&dbs_mutex);
323 	dbs_tuners_ins.freq_step = input;
324 	mutex_unlock(&dbs_mutex);
325 
326 	return count;
327 }
328 
329 #define define_one_rw(_name) \
330 static struct freq_attr _name = \
331 __ATTR(_name, 0644, show_##_name, store_##_name)
332 
333 define_one_rw(sampling_rate);
334 define_one_rw(sampling_down_factor);
335 define_one_rw(up_threshold);
336 define_one_rw(down_threshold);
337 define_one_rw(ignore_nice_load);
338 define_one_rw(freq_step);
339 
340 static struct attribute *dbs_attributes[] = {
341 	&sampling_rate_max.attr,
342 	&sampling_rate_min.attr,
343 	&sampling_rate.attr,
344 	&sampling_down_factor.attr,
345 	&up_threshold.attr,
346 	&down_threshold.attr,
347 	&ignore_nice_load.attr,
348 	&freq_step.attr,
349 	NULL
350 };
351 
352 static struct attribute_group dbs_attr_group = {
353 	.attrs = dbs_attributes,
354 	.name = "conservative",
355 };
356 
357 /************************** sysfs end ************************/
358 
359 static void dbs_check_cpu(int cpu)
360 {
361 	unsigned int idle_ticks, up_idle_ticks, down_idle_ticks;
362 	unsigned int tmp_idle_ticks, total_idle_ticks;
363 	unsigned int freq_target;
364 	unsigned int freq_down_sampling_rate;
365 	struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
366 	struct cpufreq_policy *policy;
367 
368 	if (!this_dbs_info->enable)
369 		return;
370 
371 	policy = this_dbs_info->cur_policy;
372 
373 	/*
374 	 * The default safe range is 20% to 80%
375 	 * Every sampling_rate, we check
376 	 *	- If current idle time is less than 20%, then we try to
377 	 *	  increase frequency
378 	 * Every sampling_rate*sampling_down_factor, we check
379 	 *	- If current idle time is more than 80%, then we try to
380 	 *	  decrease frequency
381 	 *
382 	 * Any frequency increase takes it to the maximum frequency.
383 	 * Frequency reduction happens at minimum steps of
384 	 * 5% (default) of max_frequency
385 	 */
386 
387 	/* Check for frequency increase */
388 	idle_ticks = UINT_MAX;
389 
390 	/* Check for frequency increase */
391 	total_idle_ticks = get_cpu_idle_time(cpu);
392 	tmp_idle_ticks = total_idle_ticks -
393 		this_dbs_info->prev_cpu_idle_up;
394 	this_dbs_info->prev_cpu_idle_up = total_idle_ticks;
395 
396 	if (tmp_idle_ticks < idle_ticks)
397 		idle_ticks = tmp_idle_ticks;
398 
399 	/* Scale idle ticks by 100 and compare with up and down ticks */
400 	idle_ticks *= 100;
401 	up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) *
402 			usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
403 
404 	if (idle_ticks < up_idle_ticks) {
405 		this_dbs_info->down_skip = 0;
406 		this_dbs_info->prev_cpu_idle_down =
407 			this_dbs_info->prev_cpu_idle_up;
408 
409 		/* if we are already at full speed then break out early */
410 		if (this_dbs_info->requested_freq == policy->max)
411 			return;
412 
413 		freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
414 
415 		/* max freq cannot be less than 100. But who knows.... */
416 		if (unlikely(freq_target == 0))
417 			freq_target = 5;
418 
419 		this_dbs_info->requested_freq += freq_target;
420 		if (this_dbs_info->requested_freq > policy->max)
421 			this_dbs_info->requested_freq = policy->max;
422 
423 		__cpufreq_driver_target(policy, this_dbs_info->requested_freq,
424 			CPUFREQ_RELATION_H);
425 		return;
426 	}
427 
428 	/* Check for frequency decrease */
429 	this_dbs_info->down_skip++;
430 	if (this_dbs_info->down_skip < dbs_tuners_ins.sampling_down_factor)
431 		return;
432 
433 	/* Check for frequency decrease */
434 	total_idle_ticks = this_dbs_info->prev_cpu_idle_up;
435 	tmp_idle_ticks = total_idle_ticks -
436 		this_dbs_info->prev_cpu_idle_down;
437 	this_dbs_info->prev_cpu_idle_down = total_idle_ticks;
438 
439 	if (tmp_idle_ticks < idle_ticks)
440 		idle_ticks = tmp_idle_ticks;
441 
442 	/* Scale idle ticks by 100 and compare with up and down ticks */
443 	idle_ticks *= 100;
444 	this_dbs_info->down_skip = 0;
445 
446 	freq_down_sampling_rate = dbs_tuners_ins.sampling_rate *
447 		dbs_tuners_ins.sampling_down_factor;
448 	down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) *
449 		usecs_to_jiffies(freq_down_sampling_rate);
450 
451 	if (idle_ticks > down_idle_ticks) {
452 		/*
453 		 * if we are already at the lowest speed then break out early
454 		 * or if we 'cannot' reduce the speed as the user might want
455 		 * freq_target to be zero
456 		 */
457 		if (this_dbs_info->requested_freq == policy->min
458 				|| dbs_tuners_ins.freq_step == 0)
459 			return;
460 
461 		freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
462 
463 		/* max freq cannot be less than 100. But who knows.... */
464 		if (unlikely(freq_target == 0))
465 			freq_target = 5;
466 
467 		this_dbs_info->requested_freq -= freq_target;
468 		if (this_dbs_info->requested_freq < policy->min)
469 			this_dbs_info->requested_freq = policy->min;
470 
471 		__cpufreq_driver_target(policy, this_dbs_info->requested_freq,
472 				CPUFREQ_RELATION_H);
473 		return;
474 	}
475 }
476 
477 static void do_dbs_timer(struct work_struct *work)
478 {
479 	int i;
480 	mutex_lock(&dbs_mutex);
481 	for_each_online_cpu(i)
482 		dbs_check_cpu(i);
483 	schedule_delayed_work(&dbs_work,
484 			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
485 	mutex_unlock(&dbs_mutex);
486 }
487 
488 static inline void dbs_timer_init(void)
489 {
490 	init_timer_deferrable(&dbs_work.timer);
491 	schedule_delayed_work(&dbs_work,
492 			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
493 	return;
494 }
495 
496 static inline void dbs_timer_exit(void)
497 {
498 	cancel_delayed_work(&dbs_work);
499 	return;
500 }
501 
502 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
503 				   unsigned int event)
504 {
505 	unsigned int cpu = policy->cpu;
506 	struct cpu_dbs_info_s *this_dbs_info;
507 	unsigned int j;
508 	int rc;
509 
510 	this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
511 
512 	switch (event) {
513 	case CPUFREQ_GOV_START:
514 		if ((!cpu_online(cpu)) || (!policy->cur))
515 			return -EINVAL;
516 
517 		if (this_dbs_info->enable) /* Already enabled */
518 			break;
519 
520 		mutex_lock(&dbs_mutex);
521 
522 		rc = sysfs_create_group(&policy->kobj, &dbs_attr_group);
523 		if (rc) {
524 			mutex_unlock(&dbs_mutex);
525 			return rc;
526 		}
527 
528 		for_each_cpu(j, policy->cpus) {
529 			struct cpu_dbs_info_s *j_dbs_info;
530 			j_dbs_info = &per_cpu(cpu_dbs_info, j);
531 			j_dbs_info->cur_policy = policy;
532 
533 			j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu);
534 			j_dbs_info->prev_cpu_idle_down
535 				= j_dbs_info->prev_cpu_idle_up;
536 		}
537 		this_dbs_info->enable = 1;
538 		this_dbs_info->down_skip = 0;
539 		this_dbs_info->requested_freq = policy->cur;
540 
541 		dbs_enable++;
542 		/*
543 		 * Start the timerschedule work, when this governor
544 		 * is used for first time
545 		 */
546 		if (dbs_enable == 1) {
547 			unsigned int latency;
548 			/* policy latency is in nS. Convert it to uS first */
549 			latency = policy->cpuinfo.transition_latency / 1000;
550 			if (latency == 0)
551 				latency = 1;
552 
553 			def_sampling_rate =
554 				max(10 * latency * LATENCY_MULTIPLIER,
555 				    MIN_STAT_SAMPLING_RATE);
556 
557 			dbs_tuners_ins.sampling_rate = def_sampling_rate;
558 
559 			dbs_timer_init();
560 			cpufreq_register_notifier(
561 					&dbs_cpufreq_notifier_block,
562 					CPUFREQ_TRANSITION_NOTIFIER);
563 		}
564 
565 		mutex_unlock(&dbs_mutex);
566 		break;
567 
568 	case CPUFREQ_GOV_STOP:
569 		mutex_lock(&dbs_mutex);
570 		this_dbs_info->enable = 0;
571 		sysfs_remove_group(&policy->kobj, &dbs_attr_group);
572 		dbs_enable--;
573 		/*
574 		 * Stop the timerschedule work, when this governor
575 		 * is used for first time
576 		 */
577 		if (dbs_enable == 0) {
578 			dbs_timer_exit();
579 			cpufreq_unregister_notifier(
580 					&dbs_cpufreq_notifier_block,
581 					CPUFREQ_TRANSITION_NOTIFIER);
582 		}
583 
584 		mutex_unlock(&dbs_mutex);
585 
586 		break;
587 
588 	case CPUFREQ_GOV_LIMITS:
589 		mutex_lock(&dbs_mutex);
590 		if (policy->max < this_dbs_info->cur_policy->cur)
591 			__cpufreq_driver_target(
592 					this_dbs_info->cur_policy,
593 					policy->max, CPUFREQ_RELATION_H);
594 		else if (policy->min > this_dbs_info->cur_policy->cur)
595 			__cpufreq_driver_target(
596 					this_dbs_info->cur_policy,
597 					policy->min, CPUFREQ_RELATION_L);
598 		mutex_unlock(&dbs_mutex);
599 		break;
600 	}
601 	return 0;
602 }
603 
604 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
605 static
606 #endif
607 struct cpufreq_governor cpufreq_gov_conservative = {
608 	.name			= "conservative",
609 	.governor		= cpufreq_governor_dbs,
610 	.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
611 	.owner			= THIS_MODULE,
612 };
613 
614 static int __init cpufreq_gov_dbs_init(void)
615 {
616 	return cpufreq_register_governor(&cpufreq_gov_conservative);
617 }
618 
619 static void __exit cpufreq_gov_dbs_exit(void)
620 {
621 	/* Make sure that the scheduled work is indeed not running */
622 	flush_scheduled_work();
623 
624 	cpufreq_unregister_governor(&cpufreq_gov_conservative);
625 }
626 
627 
628 MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>");
629 MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for "
630 		"Low Latency Frequency Transition capable processors "
631 		"optimised for use in a battery environment");
632 MODULE_LICENSE("GPL");
633 
634 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
635 fs_initcall(cpufreq_gov_dbs_init);
636 #else
637 module_init(cpufreq_gov_dbs_init);
638 #endif
639 module_exit(cpufreq_gov_dbs_exit);
640