1 /*
2  *  drivers/cpufreq/cpufreq_conservative.c
3  *
4  *  Copyright (C)  2001 Russell King
5  *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6  *                      Jun Nakajima <jun.nakajima@intel.com>
7  *            (C)  2004 Alexander Clouter <alex-kernel@digriz.org.uk>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License version 2 as
11  * published by the Free Software Foundation.
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/module.h>
16 #include <linux/smp.h>
17 #include <linux/init.h>
18 #include <linux/interrupt.h>
19 #include <linux/ctype.h>
20 #include <linux/cpufreq.h>
21 #include <linux/sysctl.h>
22 #include <linux/types.h>
23 #include <linux/fs.h>
24 #include <linux/sysfs.h>
25 #include <linux/sched.h>
26 #include <linux/kmod.h>
27 #include <linux/workqueue.h>
28 #include <linux/jiffies.h>
29 #include <linux/kernel_stat.h>
30 #include <linux/percpu.h>
31 
32 /*
33  * dbs is used in this file as a shortform for demandbased switching
34  * It helps to keep variable names smaller, simpler
35  */
36 
37 #define DEF_FREQUENCY_UP_THRESHOLD		(80)
38 #define MIN_FREQUENCY_UP_THRESHOLD		(0)
39 #define MAX_FREQUENCY_UP_THRESHOLD		(100)
40 
41 #define DEF_FREQUENCY_DOWN_THRESHOLD		(20)
42 #define MIN_FREQUENCY_DOWN_THRESHOLD		(0)
43 #define MAX_FREQUENCY_DOWN_THRESHOLD		(100)
44 
45 /*
46  * The polling frequency of this governor depends on the capability of
47  * the processor. Default polling frequency is 1000 times the transition
48  * latency of the processor. The governor will work on any processor with
49  * transition latency <= 10mS, using appropriate sampling
50  * rate.
51  * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
52  * this governor will not work.
53  * All times here are in uS.
54  */
55 static unsigned int 				def_sampling_rate;
56 #define MIN_SAMPLING_RATE			(def_sampling_rate / 2)
57 #define MAX_SAMPLING_RATE			(500 * def_sampling_rate)
58 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER	(100000)
59 #define DEF_SAMPLING_DOWN_FACTOR		(5)
60 #define TRANSITION_LATENCY_LIMIT		(10 * 1000)
61 
62 static void do_dbs_timer(void *data);
63 
64 struct cpu_dbs_info_s {
65 	struct cpufreq_policy 	*cur_policy;
66 	unsigned int 		prev_cpu_idle_up;
67 	unsigned int 		prev_cpu_idle_down;
68 	unsigned int 		enable;
69 };
70 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
71 
72 static unsigned int dbs_enable;	/* number of CPUs using this policy */
73 
74 static DECLARE_MUTEX 	(dbs_sem);
75 static DECLARE_WORK	(dbs_work, do_dbs_timer, NULL);
76 
77 struct dbs_tuners {
78 	unsigned int 		sampling_rate;
79 	unsigned int		sampling_down_factor;
80 	unsigned int		up_threshold;
81 	unsigned int		down_threshold;
82 	unsigned int		ignore_nice;
83 	unsigned int		freq_step;
84 };
85 
86 static struct dbs_tuners dbs_tuners_ins = {
87 	.up_threshold 		= DEF_FREQUENCY_UP_THRESHOLD,
88 	.down_threshold 	= DEF_FREQUENCY_DOWN_THRESHOLD,
89 	.sampling_down_factor 	= DEF_SAMPLING_DOWN_FACTOR,
90 };
91 
92 static inline unsigned int get_cpu_idle_time(unsigned int cpu)
93 {
94 	return	kstat_cpu(cpu).cpustat.idle +
95 		kstat_cpu(cpu).cpustat.iowait +
96 		( dbs_tuners_ins.ignore_nice ?
97 		  kstat_cpu(cpu).cpustat.nice :
98 		  0);
99 }
100 
101 /************************** sysfs interface ************************/
102 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
103 {
104 	return sprintf (buf, "%u\n", MAX_SAMPLING_RATE);
105 }
106 
107 static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf)
108 {
109 	return sprintf (buf, "%u\n", MIN_SAMPLING_RATE);
110 }
111 
112 #define define_one_ro(_name) 					\
113 static struct freq_attr _name =  				\
114 __ATTR(_name, 0444, show_##_name, NULL)
115 
116 define_one_ro(sampling_rate_max);
117 define_one_ro(sampling_rate_min);
118 
119 /* cpufreq_conservative Governor Tunables */
120 #define show_one(file_name, object)					\
121 static ssize_t show_##file_name						\
122 (struct cpufreq_policy *unused, char *buf)				\
123 {									\
124 	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
125 }
126 show_one(sampling_rate, sampling_rate);
127 show_one(sampling_down_factor, sampling_down_factor);
128 show_one(up_threshold, up_threshold);
129 show_one(down_threshold, down_threshold);
130 show_one(ignore_nice_load, ignore_nice);
131 show_one(freq_step, freq_step);
132 
133 static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
134 		const char *buf, size_t count)
135 {
136 	unsigned int input;
137 	int ret;
138 	ret = sscanf (buf, "%u", &input);
139 	if (ret != 1 )
140 		return -EINVAL;
141 
142 	down(&dbs_sem);
143 	dbs_tuners_ins.sampling_down_factor = input;
144 	up(&dbs_sem);
145 
146 	return count;
147 }
148 
149 static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
150 		const char *buf, size_t count)
151 {
152 	unsigned int input;
153 	int ret;
154 	ret = sscanf (buf, "%u", &input);
155 
156 	down(&dbs_sem);
157 	if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) {
158 		up(&dbs_sem);
159 		return -EINVAL;
160 	}
161 
162 	dbs_tuners_ins.sampling_rate = input;
163 	up(&dbs_sem);
164 
165 	return count;
166 }
167 
168 static ssize_t store_up_threshold(struct cpufreq_policy *unused,
169 		const char *buf, size_t count)
170 {
171 	unsigned int input;
172 	int ret;
173 	ret = sscanf (buf, "%u", &input);
174 
175 	down(&dbs_sem);
176 	if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
177 			input < MIN_FREQUENCY_UP_THRESHOLD ||
178 			input <= dbs_tuners_ins.down_threshold) {
179 		up(&dbs_sem);
180 		return -EINVAL;
181 	}
182 
183 	dbs_tuners_ins.up_threshold = input;
184 	up(&dbs_sem);
185 
186 	return count;
187 }
188 
189 static ssize_t store_down_threshold(struct cpufreq_policy *unused,
190 		const char *buf, size_t count)
191 {
192 	unsigned int input;
193 	int ret;
194 	ret = sscanf (buf, "%u", &input);
195 
196 	down(&dbs_sem);
197 	if (ret != 1 || input > MAX_FREQUENCY_DOWN_THRESHOLD ||
198 			input < MIN_FREQUENCY_DOWN_THRESHOLD ||
199 			input >= dbs_tuners_ins.up_threshold) {
200 		up(&dbs_sem);
201 		return -EINVAL;
202 	}
203 
204 	dbs_tuners_ins.down_threshold = input;
205 	up(&dbs_sem);
206 
207 	return count;
208 }
209 
210 static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
211 		const char *buf, size_t count)
212 {
213 	unsigned int input;
214 	int ret;
215 
216 	unsigned int j;
217 
218 	ret = sscanf (buf, "%u", &input);
219 	if ( ret != 1 )
220 		return -EINVAL;
221 
222 	if ( input > 1 )
223 		input = 1;
224 
225 	down(&dbs_sem);
226 	if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */
227 		up(&dbs_sem);
228 		return count;
229 	}
230 	dbs_tuners_ins.ignore_nice = input;
231 
232 	/* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */
233 	for_each_online_cpu(j) {
234 		struct cpu_dbs_info_s *j_dbs_info;
235 		j_dbs_info = &per_cpu(cpu_dbs_info, j);
236 		j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
237 		j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up;
238 	}
239 	up(&dbs_sem);
240 
241 	return count;
242 }
243 
244 static ssize_t store_freq_step(struct cpufreq_policy *policy,
245 		const char *buf, size_t count)
246 {
247 	unsigned int input;
248 	int ret;
249 
250 	ret = sscanf (buf, "%u", &input);
251 
252 	if ( ret != 1 )
253 		return -EINVAL;
254 
255 	if ( input > 100 )
256 		input = 100;
257 
258 	/* no need to test here if freq_step is zero as the user might actually
259 	 * want this, they would be crazy though :) */
260 	down(&dbs_sem);
261 	dbs_tuners_ins.freq_step = input;
262 	up(&dbs_sem);
263 
264 	return count;
265 }
266 
267 #define define_one_rw(_name) \
268 static struct freq_attr _name = \
269 __ATTR(_name, 0644, show_##_name, store_##_name)
270 
271 define_one_rw(sampling_rate);
272 define_one_rw(sampling_down_factor);
273 define_one_rw(up_threshold);
274 define_one_rw(down_threshold);
275 define_one_rw(ignore_nice_load);
276 define_one_rw(freq_step);
277 
278 static struct attribute * dbs_attributes[] = {
279 	&sampling_rate_max.attr,
280 	&sampling_rate_min.attr,
281 	&sampling_rate.attr,
282 	&sampling_down_factor.attr,
283 	&up_threshold.attr,
284 	&down_threshold.attr,
285 	&ignore_nice_load.attr,
286 	&freq_step.attr,
287 	NULL
288 };
289 
290 static struct attribute_group dbs_attr_group = {
291 	.attrs = dbs_attributes,
292 	.name = "conservative",
293 };
294 
295 /************************** sysfs end ************************/
296 
297 static void dbs_check_cpu(int cpu)
298 {
299 	unsigned int idle_ticks, up_idle_ticks, down_idle_ticks;
300 	unsigned int freq_step;
301 	unsigned int freq_down_sampling_rate;
302 	static int down_skip[NR_CPUS];
303 	static int requested_freq[NR_CPUS];
304 	static unsigned short init_flag = 0;
305 	struct cpu_dbs_info_s *this_dbs_info;
306 	struct cpu_dbs_info_s *dbs_info;
307 
308 	struct cpufreq_policy *policy;
309 	unsigned int j;
310 
311 	this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
312 	if (!this_dbs_info->enable)
313 		return;
314 
315 	policy = this_dbs_info->cur_policy;
316 
317 	if ( init_flag == 0 ) {
318 		for_each_online_cpu(j) {
319 			dbs_info = &per_cpu(cpu_dbs_info, j);
320 			requested_freq[j] = dbs_info->cur_policy->cur;
321 		}
322 		init_flag = 1;
323 	}
324 
325 	/*
326 	 * The default safe range is 20% to 80%
327 	 * Every sampling_rate, we check
328 	 * 	- If current idle time is less than 20%, then we try to
329 	 * 	  increase frequency
330 	 * Every sampling_rate*sampling_down_factor, we check
331 	 * 	- If current idle time is more than 80%, then we try to
332 	 * 	  decrease frequency
333 	 *
334 	 * Any frequency increase takes it to the maximum frequency.
335 	 * Frequency reduction happens at minimum steps of
336 	 * 5% (default) of max_frequency
337 	 */
338 
339 	/* Check for frequency increase */
340 
341 	idle_ticks = UINT_MAX;
342 	for_each_cpu_mask(j, policy->cpus) {
343 		unsigned int tmp_idle_ticks, total_idle_ticks;
344 		struct cpu_dbs_info_s *j_dbs_info;
345 
346 		j_dbs_info = &per_cpu(cpu_dbs_info, j);
347 		/* Check for frequency increase */
348 		total_idle_ticks = get_cpu_idle_time(j);
349 		tmp_idle_ticks = total_idle_ticks -
350 			j_dbs_info->prev_cpu_idle_up;
351 		j_dbs_info->prev_cpu_idle_up = total_idle_ticks;
352 
353 		if (tmp_idle_ticks < idle_ticks)
354 			idle_ticks = tmp_idle_ticks;
355 	}
356 
357 	/* Scale idle ticks by 100 and compare with up and down ticks */
358 	idle_ticks *= 100;
359 	up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) *
360 		usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
361 
362 	if (idle_ticks < up_idle_ticks) {
363 		down_skip[cpu] = 0;
364 		for_each_cpu_mask(j, policy->cpus) {
365 			struct cpu_dbs_info_s *j_dbs_info;
366 
367 			j_dbs_info = &per_cpu(cpu_dbs_info, j);
368 			j_dbs_info->prev_cpu_idle_down =
369 					j_dbs_info->prev_cpu_idle_up;
370 		}
371 		/* if we are already at full speed then break out early */
372 		if (requested_freq[cpu] == policy->max)
373 			return;
374 
375 		freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100;
376 
377 		/* max freq cannot be less than 100. But who knows.... */
378 		if (unlikely(freq_step == 0))
379 			freq_step = 5;
380 
381 		requested_freq[cpu] += freq_step;
382 		if (requested_freq[cpu] > policy->max)
383 			requested_freq[cpu] = policy->max;
384 
385 		__cpufreq_driver_target(policy, requested_freq[cpu],
386 			CPUFREQ_RELATION_H);
387 		return;
388 	}
389 
390 	/* Check for frequency decrease */
391 	down_skip[cpu]++;
392 	if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor)
393 		return;
394 
395 	idle_ticks = UINT_MAX;
396 	for_each_cpu_mask(j, policy->cpus) {
397 		unsigned int tmp_idle_ticks, total_idle_ticks;
398 		struct cpu_dbs_info_s *j_dbs_info;
399 
400 		j_dbs_info = &per_cpu(cpu_dbs_info, j);
401 		total_idle_ticks = j_dbs_info->prev_cpu_idle_up;
402 		tmp_idle_ticks = total_idle_ticks -
403 			j_dbs_info->prev_cpu_idle_down;
404 		j_dbs_info->prev_cpu_idle_down = total_idle_ticks;
405 
406 		if (tmp_idle_ticks < idle_ticks)
407 			idle_ticks = tmp_idle_ticks;
408 	}
409 
410 	/* Scale idle ticks by 100 and compare with up and down ticks */
411 	idle_ticks *= 100;
412 	down_skip[cpu] = 0;
413 
414 	freq_down_sampling_rate = dbs_tuners_ins.sampling_rate *
415 		dbs_tuners_ins.sampling_down_factor;
416 	down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) *
417 			usecs_to_jiffies(freq_down_sampling_rate);
418 
419 	if (idle_ticks > down_idle_ticks) {
420 		/* if we are already at the lowest speed then break out early
421 		 * or if we 'cannot' reduce the speed as the user might want
422 		 * freq_step to be zero */
423 		if (requested_freq[cpu] == policy->min
424 				|| dbs_tuners_ins.freq_step == 0)
425 			return;
426 
427 		freq_step = (dbs_tuners_ins.freq_step * policy->max) / 100;
428 
429 		/* max freq cannot be less than 100. But who knows.... */
430 		if (unlikely(freq_step == 0))
431 			freq_step = 5;
432 
433 		requested_freq[cpu] -= freq_step;
434 		if (requested_freq[cpu] < policy->min)
435 			requested_freq[cpu] = policy->min;
436 
437 		__cpufreq_driver_target(policy,
438 			requested_freq[cpu],
439 			CPUFREQ_RELATION_H);
440 		return;
441 	}
442 }
443 
444 static void do_dbs_timer(void *data)
445 {
446 	int i;
447 	down(&dbs_sem);
448 	for_each_online_cpu(i)
449 		dbs_check_cpu(i);
450 	schedule_delayed_work(&dbs_work,
451 			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
452 	up(&dbs_sem);
453 }
454 
455 static inline void dbs_timer_init(void)
456 {
457 	INIT_WORK(&dbs_work, do_dbs_timer, NULL);
458 	schedule_delayed_work(&dbs_work,
459 			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
460 	return;
461 }
462 
463 static inline void dbs_timer_exit(void)
464 {
465 	cancel_delayed_work(&dbs_work);
466 	return;
467 }
468 
469 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
470 				   unsigned int event)
471 {
472 	unsigned int cpu = policy->cpu;
473 	struct cpu_dbs_info_s *this_dbs_info;
474 	unsigned int j;
475 
476 	this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
477 
478 	switch (event) {
479 	case CPUFREQ_GOV_START:
480 		if ((!cpu_online(cpu)) ||
481 		    (!policy->cur))
482 			return -EINVAL;
483 
484 		if (policy->cpuinfo.transition_latency >
485 				(TRANSITION_LATENCY_LIMIT * 1000))
486 			return -EINVAL;
487 		if (this_dbs_info->enable) /* Already enabled */
488 			break;
489 
490 		down(&dbs_sem);
491 		for_each_cpu_mask(j, policy->cpus) {
492 			struct cpu_dbs_info_s *j_dbs_info;
493 			j_dbs_info = &per_cpu(cpu_dbs_info, j);
494 			j_dbs_info->cur_policy = policy;
495 
496 			j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
497 			j_dbs_info->prev_cpu_idle_down
498 				= j_dbs_info->prev_cpu_idle_up;
499 		}
500 		this_dbs_info->enable = 1;
501 		sysfs_create_group(&policy->kobj, &dbs_attr_group);
502 		dbs_enable++;
503 		/*
504 		 * Start the timerschedule work, when this governor
505 		 * is used for first time
506 		 */
507 		if (dbs_enable == 1) {
508 			unsigned int latency;
509 			/* policy latency is in nS. Convert it to uS first */
510 
511 			latency = policy->cpuinfo.transition_latency;
512 			if (latency < 1000)
513 				latency = 1000;
514 
515 			def_sampling_rate = (latency / 1000) *
516 					DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
517 			dbs_tuners_ins.sampling_rate = def_sampling_rate;
518 			dbs_tuners_ins.ignore_nice = 0;
519 			dbs_tuners_ins.freq_step = 5;
520 
521 			dbs_timer_init();
522 		}
523 
524 		up(&dbs_sem);
525 		break;
526 
527 	case CPUFREQ_GOV_STOP:
528 		down(&dbs_sem);
529 		this_dbs_info->enable = 0;
530 		sysfs_remove_group(&policy->kobj, &dbs_attr_group);
531 		dbs_enable--;
532 		/*
533 		 * Stop the timerschedule work, when this governor
534 		 * is used for first time
535 		 */
536 		if (dbs_enable == 0)
537 			dbs_timer_exit();
538 
539 		up(&dbs_sem);
540 
541 		break;
542 
543 	case CPUFREQ_GOV_LIMITS:
544 		down(&dbs_sem);
545 		if (policy->max < this_dbs_info->cur_policy->cur)
546 			__cpufreq_driver_target(
547 					this_dbs_info->cur_policy,
548 				       	policy->max, CPUFREQ_RELATION_H);
549 		else if (policy->min > this_dbs_info->cur_policy->cur)
550 			__cpufreq_driver_target(
551 					this_dbs_info->cur_policy,
552 				       	policy->min, CPUFREQ_RELATION_L);
553 		up(&dbs_sem);
554 		break;
555 	}
556 	return 0;
557 }
558 
559 static struct cpufreq_governor cpufreq_gov_dbs = {
560 	.name		= "conservative",
561 	.governor	= cpufreq_governor_dbs,
562 	.owner		= THIS_MODULE,
563 };
564 
565 static int __init cpufreq_gov_dbs_init(void)
566 {
567 	return cpufreq_register_governor(&cpufreq_gov_dbs);
568 }
569 
570 static void __exit cpufreq_gov_dbs_exit(void)
571 {
572 	/* Make sure that the scheduled work is indeed not running */
573 	flush_scheduled_work();
574 
575 	cpufreq_unregister_governor(&cpufreq_gov_dbs);
576 }
577 
578 
579 MODULE_AUTHOR ("Alexander Clouter <alex-kernel@digriz.org.uk>");
580 MODULE_DESCRIPTION ("'cpufreq_conservative' - A dynamic cpufreq governor for "
581 		"Low Latency Frequency Transition capable processors "
582 		"optimised for use in a battery environment");
583 MODULE_LICENSE ("GPL");
584 
585 module_init(cpufreq_gov_dbs_init);
586 module_exit(cpufreq_gov_dbs_exit);
587