1 /* 2 * drivers/cpufreq/cpufreq_ondemand.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/module.h> 15 #include <linux/smp.h> 16 #include <linux/init.h> 17 #include <linux/interrupt.h> 18 #include <linux/ctype.h> 19 #include <linux/cpufreq.h> 20 #include <linux/sysctl.h> 21 #include <linux/types.h> 22 #include <linux/fs.h> 23 #include <linux/sysfs.h> 24 #include <linux/sched.h> 25 #include <linux/kmod.h> 26 #include <linux/workqueue.h> 27 #include <linux/jiffies.h> 28 #include <linux/kernel_stat.h> 29 #include <linux/percpu.h> 30 31 /* 32 * dbs is used in this file as a shortform for demandbased switching 33 * It helps to keep variable names smaller, simpler 34 */ 35 36 #define DEF_FREQUENCY_UP_THRESHOLD (80) 37 #define MIN_FREQUENCY_UP_THRESHOLD (0) 38 #define MAX_FREQUENCY_UP_THRESHOLD (100) 39 40 #define DEF_FREQUENCY_DOWN_THRESHOLD (20) 41 #define MIN_FREQUENCY_DOWN_THRESHOLD (0) 42 #define MAX_FREQUENCY_DOWN_THRESHOLD (100) 43 44 /* 45 * The polling frequency of this governor depends on the capability of 46 * the processor. Default polling frequency is 1000 times the transition 47 * latency of the processor. The governor will work on any processor with 48 * transition latency <= 10mS, using appropriate sampling 49 * rate. 50 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 51 * this governor will not work. 52 * All times here are in uS. 53 */ 54 static unsigned int def_sampling_rate; 55 #define MIN_SAMPLING_RATE (def_sampling_rate / 2) 56 #define MAX_SAMPLING_RATE (500 * def_sampling_rate) 57 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) 58 #define DEF_SAMPLING_DOWN_FACTOR (10) 59 #define TRANSITION_LATENCY_LIMIT (10 * 1000) 60 #define sampling_rate_in_HZ(x) (((x * HZ) < (1000 * 1000))?1:((x * HZ) / (1000 * 1000))) 61 62 static void do_dbs_timer(void *data); 63 64 struct cpu_dbs_info_s { 65 struct cpufreq_policy *cur_policy; 66 unsigned int prev_cpu_idle_up; 67 unsigned int prev_cpu_idle_down; 68 unsigned int enable; 69 }; 70 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); 71 72 static unsigned int dbs_enable; /* number of CPUs using this policy */ 73 74 static DECLARE_MUTEX (dbs_sem); 75 static DECLARE_WORK (dbs_work, do_dbs_timer, NULL); 76 77 struct dbs_tuners { 78 unsigned int sampling_rate; 79 unsigned int sampling_down_factor; 80 unsigned int up_threshold; 81 unsigned int down_threshold; 82 }; 83 84 static struct dbs_tuners dbs_tuners_ins = { 85 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 86 .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, 87 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 88 }; 89 90 /************************** sysfs interface ************************/ 91 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) 92 { 93 return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); 94 } 95 96 static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) 97 { 98 return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); 99 } 100 101 #define define_one_ro(_name) \ 102 static struct freq_attr _name = \ 103 __ATTR(_name, 0444, show_##_name, NULL) 104 105 define_one_ro(sampling_rate_max); 106 define_one_ro(sampling_rate_min); 107 108 /* cpufreq_ondemand Governor Tunables */ 109 #define show_one(file_name, object) \ 110 static ssize_t show_##file_name \ 111 (struct cpufreq_policy *unused, char *buf) \ 112 { \ 113 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 114 } 115 show_one(sampling_rate, sampling_rate); 116 show_one(sampling_down_factor, sampling_down_factor); 117 show_one(up_threshold, up_threshold); 118 show_one(down_threshold, down_threshold); 119 120 static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, 121 const char *buf, size_t count) 122 { 123 unsigned int input; 124 int ret; 125 ret = sscanf (buf, "%u", &input); 126 if (ret != 1 ) 127 return -EINVAL; 128 129 down(&dbs_sem); 130 dbs_tuners_ins.sampling_down_factor = input; 131 up(&dbs_sem); 132 133 return count; 134 } 135 136 static ssize_t store_sampling_rate(struct cpufreq_policy *unused, 137 const char *buf, size_t count) 138 { 139 unsigned int input; 140 int ret; 141 ret = sscanf (buf, "%u", &input); 142 143 down(&dbs_sem); 144 if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) { 145 up(&dbs_sem); 146 return -EINVAL; 147 } 148 149 dbs_tuners_ins.sampling_rate = input; 150 up(&dbs_sem); 151 152 return count; 153 } 154 155 static ssize_t store_up_threshold(struct cpufreq_policy *unused, 156 const char *buf, size_t count) 157 { 158 unsigned int input; 159 int ret; 160 ret = sscanf (buf, "%u", &input); 161 162 down(&dbs_sem); 163 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 164 input < MIN_FREQUENCY_UP_THRESHOLD || 165 input <= dbs_tuners_ins.down_threshold) { 166 up(&dbs_sem); 167 return -EINVAL; 168 } 169 170 dbs_tuners_ins.up_threshold = input; 171 up(&dbs_sem); 172 173 return count; 174 } 175 176 static ssize_t store_down_threshold(struct cpufreq_policy *unused, 177 const char *buf, size_t count) 178 { 179 unsigned int input; 180 int ret; 181 ret = sscanf (buf, "%u", &input); 182 183 down(&dbs_sem); 184 if (ret != 1 || input > MAX_FREQUENCY_DOWN_THRESHOLD || 185 input < MIN_FREQUENCY_DOWN_THRESHOLD || 186 input >= dbs_tuners_ins.up_threshold) { 187 up(&dbs_sem); 188 return -EINVAL; 189 } 190 191 dbs_tuners_ins.down_threshold = input; 192 up(&dbs_sem); 193 194 return count; 195 } 196 197 #define define_one_rw(_name) \ 198 static struct freq_attr _name = \ 199 __ATTR(_name, 0644, show_##_name, store_##_name) 200 201 define_one_rw(sampling_rate); 202 define_one_rw(sampling_down_factor); 203 define_one_rw(up_threshold); 204 define_one_rw(down_threshold); 205 206 static struct attribute * dbs_attributes[] = { 207 &sampling_rate_max.attr, 208 &sampling_rate_min.attr, 209 &sampling_rate.attr, 210 &sampling_down_factor.attr, 211 &up_threshold.attr, 212 &down_threshold.attr, 213 NULL 214 }; 215 216 static struct attribute_group dbs_attr_group = { 217 .attrs = dbs_attributes, 218 .name = "ondemand", 219 }; 220 221 /************************** sysfs end ************************/ 222 223 static void dbs_check_cpu(int cpu) 224 { 225 unsigned int idle_ticks, up_idle_ticks, down_idle_ticks; 226 unsigned int total_idle_ticks; 227 unsigned int freq_down_step; 228 unsigned int freq_down_sampling_rate; 229 static int down_skip[NR_CPUS]; 230 struct cpu_dbs_info_s *this_dbs_info; 231 232 struct cpufreq_policy *policy; 233 unsigned int j; 234 235 this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 236 if (!this_dbs_info->enable) 237 return; 238 239 policy = this_dbs_info->cur_policy; 240 /* 241 * The default safe range is 20% to 80% 242 * Every sampling_rate, we check 243 * - If current idle time is less than 20%, then we try to 244 * increase frequency 245 * Every sampling_rate*sampling_down_factor, we check 246 * - If current idle time is more than 80%, then we try to 247 * decrease frequency 248 * 249 * Any frequency increase takes it to the maximum frequency. 250 * Frequency reduction happens at minimum steps of 251 * 5% of max_frequency 252 */ 253 254 /* Check for frequency increase */ 255 total_idle_ticks = kstat_cpu(cpu).cpustat.idle + 256 kstat_cpu(cpu).cpustat.iowait; 257 idle_ticks = total_idle_ticks - 258 this_dbs_info->prev_cpu_idle_up; 259 this_dbs_info->prev_cpu_idle_up = total_idle_ticks; 260 261 262 for_each_cpu_mask(j, policy->cpus) { 263 unsigned int tmp_idle_ticks; 264 struct cpu_dbs_info_s *j_dbs_info; 265 266 if (j == cpu) 267 continue; 268 269 j_dbs_info = &per_cpu(cpu_dbs_info, j); 270 /* Check for frequency increase */ 271 total_idle_ticks = kstat_cpu(j).cpustat.idle + 272 kstat_cpu(j).cpustat.iowait; 273 tmp_idle_ticks = total_idle_ticks - 274 j_dbs_info->prev_cpu_idle_up; 275 j_dbs_info->prev_cpu_idle_up = total_idle_ticks; 276 277 if (tmp_idle_ticks < idle_ticks) 278 idle_ticks = tmp_idle_ticks; 279 } 280 281 /* Scale idle ticks by 100 and compare with up and down ticks */ 282 idle_ticks *= 100; 283 up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * 284 sampling_rate_in_HZ(dbs_tuners_ins.sampling_rate); 285 286 if (idle_ticks < up_idle_ticks) { 287 __cpufreq_driver_target(policy, policy->max, 288 CPUFREQ_RELATION_H); 289 down_skip[cpu] = 0; 290 this_dbs_info->prev_cpu_idle_down = total_idle_ticks; 291 return; 292 } 293 294 /* Check for frequency decrease */ 295 down_skip[cpu]++; 296 if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor) 297 return; 298 299 total_idle_ticks = kstat_cpu(cpu).cpustat.idle + 300 kstat_cpu(cpu).cpustat.iowait; 301 idle_ticks = total_idle_ticks - 302 this_dbs_info->prev_cpu_idle_down; 303 this_dbs_info->prev_cpu_idle_down = total_idle_ticks; 304 305 for_each_cpu_mask(j, policy->cpus) { 306 unsigned int tmp_idle_ticks; 307 struct cpu_dbs_info_s *j_dbs_info; 308 309 if (j == cpu) 310 continue; 311 312 j_dbs_info = &per_cpu(cpu_dbs_info, j); 313 /* Check for frequency increase */ 314 total_idle_ticks = kstat_cpu(j).cpustat.idle + 315 kstat_cpu(j).cpustat.iowait; 316 tmp_idle_ticks = total_idle_ticks - 317 j_dbs_info->prev_cpu_idle_down; 318 j_dbs_info->prev_cpu_idle_down = total_idle_ticks; 319 320 if (tmp_idle_ticks < idle_ticks) 321 idle_ticks = tmp_idle_ticks; 322 } 323 324 /* Scale idle ticks by 100 and compare with up and down ticks */ 325 idle_ticks *= 100; 326 down_skip[cpu] = 0; 327 328 freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * 329 dbs_tuners_ins.sampling_down_factor; 330 down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) * 331 sampling_rate_in_HZ(freq_down_sampling_rate); 332 333 if (idle_ticks > down_idle_ticks ) { 334 freq_down_step = (5 * policy->max) / 100; 335 336 /* max freq cannot be less than 100. But who knows.... */ 337 if (unlikely(freq_down_step == 0)) 338 freq_down_step = 5; 339 340 __cpufreq_driver_target(policy, 341 policy->cur - freq_down_step, 342 CPUFREQ_RELATION_H); 343 return; 344 } 345 } 346 347 static void do_dbs_timer(void *data) 348 { 349 int i; 350 down(&dbs_sem); 351 for (i = 0; i < NR_CPUS; i++) 352 if (cpu_online(i)) 353 dbs_check_cpu(i); 354 schedule_delayed_work(&dbs_work, 355 sampling_rate_in_HZ(dbs_tuners_ins.sampling_rate)); 356 up(&dbs_sem); 357 } 358 359 static inline void dbs_timer_init(void) 360 { 361 INIT_WORK(&dbs_work, do_dbs_timer, NULL); 362 schedule_delayed_work(&dbs_work, 363 sampling_rate_in_HZ(dbs_tuners_ins.sampling_rate)); 364 return; 365 } 366 367 static inline void dbs_timer_exit(void) 368 { 369 cancel_delayed_work(&dbs_work); 370 return; 371 } 372 373 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 374 unsigned int event) 375 { 376 unsigned int cpu = policy->cpu; 377 struct cpu_dbs_info_s *this_dbs_info; 378 unsigned int j; 379 380 this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 381 382 switch (event) { 383 case CPUFREQ_GOV_START: 384 if ((!cpu_online(cpu)) || 385 (!policy->cur)) 386 return -EINVAL; 387 388 if (policy->cpuinfo.transition_latency > 389 (TRANSITION_LATENCY_LIMIT * 1000)) 390 return -EINVAL; 391 if (this_dbs_info->enable) /* Already enabled */ 392 break; 393 394 down(&dbs_sem); 395 for_each_cpu_mask(j, policy->cpus) { 396 struct cpu_dbs_info_s *j_dbs_info; 397 j_dbs_info = &per_cpu(cpu_dbs_info, j); 398 j_dbs_info->cur_policy = policy; 399 400 j_dbs_info->prev_cpu_idle_up = 401 kstat_cpu(j).cpustat.idle + 402 kstat_cpu(j).cpustat.iowait; 403 j_dbs_info->prev_cpu_idle_down = 404 kstat_cpu(j).cpustat.idle + 405 kstat_cpu(j).cpustat.iowait; 406 } 407 this_dbs_info->enable = 1; 408 sysfs_create_group(&policy->kobj, &dbs_attr_group); 409 dbs_enable++; 410 /* 411 * Start the timerschedule work, when this governor 412 * is used for first time 413 */ 414 if (dbs_enable == 1) { 415 unsigned int latency; 416 /* policy latency is in nS. Convert it to uS first */ 417 418 latency = policy->cpuinfo.transition_latency; 419 if (latency < 1000) 420 latency = 1000; 421 422 def_sampling_rate = (latency / 1000) * 423 DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; 424 dbs_tuners_ins.sampling_rate = def_sampling_rate; 425 426 dbs_timer_init(); 427 } 428 429 up(&dbs_sem); 430 break; 431 432 case CPUFREQ_GOV_STOP: 433 down(&dbs_sem); 434 this_dbs_info->enable = 0; 435 sysfs_remove_group(&policy->kobj, &dbs_attr_group); 436 dbs_enable--; 437 /* 438 * Stop the timerschedule work, when this governor 439 * is used for first time 440 */ 441 if (dbs_enable == 0) 442 dbs_timer_exit(); 443 444 up(&dbs_sem); 445 446 break; 447 448 case CPUFREQ_GOV_LIMITS: 449 down(&dbs_sem); 450 if (policy->max < this_dbs_info->cur_policy->cur) 451 __cpufreq_driver_target( 452 this_dbs_info->cur_policy, 453 policy->max, CPUFREQ_RELATION_H); 454 else if (policy->min > this_dbs_info->cur_policy->cur) 455 __cpufreq_driver_target( 456 this_dbs_info->cur_policy, 457 policy->min, CPUFREQ_RELATION_L); 458 up(&dbs_sem); 459 break; 460 } 461 return 0; 462 } 463 464 struct cpufreq_governor cpufreq_gov_dbs = { 465 .name = "ondemand", 466 .governor = cpufreq_governor_dbs, 467 .owner = THIS_MODULE, 468 }; 469 EXPORT_SYMBOL(cpufreq_gov_dbs); 470 471 static int __init cpufreq_gov_dbs_init(void) 472 { 473 return cpufreq_register_governor(&cpufreq_gov_dbs); 474 } 475 476 static void __exit cpufreq_gov_dbs_exit(void) 477 { 478 /* Make sure that the scheduled work is indeed not running */ 479 flush_scheduled_work(); 480 481 cpufreq_unregister_governor(&cpufreq_gov_dbs); 482 } 483 484 485 MODULE_AUTHOR ("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 486 MODULE_DESCRIPTION ("'cpufreq_ondemand' - A dynamic cpufreq governor for " 487 "Low Latency Frequency Transition capable processors"); 488 MODULE_LICENSE ("GPL"); 489 490 module_init(cpufreq_gov_dbs_init); 491 module_exit(cpufreq_gov_dbs_exit); 492