1 /* 2 * drivers/cpufreq/cpufreq_conservative.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/init.h> 17 #include <linux/cpufreq.h> 18 #include <linux/cpu.h> 19 #include <linux/jiffies.h> 20 #include <linux/kernel_stat.h> 21 #include <linux/mutex.h> 22 #include <linux/hrtimer.h> 23 #include <linux/tick.h> 24 #include <linux/ktime.h> 25 #include <linux/sched.h> 26 27 /* 28 * dbs is used in this file as a shortform for demandbased switching 29 * It helps to keep variable names smaller, simpler 30 */ 31 32 #define DEF_FREQUENCY_UP_THRESHOLD (80) 33 #define DEF_FREQUENCY_DOWN_THRESHOLD (20) 34 35 /* 36 * The polling frequency of this governor depends on the capability of 37 * the processor. Default polling frequency is 1000 times the transition 38 * latency of the processor. The governor will work on any processor with 39 * transition latency <= 10mS, using appropriate sampling 40 * rate. 41 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 42 * this governor will not work. 43 * All times here are in uS. 44 */ 45 #define MIN_SAMPLING_RATE_RATIO (2) 46 47 static unsigned int min_sampling_rate; 48 49 #define LATENCY_MULTIPLIER (1000) 50 #define MIN_LATENCY_MULTIPLIER (100) 51 #define DEF_SAMPLING_DOWN_FACTOR (1) 52 #define MAX_SAMPLING_DOWN_FACTOR (10) 53 #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) 54 55 static void do_dbs_timer(struct work_struct *work); 56 57 struct cpu_dbs_info_s { 58 cputime64_t prev_cpu_idle; 59 cputime64_t prev_cpu_wall; 60 cputime64_t prev_cpu_nice; 61 struct cpufreq_policy *cur_policy; 62 struct delayed_work work; 63 unsigned int down_skip; 64 unsigned int requested_freq; 65 int cpu; 66 unsigned int enable:1; 67 /* 68 * percpu mutex that serializes governor limit change with 69 * do_dbs_timer invocation. We do not want do_dbs_timer to run 70 * when user is changing the governor or limits. 71 */ 72 struct mutex timer_mutex; 73 }; 74 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info); 75 76 static unsigned int dbs_enable; /* number of CPUs using this policy */ 77 78 /* 79 * dbs_mutex protects dbs_enable in governor start/stop. 80 */ 81 static DEFINE_MUTEX(dbs_mutex); 82 83 static struct dbs_tuners { 84 unsigned int sampling_rate; 85 unsigned int sampling_down_factor; 86 unsigned int up_threshold; 87 unsigned int down_threshold; 88 unsigned int ignore_nice; 89 unsigned int freq_step; 90 } dbs_tuners_ins = { 91 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 92 .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, 93 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 94 .ignore_nice = 0, 95 .freq_step = 5, 96 }; 97 98 /* keep track of frequency transitions */ 99 static int 100 dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 101 void *data) 102 { 103 struct cpufreq_freqs *freq = data; 104 struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info, 105 freq->cpu); 106 107 struct cpufreq_policy *policy; 108 109 if (!this_dbs_info->enable) 110 return 0; 111 112 policy = this_dbs_info->cur_policy; 113 114 /* 115 * we only care if our internally tracked freq moves outside 116 * the 'valid' ranges of freqency available to us otherwise 117 * we do not change it 118 */ 119 if (this_dbs_info->requested_freq > policy->max 120 || this_dbs_info->requested_freq < policy->min) 121 this_dbs_info->requested_freq = freq->new; 122 123 return 0; 124 } 125 126 static struct notifier_block dbs_cpufreq_notifier_block = { 127 .notifier_call = dbs_cpufreq_notifier 128 }; 129 130 /************************** sysfs interface ************************/ 131 static ssize_t show_sampling_rate_min(struct kobject *kobj, 132 struct attribute *attr, char *buf) 133 { 134 return sprintf(buf, "%u\n", min_sampling_rate); 135 } 136 137 define_one_global_ro(sampling_rate_min); 138 139 /* cpufreq_conservative Governor Tunables */ 140 #define show_one(file_name, object) \ 141 static ssize_t show_##file_name \ 142 (struct kobject *kobj, struct attribute *attr, char *buf) \ 143 { \ 144 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 145 } 146 show_one(sampling_rate, sampling_rate); 147 show_one(sampling_down_factor, sampling_down_factor); 148 show_one(up_threshold, up_threshold); 149 show_one(down_threshold, down_threshold); 150 show_one(ignore_nice_load, ignore_nice); 151 show_one(freq_step, freq_step); 152 153 static ssize_t store_sampling_down_factor(struct kobject *a, 154 struct attribute *b, 155 const char *buf, size_t count) 156 { 157 unsigned int input; 158 int ret; 159 ret = sscanf(buf, "%u", &input); 160 161 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 162 return -EINVAL; 163 164 dbs_tuners_ins.sampling_down_factor = input; 165 return count; 166 } 167 168 static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, 169 const char *buf, size_t count) 170 { 171 unsigned int input; 172 int ret; 173 ret = sscanf(buf, "%u", &input); 174 175 if (ret != 1) 176 return -EINVAL; 177 178 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); 179 return count; 180 } 181 182 static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, 183 const char *buf, size_t count) 184 { 185 unsigned int input; 186 int ret; 187 ret = sscanf(buf, "%u", &input); 188 189 if (ret != 1 || input > 100 || 190 input <= dbs_tuners_ins.down_threshold) 191 return -EINVAL; 192 193 dbs_tuners_ins.up_threshold = input; 194 return count; 195 } 196 197 static ssize_t store_down_threshold(struct kobject *a, struct attribute *b, 198 const char *buf, size_t count) 199 { 200 unsigned int input; 201 int ret; 202 ret = sscanf(buf, "%u", &input); 203 204 /* cannot be lower than 11 otherwise freq will not fall */ 205 if (ret != 1 || input < 11 || input > 100 || 206 input >= dbs_tuners_ins.up_threshold) 207 return -EINVAL; 208 209 dbs_tuners_ins.down_threshold = input; 210 return count; 211 } 212 213 static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, 214 const char *buf, size_t count) 215 { 216 unsigned int input; 217 int ret; 218 219 unsigned int j; 220 221 ret = sscanf(buf, "%u", &input); 222 if (ret != 1) 223 return -EINVAL; 224 225 if (input > 1) 226 input = 1; 227 228 if (input == dbs_tuners_ins.ignore_nice) /* nothing to do */ 229 return count; 230 231 dbs_tuners_ins.ignore_nice = input; 232 233 /* we need to re-evaluate prev_cpu_idle */ 234 for_each_online_cpu(j) { 235 struct cpu_dbs_info_s *dbs_info; 236 dbs_info = &per_cpu(cs_cpu_dbs_info, j); 237 dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 238 &dbs_info->prev_cpu_wall); 239 if (dbs_tuners_ins.ignore_nice) 240 dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 241 } 242 return count; 243 } 244 245 static ssize_t store_freq_step(struct kobject *a, struct attribute *b, 246 const char *buf, size_t count) 247 { 248 unsigned int input; 249 int ret; 250 ret = sscanf(buf, "%u", &input); 251 252 if (ret != 1) 253 return -EINVAL; 254 255 if (input > 100) 256 input = 100; 257 258 /* no need to test here if freq_step is zero as the user might actually 259 * want this, they would be crazy though :) */ 260 dbs_tuners_ins.freq_step = input; 261 return count; 262 } 263 264 define_one_global_rw(sampling_rate); 265 define_one_global_rw(sampling_down_factor); 266 define_one_global_rw(up_threshold); 267 define_one_global_rw(down_threshold); 268 define_one_global_rw(ignore_nice_load); 269 define_one_global_rw(freq_step); 270 271 static struct attribute *dbs_attributes[] = { 272 &sampling_rate_min.attr, 273 &sampling_rate.attr, 274 &sampling_down_factor.attr, 275 &up_threshold.attr, 276 &down_threshold.attr, 277 &ignore_nice_load.attr, 278 &freq_step.attr, 279 NULL 280 }; 281 282 static struct attribute_group dbs_attr_group = { 283 .attrs = dbs_attributes, 284 .name = "conservative", 285 }; 286 287 /************************** sysfs end ************************/ 288 289 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) 290 { 291 unsigned int load = 0; 292 unsigned int max_load = 0; 293 unsigned int freq_target; 294 295 struct cpufreq_policy *policy; 296 unsigned int j; 297 298 policy = this_dbs_info->cur_policy; 299 300 /* 301 * Every sampling_rate, we check, if current idle time is less 302 * than 20% (default), then we try to increase frequency 303 * Every sampling_rate*sampling_down_factor, we check, if current 304 * idle time is more than 80%, then we try to decrease frequency 305 * 306 * Any frequency increase takes it to the maximum frequency. 307 * Frequency reduction happens at minimum steps of 308 * 5% (default) of maximum frequency 309 */ 310 311 /* Get Absolute Load */ 312 for_each_cpu(j, policy->cpus) { 313 struct cpu_dbs_info_s *j_dbs_info; 314 cputime64_t cur_wall_time, cur_idle_time; 315 unsigned int idle_time, wall_time; 316 317 j_dbs_info = &per_cpu(cs_cpu_dbs_info, j); 318 319 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); 320 321 wall_time = (unsigned int) 322 (cur_wall_time - j_dbs_info->prev_cpu_wall); 323 j_dbs_info->prev_cpu_wall = cur_wall_time; 324 325 idle_time = (unsigned int) 326 (cur_idle_time - j_dbs_info->prev_cpu_idle); 327 j_dbs_info->prev_cpu_idle = cur_idle_time; 328 329 if (dbs_tuners_ins.ignore_nice) { 330 u64 cur_nice; 331 unsigned long cur_nice_jiffies; 332 333 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - 334 j_dbs_info->prev_cpu_nice; 335 /* 336 * Assumption: nice time between sampling periods will 337 * be less than 2^32 jiffies for 32 bit sys 338 */ 339 cur_nice_jiffies = (unsigned long) 340 cputime64_to_jiffies64(cur_nice); 341 342 j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 343 idle_time += jiffies_to_usecs(cur_nice_jiffies); 344 } 345 346 if (unlikely(!wall_time || wall_time < idle_time)) 347 continue; 348 349 load = 100 * (wall_time - idle_time) / wall_time; 350 351 if (load > max_load) 352 max_load = load; 353 } 354 355 /* 356 * break out if we 'cannot' reduce the speed as the user might 357 * want freq_step to be zero 358 */ 359 if (dbs_tuners_ins.freq_step == 0) 360 return; 361 362 /* Check for frequency increase */ 363 if (max_load > dbs_tuners_ins.up_threshold) { 364 this_dbs_info->down_skip = 0; 365 366 /* if we are already at full speed then break out early */ 367 if (this_dbs_info->requested_freq == policy->max) 368 return; 369 370 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; 371 372 /* max freq cannot be less than 100. But who knows.... */ 373 if (unlikely(freq_target == 0)) 374 freq_target = 5; 375 376 this_dbs_info->requested_freq += freq_target; 377 if (this_dbs_info->requested_freq > policy->max) 378 this_dbs_info->requested_freq = policy->max; 379 380 __cpufreq_driver_target(policy, this_dbs_info->requested_freq, 381 CPUFREQ_RELATION_H); 382 return; 383 } 384 385 /* 386 * The optimal frequency is the frequency that is the lowest that 387 * can support the current CPU usage without triggering the up 388 * policy. To be safe, we focus 10 points under the threshold. 389 */ 390 if (max_load < (dbs_tuners_ins.down_threshold - 10)) { 391 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; 392 393 this_dbs_info->requested_freq -= freq_target; 394 if (this_dbs_info->requested_freq < policy->min) 395 this_dbs_info->requested_freq = policy->min; 396 397 /* 398 * if we cannot reduce the frequency anymore, break out early 399 */ 400 if (policy->cur == policy->min) 401 return; 402 403 __cpufreq_driver_target(policy, this_dbs_info->requested_freq, 404 CPUFREQ_RELATION_H); 405 return; 406 } 407 } 408 409 static void do_dbs_timer(struct work_struct *work) 410 { 411 struct cpu_dbs_info_s *dbs_info = 412 container_of(work, struct cpu_dbs_info_s, work.work); 413 unsigned int cpu = dbs_info->cpu; 414 415 /* We want all CPUs to do sampling nearly on same jiffy */ 416 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 417 418 delay -= jiffies % delay; 419 420 mutex_lock(&dbs_info->timer_mutex); 421 422 dbs_check_cpu(dbs_info); 423 424 schedule_delayed_work_on(cpu, &dbs_info->work, delay); 425 mutex_unlock(&dbs_info->timer_mutex); 426 } 427 428 static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 429 { 430 /* We want all CPUs to do sampling nearly on same jiffy */ 431 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 432 delay -= jiffies % delay; 433 434 dbs_info->enable = 1; 435 INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer); 436 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); 437 } 438 439 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 440 { 441 dbs_info->enable = 0; 442 cancel_delayed_work_sync(&dbs_info->work); 443 } 444 445 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 446 unsigned int event) 447 { 448 unsigned int cpu = policy->cpu; 449 struct cpu_dbs_info_s *this_dbs_info; 450 unsigned int j; 451 int rc; 452 453 this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); 454 455 switch (event) { 456 case CPUFREQ_GOV_START: 457 if ((!cpu_online(cpu)) || (!policy->cur)) 458 return -EINVAL; 459 460 mutex_lock(&dbs_mutex); 461 462 for_each_cpu(j, policy->cpus) { 463 struct cpu_dbs_info_s *j_dbs_info; 464 j_dbs_info = &per_cpu(cs_cpu_dbs_info, j); 465 j_dbs_info->cur_policy = policy; 466 467 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 468 &j_dbs_info->prev_cpu_wall); 469 if (dbs_tuners_ins.ignore_nice) 470 j_dbs_info->prev_cpu_nice = 471 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 472 } 473 this_dbs_info->cpu = cpu; 474 this_dbs_info->down_skip = 0; 475 this_dbs_info->requested_freq = policy->cur; 476 477 mutex_init(&this_dbs_info->timer_mutex); 478 dbs_enable++; 479 /* 480 * Start the timerschedule work, when this governor 481 * is used for first time 482 */ 483 if (dbs_enable == 1) { 484 unsigned int latency; 485 /* policy latency is in nS. Convert it to uS first */ 486 latency = policy->cpuinfo.transition_latency / 1000; 487 if (latency == 0) 488 latency = 1; 489 490 rc = sysfs_create_group(cpufreq_global_kobject, 491 &dbs_attr_group); 492 if (rc) { 493 mutex_unlock(&dbs_mutex); 494 return rc; 495 } 496 497 /* 498 * conservative does not implement micro like ondemand 499 * governor, thus we are bound to jiffes/HZ 500 */ 501 min_sampling_rate = 502 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); 503 /* Bring kernel and HW constraints together */ 504 min_sampling_rate = max(min_sampling_rate, 505 MIN_LATENCY_MULTIPLIER * latency); 506 dbs_tuners_ins.sampling_rate = 507 max(min_sampling_rate, 508 latency * LATENCY_MULTIPLIER); 509 510 cpufreq_register_notifier( 511 &dbs_cpufreq_notifier_block, 512 CPUFREQ_TRANSITION_NOTIFIER); 513 } 514 mutex_unlock(&dbs_mutex); 515 516 dbs_timer_init(this_dbs_info); 517 518 break; 519 520 case CPUFREQ_GOV_STOP: 521 dbs_timer_exit(this_dbs_info); 522 523 mutex_lock(&dbs_mutex); 524 dbs_enable--; 525 mutex_destroy(&this_dbs_info->timer_mutex); 526 527 /* 528 * Stop the timerschedule work, when this governor 529 * is used for first time 530 */ 531 if (dbs_enable == 0) 532 cpufreq_unregister_notifier( 533 &dbs_cpufreq_notifier_block, 534 CPUFREQ_TRANSITION_NOTIFIER); 535 536 mutex_unlock(&dbs_mutex); 537 if (!dbs_enable) 538 sysfs_remove_group(cpufreq_global_kobject, 539 &dbs_attr_group); 540 541 break; 542 543 case CPUFREQ_GOV_LIMITS: 544 mutex_lock(&this_dbs_info->timer_mutex); 545 if (policy->max < this_dbs_info->cur_policy->cur) 546 __cpufreq_driver_target( 547 this_dbs_info->cur_policy, 548 policy->max, CPUFREQ_RELATION_H); 549 else if (policy->min > this_dbs_info->cur_policy->cur) 550 __cpufreq_driver_target( 551 this_dbs_info->cur_policy, 552 policy->min, CPUFREQ_RELATION_L); 553 dbs_check_cpu(this_dbs_info); 554 mutex_unlock(&this_dbs_info->timer_mutex); 555 556 break; 557 } 558 return 0; 559 } 560 561 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE 562 static 563 #endif 564 struct cpufreq_governor cpufreq_gov_conservative = { 565 .name = "conservative", 566 .governor = cpufreq_governor_dbs, 567 .max_transition_latency = TRANSITION_LATENCY_LIMIT, 568 .owner = THIS_MODULE, 569 }; 570 571 static int __init cpufreq_gov_dbs_init(void) 572 { 573 return cpufreq_register_governor(&cpufreq_gov_conservative); 574 } 575 576 static void __exit cpufreq_gov_dbs_exit(void) 577 { 578 cpufreq_unregister_governor(&cpufreq_gov_conservative); 579 } 580 581 582 MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); 583 MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " 584 "Low Latency Frequency Transition capable processors " 585 "optimised for use in a battery environment"); 586 MODULE_LICENSE("GPL"); 587 588 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE 589 fs_initcall(cpufreq_gov_dbs_init); 590 #else 591 module_init(cpufreq_gov_dbs_init); 592 #endif 593 module_exit(cpufreq_gov_dbs_exit); 594