1 /* 2 * drivers/cpufreq/cpufreq_conservative.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/smp.h> 17 #include <linux/init.h> 18 #include <linux/interrupt.h> 19 #include <linux/ctype.h> 20 #include <linux/cpufreq.h> 21 #include <linux/sysctl.h> 22 #include <linux/types.h> 23 #include <linux/fs.h> 24 #include <linux/sysfs.h> 25 #include <linux/cpu.h> 26 #include <linux/kmod.h> 27 #include <linux/workqueue.h> 28 #include <linux/jiffies.h> 29 #include <linux/kernel_stat.h> 30 #include <linux/percpu.h> 31 #include <linux/mutex.h> 32 /* 33 * dbs is used in this file as a shortform for demandbased switching 34 * It helps to keep variable names smaller, simpler 35 */ 36 37 #define DEF_FREQUENCY_UP_THRESHOLD (80) 38 #define DEF_FREQUENCY_DOWN_THRESHOLD (20) 39 40 /* 41 * The polling frequency of this governor depends on the capability of 42 * the processor. Default polling frequency is 1000 times the transition 43 * latency of the processor. The governor will work on any processor with 44 * transition latency <= 10mS, using appropriate sampling 45 * rate. 46 * For CPUs with transition latency > 10mS (mostly drivers 47 * with CPUFREQ_ETERNAL), this governor will not work. 48 * All times here are in uS. 49 */ 50 static unsigned int def_sampling_rate; 51 #define MIN_SAMPLING_RATE_RATIO (2) 52 /* for correct statistics, we need at least 10 ticks between each measure */ 53 #define MIN_STAT_SAMPLING_RATE \ 54 (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) 55 #define MIN_SAMPLING_RATE \ 56 (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) 57 /* Above MIN_SAMPLING_RATE will vanish with its sysfs file soon 58 * Define the minimal settable sampling rate to the greater of: 59 * - "HW transition latency" * 100 (same as default sampling / 10) 60 * - MIN_STAT_SAMPLING_RATE 61 * To avoid that userspace shoots itself. 62 */ 63 static unsigned int minimum_sampling_rate(void) 64 { 65 return max(def_sampling_rate / 10, MIN_STAT_SAMPLING_RATE); 66 } 67 68 /* This will also vanish soon with removing sampling_rate_max */ 69 #define MAX_SAMPLING_RATE (500 * def_sampling_rate) 70 #define LATENCY_MULTIPLIER (1000) 71 #define DEF_SAMPLING_DOWN_FACTOR (1) 72 #define MAX_SAMPLING_DOWN_FACTOR (10) 73 #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) 74 75 static void do_dbs_timer(struct work_struct *work); 76 77 struct cpu_dbs_info_s { 78 struct cpufreq_policy *cur_policy; 79 unsigned int prev_cpu_idle_up; 80 unsigned int prev_cpu_idle_down; 81 unsigned int enable; 82 unsigned int down_skip; 83 unsigned int requested_freq; 84 }; 85 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); 86 87 static unsigned int dbs_enable; /* number of CPUs using this policy */ 88 89 /* 90 * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug 91 * lock and dbs_mutex. cpu_hotplug lock should always be held before 92 * dbs_mutex. If any function that can potentially take cpu_hotplug lock 93 * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then 94 * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock 95 * is recursive for the same process. -Venki 96 */ 97 static DEFINE_MUTEX(dbs_mutex); 98 static DECLARE_DELAYED_WORK(dbs_work, do_dbs_timer); 99 100 struct dbs_tuners { 101 unsigned int sampling_rate; 102 unsigned int sampling_down_factor; 103 unsigned int up_threshold; 104 unsigned int down_threshold; 105 unsigned int ignore_nice; 106 unsigned int freq_step; 107 }; 108 109 static struct dbs_tuners dbs_tuners_ins = { 110 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 111 .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, 112 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 113 .ignore_nice = 0, 114 .freq_step = 5, 115 }; 116 117 static inline unsigned int get_cpu_idle_time(unsigned int cpu) 118 { 119 unsigned int add_nice = 0, ret; 120 121 if (dbs_tuners_ins.ignore_nice) 122 add_nice = kstat_cpu(cpu).cpustat.nice; 123 124 ret = kstat_cpu(cpu).cpustat.idle + 125 kstat_cpu(cpu).cpustat.iowait + 126 add_nice; 127 128 return ret; 129 } 130 131 /* keep track of frequency transitions */ 132 static int 133 dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 134 void *data) 135 { 136 struct cpufreq_freqs *freq = data; 137 struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, 138 freq->cpu); 139 140 if (!this_dbs_info->enable) 141 return 0; 142 143 this_dbs_info->requested_freq = freq->new; 144 145 return 0; 146 } 147 148 static struct notifier_block dbs_cpufreq_notifier_block = { 149 .notifier_call = dbs_cpufreq_notifier 150 }; 151 152 /************************** sysfs interface ************************/ 153 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) 154 { 155 static int print_once; 156 157 if (!print_once) { 158 printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max " 159 "sysfs file is deprecated - used by: %s\n", 160 current->comm); 161 print_once = 1; 162 } 163 return sprintf(buf, "%u\n", MAX_SAMPLING_RATE); 164 } 165 166 static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) 167 { 168 static int print_once; 169 170 if (!print_once) { 171 printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max " 172 "sysfs file is deprecated - used by: %s\n", current->comm); 173 print_once = 1; 174 } 175 return sprintf(buf, "%u\n", MIN_SAMPLING_RATE); 176 } 177 178 #define define_one_ro(_name) \ 179 static struct freq_attr _name = \ 180 __ATTR(_name, 0444, show_##_name, NULL) 181 182 define_one_ro(sampling_rate_max); 183 define_one_ro(sampling_rate_min); 184 185 /* cpufreq_conservative Governor Tunables */ 186 #define show_one(file_name, object) \ 187 static ssize_t show_##file_name \ 188 (struct cpufreq_policy *unused, char *buf) \ 189 { \ 190 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 191 } 192 show_one(sampling_rate, sampling_rate); 193 show_one(sampling_down_factor, sampling_down_factor); 194 show_one(up_threshold, up_threshold); 195 show_one(down_threshold, down_threshold); 196 show_one(ignore_nice_load, ignore_nice); 197 show_one(freq_step, freq_step); 198 199 static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, 200 const char *buf, size_t count) 201 { 202 unsigned int input; 203 int ret; 204 ret = sscanf(buf, "%u", &input); 205 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 206 return -EINVAL; 207 208 mutex_lock(&dbs_mutex); 209 dbs_tuners_ins.sampling_down_factor = input; 210 mutex_unlock(&dbs_mutex); 211 212 return count; 213 } 214 215 static ssize_t store_sampling_rate(struct cpufreq_policy *unused, 216 const char *buf, size_t count) 217 { 218 unsigned int input; 219 int ret; 220 ret = sscanf(buf, "%u", &input); 221 222 mutex_lock(&dbs_mutex); 223 if (ret != 1) { 224 mutex_unlock(&dbs_mutex); 225 return -EINVAL; 226 } 227 dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate()); 228 mutex_unlock(&dbs_mutex); 229 230 return count; 231 } 232 233 static ssize_t store_up_threshold(struct cpufreq_policy *unused, 234 const char *buf, size_t count) 235 { 236 unsigned int input; 237 int ret; 238 ret = sscanf(buf, "%u", &input); 239 240 mutex_lock(&dbs_mutex); 241 if (ret != 1 || input > 100 || 242 input <= dbs_tuners_ins.down_threshold) { 243 mutex_unlock(&dbs_mutex); 244 return -EINVAL; 245 } 246 247 dbs_tuners_ins.up_threshold = input; 248 mutex_unlock(&dbs_mutex); 249 250 return count; 251 } 252 253 static ssize_t store_down_threshold(struct cpufreq_policy *unused, 254 const char *buf, size_t count) 255 { 256 unsigned int input; 257 int ret; 258 ret = sscanf(buf, "%u", &input); 259 260 mutex_lock(&dbs_mutex); 261 if (ret != 1 || input > 100 || input >= dbs_tuners_ins.up_threshold) { 262 mutex_unlock(&dbs_mutex); 263 return -EINVAL; 264 } 265 266 dbs_tuners_ins.down_threshold = input; 267 mutex_unlock(&dbs_mutex); 268 269 return count; 270 } 271 272 static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, 273 const char *buf, size_t count) 274 { 275 unsigned int input; 276 int ret; 277 278 unsigned int j; 279 280 ret = sscanf(buf, "%u", &input); 281 if (ret != 1) 282 return -EINVAL; 283 284 if (input > 1) 285 input = 1; 286 287 mutex_lock(&dbs_mutex); 288 if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ 289 mutex_unlock(&dbs_mutex); 290 return count; 291 } 292 dbs_tuners_ins.ignore_nice = input; 293 294 /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */ 295 for_each_online_cpu(j) { 296 struct cpu_dbs_info_s *j_dbs_info; 297 j_dbs_info = &per_cpu(cpu_dbs_info, j); 298 j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); 299 j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; 300 } 301 mutex_unlock(&dbs_mutex); 302 303 return count; 304 } 305 306 static ssize_t store_freq_step(struct cpufreq_policy *policy, 307 const char *buf, size_t count) 308 { 309 unsigned int input; 310 int ret; 311 312 ret = sscanf(buf, "%u", &input); 313 314 if (ret != 1) 315 return -EINVAL; 316 317 if (input > 100) 318 input = 100; 319 320 /* no need to test here if freq_step is zero as the user might actually 321 * want this, they would be crazy though :) */ 322 mutex_lock(&dbs_mutex); 323 dbs_tuners_ins.freq_step = input; 324 mutex_unlock(&dbs_mutex); 325 326 return count; 327 } 328 329 #define define_one_rw(_name) \ 330 static struct freq_attr _name = \ 331 __ATTR(_name, 0644, show_##_name, store_##_name) 332 333 define_one_rw(sampling_rate); 334 define_one_rw(sampling_down_factor); 335 define_one_rw(up_threshold); 336 define_one_rw(down_threshold); 337 define_one_rw(ignore_nice_load); 338 define_one_rw(freq_step); 339 340 static struct attribute *dbs_attributes[] = { 341 &sampling_rate_max.attr, 342 &sampling_rate_min.attr, 343 &sampling_rate.attr, 344 &sampling_down_factor.attr, 345 &up_threshold.attr, 346 &down_threshold.attr, 347 &ignore_nice_load.attr, 348 &freq_step.attr, 349 NULL 350 }; 351 352 static struct attribute_group dbs_attr_group = { 353 .attrs = dbs_attributes, 354 .name = "conservative", 355 }; 356 357 /************************** sysfs end ************************/ 358 359 static void dbs_check_cpu(int cpu) 360 { 361 unsigned int idle_ticks, up_idle_ticks, down_idle_ticks; 362 unsigned int tmp_idle_ticks, total_idle_ticks; 363 unsigned int freq_target; 364 unsigned int freq_down_sampling_rate; 365 struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 366 struct cpufreq_policy *policy; 367 368 if (!this_dbs_info->enable) 369 return; 370 371 policy = this_dbs_info->cur_policy; 372 373 /* 374 * The default safe range is 20% to 80% 375 * Every sampling_rate, we check 376 * - If current idle time is less than 20%, then we try to 377 * increase frequency 378 * Every sampling_rate*sampling_down_factor, we check 379 * - If current idle time is more than 80%, then we try to 380 * decrease frequency 381 * 382 * Any frequency increase takes it to the maximum frequency. 383 * Frequency reduction happens at minimum steps of 384 * 5% (default) of max_frequency 385 */ 386 387 /* Check for frequency increase */ 388 idle_ticks = UINT_MAX; 389 390 /* Check for frequency increase */ 391 total_idle_ticks = get_cpu_idle_time(cpu); 392 tmp_idle_ticks = total_idle_ticks - 393 this_dbs_info->prev_cpu_idle_up; 394 this_dbs_info->prev_cpu_idle_up = total_idle_ticks; 395 396 if (tmp_idle_ticks < idle_ticks) 397 idle_ticks = tmp_idle_ticks; 398 399 /* Scale idle ticks by 100 and compare with up and down ticks */ 400 idle_ticks *= 100; 401 up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * 402 usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 403 404 if (idle_ticks < up_idle_ticks) { 405 this_dbs_info->down_skip = 0; 406 this_dbs_info->prev_cpu_idle_down = 407 this_dbs_info->prev_cpu_idle_up; 408 409 /* if we are already at full speed then break out early */ 410 if (this_dbs_info->requested_freq == policy->max) 411 return; 412 413 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; 414 415 /* max freq cannot be less than 100. But who knows.... */ 416 if (unlikely(freq_target == 0)) 417 freq_target = 5; 418 419 this_dbs_info->requested_freq += freq_target; 420 if (this_dbs_info->requested_freq > policy->max) 421 this_dbs_info->requested_freq = policy->max; 422 423 __cpufreq_driver_target(policy, this_dbs_info->requested_freq, 424 CPUFREQ_RELATION_H); 425 return; 426 } 427 428 /* Check for frequency decrease */ 429 this_dbs_info->down_skip++; 430 if (this_dbs_info->down_skip < dbs_tuners_ins.sampling_down_factor) 431 return; 432 433 /* Check for frequency decrease */ 434 total_idle_ticks = this_dbs_info->prev_cpu_idle_up; 435 tmp_idle_ticks = total_idle_ticks - 436 this_dbs_info->prev_cpu_idle_down; 437 this_dbs_info->prev_cpu_idle_down = total_idle_ticks; 438 439 if (tmp_idle_ticks < idle_ticks) 440 idle_ticks = tmp_idle_ticks; 441 442 /* Scale idle ticks by 100 and compare with up and down ticks */ 443 idle_ticks *= 100; 444 this_dbs_info->down_skip = 0; 445 446 freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * 447 dbs_tuners_ins.sampling_down_factor; 448 down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) * 449 usecs_to_jiffies(freq_down_sampling_rate); 450 451 if (idle_ticks > down_idle_ticks) { 452 /* 453 * if we are already at the lowest speed then break out early 454 * or if we 'cannot' reduce the speed as the user might want 455 * freq_target to be zero 456 */ 457 if (this_dbs_info->requested_freq == policy->min 458 || dbs_tuners_ins.freq_step == 0) 459 return; 460 461 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; 462 463 /* max freq cannot be less than 100. But who knows.... */ 464 if (unlikely(freq_target == 0)) 465 freq_target = 5; 466 467 this_dbs_info->requested_freq -= freq_target; 468 if (this_dbs_info->requested_freq < policy->min) 469 this_dbs_info->requested_freq = policy->min; 470 471 __cpufreq_driver_target(policy, this_dbs_info->requested_freq, 472 CPUFREQ_RELATION_H); 473 return; 474 } 475 } 476 477 static void do_dbs_timer(struct work_struct *work) 478 { 479 int i; 480 mutex_lock(&dbs_mutex); 481 for_each_online_cpu(i) 482 dbs_check_cpu(i); 483 schedule_delayed_work(&dbs_work, 484 usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); 485 mutex_unlock(&dbs_mutex); 486 } 487 488 static inline void dbs_timer_init(void) 489 { 490 init_timer_deferrable(&dbs_work.timer); 491 schedule_delayed_work(&dbs_work, 492 usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); 493 return; 494 } 495 496 static inline void dbs_timer_exit(void) 497 { 498 cancel_delayed_work(&dbs_work); 499 return; 500 } 501 502 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 503 unsigned int event) 504 { 505 unsigned int cpu = policy->cpu; 506 struct cpu_dbs_info_s *this_dbs_info; 507 unsigned int j; 508 int rc; 509 510 this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 511 512 switch (event) { 513 case CPUFREQ_GOV_START: 514 if ((!cpu_online(cpu)) || (!policy->cur)) 515 return -EINVAL; 516 517 if (this_dbs_info->enable) /* Already enabled */ 518 break; 519 520 mutex_lock(&dbs_mutex); 521 522 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); 523 if (rc) { 524 mutex_unlock(&dbs_mutex); 525 return rc; 526 } 527 528 for_each_cpu(j, policy->cpus) { 529 struct cpu_dbs_info_s *j_dbs_info; 530 j_dbs_info = &per_cpu(cpu_dbs_info, j); 531 j_dbs_info->cur_policy = policy; 532 533 j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu); 534 j_dbs_info->prev_cpu_idle_down 535 = j_dbs_info->prev_cpu_idle_up; 536 } 537 this_dbs_info->enable = 1; 538 this_dbs_info->down_skip = 0; 539 this_dbs_info->requested_freq = policy->cur; 540 541 dbs_enable++; 542 /* 543 * Start the timerschedule work, when this governor 544 * is used for first time 545 */ 546 if (dbs_enable == 1) { 547 unsigned int latency; 548 /* policy latency is in nS. Convert it to uS first */ 549 latency = policy->cpuinfo.transition_latency / 1000; 550 if (latency == 0) 551 latency = 1; 552 553 def_sampling_rate = 554 max(10 * latency * LATENCY_MULTIPLIER, 555 MIN_STAT_SAMPLING_RATE); 556 557 dbs_tuners_ins.sampling_rate = def_sampling_rate; 558 559 dbs_timer_init(); 560 cpufreq_register_notifier( 561 &dbs_cpufreq_notifier_block, 562 CPUFREQ_TRANSITION_NOTIFIER); 563 } 564 565 mutex_unlock(&dbs_mutex); 566 break; 567 568 case CPUFREQ_GOV_STOP: 569 mutex_lock(&dbs_mutex); 570 this_dbs_info->enable = 0; 571 sysfs_remove_group(&policy->kobj, &dbs_attr_group); 572 dbs_enable--; 573 /* 574 * Stop the timerschedule work, when this governor 575 * is used for first time 576 */ 577 if (dbs_enable == 0) { 578 dbs_timer_exit(); 579 cpufreq_unregister_notifier( 580 &dbs_cpufreq_notifier_block, 581 CPUFREQ_TRANSITION_NOTIFIER); 582 } 583 584 mutex_unlock(&dbs_mutex); 585 586 break; 587 588 case CPUFREQ_GOV_LIMITS: 589 mutex_lock(&dbs_mutex); 590 if (policy->max < this_dbs_info->cur_policy->cur) 591 __cpufreq_driver_target( 592 this_dbs_info->cur_policy, 593 policy->max, CPUFREQ_RELATION_H); 594 else if (policy->min > this_dbs_info->cur_policy->cur) 595 __cpufreq_driver_target( 596 this_dbs_info->cur_policy, 597 policy->min, CPUFREQ_RELATION_L); 598 mutex_unlock(&dbs_mutex); 599 break; 600 } 601 return 0; 602 } 603 604 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE 605 static 606 #endif 607 struct cpufreq_governor cpufreq_gov_conservative = { 608 .name = "conservative", 609 .governor = cpufreq_governor_dbs, 610 .max_transition_latency = TRANSITION_LATENCY_LIMIT, 611 .owner = THIS_MODULE, 612 }; 613 614 static int __init cpufreq_gov_dbs_init(void) 615 { 616 return cpufreq_register_governor(&cpufreq_gov_conservative); 617 } 618 619 static void __exit cpufreq_gov_dbs_exit(void) 620 { 621 /* Make sure that the scheduled work is indeed not running */ 622 flush_scheduled_work(); 623 624 cpufreq_unregister_governor(&cpufreq_gov_conservative); 625 } 626 627 628 MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); 629 MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " 630 "Low Latency Frequency Transition capable processors " 631 "optimised for use in a battery environment"); 632 MODULE_LICENSE("GPL"); 633 634 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE 635 fs_initcall(cpufreq_gov_dbs_init); 636 #else 637 module_init(cpufreq_gov_dbs_init); 638 #endif 639 module_exit(cpufreq_gov_dbs_exit); 640