1 /* 2 * drivers/cpufreq/cpufreq_conservative.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/init.h> 17 #include <linux/cpufreq.h> 18 #include <linux/cpu.h> 19 #include <linux/jiffies.h> 20 #include <linux/kernel_stat.h> 21 #include <linux/mutex.h> 22 #include <linux/hrtimer.h> 23 #include <linux/tick.h> 24 #include <linux/ktime.h> 25 #include <linux/sched.h> 26 27 /* 28 * dbs is used in this file as a shortform for demandbased switching 29 * It helps to keep variable names smaller, simpler 30 */ 31 32 #define DEF_FREQUENCY_UP_THRESHOLD (80) 33 #define DEF_FREQUENCY_DOWN_THRESHOLD (20) 34 35 /* 36 * The polling frequency of this governor depends on the capability of 37 * the processor. Default polling frequency is 1000 times the transition 38 * latency of the processor. The governor will work on any processor with 39 * transition latency <= 10mS, using appropriate sampling 40 * rate. 41 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 42 * this governor will not work. 43 * All times here are in uS. 44 */ 45 #define MIN_SAMPLING_RATE_RATIO (2) 46 47 static unsigned int min_sampling_rate; 48 49 #define LATENCY_MULTIPLIER (1000) 50 #define MIN_LATENCY_MULTIPLIER (100) 51 #define DEF_SAMPLING_DOWN_FACTOR (1) 52 #define MAX_SAMPLING_DOWN_FACTOR (10) 53 #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) 54 55 static void do_dbs_timer(struct work_struct *work); 56 57 struct cpu_dbs_info_s { 58 cputime64_t prev_cpu_idle; 59 cputime64_t prev_cpu_wall; 60 cputime64_t prev_cpu_nice; 61 struct cpufreq_policy *cur_policy; 62 struct delayed_work work; 63 unsigned int down_skip; 64 unsigned int requested_freq; 65 int cpu; 66 unsigned int enable:1; 67 /* 68 * percpu mutex that serializes governor limit change with 69 * do_dbs_timer invocation. We do not want do_dbs_timer to run 70 * when user is changing the governor or limits. 71 */ 72 struct mutex timer_mutex; 73 }; 74 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info); 75 76 static unsigned int dbs_enable; /* number of CPUs using this policy */ 77 78 /* 79 * dbs_mutex protects dbs_enable in governor start/stop. 80 */ 81 static DEFINE_MUTEX(dbs_mutex); 82 83 static struct dbs_tuners { 84 unsigned int sampling_rate; 85 unsigned int sampling_down_factor; 86 unsigned int up_threshold; 87 unsigned int down_threshold; 88 unsigned int ignore_nice; 89 unsigned int freq_step; 90 } dbs_tuners_ins = { 91 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 92 .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, 93 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 94 .ignore_nice = 0, 95 .freq_step = 5, 96 }; 97 98 static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, 99 cputime64_t *wall) 100 { 101 cputime64_t idle_time; 102 cputime64_t cur_wall_time; 103 cputime64_t busy_time; 104 105 cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); 106 busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, 107 kstat_cpu(cpu).cpustat.system); 108 109 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); 110 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); 111 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); 112 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); 113 114 idle_time = cputime64_sub(cur_wall_time, busy_time); 115 if (wall) 116 *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); 117 118 return (cputime64_t)jiffies_to_usecs(idle_time); 119 } 120 121 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) 122 { 123 u64 idle_time = get_cpu_idle_time_us(cpu, NULL); 124 125 if (idle_time == -1ULL) 126 return get_cpu_idle_time_jiffy(cpu, wall); 127 else 128 idle_time += get_cpu_iowait_time_us(cpu, wall); 129 130 return idle_time; 131 } 132 133 /* keep track of frequency transitions */ 134 static int 135 dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 136 void *data) 137 { 138 struct cpufreq_freqs *freq = data; 139 struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info, 140 freq->cpu); 141 142 struct cpufreq_policy *policy; 143 144 if (!this_dbs_info->enable) 145 return 0; 146 147 policy = this_dbs_info->cur_policy; 148 149 /* 150 * we only care if our internally tracked freq moves outside 151 * the 'valid' ranges of freqency available to us otherwise 152 * we do not change it 153 */ 154 if (this_dbs_info->requested_freq > policy->max 155 || this_dbs_info->requested_freq < policy->min) 156 this_dbs_info->requested_freq = freq->new; 157 158 return 0; 159 } 160 161 static struct notifier_block dbs_cpufreq_notifier_block = { 162 .notifier_call = dbs_cpufreq_notifier 163 }; 164 165 /************************** sysfs interface ************************/ 166 static ssize_t show_sampling_rate_min(struct kobject *kobj, 167 struct attribute *attr, char *buf) 168 { 169 return sprintf(buf, "%u\n", min_sampling_rate); 170 } 171 172 define_one_global_ro(sampling_rate_min); 173 174 /* cpufreq_conservative Governor Tunables */ 175 #define show_one(file_name, object) \ 176 static ssize_t show_##file_name \ 177 (struct kobject *kobj, struct attribute *attr, char *buf) \ 178 { \ 179 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 180 } 181 show_one(sampling_rate, sampling_rate); 182 show_one(sampling_down_factor, sampling_down_factor); 183 show_one(up_threshold, up_threshold); 184 show_one(down_threshold, down_threshold); 185 show_one(ignore_nice_load, ignore_nice); 186 show_one(freq_step, freq_step); 187 188 static ssize_t store_sampling_down_factor(struct kobject *a, 189 struct attribute *b, 190 const char *buf, size_t count) 191 { 192 unsigned int input; 193 int ret; 194 ret = sscanf(buf, "%u", &input); 195 196 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 197 return -EINVAL; 198 199 dbs_tuners_ins.sampling_down_factor = input; 200 return count; 201 } 202 203 static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, 204 const char *buf, size_t count) 205 { 206 unsigned int input; 207 int ret; 208 ret = sscanf(buf, "%u", &input); 209 210 if (ret != 1) 211 return -EINVAL; 212 213 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); 214 return count; 215 } 216 217 static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, 218 const char *buf, size_t count) 219 { 220 unsigned int input; 221 int ret; 222 ret = sscanf(buf, "%u", &input); 223 224 if (ret != 1 || input > 100 || 225 input <= dbs_tuners_ins.down_threshold) 226 return -EINVAL; 227 228 dbs_tuners_ins.up_threshold = input; 229 return count; 230 } 231 232 static ssize_t store_down_threshold(struct kobject *a, struct attribute *b, 233 const char *buf, size_t count) 234 { 235 unsigned int input; 236 int ret; 237 ret = sscanf(buf, "%u", &input); 238 239 /* cannot be lower than 11 otherwise freq will not fall */ 240 if (ret != 1 || input < 11 || input > 100 || 241 input >= dbs_tuners_ins.up_threshold) 242 return -EINVAL; 243 244 dbs_tuners_ins.down_threshold = input; 245 return count; 246 } 247 248 static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, 249 const char *buf, size_t count) 250 { 251 unsigned int input; 252 int ret; 253 254 unsigned int j; 255 256 ret = sscanf(buf, "%u", &input); 257 if (ret != 1) 258 return -EINVAL; 259 260 if (input > 1) 261 input = 1; 262 263 if (input == dbs_tuners_ins.ignore_nice) /* nothing to do */ 264 return count; 265 266 dbs_tuners_ins.ignore_nice = input; 267 268 /* we need to re-evaluate prev_cpu_idle */ 269 for_each_online_cpu(j) { 270 struct cpu_dbs_info_s *dbs_info; 271 dbs_info = &per_cpu(cs_cpu_dbs_info, j); 272 dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 273 &dbs_info->prev_cpu_wall); 274 if (dbs_tuners_ins.ignore_nice) 275 dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 276 } 277 return count; 278 } 279 280 static ssize_t store_freq_step(struct kobject *a, struct attribute *b, 281 const char *buf, size_t count) 282 { 283 unsigned int input; 284 int ret; 285 ret = sscanf(buf, "%u", &input); 286 287 if (ret != 1) 288 return -EINVAL; 289 290 if (input > 100) 291 input = 100; 292 293 /* no need to test here if freq_step is zero as the user might actually 294 * want this, they would be crazy though :) */ 295 dbs_tuners_ins.freq_step = input; 296 return count; 297 } 298 299 define_one_global_rw(sampling_rate); 300 define_one_global_rw(sampling_down_factor); 301 define_one_global_rw(up_threshold); 302 define_one_global_rw(down_threshold); 303 define_one_global_rw(ignore_nice_load); 304 define_one_global_rw(freq_step); 305 306 static struct attribute *dbs_attributes[] = { 307 &sampling_rate_min.attr, 308 &sampling_rate.attr, 309 &sampling_down_factor.attr, 310 &up_threshold.attr, 311 &down_threshold.attr, 312 &ignore_nice_load.attr, 313 &freq_step.attr, 314 NULL 315 }; 316 317 static struct attribute_group dbs_attr_group = { 318 .attrs = dbs_attributes, 319 .name = "conservative", 320 }; 321 322 /************************** sysfs end ************************/ 323 324 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) 325 { 326 unsigned int load = 0; 327 unsigned int max_load = 0; 328 unsigned int freq_target; 329 330 struct cpufreq_policy *policy; 331 unsigned int j; 332 333 policy = this_dbs_info->cur_policy; 334 335 /* 336 * Every sampling_rate, we check, if current idle time is less 337 * than 20% (default), then we try to increase frequency 338 * Every sampling_rate*sampling_down_factor, we check, if current 339 * idle time is more than 80%, then we try to decrease frequency 340 * 341 * Any frequency increase takes it to the maximum frequency. 342 * Frequency reduction happens at minimum steps of 343 * 5% (default) of maximum frequency 344 */ 345 346 /* Get Absolute Load */ 347 for_each_cpu(j, policy->cpus) { 348 struct cpu_dbs_info_s *j_dbs_info; 349 cputime64_t cur_wall_time, cur_idle_time; 350 unsigned int idle_time, wall_time; 351 352 j_dbs_info = &per_cpu(cs_cpu_dbs_info, j); 353 354 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); 355 356 wall_time = (unsigned int) cputime64_sub(cur_wall_time, 357 j_dbs_info->prev_cpu_wall); 358 j_dbs_info->prev_cpu_wall = cur_wall_time; 359 360 idle_time = (unsigned int) cputime64_sub(cur_idle_time, 361 j_dbs_info->prev_cpu_idle); 362 j_dbs_info->prev_cpu_idle = cur_idle_time; 363 364 if (dbs_tuners_ins.ignore_nice) { 365 cputime64_t cur_nice; 366 unsigned long cur_nice_jiffies; 367 368 cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, 369 j_dbs_info->prev_cpu_nice); 370 /* 371 * Assumption: nice time between sampling periods will 372 * be less than 2^32 jiffies for 32 bit sys 373 */ 374 cur_nice_jiffies = (unsigned long) 375 cputime64_to_jiffies64(cur_nice); 376 377 j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 378 idle_time += jiffies_to_usecs(cur_nice_jiffies); 379 } 380 381 if (unlikely(!wall_time || wall_time < idle_time)) 382 continue; 383 384 load = 100 * (wall_time - idle_time) / wall_time; 385 386 if (load > max_load) 387 max_load = load; 388 } 389 390 /* 391 * break out if we 'cannot' reduce the speed as the user might 392 * want freq_step to be zero 393 */ 394 if (dbs_tuners_ins.freq_step == 0) 395 return; 396 397 /* Check for frequency increase */ 398 if (max_load > dbs_tuners_ins.up_threshold) { 399 this_dbs_info->down_skip = 0; 400 401 /* if we are already at full speed then break out early */ 402 if (this_dbs_info->requested_freq == policy->max) 403 return; 404 405 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; 406 407 /* max freq cannot be less than 100. But who knows.... */ 408 if (unlikely(freq_target == 0)) 409 freq_target = 5; 410 411 this_dbs_info->requested_freq += freq_target; 412 if (this_dbs_info->requested_freq > policy->max) 413 this_dbs_info->requested_freq = policy->max; 414 415 __cpufreq_driver_target(policy, this_dbs_info->requested_freq, 416 CPUFREQ_RELATION_H); 417 return; 418 } 419 420 /* 421 * The optimal frequency is the frequency that is the lowest that 422 * can support the current CPU usage without triggering the up 423 * policy. To be safe, we focus 10 points under the threshold. 424 */ 425 if (max_load < (dbs_tuners_ins.down_threshold - 10)) { 426 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; 427 428 this_dbs_info->requested_freq -= freq_target; 429 if (this_dbs_info->requested_freq < policy->min) 430 this_dbs_info->requested_freq = policy->min; 431 432 /* 433 * if we cannot reduce the frequency anymore, break out early 434 */ 435 if (policy->cur == policy->min) 436 return; 437 438 __cpufreq_driver_target(policy, this_dbs_info->requested_freq, 439 CPUFREQ_RELATION_H); 440 return; 441 } 442 } 443 444 static void do_dbs_timer(struct work_struct *work) 445 { 446 struct cpu_dbs_info_s *dbs_info = 447 container_of(work, struct cpu_dbs_info_s, work.work); 448 unsigned int cpu = dbs_info->cpu; 449 450 /* We want all CPUs to do sampling nearly on same jiffy */ 451 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 452 453 delay -= jiffies % delay; 454 455 mutex_lock(&dbs_info->timer_mutex); 456 457 dbs_check_cpu(dbs_info); 458 459 schedule_delayed_work_on(cpu, &dbs_info->work, delay); 460 mutex_unlock(&dbs_info->timer_mutex); 461 } 462 463 static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 464 { 465 /* We want all CPUs to do sampling nearly on same jiffy */ 466 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 467 delay -= jiffies % delay; 468 469 dbs_info->enable = 1; 470 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); 471 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); 472 } 473 474 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 475 { 476 dbs_info->enable = 0; 477 cancel_delayed_work_sync(&dbs_info->work); 478 } 479 480 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 481 unsigned int event) 482 { 483 unsigned int cpu = policy->cpu; 484 struct cpu_dbs_info_s *this_dbs_info; 485 unsigned int j; 486 int rc; 487 488 this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); 489 490 switch (event) { 491 case CPUFREQ_GOV_START: 492 if ((!cpu_online(cpu)) || (!policy->cur)) 493 return -EINVAL; 494 495 mutex_lock(&dbs_mutex); 496 497 for_each_cpu(j, policy->cpus) { 498 struct cpu_dbs_info_s *j_dbs_info; 499 j_dbs_info = &per_cpu(cs_cpu_dbs_info, j); 500 j_dbs_info->cur_policy = policy; 501 502 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 503 &j_dbs_info->prev_cpu_wall); 504 if (dbs_tuners_ins.ignore_nice) { 505 j_dbs_info->prev_cpu_nice = 506 kstat_cpu(j).cpustat.nice; 507 } 508 } 509 this_dbs_info->down_skip = 0; 510 this_dbs_info->requested_freq = policy->cur; 511 512 mutex_init(&this_dbs_info->timer_mutex); 513 dbs_enable++; 514 /* 515 * Start the timerschedule work, when this governor 516 * is used for first time 517 */ 518 if (dbs_enable == 1) { 519 unsigned int latency; 520 /* policy latency is in nS. Convert it to uS first */ 521 latency = policy->cpuinfo.transition_latency / 1000; 522 if (latency == 0) 523 latency = 1; 524 525 rc = sysfs_create_group(cpufreq_global_kobject, 526 &dbs_attr_group); 527 if (rc) { 528 mutex_unlock(&dbs_mutex); 529 return rc; 530 } 531 532 /* 533 * conservative does not implement micro like ondemand 534 * governor, thus we are bound to jiffes/HZ 535 */ 536 min_sampling_rate = 537 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); 538 /* Bring kernel and HW constraints together */ 539 min_sampling_rate = max(min_sampling_rate, 540 MIN_LATENCY_MULTIPLIER * latency); 541 dbs_tuners_ins.sampling_rate = 542 max(min_sampling_rate, 543 latency * LATENCY_MULTIPLIER); 544 545 cpufreq_register_notifier( 546 &dbs_cpufreq_notifier_block, 547 CPUFREQ_TRANSITION_NOTIFIER); 548 } 549 mutex_unlock(&dbs_mutex); 550 551 dbs_timer_init(this_dbs_info); 552 553 break; 554 555 case CPUFREQ_GOV_STOP: 556 dbs_timer_exit(this_dbs_info); 557 558 mutex_lock(&dbs_mutex); 559 dbs_enable--; 560 mutex_destroy(&this_dbs_info->timer_mutex); 561 562 /* 563 * Stop the timerschedule work, when this governor 564 * is used for first time 565 */ 566 if (dbs_enable == 0) 567 cpufreq_unregister_notifier( 568 &dbs_cpufreq_notifier_block, 569 CPUFREQ_TRANSITION_NOTIFIER); 570 571 mutex_unlock(&dbs_mutex); 572 if (!dbs_enable) 573 sysfs_remove_group(cpufreq_global_kobject, 574 &dbs_attr_group); 575 576 break; 577 578 case CPUFREQ_GOV_LIMITS: 579 mutex_lock(&this_dbs_info->timer_mutex); 580 if (policy->max < this_dbs_info->cur_policy->cur) 581 __cpufreq_driver_target( 582 this_dbs_info->cur_policy, 583 policy->max, CPUFREQ_RELATION_H); 584 else if (policy->min > this_dbs_info->cur_policy->cur) 585 __cpufreq_driver_target( 586 this_dbs_info->cur_policy, 587 policy->min, CPUFREQ_RELATION_L); 588 mutex_unlock(&this_dbs_info->timer_mutex); 589 590 break; 591 } 592 return 0; 593 } 594 595 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE 596 static 597 #endif 598 struct cpufreq_governor cpufreq_gov_conservative = { 599 .name = "conservative", 600 .governor = cpufreq_governor_dbs, 601 .max_transition_latency = TRANSITION_LATENCY_LIMIT, 602 .owner = THIS_MODULE, 603 }; 604 605 static int __init cpufreq_gov_dbs_init(void) 606 { 607 return cpufreq_register_governor(&cpufreq_gov_conservative); 608 } 609 610 static void __exit cpufreq_gov_dbs_exit(void) 611 { 612 cpufreq_unregister_governor(&cpufreq_gov_conservative); 613 } 614 615 616 MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>"); 617 MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " 618 "Low Latency Frequency Transition capable processors " 619 "optimised for use in a battery environment"); 620 MODULE_LICENSE("GPL"); 621 622 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE 623 fs_initcall(cpufreq_gov_dbs_init); 624 #else 625 module_init(cpufreq_gov_dbs_init); 626 #endif 627 module_exit(cpufreq_gov_dbs_exit); 628