1 /* 2 * drivers/cpufreq/cpufreq_ondemand.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/cpufreq.h> 17 #include <linux/cpu.h> 18 #include <linux/jiffies.h> 19 #include <linux/kernel_stat.h> 20 #include <linux/mutex.h> 21 #include <linux/hrtimer.h> 22 #include <linux/tick.h> 23 #include <linux/ktime.h> 24 #include <linux/sched.h> 25 26 /* 27 * dbs is used in this file as a shortform for demandbased switching 28 * It helps to keep variable names smaller, simpler 29 */ 30 31 #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) 32 #define DEF_FREQUENCY_UP_THRESHOLD (80) 33 #define DEF_SAMPLING_DOWN_FACTOR (1) 34 #define MAX_SAMPLING_DOWN_FACTOR (100000) 35 #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) 36 #define MICRO_FREQUENCY_UP_THRESHOLD (95) 37 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) 38 #define MIN_FREQUENCY_UP_THRESHOLD (11) 39 #define MAX_FREQUENCY_UP_THRESHOLD (100) 40 41 /* 42 * The polling frequency of this governor depends on the capability of 43 * the processor. Default polling frequency is 1000 times the transition 44 * latency of the processor. The governor will work on any processor with 45 * transition latency <= 10mS, using appropriate sampling 46 * rate. 47 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 48 * this governor will not work. 49 * All times here are in uS. 50 */ 51 #define MIN_SAMPLING_RATE_RATIO (2) 52 53 static unsigned int min_sampling_rate; 54 55 #define LATENCY_MULTIPLIER (1000) 56 #define MIN_LATENCY_MULTIPLIER (100) 57 #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) 58 59 static void do_dbs_timer(struct work_struct *work); 60 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 61 unsigned int event); 62 63 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 64 static 65 #endif 66 struct cpufreq_governor cpufreq_gov_ondemand = { 67 .name = "ondemand", 68 .governor = cpufreq_governor_dbs, 69 .max_transition_latency = TRANSITION_LATENCY_LIMIT, 70 .owner = THIS_MODULE, 71 }; 72 73 /* Sampling types */ 74 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; 75 76 struct cpu_dbs_info_s { 77 cputime64_t prev_cpu_idle; 78 cputime64_t prev_cpu_iowait; 79 cputime64_t prev_cpu_wall; 80 cputime64_t prev_cpu_nice; 81 struct cpufreq_policy *cur_policy; 82 struct delayed_work work; 83 struct cpufreq_frequency_table *freq_table; 84 unsigned int freq_lo; 85 unsigned int freq_lo_jiffies; 86 unsigned int freq_hi_jiffies; 87 unsigned int rate_mult; 88 int cpu; 89 unsigned int sample_type:1; 90 /* 91 * percpu mutex that serializes governor limit change with 92 * do_dbs_timer invocation. We do not want do_dbs_timer to run 93 * when user is changing the governor or limits. 94 */ 95 struct mutex timer_mutex; 96 }; 97 static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); 98 99 static unsigned int dbs_enable; /* number of CPUs using this policy */ 100 101 /* 102 * dbs_mutex protects dbs_enable in governor start/stop. 103 */ 104 static DEFINE_MUTEX(dbs_mutex); 105 106 static struct dbs_tuners { 107 unsigned int sampling_rate; 108 unsigned int up_threshold; 109 unsigned int down_differential; 110 unsigned int ignore_nice; 111 unsigned int sampling_down_factor; 112 unsigned int powersave_bias; 113 unsigned int io_is_busy; 114 } dbs_tuners_ins = { 115 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 116 .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, 117 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, 118 .ignore_nice = 0, 119 .powersave_bias = 0, 120 }; 121 122 static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, 123 cputime64_t *wall) 124 { 125 cputime64_t idle_time; 126 cputime64_t cur_wall_time; 127 cputime64_t busy_time; 128 129 cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); 130 busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, 131 kstat_cpu(cpu).cpustat.system); 132 133 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); 134 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); 135 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); 136 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); 137 138 idle_time = cputime64_sub(cur_wall_time, busy_time); 139 if (wall) 140 *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); 141 142 return (cputime64_t)jiffies_to_usecs(idle_time); 143 } 144 145 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) 146 { 147 u64 idle_time = get_cpu_idle_time_us(cpu, NULL); 148 149 if (idle_time == -1ULL) 150 return get_cpu_idle_time_jiffy(cpu, wall); 151 else 152 idle_time += get_cpu_iowait_time_us(cpu, wall); 153 154 return idle_time; 155 } 156 157 static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) 158 { 159 u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); 160 161 if (iowait_time == -1ULL) 162 return 0; 163 164 return iowait_time; 165 } 166 167 /* 168 * Find right freq to be set now with powersave_bias on. 169 * Returns the freq_hi to be used right now and will set freq_hi_jiffies, 170 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. 171 */ 172 static unsigned int powersave_bias_target(struct cpufreq_policy *policy, 173 unsigned int freq_next, 174 unsigned int relation) 175 { 176 unsigned int freq_req, freq_reduc, freq_avg; 177 unsigned int freq_hi, freq_lo; 178 unsigned int index = 0; 179 unsigned int jiffies_total, jiffies_hi, jiffies_lo; 180 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 181 policy->cpu); 182 183 if (!dbs_info->freq_table) { 184 dbs_info->freq_lo = 0; 185 dbs_info->freq_lo_jiffies = 0; 186 return freq_next; 187 } 188 189 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, 190 relation, &index); 191 freq_req = dbs_info->freq_table[index].frequency; 192 freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; 193 freq_avg = freq_req - freq_reduc; 194 195 /* Find freq bounds for freq_avg in freq_table */ 196 index = 0; 197 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 198 CPUFREQ_RELATION_H, &index); 199 freq_lo = dbs_info->freq_table[index].frequency; 200 index = 0; 201 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 202 CPUFREQ_RELATION_L, &index); 203 freq_hi = dbs_info->freq_table[index].frequency; 204 205 /* Find out how long we have to be in hi and lo freqs */ 206 if (freq_hi == freq_lo) { 207 dbs_info->freq_lo = 0; 208 dbs_info->freq_lo_jiffies = 0; 209 return freq_lo; 210 } 211 jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 212 jiffies_hi = (freq_avg - freq_lo) * jiffies_total; 213 jiffies_hi += ((freq_hi - freq_lo) / 2); 214 jiffies_hi /= (freq_hi - freq_lo); 215 jiffies_lo = jiffies_total - jiffies_hi; 216 dbs_info->freq_lo = freq_lo; 217 dbs_info->freq_lo_jiffies = jiffies_lo; 218 dbs_info->freq_hi_jiffies = jiffies_hi; 219 return freq_hi; 220 } 221 222 static void ondemand_powersave_bias_init_cpu(int cpu) 223 { 224 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 225 dbs_info->freq_table = cpufreq_frequency_get_table(cpu); 226 dbs_info->freq_lo = 0; 227 } 228 229 static void ondemand_powersave_bias_init(void) 230 { 231 int i; 232 for_each_online_cpu(i) { 233 ondemand_powersave_bias_init_cpu(i); 234 } 235 } 236 237 /************************** sysfs interface ************************/ 238 239 static ssize_t show_sampling_rate_min(struct kobject *kobj, 240 struct attribute *attr, char *buf) 241 { 242 return sprintf(buf, "%u\n", min_sampling_rate); 243 } 244 245 define_one_global_ro(sampling_rate_min); 246 247 /* cpufreq_ondemand Governor Tunables */ 248 #define show_one(file_name, object) \ 249 static ssize_t show_##file_name \ 250 (struct kobject *kobj, struct attribute *attr, char *buf) \ 251 { \ 252 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 253 } 254 show_one(sampling_rate, sampling_rate); 255 show_one(io_is_busy, io_is_busy); 256 show_one(up_threshold, up_threshold); 257 show_one(sampling_down_factor, sampling_down_factor); 258 show_one(ignore_nice_load, ignore_nice); 259 show_one(powersave_bias, powersave_bias); 260 261 static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, 262 const char *buf, size_t count) 263 { 264 unsigned int input; 265 int ret; 266 ret = sscanf(buf, "%u", &input); 267 if (ret != 1) 268 return -EINVAL; 269 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); 270 return count; 271 } 272 273 static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, 274 const char *buf, size_t count) 275 { 276 unsigned int input; 277 int ret; 278 279 ret = sscanf(buf, "%u", &input); 280 if (ret != 1) 281 return -EINVAL; 282 dbs_tuners_ins.io_is_busy = !!input; 283 return count; 284 } 285 286 static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, 287 const char *buf, size_t count) 288 { 289 unsigned int input; 290 int ret; 291 ret = sscanf(buf, "%u", &input); 292 293 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 294 input < MIN_FREQUENCY_UP_THRESHOLD) { 295 return -EINVAL; 296 } 297 dbs_tuners_ins.up_threshold = input; 298 return count; 299 } 300 301 static ssize_t store_sampling_down_factor(struct kobject *a, 302 struct attribute *b, const char *buf, size_t count) 303 { 304 unsigned int input, j; 305 int ret; 306 ret = sscanf(buf, "%u", &input); 307 308 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) 309 return -EINVAL; 310 dbs_tuners_ins.sampling_down_factor = input; 311 312 /* Reset down sampling multiplier in case it was active */ 313 for_each_online_cpu(j) { 314 struct cpu_dbs_info_s *dbs_info; 315 dbs_info = &per_cpu(od_cpu_dbs_info, j); 316 dbs_info->rate_mult = 1; 317 } 318 return count; 319 } 320 321 static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, 322 const char *buf, size_t count) 323 { 324 unsigned int input; 325 int ret; 326 327 unsigned int j; 328 329 ret = sscanf(buf, "%u", &input); 330 if (ret != 1) 331 return -EINVAL; 332 333 if (input > 1) 334 input = 1; 335 336 if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ 337 return count; 338 } 339 dbs_tuners_ins.ignore_nice = input; 340 341 /* we need to re-evaluate prev_cpu_idle */ 342 for_each_online_cpu(j) { 343 struct cpu_dbs_info_s *dbs_info; 344 dbs_info = &per_cpu(od_cpu_dbs_info, j); 345 dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 346 &dbs_info->prev_cpu_wall); 347 if (dbs_tuners_ins.ignore_nice) 348 dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 349 350 } 351 return count; 352 } 353 354 static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, 355 const char *buf, size_t count) 356 { 357 unsigned int input; 358 int ret; 359 ret = sscanf(buf, "%u", &input); 360 361 if (ret != 1) 362 return -EINVAL; 363 364 if (input > 1000) 365 input = 1000; 366 367 dbs_tuners_ins.powersave_bias = input; 368 ondemand_powersave_bias_init(); 369 return count; 370 } 371 372 define_one_global_rw(sampling_rate); 373 define_one_global_rw(io_is_busy); 374 define_one_global_rw(up_threshold); 375 define_one_global_rw(sampling_down_factor); 376 define_one_global_rw(ignore_nice_load); 377 define_one_global_rw(powersave_bias); 378 379 static struct attribute *dbs_attributes[] = { 380 &sampling_rate_min.attr, 381 &sampling_rate.attr, 382 &up_threshold.attr, 383 &sampling_down_factor.attr, 384 &ignore_nice_load.attr, 385 &powersave_bias.attr, 386 &io_is_busy.attr, 387 NULL 388 }; 389 390 static struct attribute_group dbs_attr_group = { 391 .attrs = dbs_attributes, 392 .name = "ondemand", 393 }; 394 395 /************************** sysfs end ************************/ 396 397 static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) 398 { 399 if (dbs_tuners_ins.powersave_bias) 400 freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H); 401 else if (p->cur == p->max) 402 return; 403 404 __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ? 405 CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); 406 } 407 408 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) 409 { 410 unsigned int max_load_freq; 411 412 struct cpufreq_policy *policy; 413 unsigned int j; 414 415 this_dbs_info->freq_lo = 0; 416 policy = this_dbs_info->cur_policy; 417 418 /* 419 * Every sampling_rate, we check, if current idle time is less 420 * than 20% (default), then we try to increase frequency 421 * Every sampling_rate, we look for a the lowest 422 * frequency which can sustain the load while keeping idle time over 423 * 30%. If such a frequency exist, we try to decrease to this frequency. 424 * 425 * Any frequency increase takes it to the maximum frequency. 426 * Frequency reduction happens at minimum steps of 427 * 5% (default) of current frequency 428 */ 429 430 /* Get Absolute Load - in terms of freq */ 431 max_load_freq = 0; 432 433 for_each_cpu(j, policy->cpus) { 434 struct cpu_dbs_info_s *j_dbs_info; 435 cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; 436 unsigned int idle_time, wall_time, iowait_time; 437 unsigned int load, load_freq; 438 int freq_avg; 439 440 j_dbs_info = &per_cpu(od_cpu_dbs_info, j); 441 442 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); 443 cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); 444 445 wall_time = (unsigned int) cputime64_sub(cur_wall_time, 446 j_dbs_info->prev_cpu_wall); 447 j_dbs_info->prev_cpu_wall = cur_wall_time; 448 449 idle_time = (unsigned int) cputime64_sub(cur_idle_time, 450 j_dbs_info->prev_cpu_idle); 451 j_dbs_info->prev_cpu_idle = cur_idle_time; 452 453 iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, 454 j_dbs_info->prev_cpu_iowait); 455 j_dbs_info->prev_cpu_iowait = cur_iowait_time; 456 457 if (dbs_tuners_ins.ignore_nice) { 458 cputime64_t cur_nice; 459 unsigned long cur_nice_jiffies; 460 461 cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, 462 j_dbs_info->prev_cpu_nice); 463 /* 464 * Assumption: nice time between sampling periods will 465 * be less than 2^32 jiffies for 32 bit sys 466 */ 467 cur_nice_jiffies = (unsigned long) 468 cputime64_to_jiffies64(cur_nice); 469 470 j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 471 idle_time += jiffies_to_usecs(cur_nice_jiffies); 472 } 473 474 /* 475 * For the purpose of ondemand, waiting for disk IO is an 476 * indication that you're performance critical, and not that 477 * the system is actually idle. So subtract the iowait time 478 * from the cpu idle time. 479 */ 480 481 if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) 482 idle_time -= iowait_time; 483 484 if (unlikely(!wall_time || wall_time < idle_time)) 485 continue; 486 487 load = 100 * (wall_time - idle_time) / wall_time; 488 489 freq_avg = __cpufreq_driver_getavg(policy, j); 490 if (freq_avg <= 0) 491 freq_avg = policy->cur; 492 493 load_freq = load * freq_avg; 494 if (load_freq > max_load_freq) 495 max_load_freq = load_freq; 496 } 497 498 /* Check for frequency increase */ 499 if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { 500 /* If switching to max speed, apply sampling_down_factor */ 501 if (policy->cur < policy->max) 502 this_dbs_info->rate_mult = 503 dbs_tuners_ins.sampling_down_factor; 504 dbs_freq_increase(policy, policy->max); 505 return; 506 } 507 508 /* Check for frequency decrease */ 509 /* if we cannot reduce the frequency anymore, break out early */ 510 if (policy->cur == policy->min) 511 return; 512 513 /* 514 * The optimal frequency is the frequency that is the lowest that 515 * can support the current CPU usage without triggering the up 516 * policy. To be safe, we focus 10 points under the threshold. 517 */ 518 if (max_load_freq < 519 (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * 520 policy->cur) { 521 unsigned int freq_next; 522 freq_next = max_load_freq / 523 (dbs_tuners_ins.up_threshold - 524 dbs_tuners_ins.down_differential); 525 526 /* No longer fully busy, reset rate_mult */ 527 this_dbs_info->rate_mult = 1; 528 529 if (freq_next < policy->min) 530 freq_next = policy->min; 531 532 if (!dbs_tuners_ins.powersave_bias) { 533 __cpufreq_driver_target(policy, freq_next, 534 CPUFREQ_RELATION_L); 535 } else { 536 int freq = powersave_bias_target(policy, freq_next, 537 CPUFREQ_RELATION_L); 538 __cpufreq_driver_target(policy, freq, 539 CPUFREQ_RELATION_L); 540 } 541 } 542 } 543 544 static void do_dbs_timer(struct work_struct *work) 545 { 546 struct cpu_dbs_info_s *dbs_info = 547 container_of(work, struct cpu_dbs_info_s, work.work); 548 unsigned int cpu = dbs_info->cpu; 549 int sample_type = dbs_info->sample_type; 550 551 int delay; 552 553 mutex_lock(&dbs_info->timer_mutex); 554 555 /* Common NORMAL_SAMPLE setup */ 556 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 557 if (!dbs_tuners_ins.powersave_bias || 558 sample_type == DBS_NORMAL_SAMPLE) { 559 dbs_check_cpu(dbs_info); 560 if (dbs_info->freq_lo) { 561 /* Setup timer for SUB_SAMPLE */ 562 dbs_info->sample_type = DBS_SUB_SAMPLE; 563 delay = dbs_info->freq_hi_jiffies; 564 } else { 565 /* We want all CPUs to do sampling nearly on 566 * same jiffy 567 */ 568 delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate 569 * dbs_info->rate_mult); 570 571 if (num_online_cpus() > 1) 572 delay -= jiffies % delay; 573 } 574 } else { 575 __cpufreq_driver_target(dbs_info->cur_policy, 576 dbs_info->freq_lo, CPUFREQ_RELATION_H); 577 delay = dbs_info->freq_lo_jiffies; 578 } 579 schedule_delayed_work_on(cpu, &dbs_info->work, delay); 580 mutex_unlock(&dbs_info->timer_mutex); 581 } 582 583 static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 584 { 585 /* We want all CPUs to do sampling nearly on same jiffy */ 586 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 587 588 if (num_online_cpus() > 1) 589 delay -= jiffies % delay; 590 591 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 592 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); 593 schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); 594 } 595 596 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 597 { 598 cancel_delayed_work_sync(&dbs_info->work); 599 } 600 601 /* 602 * Not all CPUs want IO time to be accounted as busy; this dependson how 603 * efficient idling at a higher frequency/voltage is. 604 * Pavel Machek says this is not so for various generations of AMD and old 605 * Intel systems. 606 * Mike Chan (androidlcom) calis this is also not true for ARM. 607 * Because of this, whitelist specific known (series) of CPUs by default, and 608 * leave all others up to the user. 609 */ 610 static int should_io_be_busy(void) 611 { 612 #if defined(CONFIG_X86) 613 /* 614 * For Intel, Core 2 (model 15) andl later have an efficient idle. 615 */ 616 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 617 boot_cpu_data.x86 == 6 && 618 boot_cpu_data.x86_model >= 15) 619 return 1; 620 #endif 621 return 0; 622 } 623 624 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 625 unsigned int event) 626 { 627 unsigned int cpu = policy->cpu; 628 struct cpu_dbs_info_s *this_dbs_info; 629 unsigned int j; 630 int rc; 631 632 this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 633 634 switch (event) { 635 case CPUFREQ_GOV_START: 636 if ((!cpu_online(cpu)) || (!policy->cur)) 637 return -EINVAL; 638 639 mutex_lock(&dbs_mutex); 640 641 dbs_enable++; 642 for_each_cpu(j, policy->cpus) { 643 struct cpu_dbs_info_s *j_dbs_info; 644 j_dbs_info = &per_cpu(od_cpu_dbs_info, j); 645 j_dbs_info->cur_policy = policy; 646 647 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 648 &j_dbs_info->prev_cpu_wall); 649 if (dbs_tuners_ins.ignore_nice) { 650 j_dbs_info->prev_cpu_nice = 651 kstat_cpu(j).cpustat.nice; 652 } 653 } 654 this_dbs_info->cpu = cpu; 655 this_dbs_info->rate_mult = 1; 656 ondemand_powersave_bias_init_cpu(cpu); 657 /* 658 * Start the timerschedule work, when this governor 659 * is used for first time 660 */ 661 if (dbs_enable == 1) { 662 unsigned int latency; 663 664 rc = sysfs_create_group(cpufreq_global_kobject, 665 &dbs_attr_group); 666 if (rc) { 667 mutex_unlock(&dbs_mutex); 668 return rc; 669 } 670 671 /* policy latency is in nS. Convert it to uS first */ 672 latency = policy->cpuinfo.transition_latency / 1000; 673 if (latency == 0) 674 latency = 1; 675 /* Bring kernel and HW constraints together */ 676 min_sampling_rate = max(min_sampling_rate, 677 MIN_LATENCY_MULTIPLIER * latency); 678 dbs_tuners_ins.sampling_rate = 679 max(min_sampling_rate, 680 latency * LATENCY_MULTIPLIER); 681 dbs_tuners_ins.io_is_busy = should_io_be_busy(); 682 } 683 mutex_unlock(&dbs_mutex); 684 685 mutex_init(&this_dbs_info->timer_mutex); 686 dbs_timer_init(this_dbs_info); 687 break; 688 689 case CPUFREQ_GOV_STOP: 690 dbs_timer_exit(this_dbs_info); 691 692 mutex_lock(&dbs_mutex); 693 mutex_destroy(&this_dbs_info->timer_mutex); 694 dbs_enable--; 695 mutex_unlock(&dbs_mutex); 696 if (!dbs_enable) 697 sysfs_remove_group(cpufreq_global_kobject, 698 &dbs_attr_group); 699 700 break; 701 702 case CPUFREQ_GOV_LIMITS: 703 mutex_lock(&this_dbs_info->timer_mutex); 704 if (policy->max < this_dbs_info->cur_policy->cur) 705 __cpufreq_driver_target(this_dbs_info->cur_policy, 706 policy->max, CPUFREQ_RELATION_H); 707 else if (policy->min > this_dbs_info->cur_policy->cur) 708 __cpufreq_driver_target(this_dbs_info->cur_policy, 709 policy->min, CPUFREQ_RELATION_L); 710 mutex_unlock(&this_dbs_info->timer_mutex); 711 break; 712 } 713 return 0; 714 } 715 716 static int __init cpufreq_gov_dbs_init(void) 717 { 718 cputime64_t wall; 719 u64 idle_time; 720 int cpu = get_cpu(); 721 722 idle_time = get_cpu_idle_time_us(cpu, &wall); 723 put_cpu(); 724 if (idle_time != -1ULL) { 725 /* Idle micro accounting is supported. Use finer thresholds */ 726 dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; 727 dbs_tuners_ins.down_differential = 728 MICRO_FREQUENCY_DOWN_DIFFERENTIAL; 729 /* 730 * In nohz/micro accounting case we set the minimum frequency 731 * not depending on HZ, but fixed (very low). The deferred 732 * timer might skip some samples if idle/sleeping as needed. 733 */ 734 min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; 735 } else { 736 /* For correct statistics, we need 10 ticks for each measure */ 737 min_sampling_rate = 738 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); 739 } 740 741 return cpufreq_register_governor(&cpufreq_gov_ondemand); 742 } 743 744 static void __exit cpufreq_gov_dbs_exit(void) 745 { 746 cpufreq_unregister_governor(&cpufreq_gov_ondemand); 747 } 748 749 750 MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 751 MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); 752 MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " 753 "Low Latency Frequency Transition capable processors"); 754 MODULE_LICENSE("GPL"); 755 756 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 757 fs_initcall(cpufreq_gov_dbs_init); 758 #else 759 module_init(cpufreq_gov_dbs_init); 760 #endif 761 module_exit(cpufreq_gov_dbs_exit); 762