1 /* 2 * drivers/cpufreq/cpufreq_ondemand.c 3 * 4 * Copyright (C) 2001 Russell King 5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 6 * Jun Nakajima <jun.nakajima@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12 13 #include <linux/kernel.h> 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/cpufreq.h> 17 #include <linux/cpu.h> 18 #include <linux/jiffies.h> 19 #include <linux/kernel_stat.h> 20 #include <linux/mutex.h> 21 #include <linux/hrtimer.h> 22 #include <linux/tick.h> 23 #include <linux/ktime.h> 24 #include <linux/sched.h> 25 26 /* 27 * dbs is used in this file as a shortform for demandbased switching 28 * It helps to keep variable names smaller, simpler 29 */ 30 31 #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) 32 #define DEF_FREQUENCY_UP_THRESHOLD (80) 33 #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) 34 #define MICRO_FREQUENCY_UP_THRESHOLD (95) 35 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) 36 #define MIN_FREQUENCY_UP_THRESHOLD (11) 37 #define MAX_FREQUENCY_UP_THRESHOLD (100) 38 39 /* 40 * The polling frequency of this governor depends on the capability of 41 * the processor. Default polling frequency is 1000 times the transition 42 * latency of the processor. The governor will work on any processor with 43 * transition latency <= 10mS, using appropriate sampling 44 * rate. 45 * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) 46 * this governor will not work. 47 * All times here are in uS. 48 */ 49 #define MIN_SAMPLING_RATE_RATIO (2) 50 51 static unsigned int min_sampling_rate; 52 53 #define LATENCY_MULTIPLIER (1000) 54 #define MIN_LATENCY_MULTIPLIER (100) 55 #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) 56 57 static void do_dbs_timer(struct work_struct *work); 58 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 59 unsigned int event); 60 61 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 62 static 63 #endif 64 struct cpufreq_governor cpufreq_gov_ondemand = { 65 .name = "ondemand", 66 .governor = cpufreq_governor_dbs, 67 .max_transition_latency = TRANSITION_LATENCY_LIMIT, 68 .owner = THIS_MODULE, 69 }; 70 71 /* Sampling types */ 72 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; 73 74 struct cpu_dbs_info_s { 75 cputime64_t prev_cpu_idle; 76 cputime64_t prev_cpu_wall; 77 cputime64_t prev_cpu_nice; 78 struct cpufreq_policy *cur_policy; 79 struct delayed_work work; 80 struct cpufreq_frequency_table *freq_table; 81 unsigned int freq_lo; 82 unsigned int freq_lo_jiffies; 83 unsigned int freq_hi_jiffies; 84 int cpu; 85 unsigned int sample_type:1; 86 /* 87 * percpu mutex that serializes governor limit change with 88 * do_dbs_timer invocation. We do not want do_dbs_timer to run 89 * when user is changing the governor or limits. 90 */ 91 struct mutex timer_mutex; 92 }; 93 static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); 94 95 static unsigned int dbs_enable; /* number of CPUs using this policy */ 96 97 /* 98 * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on 99 * different CPUs. It protects dbs_enable in governor start/stop. 100 */ 101 static DEFINE_MUTEX(dbs_mutex); 102 103 static struct workqueue_struct *kondemand_wq; 104 105 static struct dbs_tuners { 106 unsigned int sampling_rate; 107 unsigned int up_threshold; 108 unsigned int down_differential; 109 unsigned int ignore_nice; 110 unsigned int powersave_bias; 111 } dbs_tuners_ins = { 112 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 113 .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, 114 .ignore_nice = 0, 115 .powersave_bias = 0, 116 }; 117 118 static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, 119 cputime64_t *wall) 120 { 121 cputime64_t idle_time; 122 cputime64_t cur_wall_time; 123 cputime64_t busy_time; 124 125 cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); 126 busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, 127 kstat_cpu(cpu).cpustat.system); 128 129 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); 130 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); 131 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); 132 busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice); 133 134 idle_time = cputime64_sub(cur_wall_time, busy_time); 135 if (wall) 136 *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time); 137 138 return (cputime64_t)jiffies_to_usecs(idle_time); 139 } 140 141 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) 142 { 143 u64 idle_time = get_cpu_idle_time_us(cpu, wall); 144 145 if (idle_time == -1ULL) 146 return get_cpu_idle_time_jiffy(cpu, wall); 147 148 return idle_time; 149 } 150 151 /* 152 * Find right freq to be set now with powersave_bias on. 153 * Returns the freq_hi to be used right now and will set freq_hi_jiffies, 154 * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. 155 */ 156 static unsigned int powersave_bias_target(struct cpufreq_policy *policy, 157 unsigned int freq_next, 158 unsigned int relation) 159 { 160 unsigned int freq_req, freq_reduc, freq_avg; 161 unsigned int freq_hi, freq_lo; 162 unsigned int index = 0; 163 unsigned int jiffies_total, jiffies_hi, jiffies_lo; 164 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 165 policy->cpu); 166 167 if (!dbs_info->freq_table) { 168 dbs_info->freq_lo = 0; 169 dbs_info->freq_lo_jiffies = 0; 170 return freq_next; 171 } 172 173 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, 174 relation, &index); 175 freq_req = dbs_info->freq_table[index].frequency; 176 freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; 177 freq_avg = freq_req - freq_reduc; 178 179 /* Find freq bounds for freq_avg in freq_table */ 180 index = 0; 181 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 182 CPUFREQ_RELATION_H, &index); 183 freq_lo = dbs_info->freq_table[index].frequency; 184 index = 0; 185 cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, 186 CPUFREQ_RELATION_L, &index); 187 freq_hi = dbs_info->freq_table[index].frequency; 188 189 /* Find out how long we have to be in hi and lo freqs */ 190 if (freq_hi == freq_lo) { 191 dbs_info->freq_lo = 0; 192 dbs_info->freq_lo_jiffies = 0; 193 return freq_lo; 194 } 195 jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 196 jiffies_hi = (freq_avg - freq_lo) * jiffies_total; 197 jiffies_hi += ((freq_hi - freq_lo) / 2); 198 jiffies_hi /= (freq_hi - freq_lo); 199 jiffies_lo = jiffies_total - jiffies_hi; 200 dbs_info->freq_lo = freq_lo; 201 dbs_info->freq_lo_jiffies = jiffies_lo; 202 dbs_info->freq_hi_jiffies = jiffies_hi; 203 return freq_hi; 204 } 205 206 static void ondemand_powersave_bias_init_cpu(int cpu) 207 { 208 struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 209 dbs_info->freq_table = cpufreq_frequency_get_table(cpu); 210 dbs_info->freq_lo = 0; 211 } 212 213 static void ondemand_powersave_bias_init(void) 214 { 215 int i; 216 for_each_online_cpu(i) { 217 ondemand_powersave_bias_init_cpu(i); 218 } 219 } 220 221 /************************** sysfs interface ************************/ 222 223 static ssize_t show_sampling_rate_max(struct kobject *kobj, 224 struct attribute *attr, char *buf) 225 { 226 printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max " 227 "sysfs file is deprecated - used by: %s\n", current->comm); 228 return sprintf(buf, "%u\n", -1U); 229 } 230 231 static ssize_t show_sampling_rate_min(struct kobject *kobj, 232 struct attribute *attr, char *buf) 233 { 234 return sprintf(buf, "%u\n", min_sampling_rate); 235 } 236 237 #define define_one_ro(_name) \ 238 static struct global_attr _name = \ 239 __ATTR(_name, 0444, show_##_name, NULL) 240 241 define_one_ro(sampling_rate_max); 242 define_one_ro(sampling_rate_min); 243 244 /* cpufreq_ondemand Governor Tunables */ 245 #define show_one(file_name, object) \ 246 static ssize_t show_##file_name \ 247 (struct kobject *kobj, struct attribute *attr, char *buf) \ 248 { \ 249 return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ 250 } 251 show_one(sampling_rate, sampling_rate); 252 show_one(up_threshold, up_threshold); 253 show_one(ignore_nice_load, ignore_nice); 254 show_one(powersave_bias, powersave_bias); 255 256 /*** delete after deprecation time ***/ 257 258 #define DEPRECATION_MSG(file_name) \ 259 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 260 "interface is deprecated - " #file_name "\n"); 261 262 #define show_one_old(file_name) \ 263 static ssize_t show_##file_name##_old \ 264 (struct cpufreq_policy *unused, char *buf) \ 265 { \ 266 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 267 "interface is deprecated - " #file_name "\n"); \ 268 return show_##file_name(NULL, NULL, buf); \ 269 } 270 show_one_old(sampling_rate); 271 show_one_old(up_threshold); 272 show_one_old(ignore_nice_load); 273 show_one_old(powersave_bias); 274 show_one_old(sampling_rate_min); 275 show_one_old(sampling_rate_max); 276 277 #define define_one_ro_old(object, _name) \ 278 static struct freq_attr object = \ 279 __ATTR(_name, 0444, show_##_name##_old, NULL) 280 281 define_one_ro_old(sampling_rate_min_old, sampling_rate_min); 282 define_one_ro_old(sampling_rate_max_old, sampling_rate_max); 283 284 /*** delete after deprecation time ***/ 285 286 static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, 287 const char *buf, size_t count) 288 { 289 unsigned int input; 290 int ret; 291 ret = sscanf(buf, "%u", &input); 292 if (ret != 1) 293 return -EINVAL; 294 295 mutex_lock(&dbs_mutex); 296 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); 297 mutex_unlock(&dbs_mutex); 298 299 return count; 300 } 301 302 static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, 303 const char *buf, size_t count) 304 { 305 unsigned int input; 306 int ret; 307 ret = sscanf(buf, "%u", &input); 308 309 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 310 input < MIN_FREQUENCY_UP_THRESHOLD) { 311 return -EINVAL; 312 } 313 314 mutex_lock(&dbs_mutex); 315 dbs_tuners_ins.up_threshold = input; 316 mutex_unlock(&dbs_mutex); 317 318 return count; 319 } 320 321 static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, 322 const char *buf, size_t count) 323 { 324 unsigned int input; 325 int ret; 326 327 unsigned int j; 328 329 ret = sscanf(buf, "%u", &input); 330 if (ret != 1) 331 return -EINVAL; 332 333 if (input > 1) 334 input = 1; 335 336 mutex_lock(&dbs_mutex); 337 if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ 338 mutex_unlock(&dbs_mutex); 339 return count; 340 } 341 dbs_tuners_ins.ignore_nice = input; 342 343 /* we need to re-evaluate prev_cpu_idle */ 344 for_each_online_cpu(j) { 345 struct cpu_dbs_info_s *dbs_info; 346 dbs_info = &per_cpu(od_cpu_dbs_info, j); 347 dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 348 &dbs_info->prev_cpu_wall); 349 if (dbs_tuners_ins.ignore_nice) 350 dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 351 352 } 353 mutex_unlock(&dbs_mutex); 354 355 return count; 356 } 357 358 static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, 359 const char *buf, size_t count) 360 { 361 unsigned int input; 362 int ret; 363 ret = sscanf(buf, "%u", &input); 364 365 if (ret != 1) 366 return -EINVAL; 367 368 if (input > 1000) 369 input = 1000; 370 371 mutex_lock(&dbs_mutex); 372 dbs_tuners_ins.powersave_bias = input; 373 ondemand_powersave_bias_init(); 374 mutex_unlock(&dbs_mutex); 375 376 return count; 377 } 378 379 #define define_one_rw(_name) \ 380 static struct global_attr _name = \ 381 __ATTR(_name, 0644, show_##_name, store_##_name) 382 383 define_one_rw(sampling_rate); 384 define_one_rw(up_threshold); 385 define_one_rw(ignore_nice_load); 386 define_one_rw(powersave_bias); 387 388 static struct attribute *dbs_attributes[] = { 389 &sampling_rate_max.attr, 390 &sampling_rate_min.attr, 391 &sampling_rate.attr, 392 &up_threshold.attr, 393 &ignore_nice_load.attr, 394 &powersave_bias.attr, 395 NULL 396 }; 397 398 static struct attribute_group dbs_attr_group = { 399 .attrs = dbs_attributes, 400 .name = "ondemand", 401 }; 402 403 /*** delete after deprecation time ***/ 404 405 #define write_one_old(file_name) \ 406 static ssize_t store_##file_name##_old \ 407 (struct cpufreq_policy *unused, const char *buf, size_t count) \ 408 { \ 409 printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs " \ 410 "interface is deprecated - " #file_name "\n"); \ 411 return store_##file_name(NULL, NULL, buf, count); \ 412 } 413 write_one_old(sampling_rate); 414 write_one_old(up_threshold); 415 write_one_old(ignore_nice_load); 416 write_one_old(powersave_bias); 417 418 #define define_one_rw_old(object, _name) \ 419 static struct freq_attr object = \ 420 __ATTR(_name, 0644, show_##_name##_old, store_##_name##_old) 421 422 define_one_rw_old(sampling_rate_old, sampling_rate); 423 define_one_rw_old(up_threshold_old, up_threshold); 424 define_one_rw_old(ignore_nice_load_old, ignore_nice_load); 425 define_one_rw_old(powersave_bias_old, powersave_bias); 426 427 static struct attribute *dbs_attributes_old[] = { 428 &sampling_rate_max_old.attr, 429 &sampling_rate_min_old.attr, 430 &sampling_rate_old.attr, 431 &up_threshold_old.attr, 432 &ignore_nice_load_old.attr, 433 &powersave_bias_old.attr, 434 NULL 435 }; 436 437 static struct attribute_group dbs_attr_group_old = { 438 .attrs = dbs_attributes_old, 439 .name = "ondemand", 440 }; 441 442 /*** delete after deprecation time ***/ 443 444 /************************** sysfs end ************************/ 445 446 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) 447 { 448 unsigned int max_load_freq; 449 450 struct cpufreq_policy *policy; 451 unsigned int j; 452 453 this_dbs_info->freq_lo = 0; 454 policy = this_dbs_info->cur_policy; 455 456 /* 457 * Every sampling_rate, we check, if current idle time is less 458 * than 20% (default), then we try to increase frequency 459 * Every sampling_rate, we look for a the lowest 460 * frequency which can sustain the load while keeping idle time over 461 * 30%. If such a frequency exist, we try to decrease to this frequency. 462 * 463 * Any frequency increase takes it to the maximum frequency. 464 * Frequency reduction happens at minimum steps of 465 * 5% (default) of current frequency 466 */ 467 468 /* Get Absolute Load - in terms of freq */ 469 max_load_freq = 0; 470 471 for_each_cpu(j, policy->cpus) { 472 struct cpu_dbs_info_s *j_dbs_info; 473 cputime64_t cur_wall_time, cur_idle_time; 474 unsigned int idle_time, wall_time; 475 unsigned int load, load_freq; 476 int freq_avg; 477 478 j_dbs_info = &per_cpu(od_cpu_dbs_info, j); 479 480 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); 481 482 wall_time = (unsigned int) cputime64_sub(cur_wall_time, 483 j_dbs_info->prev_cpu_wall); 484 j_dbs_info->prev_cpu_wall = cur_wall_time; 485 486 idle_time = (unsigned int) cputime64_sub(cur_idle_time, 487 j_dbs_info->prev_cpu_idle); 488 j_dbs_info->prev_cpu_idle = cur_idle_time; 489 490 if (dbs_tuners_ins.ignore_nice) { 491 cputime64_t cur_nice; 492 unsigned long cur_nice_jiffies; 493 494 cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice, 495 j_dbs_info->prev_cpu_nice); 496 /* 497 * Assumption: nice time between sampling periods will 498 * be less than 2^32 jiffies for 32 bit sys 499 */ 500 cur_nice_jiffies = (unsigned long) 501 cputime64_to_jiffies64(cur_nice); 502 503 j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; 504 idle_time += jiffies_to_usecs(cur_nice_jiffies); 505 } 506 507 if (unlikely(!wall_time || wall_time < idle_time)) 508 continue; 509 510 load = 100 * (wall_time - idle_time) / wall_time; 511 512 freq_avg = __cpufreq_driver_getavg(policy, j); 513 if (freq_avg <= 0) 514 freq_avg = policy->cur; 515 516 load_freq = load * freq_avg; 517 if (load_freq > max_load_freq) 518 max_load_freq = load_freq; 519 } 520 521 /* Check for frequency increase */ 522 if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { 523 /* if we are already at full speed then break out early */ 524 if (!dbs_tuners_ins.powersave_bias) { 525 if (policy->cur == policy->max) 526 return; 527 528 __cpufreq_driver_target(policy, policy->max, 529 CPUFREQ_RELATION_H); 530 } else { 531 int freq = powersave_bias_target(policy, policy->max, 532 CPUFREQ_RELATION_H); 533 __cpufreq_driver_target(policy, freq, 534 CPUFREQ_RELATION_L); 535 } 536 return; 537 } 538 539 /* Check for frequency decrease */ 540 /* if we cannot reduce the frequency anymore, break out early */ 541 if (policy->cur == policy->min) 542 return; 543 544 /* 545 * The optimal frequency is the frequency that is the lowest that 546 * can support the current CPU usage without triggering the up 547 * policy. To be safe, we focus 10 points under the threshold. 548 */ 549 if (max_load_freq < 550 (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * 551 policy->cur) { 552 unsigned int freq_next; 553 freq_next = max_load_freq / 554 (dbs_tuners_ins.up_threshold - 555 dbs_tuners_ins.down_differential); 556 557 if (!dbs_tuners_ins.powersave_bias) { 558 __cpufreq_driver_target(policy, freq_next, 559 CPUFREQ_RELATION_L); 560 } else { 561 int freq = powersave_bias_target(policy, freq_next, 562 CPUFREQ_RELATION_L); 563 __cpufreq_driver_target(policy, freq, 564 CPUFREQ_RELATION_L); 565 } 566 } 567 } 568 569 static void do_dbs_timer(struct work_struct *work) 570 { 571 struct cpu_dbs_info_s *dbs_info = 572 container_of(work, struct cpu_dbs_info_s, work.work); 573 unsigned int cpu = dbs_info->cpu; 574 int sample_type = dbs_info->sample_type; 575 576 /* We want all CPUs to do sampling nearly on same jiffy */ 577 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 578 579 delay -= jiffies % delay; 580 mutex_lock(&dbs_info->timer_mutex); 581 582 /* Common NORMAL_SAMPLE setup */ 583 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 584 if (!dbs_tuners_ins.powersave_bias || 585 sample_type == DBS_NORMAL_SAMPLE) { 586 dbs_check_cpu(dbs_info); 587 if (dbs_info->freq_lo) { 588 /* Setup timer for SUB_SAMPLE */ 589 dbs_info->sample_type = DBS_SUB_SAMPLE; 590 delay = dbs_info->freq_hi_jiffies; 591 } 592 } else { 593 __cpufreq_driver_target(dbs_info->cur_policy, 594 dbs_info->freq_lo, CPUFREQ_RELATION_H); 595 } 596 queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); 597 mutex_unlock(&dbs_info->timer_mutex); 598 } 599 600 static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 601 { 602 /* We want all CPUs to do sampling nearly on same jiffy */ 603 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 604 delay -= jiffies % delay; 605 606 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 607 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); 608 queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work, 609 delay); 610 } 611 612 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 613 { 614 cancel_delayed_work_sync(&dbs_info->work); 615 } 616 617 static int cpufreq_governor_dbs(struct cpufreq_policy *policy, 618 unsigned int event) 619 { 620 unsigned int cpu = policy->cpu; 621 struct cpu_dbs_info_s *this_dbs_info; 622 unsigned int j; 623 int rc; 624 625 this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); 626 627 switch (event) { 628 case CPUFREQ_GOV_START: 629 if ((!cpu_online(cpu)) || (!policy->cur)) 630 return -EINVAL; 631 632 mutex_lock(&dbs_mutex); 633 634 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old); 635 if (rc) { 636 mutex_unlock(&dbs_mutex); 637 return rc; 638 } 639 640 dbs_enable++; 641 for_each_cpu(j, policy->cpus) { 642 struct cpu_dbs_info_s *j_dbs_info; 643 j_dbs_info = &per_cpu(od_cpu_dbs_info, j); 644 j_dbs_info->cur_policy = policy; 645 646 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, 647 &j_dbs_info->prev_cpu_wall); 648 if (dbs_tuners_ins.ignore_nice) { 649 j_dbs_info->prev_cpu_nice = 650 kstat_cpu(j).cpustat.nice; 651 } 652 } 653 this_dbs_info->cpu = cpu; 654 ondemand_powersave_bias_init_cpu(cpu); 655 /* 656 * Start the timerschedule work, when this governor 657 * is used for first time 658 */ 659 if (dbs_enable == 1) { 660 unsigned int latency; 661 662 rc = sysfs_create_group(cpufreq_global_kobject, 663 &dbs_attr_group); 664 if (rc) { 665 mutex_unlock(&dbs_mutex); 666 return rc; 667 } 668 669 /* policy latency is in nS. Convert it to uS first */ 670 latency = policy->cpuinfo.transition_latency / 1000; 671 if (latency == 0) 672 latency = 1; 673 /* Bring kernel and HW constraints together */ 674 min_sampling_rate = max(min_sampling_rate, 675 MIN_LATENCY_MULTIPLIER * latency); 676 dbs_tuners_ins.sampling_rate = 677 max(min_sampling_rate, 678 latency * LATENCY_MULTIPLIER); 679 } 680 mutex_unlock(&dbs_mutex); 681 682 mutex_init(&this_dbs_info->timer_mutex); 683 dbs_timer_init(this_dbs_info); 684 break; 685 686 case CPUFREQ_GOV_STOP: 687 dbs_timer_exit(this_dbs_info); 688 689 mutex_lock(&dbs_mutex); 690 sysfs_remove_group(&policy->kobj, &dbs_attr_group_old); 691 mutex_destroy(&this_dbs_info->timer_mutex); 692 dbs_enable--; 693 mutex_unlock(&dbs_mutex); 694 if (!dbs_enable) 695 sysfs_remove_group(cpufreq_global_kobject, 696 &dbs_attr_group); 697 698 break; 699 700 case CPUFREQ_GOV_LIMITS: 701 mutex_lock(&this_dbs_info->timer_mutex); 702 if (policy->max < this_dbs_info->cur_policy->cur) 703 __cpufreq_driver_target(this_dbs_info->cur_policy, 704 policy->max, CPUFREQ_RELATION_H); 705 else if (policy->min > this_dbs_info->cur_policy->cur) 706 __cpufreq_driver_target(this_dbs_info->cur_policy, 707 policy->min, CPUFREQ_RELATION_L); 708 mutex_unlock(&this_dbs_info->timer_mutex); 709 break; 710 } 711 return 0; 712 } 713 714 static int __init cpufreq_gov_dbs_init(void) 715 { 716 int err; 717 cputime64_t wall; 718 u64 idle_time; 719 int cpu = get_cpu(); 720 721 idle_time = get_cpu_idle_time_us(cpu, &wall); 722 put_cpu(); 723 if (idle_time != -1ULL) { 724 /* Idle micro accounting is supported. Use finer thresholds */ 725 dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; 726 dbs_tuners_ins.down_differential = 727 MICRO_FREQUENCY_DOWN_DIFFERENTIAL; 728 /* 729 * In no_hz/micro accounting case we set the minimum frequency 730 * not depending on HZ, but fixed (very low). The deferred 731 * timer might skip some samples if idle/sleeping as needed. 732 */ 733 min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; 734 } else { 735 /* For correct statistics, we need 10 ticks for each measure */ 736 min_sampling_rate = 737 MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); 738 } 739 740 kondemand_wq = create_workqueue("kondemand"); 741 if (!kondemand_wq) { 742 printk(KERN_ERR "Creation of kondemand failed\n"); 743 return -EFAULT; 744 } 745 err = cpufreq_register_governor(&cpufreq_gov_ondemand); 746 if (err) 747 destroy_workqueue(kondemand_wq); 748 749 return err; 750 } 751 752 static void __exit cpufreq_gov_dbs_exit(void) 753 { 754 cpufreq_unregister_governor(&cpufreq_gov_ondemand); 755 destroy_workqueue(kondemand_wq); 756 } 757 758 759 MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); 760 MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); 761 MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " 762 "Low Latency Frequency Transition capable processors"); 763 MODULE_LICENSE("GPL"); 764 765 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND 766 fs_initcall(cpufreq_gov_dbs_init); 767 #else 768 module_init(cpufreq_gov_dbs_init); 769 #endif 770 module_exit(cpufreq_gov_dbs_exit); 771