1 /* 2 * drivers/cpufreq/cpufreq_governor.c 3 * 4 * CPUFREQ governors common code 5 * 6 * Copyright (C) 2001 Russell King 7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com> 9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org> 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 as 14 * published by the Free Software Foundation. 15 */ 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <linux/export.h> 20 #include <linux/kernel_stat.h> 21 #include <linux/slab.h> 22 23 #include "cpufreq_governor.h" 24 25 static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) 26 { 27 if (have_governor_per_policy()) 28 return dbs_data->cdata->attr_group_gov_pol; 29 else 30 return dbs_data->cdata->attr_group_gov_sys; 31 } 32 33 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) 34 { 35 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 36 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 37 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 38 struct cpufreq_policy *policy = cdbs->shared->policy; 39 unsigned int sampling_rate; 40 unsigned int max_load = 0; 41 unsigned int ignore_nice; 42 unsigned int j; 43 44 if (dbs_data->cdata->governor == GOV_ONDEMAND) { 45 struct od_cpu_dbs_info_s *od_dbs_info = 46 dbs_data->cdata->get_cpu_dbs_info_s(cpu); 47 48 /* 49 * Sometimes, the ondemand governor uses an additional 50 * multiplier to give long delays. So apply this multiplier to 51 * the 'sampling_rate', so as to keep the wake-up-from-idle 52 * detection logic a bit conservative. 53 */ 54 sampling_rate = od_tuners->sampling_rate; 55 sampling_rate *= od_dbs_info->rate_mult; 56 57 ignore_nice = od_tuners->ignore_nice_load; 58 } else { 59 sampling_rate = cs_tuners->sampling_rate; 60 ignore_nice = cs_tuners->ignore_nice_load; 61 } 62 63 /* Get Absolute Load */ 64 for_each_cpu(j, policy->cpus) { 65 struct cpu_dbs_info *j_cdbs; 66 u64 cur_wall_time, cur_idle_time; 67 unsigned int idle_time, wall_time; 68 unsigned int load; 69 int io_busy = 0; 70 71 j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); 72 73 /* 74 * For the purpose of ondemand, waiting for disk IO is 75 * an indication that you're performance critical, and 76 * not that the system is actually idle. So do not add 77 * the iowait time to the cpu idle time. 78 */ 79 if (dbs_data->cdata->governor == GOV_ONDEMAND) 80 io_busy = od_tuners->io_is_busy; 81 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); 82 83 wall_time = (unsigned int) 84 (cur_wall_time - j_cdbs->prev_cpu_wall); 85 j_cdbs->prev_cpu_wall = cur_wall_time; 86 87 idle_time = (unsigned int) 88 (cur_idle_time - j_cdbs->prev_cpu_idle); 89 j_cdbs->prev_cpu_idle = cur_idle_time; 90 91 if (ignore_nice) { 92 u64 cur_nice; 93 unsigned long cur_nice_jiffies; 94 95 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - 96 cdbs->prev_cpu_nice; 97 /* 98 * Assumption: nice time between sampling periods will 99 * be less than 2^32 jiffies for 32 bit sys 100 */ 101 cur_nice_jiffies = (unsigned long) 102 cputime64_to_jiffies64(cur_nice); 103 104 cdbs->prev_cpu_nice = 105 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 106 idle_time += jiffies_to_usecs(cur_nice_jiffies); 107 } 108 109 if (unlikely(!wall_time || wall_time < idle_time)) 110 continue; 111 112 /* 113 * If the CPU had gone completely idle, and a task just woke up 114 * on this CPU now, it would be unfair to calculate 'load' the 115 * usual way for this elapsed time-window, because it will show 116 * near-zero load, irrespective of how CPU intensive that task 117 * actually is. This is undesirable for latency-sensitive bursty 118 * workloads. 119 * 120 * To avoid this, we reuse the 'load' from the previous 121 * time-window and give this task a chance to start with a 122 * reasonably high CPU frequency. (However, we shouldn't over-do 123 * this copy, lest we get stuck at a high load (high frequency) 124 * for too long, even when the current system load has actually 125 * dropped down. So we perform the copy only once, upon the 126 * first wake-up from idle.) 127 * 128 * Detecting this situation is easy: the governor's deferrable 129 * timer would not have fired during CPU-idle periods. Hence 130 * an unusually large 'wall_time' (as compared to the sampling 131 * rate) indicates this scenario. 132 * 133 * prev_load can be zero in two cases and we must recalculate it 134 * for both cases: 135 * - during long idle intervals 136 * - explicitly set to zero 137 */ 138 if (unlikely(wall_time > (2 * sampling_rate) && 139 j_cdbs->prev_load)) { 140 load = j_cdbs->prev_load; 141 142 /* 143 * Perform a destructive copy, to ensure that we copy 144 * the previous load only once, upon the first wake-up 145 * from idle. 146 */ 147 j_cdbs->prev_load = 0; 148 } else { 149 load = 100 * (wall_time - idle_time) / wall_time; 150 j_cdbs->prev_load = load; 151 } 152 153 if (load > max_load) 154 max_load = load; 155 } 156 157 dbs_data->cdata->gov_check_cpu(cpu, max_load); 158 } 159 EXPORT_SYMBOL_GPL(dbs_check_cpu); 160 161 static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, 162 unsigned int delay) 163 { 164 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 165 166 mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay); 167 } 168 169 void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, 170 unsigned int delay, bool all_cpus) 171 { 172 int i; 173 174 mutex_lock(&cpufreq_governor_lock); 175 if (!policy->governor_enabled) 176 goto out_unlock; 177 178 if (!all_cpus) { 179 /* 180 * Use raw_smp_processor_id() to avoid preemptible warnings. 181 * We know that this is only called with all_cpus == false from 182 * works that have been queued with *_work_on() functions and 183 * those works are canceled during CPU_DOWN_PREPARE so they 184 * can't possibly run on any other CPU. 185 */ 186 __gov_queue_work(raw_smp_processor_id(), dbs_data, delay); 187 } else { 188 for_each_cpu(i, policy->cpus) 189 __gov_queue_work(i, dbs_data, delay); 190 } 191 192 out_unlock: 193 mutex_unlock(&cpufreq_governor_lock); 194 } 195 EXPORT_SYMBOL_GPL(gov_queue_work); 196 197 static inline void gov_cancel_work(struct dbs_data *dbs_data, 198 struct cpufreq_policy *policy) 199 { 200 struct cpu_dbs_info *cdbs; 201 int i; 202 203 for_each_cpu(i, policy->cpus) { 204 cdbs = dbs_data->cdata->get_cpu_cdbs(i); 205 cancel_delayed_work_sync(&cdbs->dwork); 206 } 207 } 208 209 /* Will return if we need to evaluate cpu load again or not */ 210 static bool need_load_eval(struct cpu_common_dbs_info *shared, 211 unsigned int sampling_rate) 212 { 213 if (policy_is_shared(shared->policy)) { 214 ktime_t time_now = ktime_get(); 215 s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); 216 217 /* Do nothing if we recently have sampled */ 218 if (delta_us < (s64)(sampling_rate / 2)) 219 return false; 220 else 221 shared->time_stamp = time_now; 222 } 223 224 return true; 225 } 226 227 static void dbs_timer(struct work_struct *work) 228 { 229 struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info, 230 dwork.work); 231 struct cpu_common_dbs_info *shared = cdbs->shared; 232 struct cpufreq_policy *policy = shared->policy; 233 struct dbs_data *dbs_data = policy->governor_data; 234 unsigned int sampling_rate, delay; 235 bool modify_all = true; 236 237 mutex_lock(&shared->timer_mutex); 238 239 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 240 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 241 242 sampling_rate = cs_tuners->sampling_rate; 243 } else { 244 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 245 246 sampling_rate = od_tuners->sampling_rate; 247 } 248 249 if (!need_load_eval(cdbs->shared, sampling_rate)) 250 modify_all = false; 251 252 delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all); 253 gov_queue_work(dbs_data, policy, delay, modify_all); 254 255 mutex_unlock(&shared->timer_mutex); 256 } 257 258 static void set_sampling_rate(struct dbs_data *dbs_data, 259 unsigned int sampling_rate) 260 { 261 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 262 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 263 cs_tuners->sampling_rate = sampling_rate; 264 } else { 265 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 266 od_tuners->sampling_rate = sampling_rate; 267 } 268 } 269 270 static int alloc_common_dbs_info(struct cpufreq_policy *policy, 271 struct common_dbs_data *cdata) 272 { 273 struct cpu_common_dbs_info *shared; 274 int j; 275 276 /* Allocate memory for the common information for policy->cpus */ 277 shared = kzalloc(sizeof(*shared), GFP_KERNEL); 278 if (!shared) 279 return -ENOMEM; 280 281 /* Set shared for all CPUs, online+offline */ 282 for_each_cpu(j, policy->related_cpus) 283 cdata->get_cpu_cdbs(j)->shared = shared; 284 285 return 0; 286 } 287 288 static void free_common_dbs_info(struct cpufreq_policy *policy, 289 struct common_dbs_data *cdata) 290 { 291 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); 292 struct cpu_common_dbs_info *shared = cdbs->shared; 293 int j; 294 295 for_each_cpu(j, policy->cpus) 296 cdata->get_cpu_cdbs(j)->shared = NULL; 297 298 kfree(shared); 299 } 300 301 static int cpufreq_governor_init(struct cpufreq_policy *policy, 302 struct dbs_data *dbs_data, 303 struct common_dbs_data *cdata) 304 { 305 unsigned int latency; 306 int ret; 307 308 /* State should be equivalent to EXIT */ 309 if (policy->governor_data) 310 return -EBUSY; 311 312 if (dbs_data) { 313 if (WARN_ON(have_governor_per_policy())) 314 return -EINVAL; 315 316 ret = alloc_common_dbs_info(policy, cdata); 317 if (ret) 318 return ret; 319 320 dbs_data->usage_count++; 321 policy->governor_data = dbs_data; 322 return 0; 323 } 324 325 dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); 326 if (!dbs_data) 327 return -ENOMEM; 328 329 ret = alloc_common_dbs_info(policy, cdata); 330 if (ret) 331 goto free_dbs_data; 332 333 dbs_data->cdata = cdata; 334 dbs_data->usage_count = 1; 335 336 ret = cdata->init(dbs_data, !policy->governor->initialized); 337 if (ret) 338 goto free_common_dbs_info; 339 340 /* policy latency is in ns. Convert it to us first */ 341 latency = policy->cpuinfo.transition_latency / 1000; 342 if (latency == 0) 343 latency = 1; 344 345 /* Bring kernel and HW constraints together */ 346 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, 347 MIN_LATENCY_MULTIPLIER * latency); 348 set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, 349 latency * LATENCY_MULTIPLIER)); 350 351 if (!have_governor_per_policy()) { 352 if (WARN_ON(cpufreq_get_global_kobject())) { 353 ret = -EINVAL; 354 goto cdata_exit; 355 } 356 cdata->gdbs_data = dbs_data; 357 } 358 359 ret = sysfs_create_group(get_governor_parent_kobj(policy), 360 get_sysfs_attr(dbs_data)); 361 if (ret) 362 goto put_kobj; 363 364 policy->governor_data = dbs_data; 365 366 return 0; 367 368 put_kobj: 369 if (!have_governor_per_policy()) { 370 cdata->gdbs_data = NULL; 371 cpufreq_put_global_kobject(); 372 } 373 cdata_exit: 374 cdata->exit(dbs_data, !policy->governor->initialized); 375 free_common_dbs_info: 376 free_common_dbs_info(policy, cdata); 377 free_dbs_data: 378 kfree(dbs_data); 379 return ret; 380 } 381 382 static int cpufreq_governor_exit(struct cpufreq_policy *policy, 383 struct dbs_data *dbs_data) 384 { 385 struct common_dbs_data *cdata = dbs_data->cdata; 386 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); 387 388 /* State should be equivalent to INIT */ 389 if (!cdbs->shared || cdbs->shared->policy) 390 return -EBUSY; 391 392 policy->governor_data = NULL; 393 if (!--dbs_data->usage_count) { 394 sysfs_remove_group(get_governor_parent_kobj(policy), 395 get_sysfs_attr(dbs_data)); 396 397 if (!have_governor_per_policy()) { 398 cdata->gdbs_data = NULL; 399 cpufreq_put_global_kobject(); 400 } 401 402 cdata->exit(dbs_data, policy->governor->initialized == 1); 403 kfree(dbs_data); 404 } 405 406 free_common_dbs_info(policy, cdata); 407 return 0; 408 } 409 410 static int cpufreq_governor_start(struct cpufreq_policy *policy, 411 struct dbs_data *dbs_data) 412 { 413 struct common_dbs_data *cdata = dbs_data->cdata; 414 unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; 415 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); 416 struct cpu_common_dbs_info *shared = cdbs->shared; 417 int io_busy = 0; 418 419 if (!policy->cur) 420 return -EINVAL; 421 422 /* State should be equivalent to INIT */ 423 if (!shared || shared->policy) 424 return -EBUSY; 425 426 if (cdata->governor == GOV_CONSERVATIVE) { 427 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 428 429 sampling_rate = cs_tuners->sampling_rate; 430 ignore_nice = cs_tuners->ignore_nice_load; 431 } else { 432 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 433 434 sampling_rate = od_tuners->sampling_rate; 435 ignore_nice = od_tuners->ignore_nice_load; 436 io_busy = od_tuners->io_is_busy; 437 } 438 439 shared->policy = policy; 440 shared->time_stamp = ktime_get(); 441 mutex_init(&shared->timer_mutex); 442 443 for_each_cpu(j, policy->cpus) { 444 struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); 445 unsigned int prev_load; 446 447 j_cdbs->prev_cpu_idle = 448 get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); 449 450 prev_load = (unsigned int)(j_cdbs->prev_cpu_wall - 451 j_cdbs->prev_cpu_idle); 452 j_cdbs->prev_load = 100 * prev_load / 453 (unsigned int)j_cdbs->prev_cpu_wall; 454 455 if (ignore_nice) 456 j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 457 458 INIT_DEFERRABLE_WORK(&j_cdbs->dwork, dbs_timer); 459 } 460 461 if (cdata->governor == GOV_CONSERVATIVE) { 462 struct cs_cpu_dbs_info_s *cs_dbs_info = 463 cdata->get_cpu_dbs_info_s(cpu); 464 465 cs_dbs_info->down_skip = 0; 466 cs_dbs_info->enable = 1; 467 cs_dbs_info->requested_freq = policy->cur; 468 } else { 469 struct od_ops *od_ops = cdata->gov_ops; 470 struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu); 471 472 od_dbs_info->rate_mult = 1; 473 od_dbs_info->sample_type = OD_NORMAL_SAMPLE; 474 od_ops->powersave_bias_init_cpu(cpu); 475 } 476 477 gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate), 478 true); 479 return 0; 480 } 481 482 static int cpufreq_governor_stop(struct cpufreq_policy *policy, 483 struct dbs_data *dbs_data) 484 { 485 struct common_dbs_data *cdata = dbs_data->cdata; 486 unsigned int cpu = policy->cpu; 487 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); 488 struct cpu_common_dbs_info *shared = cdbs->shared; 489 490 /* State should be equivalent to START */ 491 if (!shared || !shared->policy) 492 return -EBUSY; 493 494 gov_cancel_work(dbs_data, policy); 495 496 if (cdata->governor == GOV_CONSERVATIVE) { 497 struct cs_cpu_dbs_info_s *cs_dbs_info = 498 cdata->get_cpu_dbs_info_s(cpu); 499 500 cs_dbs_info->enable = 0; 501 } 502 503 shared->policy = NULL; 504 mutex_destroy(&shared->timer_mutex); 505 return 0; 506 } 507 508 static int cpufreq_governor_limits(struct cpufreq_policy *policy, 509 struct dbs_data *dbs_data) 510 { 511 struct common_dbs_data *cdata = dbs_data->cdata; 512 unsigned int cpu = policy->cpu; 513 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); 514 515 /* State should be equivalent to START */ 516 if (!cdbs->shared || !cdbs->shared->policy) 517 return -EBUSY; 518 519 mutex_lock(&cdbs->shared->timer_mutex); 520 if (policy->max < cdbs->shared->policy->cur) 521 __cpufreq_driver_target(cdbs->shared->policy, policy->max, 522 CPUFREQ_RELATION_H); 523 else if (policy->min > cdbs->shared->policy->cur) 524 __cpufreq_driver_target(cdbs->shared->policy, policy->min, 525 CPUFREQ_RELATION_L); 526 dbs_check_cpu(dbs_data, cpu); 527 mutex_unlock(&cdbs->shared->timer_mutex); 528 529 return 0; 530 } 531 532 int cpufreq_governor_dbs(struct cpufreq_policy *policy, 533 struct common_dbs_data *cdata, unsigned int event) 534 { 535 struct dbs_data *dbs_data; 536 int ret; 537 538 /* Lock governor to block concurrent initialization of governor */ 539 mutex_lock(&cdata->mutex); 540 541 if (have_governor_per_policy()) 542 dbs_data = policy->governor_data; 543 else 544 dbs_data = cdata->gdbs_data; 545 546 if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { 547 ret = -EINVAL; 548 goto unlock; 549 } 550 551 switch (event) { 552 case CPUFREQ_GOV_POLICY_INIT: 553 ret = cpufreq_governor_init(policy, dbs_data, cdata); 554 break; 555 case CPUFREQ_GOV_POLICY_EXIT: 556 ret = cpufreq_governor_exit(policy, dbs_data); 557 break; 558 case CPUFREQ_GOV_START: 559 ret = cpufreq_governor_start(policy, dbs_data); 560 break; 561 case CPUFREQ_GOV_STOP: 562 ret = cpufreq_governor_stop(policy, dbs_data); 563 break; 564 case CPUFREQ_GOV_LIMITS: 565 ret = cpufreq_governor_limits(policy, dbs_data); 566 break; 567 default: 568 ret = -EINVAL; 569 } 570 571 unlock: 572 mutex_unlock(&cdata->mutex); 573 574 return ret; 575 } 576 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); 577