1 /* 2 * drivers/cpufreq/cpufreq_governor.c 3 * 4 * CPUFREQ governors common code 5 * 6 * Copyright (C) 2001 Russell King 7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com> 9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org> 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 as 14 * published by the Free Software Foundation. 15 */ 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <linux/export.h> 20 #include <linux/kernel_stat.h> 21 #include <linux/slab.h> 22 23 #include "cpufreq_governor.h" 24 25 static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) 26 { 27 if (have_governor_per_policy()) 28 return dbs_data->cdata->attr_group_gov_pol; 29 else 30 return dbs_data->cdata->attr_group_gov_sys; 31 } 32 33 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) 34 { 35 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 36 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 37 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 38 struct cpufreq_policy *policy = cdbs->shared->policy; 39 unsigned int sampling_rate; 40 unsigned int max_load = 0; 41 unsigned int ignore_nice; 42 unsigned int j; 43 44 if (dbs_data->cdata->governor == GOV_ONDEMAND) { 45 struct od_cpu_dbs_info_s *od_dbs_info = 46 dbs_data->cdata->get_cpu_dbs_info_s(cpu); 47 48 /* 49 * Sometimes, the ondemand governor uses an additional 50 * multiplier to give long delays. So apply this multiplier to 51 * the 'sampling_rate', so as to keep the wake-up-from-idle 52 * detection logic a bit conservative. 53 */ 54 sampling_rate = od_tuners->sampling_rate; 55 sampling_rate *= od_dbs_info->rate_mult; 56 57 ignore_nice = od_tuners->ignore_nice_load; 58 } else { 59 sampling_rate = cs_tuners->sampling_rate; 60 ignore_nice = cs_tuners->ignore_nice_load; 61 } 62 63 /* Get Absolute Load */ 64 for_each_cpu(j, policy->cpus) { 65 struct cpu_dbs_info *j_cdbs; 66 u64 cur_wall_time, cur_idle_time; 67 unsigned int idle_time, wall_time; 68 unsigned int load; 69 int io_busy = 0; 70 71 j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); 72 73 /* 74 * For the purpose of ondemand, waiting for disk IO is 75 * an indication that you're performance critical, and 76 * not that the system is actually idle. So do not add 77 * the iowait time to the cpu idle time. 78 */ 79 if (dbs_data->cdata->governor == GOV_ONDEMAND) 80 io_busy = od_tuners->io_is_busy; 81 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); 82 83 wall_time = (unsigned int) 84 (cur_wall_time - j_cdbs->prev_cpu_wall); 85 j_cdbs->prev_cpu_wall = cur_wall_time; 86 87 idle_time = (unsigned int) 88 (cur_idle_time - j_cdbs->prev_cpu_idle); 89 j_cdbs->prev_cpu_idle = cur_idle_time; 90 91 if (ignore_nice) { 92 u64 cur_nice; 93 unsigned long cur_nice_jiffies; 94 95 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - 96 cdbs->prev_cpu_nice; 97 /* 98 * Assumption: nice time between sampling periods will 99 * be less than 2^32 jiffies for 32 bit sys 100 */ 101 cur_nice_jiffies = (unsigned long) 102 cputime64_to_jiffies64(cur_nice); 103 104 cdbs->prev_cpu_nice = 105 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 106 idle_time += jiffies_to_usecs(cur_nice_jiffies); 107 } 108 109 if (unlikely(!wall_time || wall_time < idle_time)) 110 continue; 111 112 /* 113 * If the CPU had gone completely idle, and a task just woke up 114 * on this CPU now, it would be unfair to calculate 'load' the 115 * usual way for this elapsed time-window, because it will show 116 * near-zero load, irrespective of how CPU intensive that task 117 * actually is. This is undesirable for latency-sensitive bursty 118 * workloads. 119 * 120 * To avoid this, we reuse the 'load' from the previous 121 * time-window and give this task a chance to start with a 122 * reasonably high CPU frequency. (However, we shouldn't over-do 123 * this copy, lest we get stuck at a high load (high frequency) 124 * for too long, even when the current system load has actually 125 * dropped down. So we perform the copy only once, upon the 126 * first wake-up from idle.) 127 * 128 * Detecting this situation is easy: the governor's deferrable 129 * timer would not have fired during CPU-idle periods. Hence 130 * an unusually large 'wall_time' (as compared to the sampling 131 * rate) indicates this scenario. 132 * 133 * prev_load can be zero in two cases and we must recalculate it 134 * for both cases: 135 * - during long idle intervals 136 * - explicitly set to zero 137 */ 138 if (unlikely(wall_time > (2 * sampling_rate) && 139 j_cdbs->prev_load)) { 140 load = j_cdbs->prev_load; 141 142 /* 143 * Perform a destructive copy, to ensure that we copy 144 * the previous load only once, upon the first wake-up 145 * from idle. 146 */ 147 j_cdbs->prev_load = 0; 148 } else { 149 load = 100 * (wall_time - idle_time) / wall_time; 150 j_cdbs->prev_load = load; 151 } 152 153 if (load > max_load) 154 max_load = load; 155 } 156 157 dbs_data->cdata->gov_check_cpu(cpu, max_load); 158 } 159 EXPORT_SYMBOL_GPL(dbs_check_cpu); 160 161 static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, 162 unsigned int delay) 163 { 164 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 165 166 mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay); 167 } 168 169 void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, 170 unsigned int delay, bool all_cpus) 171 { 172 int i; 173 174 if (!all_cpus) { 175 /* 176 * Use raw_smp_processor_id() to avoid preemptible warnings. 177 * We know that this is only called with all_cpus == false from 178 * works that have been queued with *_work_on() functions and 179 * those works are canceled during CPU_DOWN_PREPARE so they 180 * can't possibly run on any other CPU. 181 */ 182 __gov_queue_work(raw_smp_processor_id(), dbs_data, delay); 183 } else { 184 for_each_cpu(i, policy->cpus) 185 __gov_queue_work(i, dbs_data, delay); 186 } 187 } 188 EXPORT_SYMBOL_GPL(gov_queue_work); 189 190 static inline void gov_cancel_work(struct dbs_data *dbs_data, 191 struct cpufreq_policy *policy) 192 { 193 struct cpu_dbs_info *cdbs; 194 int i; 195 196 for_each_cpu(i, policy->cpus) { 197 cdbs = dbs_data->cdata->get_cpu_cdbs(i); 198 cancel_delayed_work_sync(&cdbs->dwork); 199 } 200 } 201 202 /* Will return if we need to evaluate cpu load again or not */ 203 static bool need_load_eval(struct cpu_common_dbs_info *shared, 204 unsigned int sampling_rate) 205 { 206 if (policy_is_shared(shared->policy)) { 207 ktime_t time_now = ktime_get(); 208 s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); 209 210 /* Do nothing if we recently have sampled */ 211 if (delta_us < (s64)(sampling_rate / 2)) 212 return false; 213 else 214 shared->time_stamp = time_now; 215 } 216 217 return true; 218 } 219 220 static void dbs_timer(struct work_struct *work) 221 { 222 struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info, 223 dwork.work); 224 struct cpu_common_dbs_info *shared = cdbs->shared; 225 struct cpufreq_policy *policy; 226 struct dbs_data *dbs_data; 227 unsigned int sampling_rate, delay; 228 bool modify_all = true; 229 230 mutex_lock(&shared->timer_mutex); 231 232 policy = shared->policy; 233 234 /* 235 * Governor might already be disabled and there is no point continuing 236 * with the work-handler. 237 */ 238 if (!policy) 239 goto unlock; 240 241 dbs_data = policy->governor_data; 242 243 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 244 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 245 246 sampling_rate = cs_tuners->sampling_rate; 247 } else { 248 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 249 250 sampling_rate = od_tuners->sampling_rate; 251 } 252 253 if (!need_load_eval(cdbs->shared, sampling_rate)) 254 modify_all = false; 255 256 delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all); 257 gov_queue_work(dbs_data, policy, delay, modify_all); 258 259 unlock: 260 mutex_unlock(&shared->timer_mutex); 261 } 262 263 static void set_sampling_rate(struct dbs_data *dbs_data, 264 unsigned int sampling_rate) 265 { 266 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 267 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 268 cs_tuners->sampling_rate = sampling_rate; 269 } else { 270 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 271 od_tuners->sampling_rate = sampling_rate; 272 } 273 } 274 275 static int alloc_common_dbs_info(struct cpufreq_policy *policy, 276 struct common_dbs_data *cdata) 277 { 278 struct cpu_common_dbs_info *shared; 279 int j; 280 281 /* Allocate memory for the common information for policy->cpus */ 282 shared = kzalloc(sizeof(*shared), GFP_KERNEL); 283 if (!shared) 284 return -ENOMEM; 285 286 /* Set shared for all CPUs, online+offline */ 287 for_each_cpu(j, policy->related_cpus) 288 cdata->get_cpu_cdbs(j)->shared = shared; 289 290 return 0; 291 } 292 293 static void free_common_dbs_info(struct cpufreq_policy *policy, 294 struct common_dbs_data *cdata) 295 { 296 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); 297 struct cpu_common_dbs_info *shared = cdbs->shared; 298 int j; 299 300 for_each_cpu(j, policy->cpus) 301 cdata->get_cpu_cdbs(j)->shared = NULL; 302 303 kfree(shared); 304 } 305 306 static int cpufreq_governor_init(struct cpufreq_policy *policy, 307 struct dbs_data *dbs_data, 308 struct common_dbs_data *cdata) 309 { 310 unsigned int latency; 311 int ret; 312 313 /* State should be equivalent to EXIT */ 314 if (policy->governor_data) 315 return -EBUSY; 316 317 if (dbs_data) { 318 if (WARN_ON(have_governor_per_policy())) 319 return -EINVAL; 320 321 ret = alloc_common_dbs_info(policy, cdata); 322 if (ret) 323 return ret; 324 325 dbs_data->usage_count++; 326 policy->governor_data = dbs_data; 327 return 0; 328 } 329 330 dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); 331 if (!dbs_data) 332 return -ENOMEM; 333 334 ret = alloc_common_dbs_info(policy, cdata); 335 if (ret) 336 goto free_dbs_data; 337 338 dbs_data->cdata = cdata; 339 dbs_data->usage_count = 1; 340 341 ret = cdata->init(dbs_data, !policy->governor->initialized); 342 if (ret) 343 goto free_common_dbs_info; 344 345 /* policy latency is in ns. Convert it to us first */ 346 latency = policy->cpuinfo.transition_latency / 1000; 347 if (latency == 0) 348 latency = 1; 349 350 /* Bring kernel and HW constraints together */ 351 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, 352 MIN_LATENCY_MULTIPLIER * latency); 353 set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, 354 latency * LATENCY_MULTIPLIER)); 355 356 if (!have_governor_per_policy()) 357 cdata->gdbs_data = dbs_data; 358 359 ret = sysfs_create_group(get_governor_parent_kobj(policy), 360 get_sysfs_attr(dbs_data)); 361 if (ret) 362 goto reset_gdbs_data; 363 364 policy->governor_data = dbs_data; 365 366 return 0; 367 368 reset_gdbs_data: 369 if (!have_governor_per_policy()) 370 cdata->gdbs_data = NULL; 371 cdata->exit(dbs_data, !policy->governor->initialized); 372 free_common_dbs_info: 373 free_common_dbs_info(policy, cdata); 374 free_dbs_data: 375 kfree(dbs_data); 376 return ret; 377 } 378 379 static int cpufreq_governor_exit(struct cpufreq_policy *policy, 380 struct dbs_data *dbs_data) 381 { 382 struct common_dbs_data *cdata = dbs_data->cdata; 383 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); 384 385 /* State should be equivalent to INIT */ 386 if (!cdbs->shared || cdbs->shared->policy) 387 return -EBUSY; 388 389 policy->governor_data = NULL; 390 if (!--dbs_data->usage_count) { 391 sysfs_remove_group(get_governor_parent_kobj(policy), 392 get_sysfs_attr(dbs_data)); 393 394 if (!have_governor_per_policy()) 395 cdata->gdbs_data = NULL; 396 397 cdata->exit(dbs_data, policy->governor->initialized == 1); 398 kfree(dbs_data); 399 } 400 401 free_common_dbs_info(policy, cdata); 402 return 0; 403 } 404 405 static int cpufreq_governor_start(struct cpufreq_policy *policy, 406 struct dbs_data *dbs_data) 407 { 408 struct common_dbs_data *cdata = dbs_data->cdata; 409 unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; 410 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); 411 struct cpu_common_dbs_info *shared = cdbs->shared; 412 int io_busy = 0; 413 414 if (!policy->cur) 415 return -EINVAL; 416 417 /* State should be equivalent to INIT */ 418 if (!shared || shared->policy) 419 return -EBUSY; 420 421 if (cdata->governor == GOV_CONSERVATIVE) { 422 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 423 424 sampling_rate = cs_tuners->sampling_rate; 425 ignore_nice = cs_tuners->ignore_nice_load; 426 } else { 427 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 428 429 sampling_rate = od_tuners->sampling_rate; 430 ignore_nice = od_tuners->ignore_nice_load; 431 io_busy = od_tuners->io_is_busy; 432 } 433 434 shared->policy = policy; 435 shared->time_stamp = ktime_get(); 436 mutex_init(&shared->timer_mutex); 437 438 for_each_cpu(j, policy->cpus) { 439 struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); 440 unsigned int prev_load; 441 442 j_cdbs->prev_cpu_idle = 443 get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); 444 445 prev_load = (unsigned int)(j_cdbs->prev_cpu_wall - 446 j_cdbs->prev_cpu_idle); 447 j_cdbs->prev_load = 100 * prev_load / 448 (unsigned int)j_cdbs->prev_cpu_wall; 449 450 if (ignore_nice) 451 j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 452 453 INIT_DEFERRABLE_WORK(&j_cdbs->dwork, dbs_timer); 454 } 455 456 if (cdata->governor == GOV_CONSERVATIVE) { 457 struct cs_cpu_dbs_info_s *cs_dbs_info = 458 cdata->get_cpu_dbs_info_s(cpu); 459 460 cs_dbs_info->down_skip = 0; 461 cs_dbs_info->requested_freq = policy->cur; 462 } else { 463 struct od_ops *od_ops = cdata->gov_ops; 464 struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu); 465 466 od_dbs_info->rate_mult = 1; 467 od_dbs_info->sample_type = OD_NORMAL_SAMPLE; 468 od_ops->powersave_bias_init_cpu(cpu); 469 } 470 471 gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate), 472 true); 473 return 0; 474 } 475 476 static int cpufreq_governor_stop(struct cpufreq_policy *policy, 477 struct dbs_data *dbs_data) 478 { 479 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); 480 struct cpu_common_dbs_info *shared = cdbs->shared; 481 482 /* State should be equivalent to START */ 483 if (!shared || !shared->policy) 484 return -EBUSY; 485 486 /* 487 * Work-handler must see this updated, as it should not proceed any 488 * further after governor is disabled. And so timer_mutex is taken while 489 * updating this value. 490 */ 491 mutex_lock(&shared->timer_mutex); 492 shared->policy = NULL; 493 mutex_unlock(&shared->timer_mutex); 494 495 gov_cancel_work(dbs_data, policy); 496 497 mutex_destroy(&shared->timer_mutex); 498 return 0; 499 } 500 501 static int cpufreq_governor_limits(struct cpufreq_policy *policy, 502 struct dbs_data *dbs_data) 503 { 504 struct common_dbs_data *cdata = dbs_data->cdata; 505 unsigned int cpu = policy->cpu; 506 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); 507 508 /* State should be equivalent to START */ 509 if (!cdbs->shared || !cdbs->shared->policy) 510 return -EBUSY; 511 512 mutex_lock(&cdbs->shared->timer_mutex); 513 if (policy->max < cdbs->shared->policy->cur) 514 __cpufreq_driver_target(cdbs->shared->policy, policy->max, 515 CPUFREQ_RELATION_H); 516 else if (policy->min > cdbs->shared->policy->cur) 517 __cpufreq_driver_target(cdbs->shared->policy, policy->min, 518 CPUFREQ_RELATION_L); 519 dbs_check_cpu(dbs_data, cpu); 520 mutex_unlock(&cdbs->shared->timer_mutex); 521 522 return 0; 523 } 524 525 int cpufreq_governor_dbs(struct cpufreq_policy *policy, 526 struct common_dbs_data *cdata, unsigned int event) 527 { 528 struct dbs_data *dbs_data; 529 int ret; 530 531 /* Lock governor to block concurrent initialization of governor */ 532 mutex_lock(&cdata->mutex); 533 534 if (have_governor_per_policy()) 535 dbs_data = policy->governor_data; 536 else 537 dbs_data = cdata->gdbs_data; 538 539 if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { 540 ret = -EINVAL; 541 goto unlock; 542 } 543 544 switch (event) { 545 case CPUFREQ_GOV_POLICY_INIT: 546 ret = cpufreq_governor_init(policy, dbs_data, cdata); 547 break; 548 case CPUFREQ_GOV_POLICY_EXIT: 549 ret = cpufreq_governor_exit(policy, dbs_data); 550 break; 551 case CPUFREQ_GOV_START: 552 ret = cpufreq_governor_start(policy, dbs_data); 553 break; 554 case CPUFREQ_GOV_STOP: 555 ret = cpufreq_governor_stop(policy, dbs_data); 556 break; 557 case CPUFREQ_GOV_LIMITS: 558 ret = cpufreq_governor_limits(policy, dbs_data); 559 break; 560 default: 561 ret = -EINVAL; 562 } 563 564 unlock: 565 mutex_unlock(&cdata->mutex); 566 567 return ret; 568 } 569 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); 570