1 /* 2 * drivers/cpufreq/cpufreq_governor.c 3 * 4 * CPUFREQ governors common code 5 * 6 * Copyright (C) 2001 Russell King 7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com> 9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org> 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 as 14 * published by the Free Software Foundation. 15 */ 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <linux/export.h> 20 #include <linux/kernel_stat.h> 21 #include <linux/slab.h> 22 23 #include "cpufreq_governor.h" 24 25 static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) 26 { 27 if (have_governor_per_policy()) 28 return dbs_data->cdata->attr_group_gov_pol; 29 else 30 return dbs_data->cdata->attr_group_gov_sys; 31 } 32 33 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) 34 { 35 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 36 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 37 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 38 struct cpufreq_policy *policy = cdbs->shared->policy; 39 unsigned int sampling_rate; 40 unsigned int max_load = 0; 41 unsigned int ignore_nice; 42 unsigned int j; 43 44 if (dbs_data->cdata->governor == GOV_ONDEMAND) { 45 struct od_cpu_dbs_info_s *od_dbs_info = 46 dbs_data->cdata->get_cpu_dbs_info_s(cpu); 47 48 /* 49 * Sometimes, the ondemand governor uses an additional 50 * multiplier to give long delays. So apply this multiplier to 51 * the 'sampling_rate', so as to keep the wake-up-from-idle 52 * detection logic a bit conservative. 53 */ 54 sampling_rate = od_tuners->sampling_rate; 55 sampling_rate *= od_dbs_info->rate_mult; 56 57 ignore_nice = od_tuners->ignore_nice_load; 58 } else { 59 sampling_rate = cs_tuners->sampling_rate; 60 ignore_nice = cs_tuners->ignore_nice_load; 61 } 62 63 /* Get Absolute Load */ 64 for_each_cpu(j, policy->cpus) { 65 struct cpu_dbs_info *j_cdbs; 66 u64 cur_wall_time, cur_idle_time; 67 unsigned int idle_time, wall_time; 68 unsigned int load; 69 int io_busy = 0; 70 71 j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); 72 73 /* 74 * For the purpose of ondemand, waiting for disk IO is 75 * an indication that you're performance critical, and 76 * not that the system is actually idle. So do not add 77 * the iowait time to the cpu idle time. 78 */ 79 if (dbs_data->cdata->governor == GOV_ONDEMAND) 80 io_busy = od_tuners->io_is_busy; 81 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); 82 83 wall_time = (unsigned int) 84 (cur_wall_time - j_cdbs->prev_cpu_wall); 85 j_cdbs->prev_cpu_wall = cur_wall_time; 86 87 if (cur_idle_time < j_cdbs->prev_cpu_idle) 88 cur_idle_time = j_cdbs->prev_cpu_idle; 89 90 idle_time = (unsigned int) 91 (cur_idle_time - j_cdbs->prev_cpu_idle); 92 j_cdbs->prev_cpu_idle = cur_idle_time; 93 94 if (ignore_nice) { 95 u64 cur_nice; 96 unsigned long cur_nice_jiffies; 97 98 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - 99 cdbs->prev_cpu_nice; 100 /* 101 * Assumption: nice time between sampling periods will 102 * be less than 2^32 jiffies for 32 bit sys 103 */ 104 cur_nice_jiffies = (unsigned long) 105 cputime64_to_jiffies64(cur_nice); 106 107 cdbs->prev_cpu_nice = 108 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 109 idle_time += jiffies_to_usecs(cur_nice_jiffies); 110 } 111 112 if (unlikely(!wall_time || wall_time < idle_time)) 113 continue; 114 115 /* 116 * If the CPU had gone completely idle, and a task just woke up 117 * on this CPU now, it would be unfair to calculate 'load' the 118 * usual way for this elapsed time-window, because it will show 119 * near-zero load, irrespective of how CPU intensive that task 120 * actually is. This is undesirable for latency-sensitive bursty 121 * workloads. 122 * 123 * To avoid this, we reuse the 'load' from the previous 124 * time-window and give this task a chance to start with a 125 * reasonably high CPU frequency. (However, we shouldn't over-do 126 * this copy, lest we get stuck at a high load (high frequency) 127 * for too long, even when the current system load has actually 128 * dropped down. So we perform the copy only once, upon the 129 * first wake-up from idle.) 130 * 131 * Detecting this situation is easy: the governor's deferrable 132 * timer would not have fired during CPU-idle periods. Hence 133 * an unusually large 'wall_time' (as compared to the sampling 134 * rate) indicates this scenario. 135 * 136 * prev_load can be zero in two cases and we must recalculate it 137 * for both cases: 138 * - during long idle intervals 139 * - explicitly set to zero 140 */ 141 if (unlikely(wall_time > (2 * sampling_rate) && 142 j_cdbs->prev_load)) { 143 load = j_cdbs->prev_load; 144 145 /* 146 * Perform a destructive copy, to ensure that we copy 147 * the previous load only once, upon the first wake-up 148 * from idle. 149 */ 150 j_cdbs->prev_load = 0; 151 } else { 152 load = 100 * (wall_time - idle_time) / wall_time; 153 j_cdbs->prev_load = load; 154 } 155 156 if (load > max_load) 157 max_load = load; 158 } 159 160 dbs_data->cdata->gov_check_cpu(cpu, max_load); 161 } 162 EXPORT_SYMBOL_GPL(dbs_check_cpu); 163 164 void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay) 165 { 166 struct dbs_data *dbs_data = policy->governor_data; 167 struct cpu_dbs_info *cdbs; 168 int cpu; 169 170 for_each_cpu(cpu, policy->cpus) { 171 cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 172 cdbs->timer.expires = jiffies + delay; 173 add_timer_on(&cdbs->timer, cpu); 174 } 175 } 176 EXPORT_SYMBOL_GPL(gov_add_timers); 177 178 static inline void gov_cancel_timers(struct cpufreq_policy *policy) 179 { 180 struct dbs_data *dbs_data = policy->governor_data; 181 struct cpu_dbs_info *cdbs; 182 int i; 183 184 for_each_cpu(i, policy->cpus) { 185 cdbs = dbs_data->cdata->get_cpu_cdbs(i); 186 del_timer_sync(&cdbs->timer); 187 } 188 } 189 190 void gov_cancel_work(struct cpu_common_dbs_info *shared) 191 { 192 /* Tell dbs_timer_handler() to skip queuing up work items. */ 193 atomic_inc(&shared->skip_work); 194 /* 195 * If dbs_timer_handler() is already running, it may not notice the 196 * incremented skip_work, so wait for it to complete to prevent its work 197 * item from being queued up after the cancel_work_sync() below. 198 */ 199 gov_cancel_timers(shared->policy); 200 /* 201 * In case dbs_timer_handler() managed to run and spawn a work item 202 * before the timers have been canceled, wait for that work item to 203 * complete and then cancel all of the timers set up by it. If 204 * dbs_timer_handler() runs again at that point, it will see the 205 * positive value of skip_work and won't spawn any more work items. 206 */ 207 cancel_work_sync(&shared->work); 208 gov_cancel_timers(shared->policy); 209 atomic_set(&shared->skip_work, 0); 210 } 211 EXPORT_SYMBOL_GPL(gov_cancel_work); 212 213 /* Will return if we need to evaluate cpu load again or not */ 214 static bool need_load_eval(struct cpu_common_dbs_info *shared, 215 unsigned int sampling_rate) 216 { 217 if (policy_is_shared(shared->policy)) { 218 ktime_t time_now = ktime_get(); 219 s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); 220 221 /* Do nothing if we recently have sampled */ 222 if (delta_us < (s64)(sampling_rate / 2)) 223 return false; 224 else 225 shared->time_stamp = time_now; 226 } 227 228 return true; 229 } 230 231 static void dbs_work_handler(struct work_struct *work) 232 { 233 struct cpu_common_dbs_info *shared = container_of(work, struct 234 cpu_common_dbs_info, work); 235 struct cpufreq_policy *policy; 236 struct dbs_data *dbs_data; 237 unsigned int sampling_rate, delay; 238 bool eval_load; 239 240 policy = shared->policy; 241 dbs_data = policy->governor_data; 242 243 /* Kill all timers */ 244 gov_cancel_timers(policy); 245 246 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 247 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 248 249 sampling_rate = cs_tuners->sampling_rate; 250 } else { 251 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 252 253 sampling_rate = od_tuners->sampling_rate; 254 } 255 256 eval_load = need_load_eval(shared, sampling_rate); 257 258 /* 259 * Make sure cpufreq_governor_limits() isn't evaluating load in 260 * parallel. 261 */ 262 mutex_lock(&shared->timer_mutex); 263 delay = dbs_data->cdata->gov_dbs_timer(policy, eval_load); 264 mutex_unlock(&shared->timer_mutex); 265 266 atomic_dec(&shared->skip_work); 267 268 gov_add_timers(policy, delay); 269 } 270 271 static void dbs_timer_handler(unsigned long data) 272 { 273 struct cpu_dbs_info *cdbs = (struct cpu_dbs_info *)data; 274 struct cpu_common_dbs_info *shared = cdbs->shared; 275 276 /* 277 * Timer handler may not be allowed to queue the work at the moment, 278 * because: 279 * - Another timer handler has done that 280 * - We are stopping the governor 281 * - Or we are updating the sampling rate of the ondemand governor 282 */ 283 if (atomic_inc_return(&shared->skip_work) > 1) 284 atomic_dec(&shared->skip_work); 285 else 286 queue_work(system_wq, &shared->work); 287 } 288 289 static void set_sampling_rate(struct dbs_data *dbs_data, 290 unsigned int sampling_rate) 291 { 292 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 293 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 294 cs_tuners->sampling_rate = sampling_rate; 295 } else { 296 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 297 od_tuners->sampling_rate = sampling_rate; 298 } 299 } 300 301 static int alloc_common_dbs_info(struct cpufreq_policy *policy, 302 struct common_dbs_data *cdata) 303 { 304 struct cpu_common_dbs_info *shared; 305 int j; 306 307 /* Allocate memory for the common information for policy->cpus */ 308 shared = kzalloc(sizeof(*shared), GFP_KERNEL); 309 if (!shared) 310 return -ENOMEM; 311 312 /* Set shared for all CPUs, online+offline */ 313 for_each_cpu(j, policy->related_cpus) 314 cdata->get_cpu_cdbs(j)->shared = shared; 315 316 mutex_init(&shared->timer_mutex); 317 atomic_set(&shared->skip_work, 0); 318 INIT_WORK(&shared->work, dbs_work_handler); 319 return 0; 320 } 321 322 static void free_common_dbs_info(struct cpufreq_policy *policy, 323 struct common_dbs_data *cdata) 324 { 325 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); 326 struct cpu_common_dbs_info *shared = cdbs->shared; 327 int j; 328 329 mutex_destroy(&shared->timer_mutex); 330 331 for_each_cpu(j, policy->cpus) 332 cdata->get_cpu_cdbs(j)->shared = NULL; 333 334 kfree(shared); 335 } 336 337 static int cpufreq_governor_init(struct cpufreq_policy *policy, 338 struct dbs_data *dbs_data, 339 struct common_dbs_data *cdata) 340 { 341 unsigned int latency; 342 int ret; 343 344 /* State should be equivalent to EXIT */ 345 if (policy->governor_data) 346 return -EBUSY; 347 348 if (dbs_data) { 349 if (WARN_ON(have_governor_per_policy())) 350 return -EINVAL; 351 352 ret = alloc_common_dbs_info(policy, cdata); 353 if (ret) 354 return ret; 355 356 dbs_data->usage_count++; 357 policy->governor_data = dbs_data; 358 return 0; 359 } 360 361 dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); 362 if (!dbs_data) 363 return -ENOMEM; 364 365 ret = alloc_common_dbs_info(policy, cdata); 366 if (ret) 367 goto free_dbs_data; 368 369 dbs_data->cdata = cdata; 370 dbs_data->usage_count = 1; 371 372 ret = cdata->init(dbs_data, !policy->governor->initialized); 373 if (ret) 374 goto free_common_dbs_info; 375 376 /* policy latency is in ns. Convert it to us first */ 377 latency = policy->cpuinfo.transition_latency / 1000; 378 if (latency == 0) 379 latency = 1; 380 381 /* Bring kernel and HW constraints together */ 382 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, 383 MIN_LATENCY_MULTIPLIER * latency); 384 set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, 385 latency * LATENCY_MULTIPLIER)); 386 387 if (!have_governor_per_policy()) 388 cdata->gdbs_data = dbs_data; 389 390 policy->governor_data = dbs_data; 391 392 ret = sysfs_create_group(get_governor_parent_kobj(policy), 393 get_sysfs_attr(dbs_data)); 394 if (ret) 395 goto reset_gdbs_data; 396 397 return 0; 398 399 reset_gdbs_data: 400 policy->governor_data = NULL; 401 402 if (!have_governor_per_policy()) 403 cdata->gdbs_data = NULL; 404 cdata->exit(dbs_data, !policy->governor->initialized); 405 free_common_dbs_info: 406 free_common_dbs_info(policy, cdata); 407 free_dbs_data: 408 kfree(dbs_data); 409 return ret; 410 } 411 412 static int cpufreq_governor_exit(struct cpufreq_policy *policy, 413 struct dbs_data *dbs_data) 414 { 415 struct common_dbs_data *cdata = dbs_data->cdata; 416 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); 417 418 /* State should be equivalent to INIT */ 419 if (!cdbs->shared || cdbs->shared->policy) 420 return -EBUSY; 421 422 if (!--dbs_data->usage_count) { 423 sysfs_remove_group(get_governor_parent_kobj(policy), 424 get_sysfs_attr(dbs_data)); 425 426 policy->governor_data = NULL; 427 428 if (!have_governor_per_policy()) 429 cdata->gdbs_data = NULL; 430 431 cdata->exit(dbs_data, policy->governor->initialized == 1); 432 kfree(dbs_data); 433 } else { 434 policy->governor_data = NULL; 435 } 436 437 free_common_dbs_info(policy, cdata); 438 return 0; 439 } 440 441 static int cpufreq_governor_start(struct cpufreq_policy *policy, 442 struct dbs_data *dbs_data) 443 { 444 struct common_dbs_data *cdata = dbs_data->cdata; 445 unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; 446 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); 447 struct cpu_common_dbs_info *shared = cdbs->shared; 448 int io_busy = 0; 449 450 if (!policy->cur) 451 return -EINVAL; 452 453 /* State should be equivalent to INIT */ 454 if (!shared || shared->policy) 455 return -EBUSY; 456 457 if (cdata->governor == GOV_CONSERVATIVE) { 458 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 459 460 sampling_rate = cs_tuners->sampling_rate; 461 ignore_nice = cs_tuners->ignore_nice_load; 462 } else { 463 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 464 465 sampling_rate = od_tuners->sampling_rate; 466 ignore_nice = od_tuners->ignore_nice_load; 467 io_busy = od_tuners->io_is_busy; 468 } 469 470 shared->policy = policy; 471 shared->time_stamp = ktime_get(); 472 473 for_each_cpu(j, policy->cpus) { 474 struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); 475 unsigned int prev_load; 476 477 j_cdbs->prev_cpu_idle = 478 get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); 479 480 prev_load = (unsigned int)(j_cdbs->prev_cpu_wall - 481 j_cdbs->prev_cpu_idle); 482 j_cdbs->prev_load = 100 * prev_load / 483 (unsigned int)j_cdbs->prev_cpu_wall; 484 485 if (ignore_nice) 486 j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 487 488 __setup_timer(&j_cdbs->timer, dbs_timer_handler, 489 (unsigned long)j_cdbs, 490 TIMER_DEFERRABLE | TIMER_IRQSAFE); 491 } 492 493 if (cdata->governor == GOV_CONSERVATIVE) { 494 struct cs_cpu_dbs_info_s *cs_dbs_info = 495 cdata->get_cpu_dbs_info_s(cpu); 496 497 cs_dbs_info->down_skip = 0; 498 cs_dbs_info->requested_freq = policy->cur; 499 } else { 500 struct od_ops *od_ops = cdata->gov_ops; 501 struct od_cpu_dbs_info_s *od_dbs_info = cdata->get_cpu_dbs_info_s(cpu); 502 503 od_dbs_info->rate_mult = 1; 504 od_dbs_info->sample_type = OD_NORMAL_SAMPLE; 505 od_ops->powersave_bias_init_cpu(cpu); 506 } 507 508 gov_add_timers(policy, delay_for_sampling_rate(sampling_rate)); 509 return 0; 510 } 511 512 static int cpufreq_governor_stop(struct cpufreq_policy *policy, 513 struct dbs_data *dbs_data) 514 { 515 struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); 516 struct cpu_common_dbs_info *shared = cdbs->shared; 517 518 /* State should be equivalent to START */ 519 if (!shared || !shared->policy) 520 return -EBUSY; 521 522 gov_cancel_work(shared); 523 shared->policy = NULL; 524 525 return 0; 526 } 527 528 static int cpufreq_governor_limits(struct cpufreq_policy *policy, 529 struct dbs_data *dbs_data) 530 { 531 struct common_dbs_data *cdata = dbs_data->cdata; 532 unsigned int cpu = policy->cpu; 533 struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); 534 535 /* State should be equivalent to START */ 536 if (!cdbs->shared || !cdbs->shared->policy) 537 return -EBUSY; 538 539 mutex_lock(&cdbs->shared->timer_mutex); 540 if (policy->max < cdbs->shared->policy->cur) 541 __cpufreq_driver_target(cdbs->shared->policy, policy->max, 542 CPUFREQ_RELATION_H); 543 else if (policy->min > cdbs->shared->policy->cur) 544 __cpufreq_driver_target(cdbs->shared->policy, policy->min, 545 CPUFREQ_RELATION_L); 546 dbs_check_cpu(dbs_data, cpu); 547 mutex_unlock(&cdbs->shared->timer_mutex); 548 549 return 0; 550 } 551 552 int cpufreq_governor_dbs(struct cpufreq_policy *policy, 553 struct common_dbs_data *cdata, unsigned int event) 554 { 555 struct dbs_data *dbs_data; 556 int ret; 557 558 /* Lock governor to block concurrent initialization of governor */ 559 mutex_lock(&cdata->mutex); 560 561 if (have_governor_per_policy()) 562 dbs_data = policy->governor_data; 563 else 564 dbs_data = cdata->gdbs_data; 565 566 if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { 567 ret = -EINVAL; 568 goto unlock; 569 } 570 571 switch (event) { 572 case CPUFREQ_GOV_POLICY_INIT: 573 ret = cpufreq_governor_init(policy, dbs_data, cdata); 574 break; 575 case CPUFREQ_GOV_POLICY_EXIT: 576 ret = cpufreq_governor_exit(policy, dbs_data); 577 break; 578 case CPUFREQ_GOV_START: 579 ret = cpufreq_governor_start(policy, dbs_data); 580 break; 581 case CPUFREQ_GOV_STOP: 582 ret = cpufreq_governor_stop(policy, dbs_data); 583 break; 584 case CPUFREQ_GOV_LIMITS: 585 ret = cpufreq_governor_limits(policy, dbs_data); 586 break; 587 default: 588 ret = -EINVAL; 589 } 590 591 unlock: 592 mutex_unlock(&cdata->mutex); 593 594 return ret; 595 } 596 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); 597