1 /* 2 * drivers/cpufreq/cpufreq_governor.c 3 * 4 * CPUFREQ governors common code 5 * 6 * Copyright (C) 2001 Russell King 7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com> 9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org> 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 as 14 * published by the Free Software Foundation. 15 */ 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <linux/export.h> 20 #include <linux/kernel_stat.h> 21 #include <linux/slab.h> 22 23 #include "cpufreq_governor.h" 24 25 static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) 26 { 27 if (have_governor_per_policy()) 28 return dbs_data->cdata->attr_group_gov_pol; 29 else 30 return dbs_data->cdata->attr_group_gov_sys; 31 } 32 33 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) 34 { 35 struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 36 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 37 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 38 struct cpufreq_policy *policy; 39 unsigned int max_load = 0; 40 unsigned int ignore_nice; 41 unsigned int j; 42 43 if (dbs_data->cdata->governor == GOV_ONDEMAND) 44 ignore_nice = od_tuners->ignore_nice_load; 45 else 46 ignore_nice = cs_tuners->ignore_nice_load; 47 48 policy = cdbs->cur_policy; 49 50 /* Get Absolute Load */ 51 for_each_cpu(j, policy->cpus) { 52 struct cpu_dbs_common_info *j_cdbs; 53 u64 cur_wall_time, cur_idle_time; 54 unsigned int idle_time, wall_time; 55 unsigned int load; 56 int io_busy = 0; 57 58 j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); 59 60 /* 61 * For the purpose of ondemand, waiting for disk IO is 62 * an indication that you're performance critical, and 63 * not that the system is actually idle. So do not add 64 * the iowait time to the cpu idle time. 65 */ 66 if (dbs_data->cdata->governor == GOV_ONDEMAND) 67 io_busy = od_tuners->io_is_busy; 68 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); 69 70 wall_time = (unsigned int) 71 (cur_wall_time - j_cdbs->prev_cpu_wall); 72 j_cdbs->prev_cpu_wall = cur_wall_time; 73 74 idle_time = (unsigned int) 75 (cur_idle_time - j_cdbs->prev_cpu_idle); 76 j_cdbs->prev_cpu_idle = cur_idle_time; 77 78 if (ignore_nice) { 79 u64 cur_nice; 80 unsigned long cur_nice_jiffies; 81 82 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - 83 cdbs->prev_cpu_nice; 84 /* 85 * Assumption: nice time between sampling periods will 86 * be less than 2^32 jiffies for 32 bit sys 87 */ 88 cur_nice_jiffies = (unsigned long) 89 cputime64_to_jiffies64(cur_nice); 90 91 cdbs->prev_cpu_nice = 92 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 93 idle_time += jiffies_to_usecs(cur_nice_jiffies); 94 } 95 96 if (unlikely(!wall_time || wall_time < idle_time)) 97 continue; 98 99 load = 100 * (wall_time - idle_time) / wall_time; 100 101 if (load > max_load) 102 max_load = load; 103 } 104 105 dbs_data->cdata->gov_check_cpu(cpu, max_load); 106 } 107 EXPORT_SYMBOL_GPL(dbs_check_cpu); 108 109 static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, 110 unsigned int delay) 111 { 112 struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 113 114 mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay); 115 } 116 117 void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, 118 unsigned int delay, bool all_cpus) 119 { 120 int i; 121 122 if (!policy->governor_enabled) 123 return; 124 125 if (!all_cpus) { 126 /* 127 * Use raw_smp_processor_id() to avoid preemptible warnings. 128 * We know that this is only called with all_cpus == false from 129 * works that have been queued with *_work_on() functions and 130 * those works are canceled during CPU_DOWN_PREPARE so they 131 * can't possibly run on any other CPU. 132 */ 133 __gov_queue_work(raw_smp_processor_id(), dbs_data, delay); 134 } else { 135 for_each_cpu(i, policy->cpus) 136 __gov_queue_work(i, dbs_data, delay); 137 } 138 } 139 EXPORT_SYMBOL_GPL(gov_queue_work); 140 141 static inline void gov_cancel_work(struct dbs_data *dbs_data, 142 struct cpufreq_policy *policy) 143 { 144 struct cpu_dbs_common_info *cdbs; 145 int i; 146 147 for_each_cpu(i, policy->cpus) { 148 cdbs = dbs_data->cdata->get_cpu_cdbs(i); 149 cancel_delayed_work_sync(&cdbs->work); 150 } 151 } 152 153 /* Will return if we need to evaluate cpu load again or not */ 154 bool need_load_eval(struct cpu_dbs_common_info *cdbs, 155 unsigned int sampling_rate) 156 { 157 if (policy_is_shared(cdbs->cur_policy)) { 158 ktime_t time_now = ktime_get(); 159 s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp); 160 161 /* Do nothing if we recently have sampled */ 162 if (delta_us < (s64)(sampling_rate / 2)) 163 return false; 164 else 165 cdbs->time_stamp = time_now; 166 } 167 168 return true; 169 } 170 EXPORT_SYMBOL_GPL(need_load_eval); 171 172 static void set_sampling_rate(struct dbs_data *dbs_data, 173 unsigned int sampling_rate) 174 { 175 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 176 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 177 cs_tuners->sampling_rate = sampling_rate; 178 } else { 179 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 180 od_tuners->sampling_rate = sampling_rate; 181 } 182 } 183 184 int cpufreq_governor_dbs(struct cpufreq_policy *policy, 185 struct common_dbs_data *cdata, unsigned int event) 186 { 187 struct dbs_data *dbs_data; 188 struct od_cpu_dbs_info_s *od_dbs_info = NULL; 189 struct cs_cpu_dbs_info_s *cs_dbs_info = NULL; 190 struct od_ops *od_ops = NULL; 191 struct od_dbs_tuners *od_tuners = NULL; 192 struct cs_dbs_tuners *cs_tuners = NULL; 193 struct cpu_dbs_common_info *cpu_cdbs; 194 unsigned int sampling_rate, latency, ignore_nice, j, cpu = policy->cpu; 195 int io_busy = 0; 196 int rc; 197 198 if (have_governor_per_policy()) 199 dbs_data = policy->governor_data; 200 else 201 dbs_data = cdata->gdbs_data; 202 203 WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)); 204 205 switch (event) { 206 case CPUFREQ_GOV_POLICY_INIT: 207 if (have_governor_per_policy()) { 208 WARN_ON(dbs_data); 209 } else if (dbs_data) { 210 dbs_data->usage_count++; 211 policy->governor_data = dbs_data; 212 return 0; 213 } 214 215 dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); 216 if (!dbs_data) { 217 pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__); 218 return -ENOMEM; 219 } 220 221 dbs_data->cdata = cdata; 222 dbs_data->usage_count = 1; 223 rc = cdata->init(dbs_data); 224 if (rc) { 225 pr_err("%s: POLICY_INIT: init() failed\n", __func__); 226 kfree(dbs_data); 227 return rc; 228 } 229 230 if (!have_governor_per_policy()) 231 WARN_ON(cpufreq_get_global_kobject()); 232 233 rc = sysfs_create_group(get_governor_parent_kobj(policy), 234 get_sysfs_attr(dbs_data)); 235 if (rc) { 236 cdata->exit(dbs_data); 237 kfree(dbs_data); 238 return rc; 239 } 240 241 policy->governor_data = dbs_data; 242 243 /* policy latency is in ns. Convert it to us first */ 244 latency = policy->cpuinfo.transition_latency / 1000; 245 if (latency == 0) 246 latency = 1; 247 248 /* Bring kernel and HW constraints together */ 249 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, 250 MIN_LATENCY_MULTIPLIER * latency); 251 set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, 252 latency * LATENCY_MULTIPLIER)); 253 254 if ((cdata->governor == GOV_CONSERVATIVE) && 255 (!policy->governor->initialized)) { 256 struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; 257 258 cpufreq_register_notifier(cs_ops->notifier_block, 259 CPUFREQ_TRANSITION_NOTIFIER); 260 } 261 262 if (!have_governor_per_policy()) 263 cdata->gdbs_data = dbs_data; 264 265 return 0; 266 case CPUFREQ_GOV_POLICY_EXIT: 267 if (!--dbs_data->usage_count) { 268 sysfs_remove_group(get_governor_parent_kobj(policy), 269 get_sysfs_attr(dbs_data)); 270 271 if (!have_governor_per_policy()) 272 cpufreq_put_global_kobject(); 273 274 if ((dbs_data->cdata->governor == GOV_CONSERVATIVE) && 275 (policy->governor->initialized == 1)) { 276 struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; 277 278 cpufreq_unregister_notifier(cs_ops->notifier_block, 279 CPUFREQ_TRANSITION_NOTIFIER); 280 } 281 282 cdata->exit(dbs_data); 283 kfree(dbs_data); 284 cdata->gdbs_data = NULL; 285 } 286 287 policy->governor_data = NULL; 288 return 0; 289 } 290 291 cpu_cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 292 293 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 294 cs_tuners = dbs_data->tuners; 295 cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); 296 sampling_rate = cs_tuners->sampling_rate; 297 ignore_nice = cs_tuners->ignore_nice_load; 298 } else { 299 od_tuners = dbs_data->tuners; 300 od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); 301 sampling_rate = od_tuners->sampling_rate; 302 ignore_nice = od_tuners->ignore_nice_load; 303 od_ops = dbs_data->cdata->gov_ops; 304 io_busy = od_tuners->io_is_busy; 305 } 306 307 switch (event) { 308 case CPUFREQ_GOV_START: 309 if (!policy->cur) 310 return -EINVAL; 311 312 mutex_lock(&dbs_data->mutex); 313 314 for_each_cpu(j, policy->cpus) { 315 struct cpu_dbs_common_info *j_cdbs = 316 dbs_data->cdata->get_cpu_cdbs(j); 317 318 j_cdbs->cpu = j; 319 j_cdbs->cur_policy = policy; 320 j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, 321 &j_cdbs->prev_cpu_wall, io_busy); 322 if (ignore_nice) 323 j_cdbs->prev_cpu_nice = 324 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 325 326 mutex_init(&j_cdbs->timer_mutex); 327 INIT_DEFERRABLE_WORK(&j_cdbs->work, 328 dbs_data->cdata->gov_dbs_timer); 329 } 330 331 /* 332 * conservative does not implement micro like ondemand 333 * governor, thus we are bound to jiffes/HZ 334 */ 335 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 336 cs_dbs_info->down_skip = 0; 337 cs_dbs_info->enable = 1; 338 cs_dbs_info->requested_freq = policy->cur; 339 } else { 340 od_dbs_info->rate_mult = 1; 341 od_dbs_info->sample_type = OD_NORMAL_SAMPLE; 342 od_ops->powersave_bias_init_cpu(cpu); 343 } 344 345 mutex_unlock(&dbs_data->mutex); 346 347 /* Initiate timer time stamp */ 348 cpu_cdbs->time_stamp = ktime_get(); 349 350 gov_queue_work(dbs_data, policy, 351 delay_for_sampling_rate(sampling_rate), true); 352 break; 353 354 case CPUFREQ_GOV_STOP: 355 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) 356 cs_dbs_info->enable = 0; 357 358 gov_cancel_work(dbs_data, policy); 359 360 mutex_lock(&dbs_data->mutex); 361 mutex_destroy(&cpu_cdbs->timer_mutex); 362 cpu_cdbs->cur_policy = NULL; 363 364 mutex_unlock(&dbs_data->mutex); 365 366 break; 367 368 case CPUFREQ_GOV_LIMITS: 369 mutex_lock(&cpu_cdbs->timer_mutex); 370 if (policy->max < cpu_cdbs->cur_policy->cur) 371 __cpufreq_driver_target(cpu_cdbs->cur_policy, 372 policy->max, CPUFREQ_RELATION_H); 373 else if (policy->min > cpu_cdbs->cur_policy->cur) 374 __cpufreq_driver_target(cpu_cdbs->cur_policy, 375 policy->min, CPUFREQ_RELATION_L); 376 dbs_check_cpu(dbs_data, cpu); 377 mutex_unlock(&cpu_cdbs->timer_mutex); 378 break; 379 } 380 return 0; 381 } 382 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); 383