1 /* 2 * drivers/cpufreq/cpufreq_governor.c 3 * 4 * CPUFREQ governors common code 5 * 6 * Copyright (C) 2001 Russell King 7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com> 9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org> 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 as 14 * published by the Free Software Foundation. 15 */ 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <asm/cputime.h> 20 #include <linux/cpufreq.h> 21 #include <linux/cpumask.h> 22 #include <linux/export.h> 23 #include <linux/kernel_stat.h> 24 #include <linux/mutex.h> 25 #include <linux/slab.h> 26 #include <linux/types.h> 27 #include <linux/workqueue.h> 28 29 #include "cpufreq_governor.h" 30 31 static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) 32 { 33 if (have_governor_per_policy()) 34 return dbs_data->cdata->attr_group_gov_pol; 35 else 36 return dbs_data->cdata->attr_group_gov_sys; 37 } 38 39 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) 40 { 41 struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 42 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 43 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 44 struct cpufreq_policy *policy; 45 unsigned int max_load = 0; 46 unsigned int ignore_nice; 47 unsigned int j; 48 49 if (dbs_data->cdata->governor == GOV_ONDEMAND) 50 ignore_nice = od_tuners->ignore_nice_load; 51 else 52 ignore_nice = cs_tuners->ignore_nice_load; 53 54 policy = cdbs->cur_policy; 55 56 /* Get Absolute Load (in terms of freq for ondemand gov) */ 57 for_each_cpu(j, policy->cpus) { 58 struct cpu_dbs_common_info *j_cdbs; 59 u64 cur_wall_time, cur_idle_time; 60 unsigned int idle_time, wall_time; 61 unsigned int load; 62 int io_busy = 0; 63 64 j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); 65 66 /* 67 * For the purpose of ondemand, waiting for disk IO is 68 * an indication that you're performance critical, and 69 * not that the system is actually idle. So do not add 70 * the iowait time to the cpu idle time. 71 */ 72 if (dbs_data->cdata->governor == GOV_ONDEMAND) 73 io_busy = od_tuners->io_is_busy; 74 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy); 75 76 wall_time = (unsigned int) 77 (cur_wall_time - j_cdbs->prev_cpu_wall); 78 j_cdbs->prev_cpu_wall = cur_wall_time; 79 80 idle_time = (unsigned int) 81 (cur_idle_time - j_cdbs->prev_cpu_idle); 82 j_cdbs->prev_cpu_idle = cur_idle_time; 83 84 if (ignore_nice) { 85 u64 cur_nice; 86 unsigned long cur_nice_jiffies; 87 88 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - 89 cdbs->prev_cpu_nice; 90 /* 91 * Assumption: nice time between sampling periods will 92 * be less than 2^32 jiffies for 32 bit sys 93 */ 94 cur_nice_jiffies = (unsigned long) 95 cputime64_to_jiffies64(cur_nice); 96 97 cdbs->prev_cpu_nice = 98 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 99 idle_time += jiffies_to_usecs(cur_nice_jiffies); 100 } 101 102 if (unlikely(!wall_time || wall_time < idle_time)) 103 continue; 104 105 load = 100 * (wall_time - idle_time) / wall_time; 106 107 if (dbs_data->cdata->governor == GOV_ONDEMAND) { 108 int freq_avg = __cpufreq_driver_getavg(policy, j); 109 if (freq_avg <= 0) 110 freq_avg = policy->cur; 111 112 load *= freq_avg; 113 } 114 115 if (load > max_load) 116 max_load = load; 117 } 118 119 dbs_data->cdata->gov_check_cpu(cpu, max_load); 120 } 121 EXPORT_SYMBOL_GPL(dbs_check_cpu); 122 123 static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, 124 unsigned int delay) 125 { 126 struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 127 128 mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay); 129 } 130 131 void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, 132 unsigned int delay, bool all_cpus) 133 { 134 int i; 135 136 if (!all_cpus) { 137 __gov_queue_work(smp_processor_id(), dbs_data, delay); 138 } else { 139 for_each_cpu(i, policy->cpus) 140 __gov_queue_work(i, dbs_data, delay); 141 } 142 } 143 EXPORT_SYMBOL_GPL(gov_queue_work); 144 145 static inline void gov_cancel_work(struct dbs_data *dbs_data, 146 struct cpufreq_policy *policy) 147 { 148 struct cpu_dbs_common_info *cdbs; 149 int i; 150 151 for_each_cpu(i, policy->cpus) { 152 cdbs = dbs_data->cdata->get_cpu_cdbs(i); 153 cancel_delayed_work_sync(&cdbs->work); 154 } 155 } 156 157 /* Will return if we need to evaluate cpu load again or not */ 158 bool need_load_eval(struct cpu_dbs_common_info *cdbs, 159 unsigned int sampling_rate) 160 { 161 if (policy_is_shared(cdbs->cur_policy)) { 162 ktime_t time_now = ktime_get(); 163 s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp); 164 165 /* Do nothing if we recently have sampled */ 166 if (delta_us < (s64)(sampling_rate / 2)) 167 return false; 168 else 169 cdbs->time_stamp = time_now; 170 } 171 172 return true; 173 } 174 EXPORT_SYMBOL_GPL(need_load_eval); 175 176 static void set_sampling_rate(struct dbs_data *dbs_data, 177 unsigned int sampling_rate) 178 { 179 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 180 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 181 cs_tuners->sampling_rate = sampling_rate; 182 } else { 183 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 184 od_tuners->sampling_rate = sampling_rate; 185 } 186 } 187 188 int cpufreq_governor_dbs(struct cpufreq_policy *policy, 189 struct common_dbs_data *cdata, unsigned int event) 190 { 191 struct dbs_data *dbs_data; 192 struct od_cpu_dbs_info_s *od_dbs_info = NULL; 193 struct cs_cpu_dbs_info_s *cs_dbs_info = NULL; 194 struct od_ops *od_ops = NULL; 195 struct od_dbs_tuners *od_tuners = NULL; 196 struct cs_dbs_tuners *cs_tuners = NULL; 197 struct cpu_dbs_common_info *cpu_cdbs; 198 unsigned int sampling_rate, latency, ignore_nice, j, cpu = policy->cpu; 199 int io_busy = 0; 200 int rc; 201 202 if (have_governor_per_policy()) 203 dbs_data = policy->governor_data; 204 else 205 dbs_data = cdata->gdbs_data; 206 207 WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)); 208 209 switch (event) { 210 case CPUFREQ_GOV_POLICY_INIT: 211 if (have_governor_per_policy()) { 212 WARN_ON(dbs_data); 213 } else if (dbs_data) { 214 dbs_data->usage_count++; 215 policy->governor_data = dbs_data; 216 return 0; 217 } 218 219 dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL); 220 if (!dbs_data) { 221 pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__); 222 return -ENOMEM; 223 } 224 225 dbs_data->cdata = cdata; 226 dbs_data->usage_count = 1; 227 rc = cdata->init(dbs_data); 228 if (rc) { 229 pr_err("%s: POLICY_INIT: init() failed\n", __func__); 230 kfree(dbs_data); 231 return rc; 232 } 233 234 if (!have_governor_per_policy()) 235 WARN_ON(cpufreq_get_global_kobject()); 236 237 rc = sysfs_create_group(get_governor_parent_kobj(policy), 238 get_sysfs_attr(dbs_data)); 239 if (rc) { 240 cdata->exit(dbs_data); 241 kfree(dbs_data); 242 return rc; 243 } 244 245 policy->governor_data = dbs_data; 246 247 /* policy latency is in nS. Convert it to uS first */ 248 latency = policy->cpuinfo.transition_latency / 1000; 249 if (latency == 0) 250 latency = 1; 251 252 /* Bring kernel and HW constraints together */ 253 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, 254 MIN_LATENCY_MULTIPLIER * latency); 255 set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate, 256 latency * LATENCY_MULTIPLIER)); 257 258 if ((cdata->governor == GOV_CONSERVATIVE) && 259 (!policy->governor->initialized)) { 260 struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; 261 262 cpufreq_register_notifier(cs_ops->notifier_block, 263 CPUFREQ_TRANSITION_NOTIFIER); 264 } 265 266 if (!have_governor_per_policy()) 267 cdata->gdbs_data = dbs_data; 268 269 return 0; 270 case CPUFREQ_GOV_POLICY_EXIT: 271 if (!--dbs_data->usage_count) { 272 sysfs_remove_group(get_governor_parent_kobj(policy), 273 get_sysfs_attr(dbs_data)); 274 275 if (!have_governor_per_policy()) 276 cpufreq_put_global_kobject(); 277 278 if ((dbs_data->cdata->governor == GOV_CONSERVATIVE) && 279 (policy->governor->initialized == 1)) { 280 struct cs_ops *cs_ops = dbs_data->cdata->gov_ops; 281 282 cpufreq_unregister_notifier(cs_ops->notifier_block, 283 CPUFREQ_TRANSITION_NOTIFIER); 284 } 285 286 cdata->exit(dbs_data); 287 kfree(dbs_data); 288 cdata->gdbs_data = NULL; 289 } 290 291 policy->governor_data = NULL; 292 return 0; 293 } 294 295 cpu_cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); 296 297 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 298 cs_tuners = dbs_data->tuners; 299 cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); 300 sampling_rate = cs_tuners->sampling_rate; 301 ignore_nice = cs_tuners->ignore_nice_load; 302 } else { 303 od_tuners = dbs_data->tuners; 304 od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu); 305 sampling_rate = od_tuners->sampling_rate; 306 ignore_nice = od_tuners->ignore_nice_load; 307 od_ops = dbs_data->cdata->gov_ops; 308 io_busy = od_tuners->io_is_busy; 309 } 310 311 switch (event) { 312 case CPUFREQ_GOV_START: 313 if (!policy->cur) 314 return -EINVAL; 315 316 mutex_lock(&dbs_data->mutex); 317 318 for_each_cpu(j, policy->cpus) { 319 struct cpu_dbs_common_info *j_cdbs = 320 dbs_data->cdata->get_cpu_cdbs(j); 321 322 j_cdbs->cpu = j; 323 j_cdbs->cur_policy = policy; 324 j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, 325 &j_cdbs->prev_cpu_wall, io_busy); 326 if (ignore_nice) 327 j_cdbs->prev_cpu_nice = 328 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 329 330 mutex_init(&j_cdbs->timer_mutex); 331 INIT_DEFERRABLE_WORK(&j_cdbs->work, 332 dbs_data->cdata->gov_dbs_timer); 333 } 334 335 /* 336 * conservative does not implement micro like ondemand 337 * governor, thus we are bound to jiffes/HZ 338 */ 339 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { 340 cs_dbs_info->down_skip = 0; 341 cs_dbs_info->enable = 1; 342 cs_dbs_info->requested_freq = policy->cur; 343 } else { 344 od_dbs_info->rate_mult = 1; 345 od_dbs_info->sample_type = OD_NORMAL_SAMPLE; 346 od_ops->powersave_bias_init_cpu(cpu); 347 } 348 349 mutex_unlock(&dbs_data->mutex); 350 351 /* Initiate timer time stamp */ 352 cpu_cdbs->time_stamp = ktime_get(); 353 354 gov_queue_work(dbs_data, policy, 355 delay_for_sampling_rate(sampling_rate), true); 356 break; 357 358 case CPUFREQ_GOV_STOP: 359 if (dbs_data->cdata->governor == GOV_CONSERVATIVE) 360 cs_dbs_info->enable = 0; 361 362 gov_cancel_work(dbs_data, policy); 363 364 mutex_lock(&dbs_data->mutex); 365 mutex_destroy(&cpu_cdbs->timer_mutex); 366 cpu_cdbs->cur_policy = NULL; 367 368 mutex_unlock(&dbs_data->mutex); 369 370 break; 371 372 case CPUFREQ_GOV_LIMITS: 373 mutex_lock(&cpu_cdbs->timer_mutex); 374 if (policy->max < cpu_cdbs->cur_policy->cur) 375 __cpufreq_driver_target(cpu_cdbs->cur_policy, 376 policy->max, CPUFREQ_RELATION_H); 377 else if (policy->min > cpu_cdbs->cur_policy->cur) 378 __cpufreq_driver_target(cpu_cdbs->cur_policy, 379 policy->min, CPUFREQ_RELATION_L); 380 dbs_check_cpu(dbs_data, cpu); 381 mutex_unlock(&cpu_cdbs->timer_mutex); 382 break; 383 } 384 return 0; 385 } 386 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); 387