1 /* 2 * drivers/cpufreq/cpufreq_governor.c 3 * 4 * CPUFREQ governors common code 5 * 6 * Copyright (C) 2001 Russell King 7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com> 9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org> 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 as 14 * published by the Free Software Foundation. 15 */ 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <asm/cputime.h> 20 #include <linux/cpufreq.h> 21 #include <linux/cpumask.h> 22 #include <linux/export.h> 23 #include <linux/kernel_stat.h> 24 #include <linux/mutex.h> 25 #include <linux/tick.h> 26 #include <linux/types.h> 27 #include <linux/workqueue.h> 28 29 #include "cpufreq_governor.h" 30 31 static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) 32 { 33 u64 idle_time; 34 u64 cur_wall_time; 35 u64 busy_time; 36 37 cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); 38 39 busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; 40 busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; 41 busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; 42 busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; 43 busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; 44 busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; 45 46 idle_time = cur_wall_time - busy_time; 47 if (wall) 48 *wall = cputime_to_usecs(cur_wall_time); 49 50 return cputime_to_usecs(idle_time); 51 } 52 53 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall) 54 { 55 u64 idle_time = get_cpu_idle_time_us(cpu, NULL); 56 57 if (idle_time == -1ULL) 58 return get_cpu_idle_time_jiffy(cpu, wall); 59 else 60 idle_time += get_cpu_iowait_time_us(cpu, wall); 61 62 return idle_time; 63 } 64 EXPORT_SYMBOL_GPL(get_cpu_idle_time); 65 66 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) 67 { 68 struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); 69 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 70 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 71 struct cpufreq_policy *policy; 72 unsigned int max_load = 0; 73 unsigned int ignore_nice; 74 unsigned int j; 75 76 if (dbs_data->governor == GOV_ONDEMAND) 77 ignore_nice = od_tuners->ignore_nice; 78 else 79 ignore_nice = cs_tuners->ignore_nice; 80 81 policy = cdbs->cur_policy; 82 83 /* Get Absolute Load (in terms of freq for ondemand gov) */ 84 for_each_cpu(j, policy->cpus) { 85 struct cpu_dbs_common_info *j_cdbs; 86 u64 cur_wall_time, cur_idle_time, cur_iowait_time; 87 unsigned int idle_time, wall_time, iowait_time; 88 unsigned int load; 89 90 j_cdbs = dbs_data->get_cpu_cdbs(j); 91 92 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); 93 94 wall_time = (unsigned int) 95 (cur_wall_time - j_cdbs->prev_cpu_wall); 96 j_cdbs->prev_cpu_wall = cur_wall_time; 97 98 idle_time = (unsigned int) 99 (cur_idle_time - j_cdbs->prev_cpu_idle); 100 j_cdbs->prev_cpu_idle = cur_idle_time; 101 102 if (ignore_nice) { 103 u64 cur_nice; 104 unsigned long cur_nice_jiffies; 105 106 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - 107 cdbs->prev_cpu_nice; 108 /* 109 * Assumption: nice time between sampling periods will 110 * be less than 2^32 jiffies for 32 bit sys 111 */ 112 cur_nice_jiffies = (unsigned long) 113 cputime64_to_jiffies64(cur_nice); 114 115 cdbs->prev_cpu_nice = 116 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 117 idle_time += jiffies_to_usecs(cur_nice_jiffies); 118 } 119 120 if (dbs_data->governor == GOV_ONDEMAND) { 121 struct od_cpu_dbs_info_s *od_j_dbs_info = 122 dbs_data->get_cpu_dbs_info_s(cpu); 123 124 cur_iowait_time = get_cpu_iowait_time_us(j, 125 &cur_wall_time); 126 if (cur_iowait_time == -1ULL) 127 cur_iowait_time = 0; 128 129 iowait_time = (unsigned int) (cur_iowait_time - 130 od_j_dbs_info->prev_cpu_iowait); 131 od_j_dbs_info->prev_cpu_iowait = cur_iowait_time; 132 133 /* 134 * For the purpose of ondemand, waiting for disk IO is 135 * an indication that you're performance critical, and 136 * not that the system is actually idle. So subtract the 137 * iowait time from the cpu idle time. 138 */ 139 if (od_tuners->io_is_busy && idle_time >= iowait_time) 140 idle_time -= iowait_time; 141 } 142 143 if (unlikely(!wall_time || wall_time < idle_time)) 144 continue; 145 146 load = 100 * (wall_time - idle_time) / wall_time; 147 148 if (dbs_data->governor == GOV_ONDEMAND) { 149 int freq_avg = __cpufreq_driver_getavg(policy, j); 150 if (freq_avg <= 0) 151 freq_avg = policy->cur; 152 153 load *= freq_avg; 154 } 155 156 if (load > max_load) 157 max_load = load; 158 } 159 160 dbs_data->gov_check_cpu(cpu, max_load); 161 } 162 EXPORT_SYMBOL_GPL(dbs_check_cpu); 163 164 static inline void dbs_timer_init(struct dbs_data *dbs_data, int cpu, 165 unsigned int sampling_rate) 166 { 167 int delay = delay_for_sampling_rate(sampling_rate); 168 struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); 169 170 schedule_delayed_work_on(cpu, &cdbs->work, delay); 171 } 172 173 static inline void dbs_timer_exit(struct dbs_data *dbs_data, int cpu) 174 { 175 struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); 176 177 cancel_delayed_work_sync(&cdbs->work); 178 } 179 180 /* Will return if we need to evaluate cpu load again or not */ 181 bool need_load_eval(struct cpu_dbs_common_info *cdbs, 182 unsigned int sampling_rate) 183 { 184 if (policy_is_shared(cdbs->cur_policy)) { 185 ktime_t time_now = ktime_get(); 186 s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp); 187 188 /* Do nothing if we recently have sampled */ 189 if (delta_us < (s64)(sampling_rate / 2)) 190 return false; 191 else 192 cdbs->time_stamp = time_now; 193 } 194 195 return true; 196 } 197 EXPORT_SYMBOL_GPL(need_load_eval); 198 199 int cpufreq_governor_dbs(struct dbs_data *dbs_data, 200 struct cpufreq_policy *policy, unsigned int event) 201 { 202 struct od_cpu_dbs_info_s *od_dbs_info = NULL; 203 struct cs_cpu_dbs_info_s *cs_dbs_info = NULL; 204 struct cs_ops *cs_ops = NULL; 205 struct od_ops *od_ops = NULL; 206 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 207 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 208 struct cpu_dbs_common_info *cpu_cdbs; 209 unsigned int *sampling_rate, latency, ignore_nice, j, cpu = policy->cpu; 210 int rc; 211 212 cpu_cdbs = dbs_data->get_cpu_cdbs(cpu); 213 214 if (dbs_data->governor == GOV_CONSERVATIVE) { 215 cs_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); 216 sampling_rate = &cs_tuners->sampling_rate; 217 ignore_nice = cs_tuners->ignore_nice; 218 cs_ops = dbs_data->gov_ops; 219 } else { 220 od_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); 221 sampling_rate = &od_tuners->sampling_rate; 222 ignore_nice = od_tuners->ignore_nice; 223 od_ops = dbs_data->gov_ops; 224 } 225 226 switch (event) { 227 case CPUFREQ_GOV_START: 228 if (!policy->cur) 229 return -EINVAL; 230 231 mutex_lock(&dbs_data->mutex); 232 233 for_each_cpu(j, policy->cpus) { 234 struct cpu_dbs_common_info *j_cdbs = 235 dbs_data->get_cpu_cdbs(j); 236 237 j_cdbs->cpu = j; 238 j_cdbs->cur_policy = policy; 239 j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, 240 &j_cdbs->prev_cpu_wall); 241 if (ignore_nice) 242 j_cdbs->prev_cpu_nice = 243 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 244 245 mutex_init(&j_cdbs->timer_mutex); 246 INIT_DEFERRABLE_WORK(&j_cdbs->work, 247 dbs_data->gov_dbs_timer); 248 } 249 250 if (!policy->governor->initialized) { 251 rc = sysfs_create_group(cpufreq_global_kobject, 252 dbs_data->attr_group); 253 if (rc) { 254 mutex_unlock(&dbs_data->mutex); 255 return rc; 256 } 257 } 258 259 /* 260 * conservative does not implement micro like ondemand 261 * governor, thus we are bound to jiffes/HZ 262 */ 263 if (dbs_data->governor == GOV_CONSERVATIVE) { 264 cs_dbs_info->down_skip = 0; 265 cs_dbs_info->enable = 1; 266 cs_dbs_info->requested_freq = policy->cur; 267 268 if (!policy->governor->initialized) { 269 cpufreq_register_notifier(cs_ops->notifier_block, 270 CPUFREQ_TRANSITION_NOTIFIER); 271 272 dbs_data->min_sampling_rate = 273 MIN_SAMPLING_RATE_RATIO * 274 jiffies_to_usecs(10); 275 } 276 } else { 277 od_dbs_info->rate_mult = 1; 278 od_dbs_info->sample_type = OD_NORMAL_SAMPLE; 279 od_ops->powersave_bias_init_cpu(cpu); 280 281 if (!policy->governor->initialized) 282 od_tuners->io_is_busy = od_ops->io_busy(); 283 } 284 285 if (policy->governor->initialized) 286 goto unlock; 287 288 /* policy latency is in nS. Convert it to uS first */ 289 latency = policy->cpuinfo.transition_latency / 1000; 290 if (latency == 0) 291 latency = 1; 292 293 /* Bring kernel and HW constraints together */ 294 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, 295 MIN_LATENCY_MULTIPLIER * latency); 296 *sampling_rate = max(dbs_data->min_sampling_rate, latency * 297 LATENCY_MULTIPLIER); 298 unlock: 299 mutex_unlock(&dbs_data->mutex); 300 301 /* Initiate timer time stamp */ 302 cpu_cdbs->time_stamp = ktime_get(); 303 304 for_each_cpu(j, policy->cpus) 305 dbs_timer_init(dbs_data, j, *sampling_rate); 306 break; 307 308 case CPUFREQ_GOV_STOP: 309 if (dbs_data->governor == GOV_CONSERVATIVE) 310 cs_dbs_info->enable = 0; 311 312 for_each_cpu(j, policy->cpus) 313 dbs_timer_exit(dbs_data, j); 314 315 mutex_lock(&dbs_data->mutex); 316 mutex_destroy(&cpu_cdbs->timer_mutex); 317 318 if (policy->governor->initialized == 1) { 319 sysfs_remove_group(cpufreq_global_kobject, 320 dbs_data->attr_group); 321 if (dbs_data->governor == GOV_CONSERVATIVE) 322 cpufreq_unregister_notifier(cs_ops->notifier_block, 323 CPUFREQ_TRANSITION_NOTIFIER); 324 } 325 mutex_unlock(&dbs_data->mutex); 326 327 break; 328 329 case CPUFREQ_GOV_LIMITS: 330 mutex_lock(&cpu_cdbs->timer_mutex); 331 if (policy->max < cpu_cdbs->cur_policy->cur) 332 __cpufreq_driver_target(cpu_cdbs->cur_policy, 333 policy->max, CPUFREQ_RELATION_H); 334 else if (policy->min > cpu_cdbs->cur_policy->cur) 335 __cpufreq_driver_target(cpu_cdbs->cur_policy, 336 policy->min, CPUFREQ_RELATION_L); 337 dbs_check_cpu(dbs_data, cpu); 338 mutex_unlock(&cpu_cdbs->timer_mutex); 339 break; 340 } 341 return 0; 342 } 343 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); 344