1 /* 2 * drivers/cpufreq/cpufreq_governor.c 3 * 4 * CPUFREQ governors common code 5 * 6 * Copyright (C) 2001 Russell King 7 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 8 * (C) 2003 Jun Nakajima <jun.nakajima@intel.com> 9 * (C) 2009 Alexander Clouter <alex@digriz.org.uk> 10 * (c) 2012 Viresh Kumar <viresh.kumar@linaro.org> 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 as 14 * published by the Free Software Foundation. 15 */ 16 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <asm/cputime.h> 20 #include <linux/cpufreq.h> 21 #include <linux/cpumask.h> 22 #include <linux/export.h> 23 #include <linux/kernel_stat.h> 24 #include <linux/mutex.h> 25 #include <linux/tick.h> 26 #include <linux/types.h> 27 #include <linux/workqueue.h> 28 29 #include "cpufreq_governor.h" 30 31 static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) 32 { 33 u64 idle_time; 34 u64 cur_wall_time; 35 u64 busy_time; 36 37 cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); 38 39 busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; 40 busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; 41 busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; 42 busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; 43 busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; 44 busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; 45 46 idle_time = cur_wall_time - busy_time; 47 if (wall) 48 *wall = cputime_to_usecs(cur_wall_time); 49 50 return cputime_to_usecs(idle_time); 51 } 52 53 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall) 54 { 55 u64 idle_time = get_cpu_idle_time_us(cpu, NULL); 56 57 if (idle_time == -1ULL) 58 return get_cpu_idle_time_jiffy(cpu, wall); 59 else 60 idle_time += get_cpu_iowait_time_us(cpu, wall); 61 62 return idle_time; 63 } 64 EXPORT_SYMBOL_GPL(get_cpu_idle_time); 65 66 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) 67 { 68 struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); 69 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 70 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 71 struct cpufreq_policy *policy; 72 unsigned int max_load = 0; 73 unsigned int ignore_nice; 74 unsigned int j; 75 76 if (dbs_data->governor == GOV_ONDEMAND) 77 ignore_nice = od_tuners->ignore_nice; 78 else 79 ignore_nice = cs_tuners->ignore_nice; 80 81 policy = cdbs->cur_policy; 82 83 /* Get Absolute Load (in terms of freq for ondemand gov) */ 84 for_each_cpu(j, policy->cpus) { 85 struct cpu_dbs_common_info *j_cdbs; 86 u64 cur_wall_time, cur_idle_time, cur_iowait_time; 87 unsigned int idle_time, wall_time, iowait_time; 88 unsigned int load; 89 90 j_cdbs = dbs_data->get_cpu_cdbs(j); 91 92 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); 93 94 wall_time = (unsigned int) 95 (cur_wall_time - j_cdbs->prev_cpu_wall); 96 j_cdbs->prev_cpu_wall = cur_wall_time; 97 98 idle_time = (unsigned int) 99 (cur_idle_time - j_cdbs->prev_cpu_idle); 100 j_cdbs->prev_cpu_idle = cur_idle_time; 101 102 if (ignore_nice) { 103 u64 cur_nice; 104 unsigned long cur_nice_jiffies; 105 106 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - 107 cdbs->prev_cpu_nice; 108 /* 109 * Assumption: nice time between sampling periods will 110 * be less than 2^32 jiffies for 32 bit sys 111 */ 112 cur_nice_jiffies = (unsigned long) 113 cputime64_to_jiffies64(cur_nice); 114 115 cdbs->prev_cpu_nice = 116 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 117 idle_time += jiffies_to_usecs(cur_nice_jiffies); 118 } 119 120 if (dbs_data->governor == GOV_ONDEMAND) { 121 struct od_cpu_dbs_info_s *od_j_dbs_info = 122 dbs_data->get_cpu_dbs_info_s(cpu); 123 124 cur_iowait_time = get_cpu_iowait_time_us(j, 125 &cur_wall_time); 126 if (cur_iowait_time == -1ULL) 127 cur_iowait_time = 0; 128 129 iowait_time = (unsigned int) (cur_iowait_time - 130 od_j_dbs_info->prev_cpu_iowait); 131 od_j_dbs_info->prev_cpu_iowait = cur_iowait_time; 132 133 /* 134 * For the purpose of ondemand, waiting for disk IO is 135 * an indication that you're performance critical, and 136 * not that the system is actually idle. So subtract the 137 * iowait time from the cpu idle time. 138 */ 139 if (od_tuners->io_is_busy && idle_time >= iowait_time) 140 idle_time -= iowait_time; 141 } 142 143 if (unlikely(!wall_time || wall_time < idle_time)) 144 continue; 145 146 load = 100 * (wall_time - idle_time) / wall_time; 147 148 if (dbs_data->governor == GOV_ONDEMAND) { 149 int freq_avg = __cpufreq_driver_getavg(policy, j); 150 if (freq_avg <= 0) 151 freq_avg = policy->cur; 152 153 load *= freq_avg; 154 } 155 156 if (load > max_load) 157 max_load = load; 158 } 159 160 dbs_data->gov_check_cpu(cpu, max_load); 161 } 162 EXPORT_SYMBOL_GPL(dbs_check_cpu); 163 164 static inline void dbs_timer_init(struct dbs_data *dbs_data, 165 struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate) 166 { 167 int delay = delay_for_sampling_rate(sampling_rate); 168 169 INIT_DEFERRABLE_WORK(&cdbs->work, dbs_data->gov_dbs_timer); 170 schedule_delayed_work_on(cdbs->cpu, &cdbs->work, delay); 171 } 172 173 static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs) 174 { 175 cancel_delayed_work_sync(&cdbs->work); 176 } 177 178 int cpufreq_governor_dbs(struct dbs_data *dbs_data, 179 struct cpufreq_policy *policy, unsigned int event) 180 { 181 struct od_cpu_dbs_info_s *od_dbs_info = NULL; 182 struct cs_cpu_dbs_info_s *cs_dbs_info = NULL; 183 struct od_dbs_tuners *od_tuners = dbs_data->tuners; 184 struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; 185 struct cpu_dbs_common_info *cpu_cdbs; 186 unsigned int *sampling_rate, latency, ignore_nice, j, cpu = policy->cpu; 187 int rc; 188 189 cpu_cdbs = dbs_data->get_cpu_cdbs(cpu); 190 191 if (dbs_data->governor == GOV_CONSERVATIVE) { 192 cs_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); 193 sampling_rate = &cs_tuners->sampling_rate; 194 ignore_nice = cs_tuners->ignore_nice; 195 } else { 196 od_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); 197 sampling_rate = &od_tuners->sampling_rate; 198 ignore_nice = od_tuners->ignore_nice; 199 } 200 201 switch (event) { 202 case CPUFREQ_GOV_START: 203 if ((!cpu_online(cpu)) || (!policy->cur)) 204 return -EINVAL; 205 206 mutex_lock(&dbs_data->mutex); 207 208 dbs_data->enable++; 209 cpu_cdbs->cpu = cpu; 210 for_each_cpu(j, policy->cpus) { 211 struct cpu_dbs_common_info *j_cdbs; 212 j_cdbs = dbs_data->get_cpu_cdbs(j); 213 214 j_cdbs->cur_policy = policy; 215 j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, 216 &j_cdbs->prev_cpu_wall); 217 if (ignore_nice) 218 j_cdbs->prev_cpu_nice = 219 kcpustat_cpu(j).cpustat[CPUTIME_NICE]; 220 } 221 222 /* 223 * Start the timerschedule work, when this governor is used for 224 * first time 225 */ 226 if (dbs_data->enable != 1) 227 goto second_time; 228 229 rc = sysfs_create_group(cpufreq_global_kobject, 230 dbs_data->attr_group); 231 if (rc) { 232 mutex_unlock(&dbs_data->mutex); 233 return rc; 234 } 235 236 /* policy latency is in nS. Convert it to uS first */ 237 latency = policy->cpuinfo.transition_latency / 1000; 238 if (latency == 0) 239 latency = 1; 240 241 /* 242 * conservative does not implement micro like ondemand 243 * governor, thus we are bound to jiffes/HZ 244 */ 245 if (dbs_data->governor == GOV_CONSERVATIVE) { 246 struct cs_ops *ops = dbs_data->gov_ops; 247 248 cpufreq_register_notifier(ops->notifier_block, 249 CPUFREQ_TRANSITION_NOTIFIER); 250 251 dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * 252 jiffies_to_usecs(10); 253 } else { 254 struct od_ops *ops = dbs_data->gov_ops; 255 256 od_tuners->io_is_busy = ops->io_busy(); 257 } 258 259 /* Bring kernel and HW constraints together */ 260 dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, 261 MIN_LATENCY_MULTIPLIER * latency); 262 *sampling_rate = max(dbs_data->min_sampling_rate, latency * 263 LATENCY_MULTIPLIER); 264 265 second_time: 266 if (dbs_data->governor == GOV_CONSERVATIVE) { 267 cs_dbs_info->down_skip = 0; 268 cs_dbs_info->enable = 1; 269 cs_dbs_info->requested_freq = policy->cur; 270 } else { 271 struct od_ops *ops = dbs_data->gov_ops; 272 od_dbs_info->rate_mult = 1; 273 od_dbs_info->sample_type = OD_NORMAL_SAMPLE; 274 ops->powersave_bias_init_cpu(cpu); 275 } 276 mutex_unlock(&dbs_data->mutex); 277 278 mutex_init(&cpu_cdbs->timer_mutex); 279 dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate); 280 break; 281 282 case CPUFREQ_GOV_STOP: 283 if (dbs_data->governor == GOV_CONSERVATIVE) 284 cs_dbs_info->enable = 0; 285 286 dbs_timer_exit(cpu_cdbs); 287 288 mutex_lock(&dbs_data->mutex); 289 mutex_destroy(&cpu_cdbs->timer_mutex); 290 dbs_data->enable--; 291 if (!dbs_data->enable) { 292 struct cs_ops *ops = dbs_data->gov_ops; 293 294 sysfs_remove_group(cpufreq_global_kobject, 295 dbs_data->attr_group); 296 if (dbs_data->governor == GOV_CONSERVATIVE) 297 cpufreq_unregister_notifier(ops->notifier_block, 298 CPUFREQ_TRANSITION_NOTIFIER); 299 } 300 mutex_unlock(&dbs_data->mutex); 301 302 break; 303 304 case CPUFREQ_GOV_LIMITS: 305 mutex_lock(&cpu_cdbs->timer_mutex); 306 if (policy->max < cpu_cdbs->cur_policy->cur) 307 __cpufreq_driver_target(cpu_cdbs->cur_policy, 308 policy->max, CPUFREQ_RELATION_H); 309 else if (policy->min > cpu_cdbs->cur_policy->cur) 310 __cpufreq_driver_target(cpu_cdbs->cur_policy, 311 policy->min, CPUFREQ_RELATION_L); 312 dbs_check_cpu(dbs_data, cpu); 313 mutex_unlock(&cpu_cdbs->timer_mutex); 314 break; 315 } 316 return 0; 317 } 318 EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); 319