1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Energy Model of CPUs 4 * 5 * Copyright (c) 2018, Arm ltd. 6 * Written by: Quentin Perret, Arm ltd. 7 */ 8 9 #define pr_fmt(fmt) "energy_model: " fmt 10 11 #include <linux/cpu.h> 12 #include <linux/cpumask.h> 13 #include <linux/energy_model.h> 14 #include <linux/sched/topology.h> 15 #include <linux/slab.h> 16 17 /* Mapping of each CPU to the performance domain to which it belongs. */ 18 static DEFINE_PER_CPU(struct em_perf_domain *, em_data); 19 20 /* 21 * Mutex serializing the registrations of performance domains and letting 22 * callbacks defined by drivers sleep. 23 */ 24 static DEFINE_MUTEX(em_pd_mutex); 25 26 static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, 27 struct em_data_callback *cb) 28 { 29 unsigned long opp_eff, prev_opp_eff = ULONG_MAX; 30 unsigned long power, freq, prev_freq = 0; 31 int i, ret, cpu = cpumask_first(span); 32 struct em_cap_state *table; 33 struct em_perf_domain *pd; 34 u64 fmax; 35 36 if (!cb->active_power) 37 return NULL; 38 39 pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); 40 if (!pd) 41 return NULL; 42 43 table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL); 44 if (!table) 45 goto free_pd; 46 47 /* Build the list of capacity states for this performance domain */ 48 for (i = 0, freq = 0; i < nr_states; i++, freq++) { 49 /* 50 * active_power() is a driver callback which ceils 'freq' to 51 * lowest capacity state of 'cpu' above 'freq' and updates 52 * 'power' and 'freq' accordingly. 53 */ 54 ret = cb->active_power(&power, &freq, cpu); 55 if (ret) { 56 pr_err("pd%d: invalid cap. state: %d\n", cpu, ret); 57 goto free_cs_table; 58 } 59 60 /* 61 * We expect the driver callback to increase the frequency for 62 * higher capacity states. 63 */ 64 if (freq <= prev_freq) { 65 pr_err("pd%d: non-increasing freq: %lu\n", cpu, freq); 66 goto free_cs_table; 67 } 68 69 /* 70 * The power returned by active_state() is expected to be 71 * positive, in milli-watts and to fit into 16 bits. 72 */ 73 if (!power || power > EM_CPU_MAX_POWER) { 74 pr_err("pd%d: invalid power: %lu\n", cpu, power); 75 goto free_cs_table; 76 } 77 78 table[i].power = power; 79 table[i].frequency = prev_freq = freq; 80 81 /* 82 * The hertz/watts efficiency ratio should decrease as the 83 * frequency grows on sane platforms. But this isn't always 84 * true in practice so warn the user if a higher OPP is more 85 * power efficient than a lower one. 86 */ 87 opp_eff = freq / power; 88 if (opp_eff >= prev_opp_eff) 89 pr_warn("pd%d: hertz/watts ratio non-monotonically decreasing: em_cap_state %d >= em_cap_state%d\n", 90 cpu, i, i - 1); 91 prev_opp_eff = opp_eff; 92 } 93 94 /* Compute the cost of each capacity_state. */ 95 fmax = (u64) table[nr_states - 1].frequency; 96 for (i = 0; i < nr_states; i++) { 97 table[i].cost = div64_u64(fmax * table[i].power, 98 table[i].frequency); 99 } 100 101 pd->table = table; 102 pd->nr_cap_states = nr_states; 103 cpumask_copy(to_cpumask(pd->cpus), span); 104 105 return pd; 106 107 free_cs_table: 108 kfree(table); 109 free_pd: 110 kfree(pd); 111 112 return NULL; 113 } 114 115 /** 116 * em_cpu_get() - Return the performance domain for a CPU 117 * @cpu : CPU to find the performance domain for 118 * 119 * Return: the performance domain to which 'cpu' belongs, or NULL if it doesn't 120 * exist. 121 */ 122 struct em_perf_domain *em_cpu_get(int cpu) 123 { 124 return READ_ONCE(per_cpu(em_data, cpu)); 125 } 126 EXPORT_SYMBOL_GPL(em_cpu_get); 127 128 /** 129 * em_register_perf_domain() - Register the Energy Model of a performance domain 130 * @span : Mask of CPUs in the performance domain 131 * @nr_states : Number of capacity states to register 132 * @cb : Callback functions providing the data of the Energy Model 133 * 134 * Create Energy Model tables for a performance domain using the callbacks 135 * defined in cb. 136 * 137 * If multiple clients register the same performance domain, all but the first 138 * registration will be ignored. 139 * 140 * Return 0 on success 141 */ 142 int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, 143 struct em_data_callback *cb) 144 { 145 unsigned long cap, prev_cap = 0; 146 struct em_perf_domain *pd; 147 int cpu, ret = 0; 148 149 if (!span || !nr_states || !cb) 150 return -EINVAL; 151 152 /* 153 * Use a mutex to serialize the registration of performance domains and 154 * let the driver-defined callback functions sleep. 155 */ 156 mutex_lock(&em_pd_mutex); 157 158 for_each_cpu(cpu, span) { 159 /* Make sure we don't register again an existing domain. */ 160 if (READ_ONCE(per_cpu(em_data, cpu))) { 161 ret = -EEXIST; 162 goto unlock; 163 } 164 165 /* 166 * All CPUs of a domain must have the same micro-architecture 167 * since they all share the same table. 168 */ 169 cap = arch_scale_cpu_capacity(NULL, cpu); 170 if (prev_cap && prev_cap != cap) { 171 pr_err("CPUs of %*pbl must have the same capacity\n", 172 cpumask_pr_args(span)); 173 ret = -EINVAL; 174 goto unlock; 175 } 176 prev_cap = cap; 177 } 178 179 /* Create the performance domain and add it to the Energy Model. */ 180 pd = em_create_pd(span, nr_states, cb); 181 if (!pd) { 182 ret = -EINVAL; 183 goto unlock; 184 } 185 186 for_each_cpu(cpu, span) { 187 /* 188 * The per-cpu array can be read concurrently from em_cpu_get(). 189 * The barrier enforces the ordering needed to make sure readers 190 * can only access well formed em_perf_domain structs. 191 */ 192 smp_store_release(per_cpu_ptr(&em_data, cpu), pd); 193 } 194 195 pr_debug("Created perf domain %*pbl\n", cpumask_pr_args(span)); 196 unlock: 197 mutex_unlock(&em_pd_mutex); 198 199 return ret; 200 } 201 EXPORT_SYMBOL_GPL(em_register_perf_domain); 202