xref: /openbmc/linux/kernel/power/energy_model.c (revision 176f011b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Energy Model of CPUs
4  *
5  * Copyright (c) 2018, Arm ltd.
6  * Written by: Quentin Perret, Arm ltd.
7  */
8 
9 #define pr_fmt(fmt) "energy_model: " fmt
10 
11 #include <linux/cpu.h>
12 #include <linux/cpumask.h>
13 #include <linux/energy_model.h>
14 #include <linux/sched/topology.h>
15 #include <linux/slab.h>
16 
17 /* Mapping of each CPU to the performance domain to which it belongs. */
18 static DEFINE_PER_CPU(struct em_perf_domain *, em_data);
19 
20 /*
21  * Mutex serializing the registrations of performance domains and letting
22  * callbacks defined by drivers sleep.
23  */
24 static DEFINE_MUTEX(em_pd_mutex);
25 
26 static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states,
27 						struct em_data_callback *cb)
28 {
29 	unsigned long opp_eff, prev_opp_eff = ULONG_MAX;
30 	unsigned long power, freq, prev_freq = 0;
31 	int i, ret, cpu = cpumask_first(span);
32 	struct em_cap_state *table;
33 	struct em_perf_domain *pd;
34 	u64 fmax;
35 
36 	if (!cb->active_power)
37 		return NULL;
38 
39 	pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
40 	if (!pd)
41 		return NULL;
42 
43 	table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
44 	if (!table)
45 		goto free_pd;
46 
47 	/* Build the list of capacity states for this performance domain */
48 	for (i = 0, freq = 0; i < nr_states; i++, freq++) {
49 		/*
50 		 * active_power() is a driver callback which ceils 'freq' to
51 		 * lowest capacity state of 'cpu' above 'freq' and updates
52 		 * 'power' and 'freq' accordingly.
53 		 */
54 		ret = cb->active_power(&power, &freq, cpu);
55 		if (ret) {
56 			pr_err("pd%d: invalid cap. state: %d\n", cpu, ret);
57 			goto free_cs_table;
58 		}
59 
60 		/*
61 		 * We expect the driver callback to increase the frequency for
62 		 * higher capacity states.
63 		 */
64 		if (freq <= prev_freq) {
65 			pr_err("pd%d: non-increasing freq: %lu\n", cpu, freq);
66 			goto free_cs_table;
67 		}
68 
69 		/*
70 		 * The power returned by active_state() is expected to be
71 		 * positive, in milli-watts and to fit into 16 bits.
72 		 */
73 		if (!power || power > EM_CPU_MAX_POWER) {
74 			pr_err("pd%d: invalid power: %lu\n", cpu, power);
75 			goto free_cs_table;
76 		}
77 
78 		table[i].power = power;
79 		table[i].frequency = prev_freq = freq;
80 
81 		/*
82 		 * The hertz/watts efficiency ratio should decrease as the
83 		 * frequency grows on sane platforms. But this isn't always
84 		 * true in practice so warn the user if a higher OPP is more
85 		 * power efficient than a lower one.
86 		 */
87 		opp_eff = freq / power;
88 		if (opp_eff >= prev_opp_eff)
89 			pr_warn("pd%d: hertz/watts ratio non-monotonically decreasing: em_cap_state %d >= em_cap_state%d\n",
90 					cpu, i, i - 1);
91 		prev_opp_eff = opp_eff;
92 	}
93 
94 	/* Compute the cost of each capacity_state. */
95 	fmax = (u64) table[nr_states - 1].frequency;
96 	for (i = 0; i < nr_states; i++) {
97 		table[i].cost = div64_u64(fmax * table[i].power,
98 					  table[i].frequency);
99 	}
100 
101 	pd->table = table;
102 	pd->nr_cap_states = nr_states;
103 	cpumask_copy(to_cpumask(pd->cpus), span);
104 
105 	return pd;
106 
107 free_cs_table:
108 	kfree(table);
109 free_pd:
110 	kfree(pd);
111 
112 	return NULL;
113 }
114 
115 /**
116  * em_cpu_get() - Return the performance domain for a CPU
117  * @cpu : CPU to find the performance domain for
118  *
119  * Return: the performance domain to which 'cpu' belongs, or NULL if it doesn't
120  * exist.
121  */
122 struct em_perf_domain *em_cpu_get(int cpu)
123 {
124 	return READ_ONCE(per_cpu(em_data, cpu));
125 }
126 EXPORT_SYMBOL_GPL(em_cpu_get);
127 
128 /**
129  * em_register_perf_domain() - Register the Energy Model of a performance domain
130  * @span	: Mask of CPUs in the performance domain
131  * @nr_states	: Number of capacity states to register
132  * @cb		: Callback functions providing the data of the Energy Model
133  *
134  * Create Energy Model tables for a performance domain using the callbacks
135  * defined in cb.
136  *
137  * If multiple clients register the same performance domain, all but the first
138  * registration will be ignored.
139  *
140  * Return 0 on success
141  */
142 int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
143 						struct em_data_callback *cb)
144 {
145 	unsigned long cap, prev_cap = 0;
146 	struct em_perf_domain *pd;
147 	int cpu, ret = 0;
148 
149 	if (!span || !nr_states || !cb)
150 		return -EINVAL;
151 
152 	/*
153 	 * Use a mutex to serialize the registration of performance domains and
154 	 * let the driver-defined callback functions sleep.
155 	 */
156 	mutex_lock(&em_pd_mutex);
157 
158 	for_each_cpu(cpu, span) {
159 		/* Make sure we don't register again an existing domain. */
160 		if (READ_ONCE(per_cpu(em_data, cpu))) {
161 			ret = -EEXIST;
162 			goto unlock;
163 		}
164 
165 		/*
166 		 * All CPUs of a domain must have the same micro-architecture
167 		 * since they all share the same table.
168 		 */
169 		cap = arch_scale_cpu_capacity(NULL, cpu);
170 		if (prev_cap && prev_cap != cap) {
171 			pr_err("CPUs of %*pbl must have the same capacity\n",
172 							cpumask_pr_args(span));
173 			ret = -EINVAL;
174 			goto unlock;
175 		}
176 		prev_cap = cap;
177 	}
178 
179 	/* Create the performance domain and add it to the Energy Model. */
180 	pd = em_create_pd(span, nr_states, cb);
181 	if (!pd) {
182 		ret = -EINVAL;
183 		goto unlock;
184 	}
185 
186 	for_each_cpu(cpu, span) {
187 		/*
188 		 * The per-cpu array can be read concurrently from em_cpu_get().
189 		 * The barrier enforces the ordering needed to make sure readers
190 		 * can only access well formed em_perf_domain structs.
191 		 */
192 		smp_store_release(per_cpu_ptr(&em_data, cpu), pd);
193 	}
194 
195 	pr_debug("Created perf domain %*pbl\n", cpumask_pr_args(span));
196 unlock:
197 	mutex_unlock(&em_pd_mutex);
198 
199 	return ret;
200 }
201 EXPORT_SYMBOL_GPL(em_register_perf_domain);
202