xref: /openbmc/linux/kernel/power/energy_model.c (revision 8354eb9e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Energy Model of devices
4  *
5  * Copyright (c) 2018-2021, Arm ltd.
6  * Written by: Quentin Perret, Arm ltd.
7  * Improvements provided by: Lukasz Luba, Arm ltd.
8  */
9 
10 #define pr_fmt(fmt) "energy_model: " fmt
11 
12 #include <linux/cpu.h>
13 #include <linux/cpumask.h>
14 #include <linux/debugfs.h>
15 #include <linux/energy_model.h>
16 #include <linux/sched/topology.h>
17 #include <linux/slab.h>
18 
19 /*
20  * Mutex serializing the registrations of performance domains and letting
21  * callbacks defined by drivers sleep.
22  */
23 static DEFINE_MUTEX(em_pd_mutex);
24 
25 static bool _is_cpu_device(struct device *dev)
26 {
27 	return (dev->bus == &cpu_subsys);
28 }
29 
30 #ifdef CONFIG_DEBUG_FS
31 static struct dentry *rootdir;
32 
33 static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
34 {
35 	struct dentry *d;
36 	char name[24];
37 
38 	snprintf(name, sizeof(name), "ps:%lu", ps->frequency);
39 
40 	/* Create per-ps directory */
41 	d = debugfs_create_dir(name, pd);
42 	debugfs_create_ulong("frequency", 0444, d, &ps->frequency);
43 	debugfs_create_ulong("power", 0444, d, &ps->power);
44 	debugfs_create_ulong("cost", 0444, d, &ps->cost);
45 	debugfs_create_ulong("inefficient", 0444, d, &ps->flags);
46 }
47 
48 static int em_debug_cpus_show(struct seq_file *s, void *unused)
49 {
50 	seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private)));
51 
52 	return 0;
53 }
54 DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
55 
56 static int em_debug_units_show(struct seq_file *s, void *unused)
57 {
58 	struct em_perf_domain *pd = s->private;
59 	char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ?
60 		"milliWatts" : "bogoWatts";
61 
62 	seq_printf(s, "%s\n", units);
63 
64 	return 0;
65 }
66 DEFINE_SHOW_ATTRIBUTE(em_debug_units);
67 
68 static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused)
69 {
70 	struct em_perf_domain *pd = s->private;
71 	int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0;
72 
73 	seq_printf(s, "%d\n", enabled);
74 
75 	return 0;
76 }
77 DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies);
78 
79 static void em_debug_create_pd(struct device *dev)
80 {
81 	struct dentry *d;
82 	int i;
83 
84 	/* Create the directory of the performance domain */
85 	d = debugfs_create_dir(dev_name(dev), rootdir);
86 
87 	if (_is_cpu_device(dev))
88 		debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus,
89 				    &em_debug_cpus_fops);
90 
91 	debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
92 	debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd,
93 			    &em_debug_skip_inefficiencies_fops);
94 
95 	/* Create a sub-directory for each performance state */
96 	for (i = 0; i < dev->em_pd->nr_perf_states; i++)
97 		em_debug_create_ps(&dev->em_pd->table[i], d);
98 
99 }
100 
101 static void em_debug_remove_pd(struct device *dev)
102 {
103 	struct dentry *debug_dir;
104 
105 	debug_dir = debugfs_lookup(dev_name(dev), rootdir);
106 	debugfs_remove_recursive(debug_dir);
107 }
108 
109 static int __init em_debug_init(void)
110 {
111 	/* Create /sys/kernel/debug/energy_model directory */
112 	rootdir = debugfs_create_dir("energy_model", NULL);
113 
114 	return 0;
115 }
116 fs_initcall(em_debug_init);
117 #else /* CONFIG_DEBUG_FS */
118 static void em_debug_create_pd(struct device *dev) {}
119 static void em_debug_remove_pd(struct device *dev) {}
120 #endif
121 
122 static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
123 				int nr_states, struct em_data_callback *cb)
124 {
125 	unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
126 	struct em_perf_state *table;
127 	int i, ret;
128 	u64 fmax;
129 
130 	table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
131 	if (!table)
132 		return -ENOMEM;
133 
134 	/* Build the list of performance states for this performance domain */
135 	for (i = 0, freq = 0; i < nr_states; i++, freq++) {
136 		/*
137 		 * active_power() is a driver callback which ceils 'freq' to
138 		 * lowest performance state of 'dev' above 'freq' and updates
139 		 * 'power' and 'freq' accordingly.
140 		 */
141 		ret = cb->active_power(&power, &freq, dev);
142 		if (ret) {
143 			dev_err(dev, "EM: invalid perf. state: %d\n",
144 				ret);
145 			goto free_ps_table;
146 		}
147 
148 		/*
149 		 * We expect the driver callback to increase the frequency for
150 		 * higher performance states.
151 		 */
152 		if (freq <= prev_freq) {
153 			dev_err(dev, "EM: non-increasing freq: %lu\n",
154 				freq);
155 			goto free_ps_table;
156 		}
157 
158 		/*
159 		 * The power returned by active_state() is expected to be
160 		 * positive and to fit into 16 bits.
161 		 */
162 		if (!power || power > EM_MAX_POWER) {
163 			dev_err(dev, "EM: invalid power: %lu\n",
164 				power);
165 			goto free_ps_table;
166 		}
167 
168 		table[i].power = power;
169 		table[i].frequency = prev_freq = freq;
170 	}
171 
172 	/* Compute the cost of each performance state. */
173 	fmax = (u64) table[nr_states - 1].frequency;
174 	for (i = nr_states - 1; i >= 0; i--) {
175 		unsigned long power_res = em_scale_power(table[i].power);
176 
177 		table[i].cost = div64_u64(fmax * power_res,
178 					  table[i].frequency);
179 		if (table[i].cost >= prev_cost) {
180 			table[i].flags = EM_PERF_STATE_INEFFICIENT;
181 			dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
182 				table[i].frequency);
183 		} else {
184 			prev_cost = table[i].cost;
185 		}
186 	}
187 
188 	pd->table = table;
189 	pd->nr_perf_states = nr_states;
190 
191 	return 0;
192 
193 free_ps_table:
194 	kfree(table);
195 	return -EINVAL;
196 }
197 
198 static int em_create_pd(struct device *dev, int nr_states,
199 			struct em_data_callback *cb, cpumask_t *cpus)
200 {
201 	struct em_perf_domain *pd;
202 	struct device *cpu_dev;
203 	int cpu, ret;
204 
205 	if (_is_cpu_device(dev)) {
206 		pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
207 		if (!pd)
208 			return -ENOMEM;
209 
210 		cpumask_copy(em_span_cpus(pd), cpus);
211 	} else {
212 		pd = kzalloc(sizeof(*pd), GFP_KERNEL);
213 		if (!pd)
214 			return -ENOMEM;
215 	}
216 
217 	ret = em_create_perf_table(dev, pd, nr_states, cb);
218 	if (ret) {
219 		kfree(pd);
220 		return ret;
221 	}
222 
223 	if (_is_cpu_device(dev))
224 		for_each_cpu(cpu, cpus) {
225 			cpu_dev = get_cpu_device(cpu);
226 			cpu_dev->em_pd = pd;
227 		}
228 
229 	dev->em_pd = pd;
230 
231 	return 0;
232 }
233 
234 /**
235  * em_pd_get() - Return the performance domain for a device
236  * @dev : Device to find the performance domain for
237  *
238  * Returns the performance domain to which @dev belongs, or NULL if it doesn't
239  * exist.
240  */
241 struct em_perf_domain *em_pd_get(struct device *dev)
242 {
243 	if (IS_ERR_OR_NULL(dev))
244 		return NULL;
245 
246 	return dev->em_pd;
247 }
248 EXPORT_SYMBOL_GPL(em_pd_get);
249 
250 /**
251  * em_cpu_get() - Return the performance domain for a CPU
252  * @cpu : CPU to find the performance domain for
253  *
254  * Returns the performance domain to which @cpu belongs, or NULL if it doesn't
255  * exist.
256  */
257 struct em_perf_domain *em_cpu_get(int cpu)
258 {
259 	struct device *cpu_dev;
260 
261 	cpu_dev = get_cpu_device(cpu);
262 	if (!cpu_dev)
263 		return NULL;
264 
265 	return em_pd_get(cpu_dev);
266 }
267 EXPORT_SYMBOL_GPL(em_cpu_get);
268 
269 /**
270  * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device
271  * @dev		: Device for which the EM is to register
272  * @nr_states	: Number of performance states to register
273  * @cb		: Callback functions providing the data of the Energy Model
274  * @cpus	: Pointer to cpumask_t, which in case of a CPU device is
275  *		obligatory. It can be taken from i.e. 'policy->cpus'. For other
276  *		type of devices this should be set to NULL.
277  * @milliwatts	: Flag indicating that the power values are in milliWatts or
278  *		in some other scale. It must be set properly.
279  *
280  * Create Energy Model tables for a performance domain using the callbacks
281  * defined in cb.
282  *
283  * The @milliwatts is important to set with correct value. Some kernel
284  * sub-systems might rely on this flag and check if all devices in the EM are
285  * using the same scale.
286  *
287  * If multiple clients register the same performance domain, all but the first
288  * registration will be ignored.
289  *
290  * Return 0 on success
291  */
292 int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
293 				struct em_data_callback *cb, cpumask_t *cpus,
294 				bool milliwatts)
295 {
296 	unsigned long cap, prev_cap = 0;
297 	int cpu, ret;
298 
299 	if (!dev || !nr_states || !cb)
300 		return -EINVAL;
301 
302 	/*
303 	 * Use a mutex to serialize the registration of performance domains and
304 	 * let the driver-defined callback functions sleep.
305 	 */
306 	mutex_lock(&em_pd_mutex);
307 
308 	if (dev->em_pd) {
309 		ret = -EEXIST;
310 		goto unlock;
311 	}
312 
313 	if (_is_cpu_device(dev)) {
314 		if (!cpus) {
315 			dev_err(dev, "EM: invalid CPU mask\n");
316 			ret = -EINVAL;
317 			goto unlock;
318 		}
319 
320 		for_each_cpu(cpu, cpus) {
321 			if (em_cpu_get(cpu)) {
322 				dev_err(dev, "EM: exists for CPU%d\n", cpu);
323 				ret = -EEXIST;
324 				goto unlock;
325 			}
326 			/*
327 			 * All CPUs of a domain must have the same
328 			 * micro-architecture since they all share the same
329 			 * table.
330 			 */
331 			cap = arch_scale_cpu_capacity(cpu);
332 			if (prev_cap && prev_cap != cap) {
333 				dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n",
334 					cpumask_pr_args(cpus));
335 
336 				ret = -EINVAL;
337 				goto unlock;
338 			}
339 			prev_cap = cap;
340 		}
341 	}
342 
343 	ret = em_create_pd(dev, nr_states, cb, cpus);
344 	if (ret)
345 		goto unlock;
346 
347 	if (milliwatts)
348 		dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
349 
350 	em_debug_create_pd(dev);
351 	dev_info(dev, "EM: created perf domain\n");
352 
353 unlock:
354 	mutex_unlock(&em_pd_mutex);
355 	return ret;
356 }
357 EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
358 
359 /**
360  * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device
361  * @dev		: Device for which the EM is registered
362  *
363  * Unregister the EM for the specified @dev (but not a CPU device).
364  */
365 void em_dev_unregister_perf_domain(struct device *dev)
366 {
367 	if (IS_ERR_OR_NULL(dev) || !dev->em_pd)
368 		return;
369 
370 	if (_is_cpu_device(dev))
371 		return;
372 
373 	/*
374 	 * The mutex separates all register/unregister requests and protects
375 	 * from potential clean-up/setup issues in the debugfs directories.
376 	 * The debugfs directory name is the same as device's name.
377 	 */
378 	mutex_lock(&em_pd_mutex);
379 	em_debug_remove_pd(dev);
380 
381 	kfree(dev->em_pd->table);
382 	kfree(dev->em_pd);
383 	dev->em_pd = NULL;
384 	mutex_unlock(&em_pd_mutex);
385 }
386 EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
387