xref: /openbmc/linux/drivers/base/arch_topology.c (revision 4ee812f6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arch specific cpu topology information
4  *
5  * Copyright (C) 2016, ARM Ltd.
6  * Written by: Juri Lelli, ARM Ltd.
7  */
8 
9 #include <linux/acpi.h>
10 #include <linux/cpu.h>
11 #include <linux/cpufreq.h>
12 #include <linux/device.h>
13 #include <linux/of.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/sched/topology.h>
17 #include <linux/cpuset.h>
18 #include <linux/cpumask.h>
19 #include <linux/init.h>
20 #include <linux/percpu.h>
21 #include <linux/sched.h>
22 #include <linux/smp.h>
23 
24 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
25 
26 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
27 			 unsigned long max_freq)
28 {
29 	unsigned long scale;
30 	int i;
31 
32 	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
33 
34 	for_each_cpu(i, cpus)
35 		per_cpu(freq_scale, i) = scale;
36 }
37 
38 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
39 
40 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
41 {
42 	per_cpu(cpu_scale, cpu) = capacity;
43 }
44 
45 static ssize_t cpu_capacity_show(struct device *dev,
46 				 struct device_attribute *attr,
47 				 char *buf)
48 {
49 	struct cpu *cpu = container_of(dev, struct cpu, dev);
50 
51 	return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
52 }
53 
54 static void update_topology_flags_workfn(struct work_struct *work);
55 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
56 
57 static DEVICE_ATTR_RO(cpu_capacity);
58 
59 static int register_cpu_capacity_sysctl(void)
60 {
61 	int i;
62 	struct device *cpu;
63 
64 	for_each_possible_cpu(i) {
65 		cpu = get_cpu_device(i);
66 		if (!cpu) {
67 			pr_err("%s: too early to get CPU%d device!\n",
68 			       __func__, i);
69 			continue;
70 		}
71 		device_create_file(cpu, &dev_attr_cpu_capacity);
72 	}
73 
74 	return 0;
75 }
76 subsys_initcall(register_cpu_capacity_sysctl);
77 
78 static int update_topology;
79 
80 int topology_update_cpu_topology(void)
81 {
82 	return update_topology;
83 }
84 
85 /*
86  * Updating the sched_domains can't be done directly from cpufreq callbacks
87  * due to locking, so queue the work for later.
88  */
89 static void update_topology_flags_workfn(struct work_struct *work)
90 {
91 	update_topology = 1;
92 	rebuild_sched_domains();
93 	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
94 	update_topology = 0;
95 }
96 
97 static u32 capacity_scale;
98 static u32 *raw_capacity;
99 
100 static int free_raw_capacity(void)
101 {
102 	kfree(raw_capacity);
103 	raw_capacity = NULL;
104 
105 	return 0;
106 }
107 
108 void topology_normalize_cpu_scale(void)
109 {
110 	u64 capacity;
111 	int cpu;
112 
113 	if (!raw_capacity)
114 		return;
115 
116 	pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
117 	for_each_possible_cpu(cpu) {
118 		pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
119 			 cpu, raw_capacity[cpu]);
120 		capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
121 			/ capacity_scale;
122 		topology_set_cpu_scale(cpu, capacity);
123 		pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
124 			cpu, topology_get_cpu_scale(cpu));
125 	}
126 }
127 
128 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
129 {
130 	static bool cap_parsing_failed;
131 	int ret;
132 	u32 cpu_capacity;
133 
134 	if (cap_parsing_failed)
135 		return false;
136 
137 	ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
138 				   &cpu_capacity);
139 	if (!ret) {
140 		if (!raw_capacity) {
141 			raw_capacity = kcalloc(num_possible_cpus(),
142 					       sizeof(*raw_capacity),
143 					       GFP_KERNEL);
144 			if (!raw_capacity) {
145 				cap_parsing_failed = true;
146 				return false;
147 			}
148 		}
149 		capacity_scale = max(cpu_capacity, capacity_scale);
150 		raw_capacity[cpu] = cpu_capacity;
151 		pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
152 			cpu_node, raw_capacity[cpu]);
153 	} else {
154 		if (raw_capacity) {
155 			pr_err("cpu_capacity: missing %pOF raw capacity\n",
156 				cpu_node);
157 			pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
158 		}
159 		cap_parsing_failed = true;
160 		free_raw_capacity();
161 	}
162 
163 	return !ret;
164 }
165 
166 #ifdef CONFIG_CPU_FREQ
167 static cpumask_var_t cpus_to_visit;
168 static void parsing_done_workfn(struct work_struct *work);
169 static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
170 
171 static int
172 init_cpu_capacity_callback(struct notifier_block *nb,
173 			   unsigned long val,
174 			   void *data)
175 {
176 	struct cpufreq_policy *policy = data;
177 	int cpu;
178 
179 	if (!raw_capacity)
180 		return 0;
181 
182 	if (val != CPUFREQ_CREATE_POLICY)
183 		return 0;
184 
185 	pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
186 		 cpumask_pr_args(policy->related_cpus),
187 		 cpumask_pr_args(cpus_to_visit));
188 
189 	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
190 
191 	for_each_cpu(cpu, policy->related_cpus) {
192 		raw_capacity[cpu] = topology_get_cpu_scale(cpu) *
193 				    policy->cpuinfo.max_freq / 1000UL;
194 		capacity_scale = max(raw_capacity[cpu], capacity_scale);
195 	}
196 
197 	if (cpumask_empty(cpus_to_visit)) {
198 		topology_normalize_cpu_scale();
199 		schedule_work(&update_topology_flags_work);
200 		free_raw_capacity();
201 		pr_debug("cpu_capacity: parsing done\n");
202 		schedule_work(&parsing_done_work);
203 	}
204 
205 	return 0;
206 }
207 
208 static struct notifier_block init_cpu_capacity_notifier = {
209 	.notifier_call = init_cpu_capacity_callback,
210 };
211 
212 static int __init register_cpufreq_notifier(void)
213 {
214 	int ret;
215 
216 	/*
217 	 * on ACPI-based systems we need to use the default cpu capacity
218 	 * until we have the necessary code to parse the cpu capacity, so
219 	 * skip registering cpufreq notifier.
220 	 */
221 	if (!acpi_disabled || !raw_capacity)
222 		return -EINVAL;
223 
224 	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
225 		return -ENOMEM;
226 
227 	cpumask_copy(cpus_to_visit, cpu_possible_mask);
228 
229 	ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
230 					CPUFREQ_POLICY_NOTIFIER);
231 
232 	if (ret)
233 		free_cpumask_var(cpus_to_visit);
234 
235 	return ret;
236 }
237 core_initcall(register_cpufreq_notifier);
238 
239 static void parsing_done_workfn(struct work_struct *work)
240 {
241 	cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
242 					 CPUFREQ_POLICY_NOTIFIER);
243 	free_cpumask_var(cpus_to_visit);
244 }
245 
246 #else
247 core_initcall(free_raw_capacity);
248 #endif
249 
250 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
251 static int __init get_cpu_for_node(struct device_node *node)
252 {
253 	struct device_node *cpu_node;
254 	int cpu;
255 
256 	cpu_node = of_parse_phandle(node, "cpu", 0);
257 	if (!cpu_node)
258 		return -1;
259 
260 	cpu = of_cpu_node_to_id(cpu_node);
261 	if (cpu >= 0)
262 		topology_parse_cpu_capacity(cpu_node, cpu);
263 	else
264 		pr_crit("Unable to find CPU node for %pOF\n", cpu_node);
265 
266 	of_node_put(cpu_node);
267 	return cpu;
268 }
269 
270 static int __init parse_core(struct device_node *core, int package_id,
271 			     int core_id)
272 {
273 	char name[10];
274 	bool leaf = true;
275 	int i = 0;
276 	int cpu;
277 	struct device_node *t;
278 
279 	do {
280 		snprintf(name, sizeof(name), "thread%d", i);
281 		t = of_get_child_by_name(core, name);
282 		if (t) {
283 			leaf = false;
284 			cpu = get_cpu_for_node(t);
285 			if (cpu >= 0) {
286 				cpu_topology[cpu].package_id = package_id;
287 				cpu_topology[cpu].core_id = core_id;
288 				cpu_topology[cpu].thread_id = i;
289 			} else {
290 				pr_err("%pOF: Can't get CPU for thread\n",
291 				       t);
292 				of_node_put(t);
293 				return -EINVAL;
294 			}
295 			of_node_put(t);
296 		}
297 		i++;
298 	} while (t);
299 
300 	cpu = get_cpu_for_node(core);
301 	if (cpu >= 0) {
302 		if (!leaf) {
303 			pr_err("%pOF: Core has both threads and CPU\n",
304 			       core);
305 			return -EINVAL;
306 		}
307 
308 		cpu_topology[cpu].package_id = package_id;
309 		cpu_topology[cpu].core_id = core_id;
310 	} else if (leaf) {
311 		pr_err("%pOF: Can't get CPU for leaf core\n", core);
312 		return -EINVAL;
313 	}
314 
315 	return 0;
316 }
317 
318 static int __init parse_cluster(struct device_node *cluster, int depth)
319 {
320 	char name[10];
321 	bool leaf = true;
322 	bool has_cores = false;
323 	struct device_node *c;
324 	static int package_id __initdata;
325 	int core_id = 0;
326 	int i, ret;
327 
328 	/*
329 	 * First check for child clusters; we currently ignore any
330 	 * information about the nesting of clusters and present the
331 	 * scheduler with a flat list of them.
332 	 */
333 	i = 0;
334 	do {
335 		snprintf(name, sizeof(name), "cluster%d", i);
336 		c = of_get_child_by_name(cluster, name);
337 		if (c) {
338 			leaf = false;
339 			ret = parse_cluster(c, depth + 1);
340 			of_node_put(c);
341 			if (ret != 0)
342 				return ret;
343 		}
344 		i++;
345 	} while (c);
346 
347 	/* Now check for cores */
348 	i = 0;
349 	do {
350 		snprintf(name, sizeof(name), "core%d", i);
351 		c = of_get_child_by_name(cluster, name);
352 		if (c) {
353 			has_cores = true;
354 
355 			if (depth == 0) {
356 				pr_err("%pOF: cpu-map children should be clusters\n",
357 				       c);
358 				of_node_put(c);
359 				return -EINVAL;
360 			}
361 
362 			if (leaf) {
363 				ret = parse_core(c, package_id, core_id++);
364 			} else {
365 				pr_err("%pOF: Non-leaf cluster with core %s\n",
366 				       cluster, name);
367 				ret = -EINVAL;
368 			}
369 
370 			of_node_put(c);
371 			if (ret != 0)
372 				return ret;
373 		}
374 		i++;
375 	} while (c);
376 
377 	if (leaf && !has_cores)
378 		pr_warn("%pOF: empty cluster\n", cluster);
379 
380 	if (leaf)
381 		package_id++;
382 
383 	return 0;
384 }
385 
386 static int __init parse_dt_topology(void)
387 {
388 	struct device_node *cn, *map;
389 	int ret = 0;
390 	int cpu;
391 
392 	cn = of_find_node_by_path("/cpus");
393 	if (!cn) {
394 		pr_err("No CPU information found in DT\n");
395 		return 0;
396 	}
397 
398 	/*
399 	 * When topology is provided cpu-map is essentially a root
400 	 * cluster with restricted subnodes.
401 	 */
402 	map = of_get_child_by_name(cn, "cpu-map");
403 	if (!map)
404 		goto out;
405 
406 	ret = parse_cluster(map, 0);
407 	if (ret != 0)
408 		goto out_map;
409 
410 	topology_normalize_cpu_scale();
411 
412 	/*
413 	 * Check that all cores are in the topology; the SMP code will
414 	 * only mark cores described in the DT as possible.
415 	 */
416 	for_each_possible_cpu(cpu)
417 		if (cpu_topology[cpu].package_id == -1)
418 			ret = -EINVAL;
419 
420 out_map:
421 	of_node_put(map);
422 out:
423 	of_node_put(cn);
424 	return ret;
425 }
426 #endif
427 
428 /*
429  * cpu topology table
430  */
431 struct cpu_topology cpu_topology[NR_CPUS];
432 EXPORT_SYMBOL_GPL(cpu_topology);
433 
434 const struct cpumask *cpu_coregroup_mask(int cpu)
435 {
436 	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
437 
438 	/* Find the smaller of NUMA, core or LLC siblings */
439 	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
440 		/* not numa in package, lets use the package siblings */
441 		core_mask = &cpu_topology[cpu].core_sibling;
442 	}
443 	if (cpu_topology[cpu].llc_id != -1) {
444 		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
445 			core_mask = &cpu_topology[cpu].llc_sibling;
446 	}
447 
448 	return core_mask;
449 }
450 
451 void update_siblings_masks(unsigned int cpuid)
452 {
453 	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
454 	int cpu;
455 
456 	/* update core and thread sibling masks */
457 	for_each_online_cpu(cpu) {
458 		cpu_topo = &cpu_topology[cpu];
459 
460 		if (cpuid_topo->llc_id == cpu_topo->llc_id) {
461 			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
462 			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
463 		}
464 
465 		if (cpuid_topo->package_id != cpu_topo->package_id)
466 			continue;
467 
468 		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
469 		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
470 
471 		if (cpuid_topo->core_id != cpu_topo->core_id)
472 			continue;
473 
474 		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
475 		cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
476 	}
477 }
478 
479 static void clear_cpu_topology(int cpu)
480 {
481 	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
482 
483 	cpumask_clear(&cpu_topo->llc_sibling);
484 	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
485 
486 	cpumask_clear(&cpu_topo->core_sibling);
487 	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
488 	cpumask_clear(&cpu_topo->thread_sibling);
489 	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
490 }
491 
492 void __init reset_cpu_topology(void)
493 {
494 	unsigned int cpu;
495 
496 	for_each_possible_cpu(cpu) {
497 		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
498 
499 		cpu_topo->thread_id = -1;
500 		cpu_topo->core_id = -1;
501 		cpu_topo->package_id = -1;
502 		cpu_topo->llc_id = -1;
503 
504 		clear_cpu_topology(cpu);
505 	}
506 }
507 
508 void remove_cpu_topology(unsigned int cpu)
509 {
510 	int sibling;
511 
512 	for_each_cpu(sibling, topology_core_cpumask(cpu))
513 		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
514 	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
515 		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
516 	for_each_cpu(sibling, topology_llc_cpumask(cpu))
517 		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
518 
519 	clear_cpu_topology(cpu);
520 }
521 
522 __weak int __init parse_acpi_topology(void)
523 {
524 	return 0;
525 }
526 
527 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
528 void __init init_cpu_topology(void)
529 {
530 	reset_cpu_topology();
531 
532 	/*
533 	 * Discard anything that was parsed if we hit an error so we
534 	 * don't use partial information.
535 	 */
536 	if (parse_acpi_topology())
537 		reset_cpu_topology();
538 	else if (of_have_populated_dt() && parse_dt_topology())
539 		reset_cpu_topology();
540 }
541 #endif
542