xref: /openbmc/linux/drivers/base/arch_topology.c (revision 76ce0265)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arch specific cpu topology information
4  *
5  * Copyright (C) 2016, ARM Ltd.
6  * Written by: Juri Lelli, ARM Ltd.
7  */
8 
9 #include <linux/acpi.h>
10 #include <linux/cpu.h>
11 #include <linux/cpufreq.h>
12 #include <linux/device.h>
13 #include <linux/of.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/sched/topology.h>
17 #include <linux/cpuset.h>
18 #include <linux/cpumask.h>
19 #include <linux/init.h>
20 #include <linux/percpu.h>
21 #include <linux/sched.h>
22 #include <linux/smp.h>
23 
24 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
25 
26 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
27 			 unsigned long max_freq)
28 {
29 	unsigned long scale;
30 	int i;
31 
32 	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
33 
34 	for_each_cpu(i, cpus)
35 		per_cpu(freq_scale, i) = scale;
36 }
37 
38 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
39 
40 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
41 {
42 	per_cpu(cpu_scale, cpu) = capacity;
43 }
44 
45 static ssize_t cpu_capacity_show(struct device *dev,
46 				 struct device_attribute *attr,
47 				 char *buf)
48 {
49 	struct cpu *cpu = container_of(dev, struct cpu, dev);
50 
51 	return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
52 }
53 
54 static void update_topology_flags_workfn(struct work_struct *work);
55 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
56 
57 static DEVICE_ATTR_RO(cpu_capacity);
58 
59 static int register_cpu_capacity_sysctl(void)
60 {
61 	int i;
62 	struct device *cpu;
63 
64 	for_each_possible_cpu(i) {
65 		cpu = get_cpu_device(i);
66 		if (!cpu) {
67 			pr_err("%s: too early to get CPU%d device!\n",
68 			       __func__, i);
69 			continue;
70 		}
71 		device_create_file(cpu, &dev_attr_cpu_capacity);
72 	}
73 
74 	return 0;
75 }
76 subsys_initcall(register_cpu_capacity_sysctl);
77 
78 static int update_topology;
79 
80 int topology_update_cpu_topology(void)
81 {
82 	return update_topology;
83 }
84 
85 /*
86  * Updating the sched_domains can't be done directly from cpufreq callbacks
87  * due to locking, so queue the work for later.
88  */
89 static void update_topology_flags_workfn(struct work_struct *work)
90 {
91 	update_topology = 1;
92 	rebuild_sched_domains();
93 	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
94 	update_topology = 0;
95 }
96 
97 static u32 capacity_scale;
98 static u32 *raw_capacity;
99 
100 static int free_raw_capacity(void)
101 {
102 	kfree(raw_capacity);
103 	raw_capacity = NULL;
104 
105 	return 0;
106 }
107 
108 void topology_normalize_cpu_scale(void)
109 {
110 	u64 capacity;
111 	int cpu;
112 
113 	if (!raw_capacity)
114 		return;
115 
116 	pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
117 	for_each_possible_cpu(cpu) {
118 		pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
119 			 cpu, raw_capacity[cpu]);
120 		capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
121 			/ capacity_scale;
122 		topology_set_cpu_scale(cpu, capacity);
123 		pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
124 			cpu, topology_get_cpu_scale(cpu));
125 	}
126 }
127 
128 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
129 {
130 	static bool cap_parsing_failed;
131 	int ret;
132 	u32 cpu_capacity;
133 
134 	if (cap_parsing_failed)
135 		return false;
136 
137 	ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
138 				   &cpu_capacity);
139 	if (!ret) {
140 		if (!raw_capacity) {
141 			raw_capacity = kcalloc(num_possible_cpus(),
142 					       sizeof(*raw_capacity),
143 					       GFP_KERNEL);
144 			if (!raw_capacity) {
145 				cap_parsing_failed = true;
146 				return false;
147 			}
148 		}
149 		capacity_scale = max(cpu_capacity, capacity_scale);
150 		raw_capacity[cpu] = cpu_capacity;
151 		pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
152 			cpu_node, raw_capacity[cpu]);
153 	} else {
154 		if (raw_capacity) {
155 			pr_err("cpu_capacity: missing %pOF raw capacity\n",
156 				cpu_node);
157 			pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
158 		}
159 		cap_parsing_failed = true;
160 		free_raw_capacity();
161 	}
162 
163 	return !ret;
164 }
165 
166 #ifdef CONFIG_CPU_FREQ
167 static cpumask_var_t cpus_to_visit;
168 static void parsing_done_workfn(struct work_struct *work);
169 static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
170 
171 static int
172 init_cpu_capacity_callback(struct notifier_block *nb,
173 			   unsigned long val,
174 			   void *data)
175 {
176 	struct cpufreq_policy *policy = data;
177 	int cpu;
178 
179 	if (!raw_capacity)
180 		return 0;
181 
182 	if (val != CPUFREQ_CREATE_POLICY)
183 		return 0;
184 
185 	pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
186 		 cpumask_pr_args(policy->related_cpus),
187 		 cpumask_pr_args(cpus_to_visit));
188 
189 	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
190 
191 	for_each_cpu(cpu, policy->related_cpus) {
192 		raw_capacity[cpu] = topology_get_cpu_scale(cpu) *
193 				    policy->cpuinfo.max_freq / 1000UL;
194 		capacity_scale = max(raw_capacity[cpu], capacity_scale);
195 	}
196 
197 	if (cpumask_empty(cpus_to_visit)) {
198 		topology_normalize_cpu_scale();
199 		schedule_work(&update_topology_flags_work);
200 		free_raw_capacity();
201 		pr_debug("cpu_capacity: parsing done\n");
202 		schedule_work(&parsing_done_work);
203 	}
204 
205 	return 0;
206 }
207 
208 static struct notifier_block init_cpu_capacity_notifier = {
209 	.notifier_call = init_cpu_capacity_callback,
210 };
211 
212 static int __init register_cpufreq_notifier(void)
213 {
214 	int ret;
215 
216 	/*
217 	 * on ACPI-based systems we need to use the default cpu capacity
218 	 * until we have the necessary code to parse the cpu capacity, so
219 	 * skip registering cpufreq notifier.
220 	 */
221 	if (!acpi_disabled || !raw_capacity)
222 		return -EINVAL;
223 
224 	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
225 		return -ENOMEM;
226 
227 	cpumask_copy(cpus_to_visit, cpu_possible_mask);
228 
229 	ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
230 					CPUFREQ_POLICY_NOTIFIER);
231 
232 	if (ret)
233 		free_cpumask_var(cpus_to_visit);
234 
235 	return ret;
236 }
237 core_initcall(register_cpufreq_notifier);
238 
239 static void parsing_done_workfn(struct work_struct *work)
240 {
241 	cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
242 					 CPUFREQ_POLICY_NOTIFIER);
243 	free_cpumask_var(cpus_to_visit);
244 }
245 
246 #else
247 core_initcall(free_raw_capacity);
248 #endif
249 
250 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
251 /*
252  * This function returns the logic cpu number of the node.
253  * There are basically three kinds of return values:
254  * (1) logic cpu number which is > 0.
255  * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
256  * there is no possible logical CPU in the kernel to match. This happens
257  * when CONFIG_NR_CPUS is configure to be smaller than the number of
258  * CPU nodes in DT. We need to just ignore this case.
259  * (3) -1 if the node does not exist in the device tree
260  */
261 static int __init get_cpu_for_node(struct device_node *node)
262 {
263 	struct device_node *cpu_node;
264 	int cpu;
265 
266 	cpu_node = of_parse_phandle(node, "cpu", 0);
267 	if (!cpu_node)
268 		return -1;
269 
270 	cpu = of_cpu_node_to_id(cpu_node);
271 	if (cpu >= 0)
272 		topology_parse_cpu_capacity(cpu_node, cpu);
273 	else
274 		pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
275 			cpu_node, cpumask_pr_args(cpu_possible_mask));
276 
277 	of_node_put(cpu_node);
278 	return cpu;
279 }
280 
281 static int __init parse_core(struct device_node *core, int package_id,
282 			     int core_id)
283 {
284 	char name[10];
285 	bool leaf = true;
286 	int i = 0;
287 	int cpu;
288 	struct device_node *t;
289 
290 	do {
291 		snprintf(name, sizeof(name), "thread%d", i);
292 		t = of_get_child_by_name(core, name);
293 		if (t) {
294 			leaf = false;
295 			cpu = get_cpu_for_node(t);
296 			if (cpu >= 0) {
297 				cpu_topology[cpu].package_id = package_id;
298 				cpu_topology[cpu].core_id = core_id;
299 				cpu_topology[cpu].thread_id = i;
300 			} else if (cpu != -ENODEV) {
301 				pr_err("%pOF: Can't get CPU for thread\n", t);
302 				of_node_put(t);
303 				return -EINVAL;
304 			}
305 			of_node_put(t);
306 		}
307 		i++;
308 	} while (t);
309 
310 	cpu = get_cpu_for_node(core);
311 	if (cpu >= 0) {
312 		if (!leaf) {
313 			pr_err("%pOF: Core has both threads and CPU\n",
314 			       core);
315 			return -EINVAL;
316 		}
317 
318 		cpu_topology[cpu].package_id = package_id;
319 		cpu_topology[cpu].core_id = core_id;
320 	} else if (leaf && cpu != -ENODEV) {
321 		pr_err("%pOF: Can't get CPU for leaf core\n", core);
322 		return -EINVAL;
323 	}
324 
325 	return 0;
326 }
327 
328 static int __init parse_cluster(struct device_node *cluster, int depth)
329 {
330 	char name[10];
331 	bool leaf = true;
332 	bool has_cores = false;
333 	struct device_node *c;
334 	static int package_id __initdata;
335 	int core_id = 0;
336 	int i, ret;
337 
338 	/*
339 	 * First check for child clusters; we currently ignore any
340 	 * information about the nesting of clusters and present the
341 	 * scheduler with a flat list of them.
342 	 */
343 	i = 0;
344 	do {
345 		snprintf(name, sizeof(name), "cluster%d", i);
346 		c = of_get_child_by_name(cluster, name);
347 		if (c) {
348 			leaf = false;
349 			ret = parse_cluster(c, depth + 1);
350 			of_node_put(c);
351 			if (ret != 0)
352 				return ret;
353 		}
354 		i++;
355 	} while (c);
356 
357 	/* Now check for cores */
358 	i = 0;
359 	do {
360 		snprintf(name, sizeof(name), "core%d", i);
361 		c = of_get_child_by_name(cluster, name);
362 		if (c) {
363 			has_cores = true;
364 
365 			if (depth == 0) {
366 				pr_err("%pOF: cpu-map children should be clusters\n",
367 				       c);
368 				of_node_put(c);
369 				return -EINVAL;
370 			}
371 
372 			if (leaf) {
373 				ret = parse_core(c, package_id, core_id++);
374 			} else {
375 				pr_err("%pOF: Non-leaf cluster with core %s\n",
376 				       cluster, name);
377 				ret = -EINVAL;
378 			}
379 
380 			of_node_put(c);
381 			if (ret != 0)
382 				return ret;
383 		}
384 		i++;
385 	} while (c);
386 
387 	if (leaf && !has_cores)
388 		pr_warn("%pOF: empty cluster\n", cluster);
389 
390 	if (leaf)
391 		package_id++;
392 
393 	return 0;
394 }
395 
396 static int __init parse_dt_topology(void)
397 {
398 	struct device_node *cn, *map;
399 	int ret = 0;
400 	int cpu;
401 
402 	cn = of_find_node_by_path("/cpus");
403 	if (!cn) {
404 		pr_err("No CPU information found in DT\n");
405 		return 0;
406 	}
407 
408 	/*
409 	 * When topology is provided cpu-map is essentially a root
410 	 * cluster with restricted subnodes.
411 	 */
412 	map = of_get_child_by_name(cn, "cpu-map");
413 	if (!map)
414 		goto out;
415 
416 	ret = parse_cluster(map, 0);
417 	if (ret != 0)
418 		goto out_map;
419 
420 	topology_normalize_cpu_scale();
421 
422 	/*
423 	 * Check that all cores are in the topology; the SMP code will
424 	 * only mark cores described in the DT as possible.
425 	 */
426 	for_each_possible_cpu(cpu)
427 		if (cpu_topology[cpu].package_id == -1)
428 			ret = -EINVAL;
429 
430 out_map:
431 	of_node_put(map);
432 out:
433 	of_node_put(cn);
434 	return ret;
435 }
436 #endif
437 
438 /*
439  * cpu topology table
440  */
441 struct cpu_topology cpu_topology[NR_CPUS];
442 EXPORT_SYMBOL_GPL(cpu_topology);
443 
444 const struct cpumask *cpu_coregroup_mask(int cpu)
445 {
446 	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
447 
448 	/* Find the smaller of NUMA, core or LLC siblings */
449 	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
450 		/* not numa in package, lets use the package siblings */
451 		core_mask = &cpu_topology[cpu].core_sibling;
452 	}
453 	if (cpu_topology[cpu].llc_id != -1) {
454 		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
455 			core_mask = &cpu_topology[cpu].llc_sibling;
456 	}
457 
458 	return core_mask;
459 }
460 
461 void update_siblings_masks(unsigned int cpuid)
462 {
463 	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
464 	int cpu;
465 
466 	/* update core and thread sibling masks */
467 	for_each_online_cpu(cpu) {
468 		cpu_topo = &cpu_topology[cpu];
469 
470 		if (cpuid_topo->llc_id == cpu_topo->llc_id) {
471 			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
472 			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
473 		}
474 
475 		if (cpuid_topo->package_id != cpu_topo->package_id)
476 			continue;
477 
478 		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
479 		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
480 
481 		if (cpuid_topo->core_id != cpu_topo->core_id)
482 			continue;
483 
484 		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
485 		cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
486 	}
487 }
488 
489 static void clear_cpu_topology(int cpu)
490 {
491 	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
492 
493 	cpumask_clear(&cpu_topo->llc_sibling);
494 	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
495 
496 	cpumask_clear(&cpu_topo->core_sibling);
497 	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
498 	cpumask_clear(&cpu_topo->thread_sibling);
499 	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
500 }
501 
502 void __init reset_cpu_topology(void)
503 {
504 	unsigned int cpu;
505 
506 	for_each_possible_cpu(cpu) {
507 		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
508 
509 		cpu_topo->thread_id = -1;
510 		cpu_topo->core_id = -1;
511 		cpu_topo->package_id = -1;
512 		cpu_topo->llc_id = -1;
513 
514 		clear_cpu_topology(cpu);
515 	}
516 }
517 
518 void remove_cpu_topology(unsigned int cpu)
519 {
520 	int sibling;
521 
522 	for_each_cpu(sibling, topology_core_cpumask(cpu))
523 		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
524 	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
525 		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
526 	for_each_cpu(sibling, topology_llc_cpumask(cpu))
527 		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
528 
529 	clear_cpu_topology(cpu);
530 }
531 
532 __weak int __init parse_acpi_topology(void)
533 {
534 	return 0;
535 }
536 
537 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
538 void __init init_cpu_topology(void)
539 {
540 	reset_cpu_topology();
541 
542 	/*
543 	 * Discard anything that was parsed if we hit an error so we
544 	 * don't use partial information.
545 	 */
546 	if (parse_acpi_topology())
547 		reset_cpu_topology();
548 	else if (of_have_populated_dt() && parse_dt_topology())
549 		reset_cpu_topology();
550 }
551 #endif
552