xref: /openbmc/linux/drivers/base/arch_topology.c (revision b58c6630)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arch specific cpu topology information
4  *
5  * Copyright (C) 2016, ARM Ltd.
6  * Written by: Juri Lelli, ARM Ltd.
7  */
8 
9 #include <linux/acpi.h>
10 #include <linux/cpu.h>
11 #include <linux/cpufreq.h>
12 #include <linux/device.h>
13 #include <linux/of.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/sched/topology.h>
17 #include <linux/cpuset.h>
18 #include <linux/cpumask.h>
19 #include <linux/init.h>
20 #include <linux/percpu.h>
21 #include <linux/sched.h>
22 #include <linux/smp.h>
23 
24 __weak bool arch_freq_counters_available(struct cpumask *cpus)
25 {
26 	return false;
27 }
28 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
29 
30 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
31 			 unsigned long max_freq)
32 {
33 	unsigned long scale;
34 	int i;
35 
36 	/*
37 	 * If the use of counters for FIE is enabled, just return as we don't
38 	 * want to update the scale factor with information from CPUFREQ.
39 	 * Instead the scale factor will be updated from arch_scale_freq_tick.
40 	 */
41 	if (arch_freq_counters_available(cpus))
42 		return;
43 
44 	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
45 
46 	for_each_cpu(i, cpus)
47 		per_cpu(freq_scale, i) = scale;
48 }
49 
50 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
51 
52 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
53 {
54 	per_cpu(cpu_scale, cpu) = capacity;
55 }
56 
57 static ssize_t cpu_capacity_show(struct device *dev,
58 				 struct device_attribute *attr,
59 				 char *buf)
60 {
61 	struct cpu *cpu = container_of(dev, struct cpu, dev);
62 
63 	return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
64 }
65 
66 static void update_topology_flags_workfn(struct work_struct *work);
67 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
68 
69 static DEVICE_ATTR_RO(cpu_capacity);
70 
71 static int register_cpu_capacity_sysctl(void)
72 {
73 	int i;
74 	struct device *cpu;
75 
76 	for_each_possible_cpu(i) {
77 		cpu = get_cpu_device(i);
78 		if (!cpu) {
79 			pr_err("%s: too early to get CPU%d device!\n",
80 			       __func__, i);
81 			continue;
82 		}
83 		device_create_file(cpu, &dev_attr_cpu_capacity);
84 	}
85 
86 	return 0;
87 }
88 subsys_initcall(register_cpu_capacity_sysctl);
89 
90 static int update_topology;
91 
92 int topology_update_cpu_topology(void)
93 {
94 	return update_topology;
95 }
96 
97 /*
98  * Updating the sched_domains can't be done directly from cpufreq callbacks
99  * due to locking, so queue the work for later.
100  */
101 static void update_topology_flags_workfn(struct work_struct *work)
102 {
103 	update_topology = 1;
104 	rebuild_sched_domains();
105 	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
106 	update_topology = 0;
107 }
108 
109 static DEFINE_PER_CPU(u32, freq_factor) = 1;
110 static u32 *raw_capacity;
111 
112 static int free_raw_capacity(void)
113 {
114 	kfree(raw_capacity);
115 	raw_capacity = NULL;
116 
117 	return 0;
118 }
119 
120 void topology_normalize_cpu_scale(void)
121 {
122 	u64 capacity;
123 	u64 capacity_scale;
124 	int cpu;
125 
126 	if (!raw_capacity)
127 		return;
128 
129 	capacity_scale = 1;
130 	for_each_possible_cpu(cpu) {
131 		capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
132 		capacity_scale = max(capacity, capacity_scale);
133 	}
134 
135 	pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
136 	for_each_possible_cpu(cpu) {
137 		capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
138 		capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
139 			capacity_scale);
140 		topology_set_cpu_scale(cpu, capacity);
141 		pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
142 			cpu, topology_get_cpu_scale(cpu));
143 	}
144 }
145 
146 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
147 {
148 	struct clk *cpu_clk;
149 	static bool cap_parsing_failed;
150 	int ret;
151 	u32 cpu_capacity;
152 
153 	if (cap_parsing_failed)
154 		return false;
155 
156 	ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
157 				   &cpu_capacity);
158 	if (!ret) {
159 		if (!raw_capacity) {
160 			raw_capacity = kcalloc(num_possible_cpus(),
161 					       sizeof(*raw_capacity),
162 					       GFP_KERNEL);
163 			if (!raw_capacity) {
164 				cap_parsing_failed = true;
165 				return false;
166 			}
167 		}
168 		raw_capacity[cpu] = cpu_capacity;
169 		pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
170 			cpu_node, raw_capacity[cpu]);
171 
172 		/*
173 		 * Update freq_factor for calculating early boot cpu capacities.
174 		 * For non-clk CPU DVFS mechanism, there's no way to get the
175 		 * frequency value now, assuming they are running at the same
176 		 * frequency (by keeping the initial freq_factor value).
177 		 */
178 		cpu_clk = of_clk_get(cpu_node, 0);
179 		if (!PTR_ERR_OR_ZERO(cpu_clk)) {
180 			per_cpu(freq_factor, cpu) =
181 				clk_get_rate(cpu_clk) / 1000;
182 			clk_put(cpu_clk);
183 		}
184 	} else {
185 		if (raw_capacity) {
186 			pr_err("cpu_capacity: missing %pOF raw capacity\n",
187 				cpu_node);
188 			pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
189 		}
190 		cap_parsing_failed = true;
191 		free_raw_capacity();
192 	}
193 
194 	return !ret;
195 }
196 
197 #ifdef CONFIG_CPU_FREQ
198 static cpumask_var_t cpus_to_visit;
199 static void parsing_done_workfn(struct work_struct *work);
200 static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
201 
202 static int
203 init_cpu_capacity_callback(struct notifier_block *nb,
204 			   unsigned long val,
205 			   void *data)
206 {
207 	struct cpufreq_policy *policy = data;
208 	int cpu;
209 
210 	if (!raw_capacity)
211 		return 0;
212 
213 	if (val != CPUFREQ_CREATE_POLICY)
214 		return 0;
215 
216 	pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
217 		 cpumask_pr_args(policy->related_cpus),
218 		 cpumask_pr_args(cpus_to_visit));
219 
220 	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
221 
222 	for_each_cpu(cpu, policy->related_cpus)
223 		per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
224 
225 	if (cpumask_empty(cpus_to_visit)) {
226 		topology_normalize_cpu_scale();
227 		schedule_work(&update_topology_flags_work);
228 		free_raw_capacity();
229 		pr_debug("cpu_capacity: parsing done\n");
230 		schedule_work(&parsing_done_work);
231 	}
232 
233 	return 0;
234 }
235 
236 static struct notifier_block init_cpu_capacity_notifier = {
237 	.notifier_call = init_cpu_capacity_callback,
238 };
239 
240 static int __init register_cpufreq_notifier(void)
241 {
242 	int ret;
243 
244 	/*
245 	 * on ACPI-based systems we need to use the default cpu capacity
246 	 * until we have the necessary code to parse the cpu capacity, so
247 	 * skip registering cpufreq notifier.
248 	 */
249 	if (!acpi_disabled || !raw_capacity)
250 		return -EINVAL;
251 
252 	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
253 		return -ENOMEM;
254 
255 	cpumask_copy(cpus_to_visit, cpu_possible_mask);
256 
257 	ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
258 					CPUFREQ_POLICY_NOTIFIER);
259 
260 	if (ret)
261 		free_cpumask_var(cpus_to_visit);
262 
263 	return ret;
264 }
265 core_initcall(register_cpufreq_notifier);
266 
267 static void parsing_done_workfn(struct work_struct *work)
268 {
269 	cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
270 					 CPUFREQ_POLICY_NOTIFIER);
271 	free_cpumask_var(cpus_to_visit);
272 }
273 
274 #else
275 core_initcall(free_raw_capacity);
276 #endif
277 
278 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
279 /*
280  * This function returns the logic cpu number of the node.
281  * There are basically three kinds of return values:
282  * (1) logic cpu number which is > 0.
283  * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
284  * there is no possible logical CPU in the kernel to match. This happens
285  * when CONFIG_NR_CPUS is configure to be smaller than the number of
286  * CPU nodes in DT. We need to just ignore this case.
287  * (3) -1 if the node does not exist in the device tree
288  */
289 static int __init get_cpu_for_node(struct device_node *node)
290 {
291 	struct device_node *cpu_node;
292 	int cpu;
293 
294 	cpu_node = of_parse_phandle(node, "cpu", 0);
295 	if (!cpu_node)
296 		return -1;
297 
298 	cpu = of_cpu_node_to_id(cpu_node);
299 	if (cpu >= 0)
300 		topology_parse_cpu_capacity(cpu_node, cpu);
301 	else
302 		pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
303 			cpu_node, cpumask_pr_args(cpu_possible_mask));
304 
305 	of_node_put(cpu_node);
306 	return cpu;
307 }
308 
309 static int __init parse_core(struct device_node *core, int package_id,
310 			     int core_id)
311 {
312 	char name[20];
313 	bool leaf = true;
314 	int i = 0;
315 	int cpu;
316 	struct device_node *t;
317 
318 	do {
319 		snprintf(name, sizeof(name), "thread%d", i);
320 		t = of_get_child_by_name(core, name);
321 		if (t) {
322 			leaf = false;
323 			cpu = get_cpu_for_node(t);
324 			if (cpu >= 0) {
325 				cpu_topology[cpu].package_id = package_id;
326 				cpu_topology[cpu].core_id = core_id;
327 				cpu_topology[cpu].thread_id = i;
328 			} else if (cpu != -ENODEV) {
329 				pr_err("%pOF: Can't get CPU for thread\n", t);
330 				of_node_put(t);
331 				return -EINVAL;
332 			}
333 			of_node_put(t);
334 		}
335 		i++;
336 	} while (t);
337 
338 	cpu = get_cpu_for_node(core);
339 	if (cpu >= 0) {
340 		if (!leaf) {
341 			pr_err("%pOF: Core has both threads and CPU\n",
342 			       core);
343 			return -EINVAL;
344 		}
345 
346 		cpu_topology[cpu].package_id = package_id;
347 		cpu_topology[cpu].core_id = core_id;
348 	} else if (leaf && cpu != -ENODEV) {
349 		pr_err("%pOF: Can't get CPU for leaf core\n", core);
350 		return -EINVAL;
351 	}
352 
353 	return 0;
354 }
355 
356 static int __init parse_cluster(struct device_node *cluster, int depth)
357 {
358 	char name[20];
359 	bool leaf = true;
360 	bool has_cores = false;
361 	struct device_node *c;
362 	static int package_id __initdata;
363 	int core_id = 0;
364 	int i, ret;
365 
366 	/*
367 	 * First check for child clusters; we currently ignore any
368 	 * information about the nesting of clusters and present the
369 	 * scheduler with a flat list of them.
370 	 */
371 	i = 0;
372 	do {
373 		snprintf(name, sizeof(name), "cluster%d", i);
374 		c = of_get_child_by_name(cluster, name);
375 		if (c) {
376 			leaf = false;
377 			ret = parse_cluster(c, depth + 1);
378 			of_node_put(c);
379 			if (ret != 0)
380 				return ret;
381 		}
382 		i++;
383 	} while (c);
384 
385 	/* Now check for cores */
386 	i = 0;
387 	do {
388 		snprintf(name, sizeof(name), "core%d", i);
389 		c = of_get_child_by_name(cluster, name);
390 		if (c) {
391 			has_cores = true;
392 
393 			if (depth == 0) {
394 				pr_err("%pOF: cpu-map children should be clusters\n",
395 				       c);
396 				of_node_put(c);
397 				return -EINVAL;
398 			}
399 
400 			if (leaf) {
401 				ret = parse_core(c, package_id, core_id++);
402 			} else {
403 				pr_err("%pOF: Non-leaf cluster with core %s\n",
404 				       cluster, name);
405 				ret = -EINVAL;
406 			}
407 
408 			of_node_put(c);
409 			if (ret != 0)
410 				return ret;
411 		}
412 		i++;
413 	} while (c);
414 
415 	if (leaf && !has_cores)
416 		pr_warn("%pOF: empty cluster\n", cluster);
417 
418 	if (leaf)
419 		package_id++;
420 
421 	return 0;
422 }
423 
424 static int __init parse_dt_topology(void)
425 {
426 	struct device_node *cn, *map;
427 	int ret = 0;
428 	int cpu;
429 
430 	cn = of_find_node_by_path("/cpus");
431 	if (!cn) {
432 		pr_err("No CPU information found in DT\n");
433 		return 0;
434 	}
435 
436 	/*
437 	 * When topology is provided cpu-map is essentially a root
438 	 * cluster with restricted subnodes.
439 	 */
440 	map = of_get_child_by_name(cn, "cpu-map");
441 	if (!map)
442 		goto out;
443 
444 	ret = parse_cluster(map, 0);
445 	if (ret != 0)
446 		goto out_map;
447 
448 	topology_normalize_cpu_scale();
449 
450 	/*
451 	 * Check that all cores are in the topology; the SMP code will
452 	 * only mark cores described in the DT as possible.
453 	 */
454 	for_each_possible_cpu(cpu)
455 		if (cpu_topology[cpu].package_id == -1)
456 			ret = -EINVAL;
457 
458 out_map:
459 	of_node_put(map);
460 out:
461 	of_node_put(cn);
462 	return ret;
463 }
464 #endif
465 
466 /*
467  * cpu topology table
468  */
469 struct cpu_topology cpu_topology[NR_CPUS];
470 EXPORT_SYMBOL_GPL(cpu_topology);
471 
472 const struct cpumask *cpu_coregroup_mask(int cpu)
473 {
474 	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
475 
476 	/* Find the smaller of NUMA, core or LLC siblings */
477 	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
478 		/* not numa in package, lets use the package siblings */
479 		core_mask = &cpu_topology[cpu].core_sibling;
480 	}
481 	if (cpu_topology[cpu].llc_id != -1) {
482 		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
483 			core_mask = &cpu_topology[cpu].llc_sibling;
484 	}
485 
486 	return core_mask;
487 }
488 
489 void update_siblings_masks(unsigned int cpuid)
490 {
491 	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
492 	int cpu;
493 
494 	/* update core and thread sibling masks */
495 	for_each_online_cpu(cpu) {
496 		cpu_topo = &cpu_topology[cpu];
497 
498 		if (cpuid_topo->llc_id == cpu_topo->llc_id) {
499 			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
500 			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
501 		}
502 
503 		if (cpuid_topo->package_id != cpu_topo->package_id)
504 			continue;
505 
506 		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
507 		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
508 
509 		if (cpuid_topo->core_id != cpu_topo->core_id)
510 			continue;
511 
512 		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
513 		cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
514 	}
515 }
516 
517 static void clear_cpu_topology(int cpu)
518 {
519 	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
520 
521 	cpumask_clear(&cpu_topo->llc_sibling);
522 	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
523 
524 	cpumask_clear(&cpu_topo->core_sibling);
525 	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
526 	cpumask_clear(&cpu_topo->thread_sibling);
527 	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
528 }
529 
530 void __init reset_cpu_topology(void)
531 {
532 	unsigned int cpu;
533 
534 	for_each_possible_cpu(cpu) {
535 		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
536 
537 		cpu_topo->thread_id = -1;
538 		cpu_topo->core_id = -1;
539 		cpu_topo->package_id = -1;
540 		cpu_topo->llc_id = -1;
541 
542 		clear_cpu_topology(cpu);
543 	}
544 }
545 
546 void remove_cpu_topology(unsigned int cpu)
547 {
548 	int sibling;
549 
550 	for_each_cpu(sibling, topology_core_cpumask(cpu))
551 		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
552 	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
553 		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
554 	for_each_cpu(sibling, topology_llc_cpumask(cpu))
555 		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
556 
557 	clear_cpu_topology(cpu);
558 }
559 
560 __weak int __init parse_acpi_topology(void)
561 {
562 	return 0;
563 }
564 
565 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
566 void __init init_cpu_topology(void)
567 {
568 	reset_cpu_topology();
569 
570 	/*
571 	 * Discard anything that was parsed if we hit an error so we
572 	 * don't use partial information.
573 	 */
574 	if (parse_acpi_topology())
575 		reset_cpu_topology();
576 	else if (of_have_populated_dt() && parse_dt_topology())
577 		reset_cpu_topology();
578 }
579 #endif
580