xref: /openbmc/linux/drivers/base/arch_topology.c (revision 0eb76ba2)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arch specific cpu topology information
4  *
5  * Copyright (C) 2016, ARM Ltd.
6  * Written by: Juri Lelli, ARM Ltd.
7  */
8 
9 #include <linux/acpi.h>
10 #include <linux/cpu.h>
11 #include <linux/cpufreq.h>
12 #include <linux/device.h>
13 #include <linux/of.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/sched/topology.h>
17 #include <linux/cpuset.h>
18 #include <linux/cpumask.h>
19 #include <linux/init.h>
20 #include <linux/percpu.h>
21 #include <linux/sched.h>
22 #include <linux/smp.h>
23 
24 bool topology_scale_freq_invariant(void)
25 {
26 	return cpufreq_supports_freq_invariance() ||
27 	       arch_freq_counters_available(cpu_online_mask);
28 }
29 
30 __weak bool arch_freq_counters_available(const struct cpumask *cpus)
31 {
32 	return false;
33 }
34 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
35 
36 void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
37 			     unsigned long max_freq)
38 {
39 	unsigned long scale;
40 	int i;
41 
42 	if (WARN_ON_ONCE(!cur_freq || !max_freq))
43 		return;
44 
45 	/*
46 	 * If the use of counters for FIE is enabled, just return as we don't
47 	 * want to update the scale factor with information from CPUFREQ.
48 	 * Instead the scale factor will be updated from arch_scale_freq_tick.
49 	 */
50 	if (arch_freq_counters_available(cpus))
51 		return;
52 
53 	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
54 
55 	for_each_cpu(i, cpus)
56 		per_cpu(freq_scale, i) = scale;
57 }
58 
59 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
60 
61 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
62 {
63 	per_cpu(cpu_scale, cpu) = capacity;
64 }
65 
66 DEFINE_PER_CPU(unsigned long, thermal_pressure);
67 
68 void topology_set_thermal_pressure(const struct cpumask *cpus,
69 			       unsigned long th_pressure)
70 {
71 	int cpu;
72 
73 	for_each_cpu(cpu, cpus)
74 		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
75 }
76 
77 static ssize_t cpu_capacity_show(struct device *dev,
78 				 struct device_attribute *attr,
79 				 char *buf)
80 {
81 	struct cpu *cpu = container_of(dev, struct cpu, dev);
82 
83 	return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
84 }
85 
86 static void update_topology_flags_workfn(struct work_struct *work);
87 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
88 
89 static DEVICE_ATTR_RO(cpu_capacity);
90 
91 static int register_cpu_capacity_sysctl(void)
92 {
93 	int i;
94 	struct device *cpu;
95 
96 	for_each_possible_cpu(i) {
97 		cpu = get_cpu_device(i);
98 		if (!cpu) {
99 			pr_err("%s: too early to get CPU%d device!\n",
100 			       __func__, i);
101 			continue;
102 		}
103 		device_create_file(cpu, &dev_attr_cpu_capacity);
104 	}
105 
106 	return 0;
107 }
108 subsys_initcall(register_cpu_capacity_sysctl);
109 
110 static int update_topology;
111 
112 int topology_update_cpu_topology(void)
113 {
114 	return update_topology;
115 }
116 
117 /*
118  * Updating the sched_domains can't be done directly from cpufreq callbacks
119  * due to locking, so queue the work for later.
120  */
121 static void update_topology_flags_workfn(struct work_struct *work)
122 {
123 	update_topology = 1;
124 	rebuild_sched_domains();
125 	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
126 	update_topology = 0;
127 }
128 
129 static DEFINE_PER_CPU(u32, freq_factor) = 1;
130 static u32 *raw_capacity;
131 
132 static int free_raw_capacity(void)
133 {
134 	kfree(raw_capacity);
135 	raw_capacity = NULL;
136 
137 	return 0;
138 }
139 
140 void topology_normalize_cpu_scale(void)
141 {
142 	u64 capacity;
143 	u64 capacity_scale;
144 	int cpu;
145 
146 	if (!raw_capacity)
147 		return;
148 
149 	capacity_scale = 1;
150 	for_each_possible_cpu(cpu) {
151 		capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
152 		capacity_scale = max(capacity, capacity_scale);
153 	}
154 
155 	pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
156 	for_each_possible_cpu(cpu) {
157 		capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
158 		capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
159 			capacity_scale);
160 		topology_set_cpu_scale(cpu, capacity);
161 		pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
162 			cpu, topology_get_cpu_scale(cpu));
163 	}
164 }
165 
166 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
167 {
168 	struct clk *cpu_clk;
169 	static bool cap_parsing_failed;
170 	int ret;
171 	u32 cpu_capacity;
172 
173 	if (cap_parsing_failed)
174 		return false;
175 
176 	ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
177 				   &cpu_capacity);
178 	if (!ret) {
179 		if (!raw_capacity) {
180 			raw_capacity = kcalloc(num_possible_cpus(),
181 					       sizeof(*raw_capacity),
182 					       GFP_KERNEL);
183 			if (!raw_capacity) {
184 				cap_parsing_failed = true;
185 				return false;
186 			}
187 		}
188 		raw_capacity[cpu] = cpu_capacity;
189 		pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
190 			cpu_node, raw_capacity[cpu]);
191 
192 		/*
193 		 * Update freq_factor for calculating early boot cpu capacities.
194 		 * For non-clk CPU DVFS mechanism, there's no way to get the
195 		 * frequency value now, assuming they are running at the same
196 		 * frequency (by keeping the initial freq_factor value).
197 		 */
198 		cpu_clk = of_clk_get(cpu_node, 0);
199 		if (!PTR_ERR_OR_ZERO(cpu_clk)) {
200 			per_cpu(freq_factor, cpu) =
201 				clk_get_rate(cpu_clk) / 1000;
202 			clk_put(cpu_clk);
203 		}
204 	} else {
205 		if (raw_capacity) {
206 			pr_err("cpu_capacity: missing %pOF raw capacity\n",
207 				cpu_node);
208 			pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
209 		}
210 		cap_parsing_failed = true;
211 		free_raw_capacity();
212 	}
213 
214 	return !ret;
215 }
216 
217 #ifdef CONFIG_CPU_FREQ
218 static cpumask_var_t cpus_to_visit;
219 static void parsing_done_workfn(struct work_struct *work);
220 static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
221 
222 static int
223 init_cpu_capacity_callback(struct notifier_block *nb,
224 			   unsigned long val,
225 			   void *data)
226 {
227 	struct cpufreq_policy *policy = data;
228 	int cpu;
229 
230 	if (!raw_capacity)
231 		return 0;
232 
233 	if (val != CPUFREQ_CREATE_POLICY)
234 		return 0;
235 
236 	pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
237 		 cpumask_pr_args(policy->related_cpus),
238 		 cpumask_pr_args(cpus_to_visit));
239 
240 	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
241 
242 	for_each_cpu(cpu, policy->related_cpus)
243 		per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
244 
245 	if (cpumask_empty(cpus_to_visit)) {
246 		topology_normalize_cpu_scale();
247 		schedule_work(&update_topology_flags_work);
248 		free_raw_capacity();
249 		pr_debug("cpu_capacity: parsing done\n");
250 		schedule_work(&parsing_done_work);
251 	}
252 
253 	return 0;
254 }
255 
256 static struct notifier_block init_cpu_capacity_notifier = {
257 	.notifier_call = init_cpu_capacity_callback,
258 };
259 
260 static int __init register_cpufreq_notifier(void)
261 {
262 	int ret;
263 
264 	/*
265 	 * on ACPI-based systems we need to use the default cpu capacity
266 	 * until we have the necessary code to parse the cpu capacity, so
267 	 * skip registering cpufreq notifier.
268 	 */
269 	if (!acpi_disabled || !raw_capacity)
270 		return -EINVAL;
271 
272 	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
273 		return -ENOMEM;
274 
275 	cpumask_copy(cpus_to_visit, cpu_possible_mask);
276 
277 	ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
278 					CPUFREQ_POLICY_NOTIFIER);
279 
280 	if (ret)
281 		free_cpumask_var(cpus_to_visit);
282 
283 	return ret;
284 }
285 core_initcall(register_cpufreq_notifier);
286 
287 static void parsing_done_workfn(struct work_struct *work)
288 {
289 	cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
290 					 CPUFREQ_POLICY_NOTIFIER);
291 	free_cpumask_var(cpus_to_visit);
292 }
293 
294 #else
295 core_initcall(free_raw_capacity);
296 #endif
297 
298 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
299 /*
300  * This function returns the logic cpu number of the node.
301  * There are basically three kinds of return values:
302  * (1) logic cpu number which is > 0.
303  * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
304  * there is no possible logical CPU in the kernel to match. This happens
305  * when CONFIG_NR_CPUS is configure to be smaller than the number of
306  * CPU nodes in DT. We need to just ignore this case.
307  * (3) -1 if the node does not exist in the device tree
308  */
309 static int __init get_cpu_for_node(struct device_node *node)
310 {
311 	struct device_node *cpu_node;
312 	int cpu;
313 
314 	cpu_node = of_parse_phandle(node, "cpu", 0);
315 	if (!cpu_node)
316 		return -1;
317 
318 	cpu = of_cpu_node_to_id(cpu_node);
319 	if (cpu >= 0)
320 		topology_parse_cpu_capacity(cpu_node, cpu);
321 	else
322 		pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
323 			cpu_node, cpumask_pr_args(cpu_possible_mask));
324 
325 	of_node_put(cpu_node);
326 	return cpu;
327 }
328 
329 static int __init parse_core(struct device_node *core, int package_id,
330 			     int core_id)
331 {
332 	char name[20];
333 	bool leaf = true;
334 	int i = 0;
335 	int cpu;
336 	struct device_node *t;
337 
338 	do {
339 		snprintf(name, sizeof(name), "thread%d", i);
340 		t = of_get_child_by_name(core, name);
341 		if (t) {
342 			leaf = false;
343 			cpu = get_cpu_for_node(t);
344 			if (cpu >= 0) {
345 				cpu_topology[cpu].package_id = package_id;
346 				cpu_topology[cpu].core_id = core_id;
347 				cpu_topology[cpu].thread_id = i;
348 			} else if (cpu != -ENODEV) {
349 				pr_err("%pOF: Can't get CPU for thread\n", t);
350 				of_node_put(t);
351 				return -EINVAL;
352 			}
353 			of_node_put(t);
354 		}
355 		i++;
356 	} while (t);
357 
358 	cpu = get_cpu_for_node(core);
359 	if (cpu >= 0) {
360 		if (!leaf) {
361 			pr_err("%pOF: Core has both threads and CPU\n",
362 			       core);
363 			return -EINVAL;
364 		}
365 
366 		cpu_topology[cpu].package_id = package_id;
367 		cpu_topology[cpu].core_id = core_id;
368 	} else if (leaf && cpu != -ENODEV) {
369 		pr_err("%pOF: Can't get CPU for leaf core\n", core);
370 		return -EINVAL;
371 	}
372 
373 	return 0;
374 }
375 
376 static int __init parse_cluster(struct device_node *cluster, int depth)
377 {
378 	char name[20];
379 	bool leaf = true;
380 	bool has_cores = false;
381 	struct device_node *c;
382 	static int package_id __initdata;
383 	int core_id = 0;
384 	int i, ret;
385 
386 	/*
387 	 * First check for child clusters; we currently ignore any
388 	 * information about the nesting of clusters and present the
389 	 * scheduler with a flat list of them.
390 	 */
391 	i = 0;
392 	do {
393 		snprintf(name, sizeof(name), "cluster%d", i);
394 		c = of_get_child_by_name(cluster, name);
395 		if (c) {
396 			leaf = false;
397 			ret = parse_cluster(c, depth + 1);
398 			of_node_put(c);
399 			if (ret != 0)
400 				return ret;
401 		}
402 		i++;
403 	} while (c);
404 
405 	/* Now check for cores */
406 	i = 0;
407 	do {
408 		snprintf(name, sizeof(name), "core%d", i);
409 		c = of_get_child_by_name(cluster, name);
410 		if (c) {
411 			has_cores = true;
412 
413 			if (depth == 0) {
414 				pr_err("%pOF: cpu-map children should be clusters\n",
415 				       c);
416 				of_node_put(c);
417 				return -EINVAL;
418 			}
419 
420 			if (leaf) {
421 				ret = parse_core(c, package_id, core_id++);
422 			} else {
423 				pr_err("%pOF: Non-leaf cluster with core %s\n",
424 				       cluster, name);
425 				ret = -EINVAL;
426 			}
427 
428 			of_node_put(c);
429 			if (ret != 0)
430 				return ret;
431 		}
432 		i++;
433 	} while (c);
434 
435 	if (leaf && !has_cores)
436 		pr_warn("%pOF: empty cluster\n", cluster);
437 
438 	if (leaf)
439 		package_id++;
440 
441 	return 0;
442 }
443 
444 static int __init parse_dt_topology(void)
445 {
446 	struct device_node *cn, *map;
447 	int ret = 0;
448 	int cpu;
449 
450 	cn = of_find_node_by_path("/cpus");
451 	if (!cn) {
452 		pr_err("No CPU information found in DT\n");
453 		return 0;
454 	}
455 
456 	/*
457 	 * When topology is provided cpu-map is essentially a root
458 	 * cluster with restricted subnodes.
459 	 */
460 	map = of_get_child_by_name(cn, "cpu-map");
461 	if (!map)
462 		goto out;
463 
464 	ret = parse_cluster(map, 0);
465 	if (ret != 0)
466 		goto out_map;
467 
468 	topology_normalize_cpu_scale();
469 
470 	/*
471 	 * Check that all cores are in the topology; the SMP code will
472 	 * only mark cores described in the DT as possible.
473 	 */
474 	for_each_possible_cpu(cpu)
475 		if (cpu_topology[cpu].package_id == -1)
476 			ret = -EINVAL;
477 
478 out_map:
479 	of_node_put(map);
480 out:
481 	of_node_put(cn);
482 	return ret;
483 }
484 #endif
485 
486 /*
487  * cpu topology table
488  */
489 struct cpu_topology cpu_topology[NR_CPUS];
490 EXPORT_SYMBOL_GPL(cpu_topology);
491 
492 const struct cpumask *cpu_coregroup_mask(int cpu)
493 {
494 	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
495 
496 	/* Find the smaller of NUMA, core or LLC siblings */
497 	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
498 		/* not numa in package, lets use the package siblings */
499 		core_mask = &cpu_topology[cpu].core_sibling;
500 	}
501 	if (cpu_topology[cpu].llc_id != -1) {
502 		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
503 			core_mask = &cpu_topology[cpu].llc_sibling;
504 	}
505 
506 	return core_mask;
507 }
508 
509 void update_siblings_masks(unsigned int cpuid)
510 {
511 	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
512 	int cpu;
513 
514 	/* update core and thread sibling masks */
515 	for_each_online_cpu(cpu) {
516 		cpu_topo = &cpu_topology[cpu];
517 
518 		if (cpuid_topo->llc_id == cpu_topo->llc_id) {
519 			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
520 			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
521 		}
522 
523 		if (cpuid_topo->package_id != cpu_topo->package_id)
524 			continue;
525 
526 		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
527 		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
528 
529 		if (cpuid_topo->core_id != cpu_topo->core_id)
530 			continue;
531 
532 		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
533 		cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
534 	}
535 }
536 
537 static void clear_cpu_topology(int cpu)
538 {
539 	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
540 
541 	cpumask_clear(&cpu_topo->llc_sibling);
542 	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
543 
544 	cpumask_clear(&cpu_topo->core_sibling);
545 	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
546 	cpumask_clear(&cpu_topo->thread_sibling);
547 	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
548 }
549 
550 void __init reset_cpu_topology(void)
551 {
552 	unsigned int cpu;
553 
554 	for_each_possible_cpu(cpu) {
555 		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
556 
557 		cpu_topo->thread_id = -1;
558 		cpu_topo->core_id = -1;
559 		cpu_topo->package_id = -1;
560 		cpu_topo->llc_id = -1;
561 
562 		clear_cpu_topology(cpu);
563 	}
564 }
565 
566 void remove_cpu_topology(unsigned int cpu)
567 {
568 	int sibling;
569 
570 	for_each_cpu(sibling, topology_core_cpumask(cpu))
571 		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
572 	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
573 		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
574 	for_each_cpu(sibling, topology_llc_cpumask(cpu))
575 		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
576 
577 	clear_cpu_topology(cpu);
578 }
579 
580 __weak int __init parse_acpi_topology(void)
581 {
582 	return 0;
583 }
584 
585 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
586 void __init init_cpu_topology(void)
587 {
588 	reset_cpu_topology();
589 
590 	/*
591 	 * Discard anything that was parsed if we hit an error so we
592 	 * don't use partial information.
593 	 */
594 	if (parse_acpi_topology())
595 		reset_cpu_topology();
596 	else if (of_have_populated_dt() && parse_dt_topology())
597 		reset_cpu_topology();
598 }
599 #endif
600