1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arch specific cpu topology information 4 * 5 * Copyright (C) 2016, ARM Ltd. 6 * Written by: Juri Lelli, ARM Ltd. 7 */ 8 9 #include <linux/acpi.h> 10 #include <linux/cpu.h> 11 #include <linux/cpufreq.h> 12 #include <linux/device.h> 13 #include <linux/of.h> 14 #include <linux/slab.h> 15 #include <linux/string.h> 16 #include <linux/sched/topology.h> 17 #include <linux/cpuset.h> 18 #include <linux/cpumask.h> 19 #include <linux/init.h> 20 #include <linux/percpu.h> 21 #include <linux/sched.h> 22 #include <linux/smp.h> 23 24 __weak bool arch_freq_counters_available(struct cpumask *cpus) 25 { 26 return false; 27 } 28 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; 29 30 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, 31 unsigned long max_freq) 32 { 33 unsigned long scale; 34 int i; 35 36 /* 37 * If the use of counters for FIE is enabled, just return as we don't 38 * want to update the scale factor with information from CPUFREQ. 39 * Instead the scale factor will be updated from arch_scale_freq_tick. 40 */ 41 if (arch_freq_counters_available(cpus)) 42 return; 43 44 scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq; 45 46 for_each_cpu(i, cpus) 47 per_cpu(freq_scale, i) = scale; 48 } 49 50 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; 51 52 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) 53 { 54 per_cpu(cpu_scale, cpu) = capacity; 55 } 56 57 DEFINE_PER_CPU(unsigned long, thermal_pressure); 58 59 void topology_set_thermal_pressure(const struct cpumask *cpus, 60 unsigned long th_pressure) 61 { 62 int cpu; 63 64 for_each_cpu(cpu, cpus) 65 WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); 66 } 67 68 static ssize_t cpu_capacity_show(struct device *dev, 69 struct device_attribute *attr, 70 char *buf) 71 { 72 struct cpu *cpu = container_of(dev, struct cpu, dev); 73 74 return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); 75 } 76 77 static void update_topology_flags_workfn(struct work_struct *work); 78 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn); 79 80 static DEVICE_ATTR_RO(cpu_capacity); 81 82 static int register_cpu_capacity_sysctl(void) 83 { 84 int i; 85 struct device *cpu; 86 87 for_each_possible_cpu(i) { 88 cpu = get_cpu_device(i); 89 if (!cpu) { 90 pr_err("%s: too early to get CPU%d device!\n", 91 __func__, i); 92 continue; 93 } 94 device_create_file(cpu, &dev_attr_cpu_capacity); 95 } 96 97 return 0; 98 } 99 subsys_initcall(register_cpu_capacity_sysctl); 100 101 static int update_topology; 102 103 int topology_update_cpu_topology(void) 104 { 105 return update_topology; 106 } 107 108 /* 109 * Updating the sched_domains can't be done directly from cpufreq callbacks 110 * due to locking, so queue the work for later. 111 */ 112 static void update_topology_flags_workfn(struct work_struct *work) 113 { 114 update_topology = 1; 115 rebuild_sched_domains(); 116 pr_debug("sched_domain hierarchy rebuilt, flags updated\n"); 117 update_topology = 0; 118 } 119 120 static DEFINE_PER_CPU(u32, freq_factor) = 1; 121 static u32 *raw_capacity; 122 123 static int free_raw_capacity(void) 124 { 125 kfree(raw_capacity); 126 raw_capacity = NULL; 127 128 return 0; 129 } 130 131 void topology_normalize_cpu_scale(void) 132 { 133 u64 capacity; 134 u64 capacity_scale; 135 int cpu; 136 137 if (!raw_capacity) 138 return; 139 140 capacity_scale = 1; 141 for_each_possible_cpu(cpu) { 142 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); 143 capacity_scale = max(capacity, capacity_scale); 144 } 145 146 pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale); 147 for_each_possible_cpu(cpu) { 148 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); 149 capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, 150 capacity_scale); 151 topology_set_cpu_scale(cpu, capacity); 152 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", 153 cpu, topology_get_cpu_scale(cpu)); 154 } 155 } 156 157 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) 158 { 159 struct clk *cpu_clk; 160 static bool cap_parsing_failed; 161 int ret; 162 u32 cpu_capacity; 163 164 if (cap_parsing_failed) 165 return false; 166 167 ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz", 168 &cpu_capacity); 169 if (!ret) { 170 if (!raw_capacity) { 171 raw_capacity = kcalloc(num_possible_cpus(), 172 sizeof(*raw_capacity), 173 GFP_KERNEL); 174 if (!raw_capacity) { 175 cap_parsing_failed = true; 176 return false; 177 } 178 } 179 raw_capacity[cpu] = cpu_capacity; 180 pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n", 181 cpu_node, raw_capacity[cpu]); 182 183 /* 184 * Update freq_factor for calculating early boot cpu capacities. 185 * For non-clk CPU DVFS mechanism, there's no way to get the 186 * frequency value now, assuming they are running at the same 187 * frequency (by keeping the initial freq_factor value). 188 */ 189 cpu_clk = of_clk_get(cpu_node, 0); 190 if (!PTR_ERR_OR_ZERO(cpu_clk)) { 191 per_cpu(freq_factor, cpu) = 192 clk_get_rate(cpu_clk) / 1000; 193 clk_put(cpu_clk); 194 } 195 } else { 196 if (raw_capacity) { 197 pr_err("cpu_capacity: missing %pOF raw capacity\n", 198 cpu_node); 199 pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n"); 200 } 201 cap_parsing_failed = true; 202 free_raw_capacity(); 203 } 204 205 return !ret; 206 } 207 208 #ifdef CONFIG_CPU_FREQ 209 static cpumask_var_t cpus_to_visit; 210 static void parsing_done_workfn(struct work_struct *work); 211 static DECLARE_WORK(parsing_done_work, parsing_done_workfn); 212 213 static int 214 init_cpu_capacity_callback(struct notifier_block *nb, 215 unsigned long val, 216 void *data) 217 { 218 struct cpufreq_policy *policy = data; 219 int cpu; 220 221 if (!raw_capacity) 222 return 0; 223 224 if (val != CPUFREQ_CREATE_POLICY) 225 return 0; 226 227 pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", 228 cpumask_pr_args(policy->related_cpus), 229 cpumask_pr_args(cpus_to_visit)); 230 231 cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); 232 233 for_each_cpu(cpu, policy->related_cpus) 234 per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000; 235 236 if (cpumask_empty(cpus_to_visit)) { 237 topology_normalize_cpu_scale(); 238 schedule_work(&update_topology_flags_work); 239 free_raw_capacity(); 240 pr_debug("cpu_capacity: parsing done\n"); 241 schedule_work(&parsing_done_work); 242 } 243 244 return 0; 245 } 246 247 static struct notifier_block init_cpu_capacity_notifier = { 248 .notifier_call = init_cpu_capacity_callback, 249 }; 250 251 static int __init register_cpufreq_notifier(void) 252 { 253 int ret; 254 255 /* 256 * on ACPI-based systems we need to use the default cpu capacity 257 * until we have the necessary code to parse the cpu capacity, so 258 * skip registering cpufreq notifier. 259 */ 260 if (!acpi_disabled || !raw_capacity) 261 return -EINVAL; 262 263 if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) 264 return -ENOMEM; 265 266 cpumask_copy(cpus_to_visit, cpu_possible_mask); 267 268 ret = cpufreq_register_notifier(&init_cpu_capacity_notifier, 269 CPUFREQ_POLICY_NOTIFIER); 270 271 if (ret) 272 free_cpumask_var(cpus_to_visit); 273 274 return ret; 275 } 276 core_initcall(register_cpufreq_notifier); 277 278 static void parsing_done_workfn(struct work_struct *work) 279 { 280 cpufreq_unregister_notifier(&init_cpu_capacity_notifier, 281 CPUFREQ_POLICY_NOTIFIER); 282 free_cpumask_var(cpus_to_visit); 283 } 284 285 #else 286 core_initcall(free_raw_capacity); 287 #endif 288 289 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) 290 /* 291 * This function returns the logic cpu number of the node. 292 * There are basically three kinds of return values: 293 * (1) logic cpu number which is > 0. 294 * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but 295 * there is no possible logical CPU in the kernel to match. This happens 296 * when CONFIG_NR_CPUS is configure to be smaller than the number of 297 * CPU nodes in DT. We need to just ignore this case. 298 * (3) -1 if the node does not exist in the device tree 299 */ 300 static int __init get_cpu_for_node(struct device_node *node) 301 { 302 struct device_node *cpu_node; 303 int cpu; 304 305 cpu_node = of_parse_phandle(node, "cpu", 0); 306 if (!cpu_node) 307 return -1; 308 309 cpu = of_cpu_node_to_id(cpu_node); 310 if (cpu >= 0) 311 topology_parse_cpu_capacity(cpu_node, cpu); 312 else 313 pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n", 314 cpu_node, cpumask_pr_args(cpu_possible_mask)); 315 316 of_node_put(cpu_node); 317 return cpu; 318 } 319 320 static int __init parse_core(struct device_node *core, int package_id, 321 int core_id) 322 { 323 char name[20]; 324 bool leaf = true; 325 int i = 0; 326 int cpu; 327 struct device_node *t; 328 329 do { 330 snprintf(name, sizeof(name), "thread%d", i); 331 t = of_get_child_by_name(core, name); 332 if (t) { 333 leaf = false; 334 cpu = get_cpu_for_node(t); 335 if (cpu >= 0) { 336 cpu_topology[cpu].package_id = package_id; 337 cpu_topology[cpu].core_id = core_id; 338 cpu_topology[cpu].thread_id = i; 339 } else if (cpu != -ENODEV) { 340 pr_err("%pOF: Can't get CPU for thread\n", t); 341 of_node_put(t); 342 return -EINVAL; 343 } 344 of_node_put(t); 345 } 346 i++; 347 } while (t); 348 349 cpu = get_cpu_for_node(core); 350 if (cpu >= 0) { 351 if (!leaf) { 352 pr_err("%pOF: Core has both threads and CPU\n", 353 core); 354 return -EINVAL; 355 } 356 357 cpu_topology[cpu].package_id = package_id; 358 cpu_topology[cpu].core_id = core_id; 359 } else if (leaf && cpu != -ENODEV) { 360 pr_err("%pOF: Can't get CPU for leaf core\n", core); 361 return -EINVAL; 362 } 363 364 return 0; 365 } 366 367 static int __init parse_cluster(struct device_node *cluster, int depth) 368 { 369 char name[20]; 370 bool leaf = true; 371 bool has_cores = false; 372 struct device_node *c; 373 static int package_id __initdata; 374 int core_id = 0; 375 int i, ret; 376 377 /* 378 * First check for child clusters; we currently ignore any 379 * information about the nesting of clusters and present the 380 * scheduler with a flat list of them. 381 */ 382 i = 0; 383 do { 384 snprintf(name, sizeof(name), "cluster%d", i); 385 c = of_get_child_by_name(cluster, name); 386 if (c) { 387 leaf = false; 388 ret = parse_cluster(c, depth + 1); 389 of_node_put(c); 390 if (ret != 0) 391 return ret; 392 } 393 i++; 394 } while (c); 395 396 /* Now check for cores */ 397 i = 0; 398 do { 399 snprintf(name, sizeof(name), "core%d", i); 400 c = of_get_child_by_name(cluster, name); 401 if (c) { 402 has_cores = true; 403 404 if (depth == 0) { 405 pr_err("%pOF: cpu-map children should be clusters\n", 406 c); 407 of_node_put(c); 408 return -EINVAL; 409 } 410 411 if (leaf) { 412 ret = parse_core(c, package_id, core_id++); 413 } else { 414 pr_err("%pOF: Non-leaf cluster with core %s\n", 415 cluster, name); 416 ret = -EINVAL; 417 } 418 419 of_node_put(c); 420 if (ret != 0) 421 return ret; 422 } 423 i++; 424 } while (c); 425 426 if (leaf && !has_cores) 427 pr_warn("%pOF: empty cluster\n", cluster); 428 429 if (leaf) 430 package_id++; 431 432 return 0; 433 } 434 435 static int __init parse_dt_topology(void) 436 { 437 struct device_node *cn, *map; 438 int ret = 0; 439 int cpu; 440 441 cn = of_find_node_by_path("/cpus"); 442 if (!cn) { 443 pr_err("No CPU information found in DT\n"); 444 return 0; 445 } 446 447 /* 448 * When topology is provided cpu-map is essentially a root 449 * cluster with restricted subnodes. 450 */ 451 map = of_get_child_by_name(cn, "cpu-map"); 452 if (!map) 453 goto out; 454 455 ret = parse_cluster(map, 0); 456 if (ret != 0) 457 goto out_map; 458 459 topology_normalize_cpu_scale(); 460 461 /* 462 * Check that all cores are in the topology; the SMP code will 463 * only mark cores described in the DT as possible. 464 */ 465 for_each_possible_cpu(cpu) 466 if (cpu_topology[cpu].package_id == -1) 467 ret = -EINVAL; 468 469 out_map: 470 of_node_put(map); 471 out: 472 of_node_put(cn); 473 return ret; 474 } 475 #endif 476 477 /* 478 * cpu topology table 479 */ 480 struct cpu_topology cpu_topology[NR_CPUS]; 481 EXPORT_SYMBOL_GPL(cpu_topology); 482 483 const struct cpumask *cpu_coregroup_mask(int cpu) 484 { 485 const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); 486 487 /* Find the smaller of NUMA, core or LLC siblings */ 488 if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { 489 /* not numa in package, lets use the package siblings */ 490 core_mask = &cpu_topology[cpu].core_sibling; 491 } 492 if (cpu_topology[cpu].llc_id != -1) { 493 if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) 494 core_mask = &cpu_topology[cpu].llc_sibling; 495 } 496 497 return core_mask; 498 } 499 500 void update_siblings_masks(unsigned int cpuid) 501 { 502 struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; 503 int cpu; 504 505 /* update core and thread sibling masks */ 506 for_each_online_cpu(cpu) { 507 cpu_topo = &cpu_topology[cpu]; 508 509 if (cpuid_topo->llc_id == cpu_topo->llc_id) { 510 cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); 511 cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); 512 } 513 514 if (cpuid_topo->package_id != cpu_topo->package_id) 515 continue; 516 517 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); 518 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); 519 520 if (cpuid_topo->core_id != cpu_topo->core_id) 521 continue; 522 523 cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); 524 cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); 525 } 526 } 527 528 static void clear_cpu_topology(int cpu) 529 { 530 struct cpu_topology *cpu_topo = &cpu_topology[cpu]; 531 532 cpumask_clear(&cpu_topo->llc_sibling); 533 cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); 534 535 cpumask_clear(&cpu_topo->core_sibling); 536 cpumask_set_cpu(cpu, &cpu_topo->core_sibling); 537 cpumask_clear(&cpu_topo->thread_sibling); 538 cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); 539 } 540 541 void __init reset_cpu_topology(void) 542 { 543 unsigned int cpu; 544 545 for_each_possible_cpu(cpu) { 546 struct cpu_topology *cpu_topo = &cpu_topology[cpu]; 547 548 cpu_topo->thread_id = -1; 549 cpu_topo->core_id = -1; 550 cpu_topo->package_id = -1; 551 cpu_topo->llc_id = -1; 552 553 clear_cpu_topology(cpu); 554 } 555 } 556 557 void remove_cpu_topology(unsigned int cpu) 558 { 559 int sibling; 560 561 for_each_cpu(sibling, topology_core_cpumask(cpu)) 562 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); 563 for_each_cpu(sibling, topology_sibling_cpumask(cpu)) 564 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); 565 for_each_cpu(sibling, topology_llc_cpumask(cpu)) 566 cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); 567 568 clear_cpu_topology(cpu); 569 } 570 571 __weak int __init parse_acpi_topology(void) 572 { 573 return 0; 574 } 575 576 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) 577 void __init init_cpu_topology(void) 578 { 579 reset_cpu_topology(); 580 581 /* 582 * Discard anything that was parsed if we hit an error so we 583 * don't use partial information. 584 */ 585 if (parse_acpi_topology()) 586 reset_cpu_topology(); 587 else if (of_have_populated_dt() && parse_dt_topology()) 588 reset_cpu_topology(); 589 } 590 #endif 591