1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arch specific cpu topology information 4 * 5 * Copyright (C) 2016, ARM Ltd. 6 * Written by: Juri Lelli, ARM Ltd. 7 */ 8 9 #include <linux/acpi.h> 10 #include <linux/cpu.h> 11 #include <linux/cpufreq.h> 12 #include <linux/device.h> 13 #include <linux/of.h> 14 #include <linux/slab.h> 15 #include <linux/string.h> 16 #include <linux/sched/topology.h> 17 #include <linux/cpuset.h> 18 #include <linux/cpumask.h> 19 #include <linux/init.h> 20 #include <linux/percpu.h> 21 #include <linux/sched.h> 22 #include <linux/smp.h> 23 24 bool topology_scale_freq_invariant(void) 25 { 26 return cpufreq_supports_freq_invariance() || 27 arch_freq_counters_available(cpu_online_mask); 28 } 29 30 __weak bool arch_freq_counters_available(const struct cpumask *cpus) 31 { 32 return false; 33 } 34 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; 35 36 void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, 37 unsigned long max_freq) 38 { 39 unsigned long scale; 40 int i; 41 42 if (WARN_ON_ONCE(!cur_freq || !max_freq)) 43 return; 44 45 /* 46 * If the use of counters for FIE is enabled, just return as we don't 47 * want to update the scale factor with information from CPUFREQ. 48 * Instead the scale factor will be updated from arch_scale_freq_tick. 49 */ 50 if (arch_freq_counters_available(cpus)) 51 return; 52 53 scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq; 54 55 for_each_cpu(i, cpus) 56 per_cpu(freq_scale, i) = scale; 57 } 58 59 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; 60 61 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) 62 { 63 per_cpu(cpu_scale, cpu) = capacity; 64 } 65 66 DEFINE_PER_CPU(unsigned long, thermal_pressure); 67 68 void topology_set_thermal_pressure(const struct cpumask *cpus, 69 unsigned long th_pressure) 70 { 71 int cpu; 72 73 for_each_cpu(cpu, cpus) 74 WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); 75 } 76 77 static ssize_t cpu_capacity_show(struct device *dev, 78 struct device_attribute *attr, 79 char *buf) 80 { 81 struct cpu *cpu = container_of(dev, struct cpu, dev); 82 83 return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); 84 } 85 86 static void update_topology_flags_workfn(struct work_struct *work); 87 static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn); 88 89 static DEVICE_ATTR_RO(cpu_capacity); 90 91 static int register_cpu_capacity_sysctl(void) 92 { 93 int i; 94 struct device *cpu; 95 96 for_each_possible_cpu(i) { 97 cpu = get_cpu_device(i); 98 if (!cpu) { 99 pr_err("%s: too early to get CPU%d device!\n", 100 __func__, i); 101 continue; 102 } 103 device_create_file(cpu, &dev_attr_cpu_capacity); 104 } 105 106 return 0; 107 } 108 subsys_initcall(register_cpu_capacity_sysctl); 109 110 static int update_topology; 111 112 int topology_update_cpu_topology(void) 113 { 114 return update_topology; 115 } 116 117 /* 118 * Updating the sched_domains can't be done directly from cpufreq callbacks 119 * due to locking, so queue the work for later. 120 */ 121 static void update_topology_flags_workfn(struct work_struct *work) 122 { 123 update_topology = 1; 124 rebuild_sched_domains(); 125 pr_debug("sched_domain hierarchy rebuilt, flags updated\n"); 126 update_topology = 0; 127 } 128 129 static DEFINE_PER_CPU(u32, freq_factor) = 1; 130 static u32 *raw_capacity; 131 132 static int free_raw_capacity(void) 133 { 134 kfree(raw_capacity); 135 raw_capacity = NULL; 136 137 return 0; 138 } 139 140 void topology_normalize_cpu_scale(void) 141 { 142 u64 capacity; 143 u64 capacity_scale; 144 int cpu; 145 146 if (!raw_capacity) 147 return; 148 149 capacity_scale = 1; 150 for_each_possible_cpu(cpu) { 151 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); 152 capacity_scale = max(capacity, capacity_scale); 153 } 154 155 pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale); 156 for_each_possible_cpu(cpu) { 157 capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); 158 capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, 159 capacity_scale); 160 topology_set_cpu_scale(cpu, capacity); 161 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", 162 cpu, topology_get_cpu_scale(cpu)); 163 } 164 } 165 166 bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) 167 { 168 struct clk *cpu_clk; 169 static bool cap_parsing_failed; 170 int ret; 171 u32 cpu_capacity; 172 173 if (cap_parsing_failed) 174 return false; 175 176 ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz", 177 &cpu_capacity); 178 if (!ret) { 179 if (!raw_capacity) { 180 raw_capacity = kcalloc(num_possible_cpus(), 181 sizeof(*raw_capacity), 182 GFP_KERNEL); 183 if (!raw_capacity) { 184 cap_parsing_failed = true; 185 return false; 186 } 187 } 188 raw_capacity[cpu] = cpu_capacity; 189 pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n", 190 cpu_node, raw_capacity[cpu]); 191 192 /* 193 * Update freq_factor for calculating early boot cpu capacities. 194 * For non-clk CPU DVFS mechanism, there's no way to get the 195 * frequency value now, assuming they are running at the same 196 * frequency (by keeping the initial freq_factor value). 197 */ 198 cpu_clk = of_clk_get(cpu_node, 0); 199 if (!PTR_ERR_OR_ZERO(cpu_clk)) { 200 per_cpu(freq_factor, cpu) = 201 clk_get_rate(cpu_clk) / 1000; 202 clk_put(cpu_clk); 203 } 204 } else { 205 if (raw_capacity) { 206 pr_err("cpu_capacity: missing %pOF raw capacity\n", 207 cpu_node); 208 pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n"); 209 } 210 cap_parsing_failed = true; 211 free_raw_capacity(); 212 } 213 214 return !ret; 215 } 216 217 #ifdef CONFIG_CPU_FREQ 218 static cpumask_var_t cpus_to_visit; 219 static void parsing_done_workfn(struct work_struct *work); 220 static DECLARE_WORK(parsing_done_work, parsing_done_workfn); 221 222 static int 223 init_cpu_capacity_callback(struct notifier_block *nb, 224 unsigned long val, 225 void *data) 226 { 227 struct cpufreq_policy *policy = data; 228 int cpu; 229 230 if (!raw_capacity) 231 return 0; 232 233 if (val != CPUFREQ_CREATE_POLICY) 234 return 0; 235 236 pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", 237 cpumask_pr_args(policy->related_cpus), 238 cpumask_pr_args(cpus_to_visit)); 239 240 cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); 241 242 for_each_cpu(cpu, policy->related_cpus) 243 per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000; 244 245 if (cpumask_empty(cpus_to_visit)) { 246 topology_normalize_cpu_scale(); 247 schedule_work(&update_topology_flags_work); 248 free_raw_capacity(); 249 pr_debug("cpu_capacity: parsing done\n"); 250 schedule_work(&parsing_done_work); 251 } 252 253 return 0; 254 } 255 256 static struct notifier_block init_cpu_capacity_notifier = { 257 .notifier_call = init_cpu_capacity_callback, 258 }; 259 260 static int __init register_cpufreq_notifier(void) 261 { 262 int ret; 263 264 /* 265 * on ACPI-based systems we need to use the default cpu capacity 266 * until we have the necessary code to parse the cpu capacity, so 267 * skip registering cpufreq notifier. 268 */ 269 if (!acpi_disabled || !raw_capacity) 270 return -EINVAL; 271 272 if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) 273 return -ENOMEM; 274 275 cpumask_copy(cpus_to_visit, cpu_possible_mask); 276 277 ret = cpufreq_register_notifier(&init_cpu_capacity_notifier, 278 CPUFREQ_POLICY_NOTIFIER); 279 280 if (ret) 281 free_cpumask_var(cpus_to_visit); 282 283 return ret; 284 } 285 core_initcall(register_cpufreq_notifier); 286 287 static void parsing_done_workfn(struct work_struct *work) 288 { 289 cpufreq_unregister_notifier(&init_cpu_capacity_notifier, 290 CPUFREQ_POLICY_NOTIFIER); 291 free_cpumask_var(cpus_to_visit); 292 } 293 294 #else 295 core_initcall(free_raw_capacity); 296 #endif 297 298 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) 299 /* 300 * This function returns the logic cpu number of the node. 301 * There are basically three kinds of return values: 302 * (1) logic cpu number which is > 0. 303 * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but 304 * there is no possible logical CPU in the kernel to match. This happens 305 * when CONFIG_NR_CPUS is configure to be smaller than the number of 306 * CPU nodes in DT. We need to just ignore this case. 307 * (3) -1 if the node does not exist in the device tree 308 */ 309 static int __init get_cpu_for_node(struct device_node *node) 310 { 311 struct device_node *cpu_node; 312 int cpu; 313 314 cpu_node = of_parse_phandle(node, "cpu", 0); 315 if (!cpu_node) 316 return -1; 317 318 cpu = of_cpu_node_to_id(cpu_node); 319 if (cpu >= 0) 320 topology_parse_cpu_capacity(cpu_node, cpu); 321 else 322 pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n", 323 cpu_node, cpumask_pr_args(cpu_possible_mask)); 324 325 of_node_put(cpu_node); 326 return cpu; 327 } 328 329 static int __init parse_core(struct device_node *core, int package_id, 330 int core_id) 331 { 332 char name[20]; 333 bool leaf = true; 334 int i = 0; 335 int cpu; 336 struct device_node *t; 337 338 do { 339 snprintf(name, sizeof(name), "thread%d", i); 340 t = of_get_child_by_name(core, name); 341 if (t) { 342 leaf = false; 343 cpu = get_cpu_for_node(t); 344 if (cpu >= 0) { 345 cpu_topology[cpu].package_id = package_id; 346 cpu_topology[cpu].core_id = core_id; 347 cpu_topology[cpu].thread_id = i; 348 } else if (cpu != -ENODEV) { 349 pr_err("%pOF: Can't get CPU for thread\n", t); 350 of_node_put(t); 351 return -EINVAL; 352 } 353 of_node_put(t); 354 } 355 i++; 356 } while (t); 357 358 cpu = get_cpu_for_node(core); 359 if (cpu >= 0) { 360 if (!leaf) { 361 pr_err("%pOF: Core has both threads and CPU\n", 362 core); 363 return -EINVAL; 364 } 365 366 cpu_topology[cpu].package_id = package_id; 367 cpu_topology[cpu].core_id = core_id; 368 } else if (leaf && cpu != -ENODEV) { 369 pr_err("%pOF: Can't get CPU for leaf core\n", core); 370 return -EINVAL; 371 } 372 373 return 0; 374 } 375 376 static int __init parse_cluster(struct device_node *cluster, int depth) 377 { 378 char name[20]; 379 bool leaf = true; 380 bool has_cores = false; 381 struct device_node *c; 382 static int package_id __initdata; 383 int core_id = 0; 384 int i, ret; 385 386 /* 387 * First check for child clusters; we currently ignore any 388 * information about the nesting of clusters and present the 389 * scheduler with a flat list of them. 390 */ 391 i = 0; 392 do { 393 snprintf(name, sizeof(name), "cluster%d", i); 394 c = of_get_child_by_name(cluster, name); 395 if (c) { 396 leaf = false; 397 ret = parse_cluster(c, depth + 1); 398 of_node_put(c); 399 if (ret != 0) 400 return ret; 401 } 402 i++; 403 } while (c); 404 405 /* Now check for cores */ 406 i = 0; 407 do { 408 snprintf(name, sizeof(name), "core%d", i); 409 c = of_get_child_by_name(cluster, name); 410 if (c) { 411 has_cores = true; 412 413 if (depth == 0) { 414 pr_err("%pOF: cpu-map children should be clusters\n", 415 c); 416 of_node_put(c); 417 return -EINVAL; 418 } 419 420 if (leaf) { 421 ret = parse_core(c, package_id, core_id++); 422 } else { 423 pr_err("%pOF: Non-leaf cluster with core %s\n", 424 cluster, name); 425 ret = -EINVAL; 426 } 427 428 of_node_put(c); 429 if (ret != 0) 430 return ret; 431 } 432 i++; 433 } while (c); 434 435 if (leaf && !has_cores) 436 pr_warn("%pOF: empty cluster\n", cluster); 437 438 if (leaf) 439 package_id++; 440 441 return 0; 442 } 443 444 static int __init parse_dt_topology(void) 445 { 446 struct device_node *cn, *map; 447 int ret = 0; 448 int cpu; 449 450 cn = of_find_node_by_path("/cpus"); 451 if (!cn) { 452 pr_err("No CPU information found in DT\n"); 453 return 0; 454 } 455 456 /* 457 * When topology is provided cpu-map is essentially a root 458 * cluster with restricted subnodes. 459 */ 460 map = of_get_child_by_name(cn, "cpu-map"); 461 if (!map) 462 goto out; 463 464 ret = parse_cluster(map, 0); 465 if (ret != 0) 466 goto out_map; 467 468 topology_normalize_cpu_scale(); 469 470 /* 471 * Check that all cores are in the topology; the SMP code will 472 * only mark cores described in the DT as possible. 473 */ 474 for_each_possible_cpu(cpu) 475 if (cpu_topology[cpu].package_id == -1) 476 ret = -EINVAL; 477 478 out_map: 479 of_node_put(map); 480 out: 481 of_node_put(cn); 482 return ret; 483 } 484 #endif 485 486 /* 487 * cpu topology table 488 */ 489 struct cpu_topology cpu_topology[NR_CPUS]; 490 EXPORT_SYMBOL_GPL(cpu_topology); 491 492 const struct cpumask *cpu_coregroup_mask(int cpu) 493 { 494 const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); 495 496 /* Find the smaller of NUMA, core or LLC siblings */ 497 if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { 498 /* not numa in package, lets use the package siblings */ 499 core_mask = &cpu_topology[cpu].core_sibling; 500 } 501 if (cpu_topology[cpu].llc_id != -1) { 502 if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) 503 core_mask = &cpu_topology[cpu].llc_sibling; 504 } 505 506 return core_mask; 507 } 508 509 void update_siblings_masks(unsigned int cpuid) 510 { 511 struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; 512 int cpu; 513 514 /* update core and thread sibling masks */ 515 for_each_online_cpu(cpu) { 516 cpu_topo = &cpu_topology[cpu]; 517 518 if (cpuid_topo->llc_id == cpu_topo->llc_id) { 519 cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); 520 cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); 521 } 522 523 if (cpuid_topo->package_id != cpu_topo->package_id) 524 continue; 525 526 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); 527 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); 528 529 if (cpuid_topo->core_id != cpu_topo->core_id) 530 continue; 531 532 cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); 533 cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); 534 } 535 } 536 537 static void clear_cpu_topology(int cpu) 538 { 539 struct cpu_topology *cpu_topo = &cpu_topology[cpu]; 540 541 cpumask_clear(&cpu_topo->llc_sibling); 542 cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); 543 544 cpumask_clear(&cpu_topo->core_sibling); 545 cpumask_set_cpu(cpu, &cpu_topo->core_sibling); 546 cpumask_clear(&cpu_topo->thread_sibling); 547 cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); 548 } 549 550 void __init reset_cpu_topology(void) 551 { 552 unsigned int cpu; 553 554 for_each_possible_cpu(cpu) { 555 struct cpu_topology *cpu_topo = &cpu_topology[cpu]; 556 557 cpu_topo->thread_id = -1; 558 cpu_topo->core_id = -1; 559 cpu_topo->package_id = -1; 560 cpu_topo->llc_id = -1; 561 562 clear_cpu_topology(cpu); 563 } 564 } 565 566 void remove_cpu_topology(unsigned int cpu) 567 { 568 int sibling; 569 570 for_each_cpu(sibling, topology_core_cpumask(cpu)) 571 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); 572 for_each_cpu(sibling, topology_sibling_cpumask(cpu)) 573 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); 574 for_each_cpu(sibling, topology_llc_cpumask(cpu)) 575 cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); 576 577 clear_cpu_topology(cpu); 578 } 579 580 __weak int __init parse_acpi_topology(void) 581 { 582 return 0; 583 } 584 585 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) 586 void __init init_cpu_topology(void) 587 { 588 reset_cpu_topology(); 589 590 /* 591 * Discard anything that was parsed if we hit an error so we 592 * don't use partial information. 593 */ 594 if (parse_acpi_topology()) 595 reset_cpu_topology(); 596 else if (of_have_populated_dt() && parse_dt_topology()) 597 reset_cpu_topology(); 598 } 599 #endif 600