1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 5 */ 6 7 #define KMSG_COMPONENT "cpu" 8 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 9 10 #include <linux/workqueue.h> 11 #include <linux/memblock.h> 12 #include <linux/uaccess.h> 13 #include <linux/sysctl.h> 14 #include <linux/cpuset.h> 15 #include <linux/device.h> 16 #include <linux/export.h> 17 #include <linux/kernel.h> 18 #include <linux/sched.h> 19 #include <linux/sched/topology.h> 20 #include <linux/delay.h> 21 #include <linux/init.h> 22 #include <linux/slab.h> 23 #include <linux/cpu.h> 24 #include <linux/smp.h> 25 #include <linux/mm.h> 26 #include <linux/nodemask.h> 27 #include <linux/node.h> 28 #include <asm/sysinfo.h> 29 30 #define PTF_HORIZONTAL (0UL) 31 #define PTF_VERTICAL (1UL) 32 #define PTF_CHECK (2UL) 33 34 enum { 35 TOPOLOGY_MODE_HW, 36 TOPOLOGY_MODE_SINGLE, 37 TOPOLOGY_MODE_PACKAGE, 38 TOPOLOGY_MODE_UNINITIALIZED 39 }; 40 41 struct mask_info { 42 struct mask_info *next; 43 unsigned char id; 44 cpumask_t mask; 45 }; 46 47 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; 48 static void set_topology_timer(void); 49 static void topology_work_fn(struct work_struct *work); 50 static struct sysinfo_15_1_x *tl_info; 51 52 static DECLARE_WORK(topology_work, topology_work_fn); 53 54 /* 55 * Socket/Book linked lists and cpu_topology updates are 56 * protected by "sched_domains_mutex". 57 */ 58 static struct mask_info socket_info; 59 static struct mask_info book_info; 60 static struct mask_info drawer_info; 61 62 struct cpu_topology_s390 cpu_topology[NR_CPUS]; 63 EXPORT_SYMBOL_GPL(cpu_topology); 64 65 static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu) 66 { 67 static cpumask_t mask; 68 69 cpumask_clear(&mask); 70 if (!cpu_online(cpu)) 71 goto out; 72 cpumask_set_cpu(cpu, &mask); 73 switch (topology_mode) { 74 case TOPOLOGY_MODE_HW: 75 while (info) { 76 if (cpumask_test_cpu(cpu, &info->mask)) { 77 cpumask_copy(&mask, &info->mask); 78 break; 79 } 80 info = info->next; 81 } 82 break; 83 case TOPOLOGY_MODE_PACKAGE: 84 cpumask_copy(&mask, cpu_present_mask); 85 break; 86 default: 87 fallthrough; 88 case TOPOLOGY_MODE_SINGLE: 89 break; 90 } 91 cpumask_and(&mask, &mask, cpu_online_mask); 92 out: 93 cpumask_copy(dst, &mask); 94 } 95 96 static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) 97 { 98 static cpumask_t mask; 99 int i; 100 101 cpumask_clear(&mask); 102 if (!cpu_online(cpu)) 103 goto out; 104 cpumask_set_cpu(cpu, &mask); 105 if (topology_mode != TOPOLOGY_MODE_HW) 106 goto out; 107 cpu -= cpu % (smp_cpu_mtid + 1); 108 for (i = 0; i <= smp_cpu_mtid; i++) 109 if (cpu_present(cpu + i)) 110 cpumask_set_cpu(cpu + i, &mask); 111 cpumask_and(&mask, &mask, cpu_online_mask); 112 out: 113 cpumask_copy(dst, &mask); 114 } 115 116 #define TOPOLOGY_CORE_BITS 64 117 118 static void add_cpus_to_mask(struct topology_core *tl_core, 119 struct mask_info *drawer, 120 struct mask_info *book, 121 struct mask_info *socket) 122 { 123 struct cpu_topology_s390 *topo; 124 unsigned int core; 125 126 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { 127 unsigned int rcore; 128 int lcpu, i; 129 130 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 131 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 132 if (lcpu < 0) 133 continue; 134 for (i = 0; i <= smp_cpu_mtid; i++) { 135 topo = &cpu_topology[lcpu + i]; 136 topo->drawer_id = drawer->id; 137 topo->book_id = book->id; 138 topo->socket_id = socket->id; 139 topo->core_id = rcore; 140 topo->thread_id = lcpu + i; 141 topo->dedicated = tl_core->d; 142 cpumask_set_cpu(lcpu + i, &drawer->mask); 143 cpumask_set_cpu(lcpu + i, &book->mask); 144 cpumask_set_cpu(lcpu + i, &socket->mask); 145 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 146 } 147 } 148 } 149 150 static void clear_masks(void) 151 { 152 struct mask_info *info; 153 154 info = &socket_info; 155 while (info) { 156 cpumask_clear(&info->mask); 157 info = info->next; 158 } 159 info = &book_info; 160 while (info) { 161 cpumask_clear(&info->mask); 162 info = info->next; 163 } 164 info = &drawer_info; 165 while (info) { 166 cpumask_clear(&info->mask); 167 info = info->next; 168 } 169 } 170 171 static union topology_entry *next_tle(union topology_entry *tle) 172 { 173 if (!tle->nl) 174 return (union topology_entry *)((struct topology_core *)tle + 1); 175 return (union topology_entry *)((struct topology_container *)tle + 1); 176 } 177 178 static void tl_to_masks(struct sysinfo_15_1_x *info) 179 { 180 struct mask_info *socket = &socket_info; 181 struct mask_info *book = &book_info; 182 struct mask_info *drawer = &drawer_info; 183 union topology_entry *tle, *end; 184 185 clear_masks(); 186 tle = info->tle; 187 end = (union topology_entry *)((unsigned long)info + info->length); 188 while (tle < end) { 189 switch (tle->nl) { 190 case 3: 191 drawer = drawer->next; 192 drawer->id = tle->container.id; 193 break; 194 case 2: 195 book = book->next; 196 book->id = tle->container.id; 197 break; 198 case 1: 199 socket = socket->next; 200 socket->id = tle->container.id; 201 break; 202 case 0: 203 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 204 break; 205 default: 206 clear_masks(); 207 return; 208 } 209 tle = next_tle(tle); 210 } 211 } 212 213 static void topology_update_polarization_simple(void) 214 { 215 int cpu; 216 217 for_each_possible_cpu(cpu) 218 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 219 } 220 221 static int ptf(unsigned long fc) 222 { 223 int rc; 224 225 asm volatile( 226 " .insn rre,0xb9a20000,%1,%1\n" 227 " ipm %0\n" 228 " srl %0,28\n" 229 : "=d" (rc) 230 : "d" (fc) : "cc"); 231 return rc; 232 } 233 234 int topology_set_cpu_management(int fc) 235 { 236 int cpu, rc; 237 238 if (!MACHINE_HAS_TOPOLOGY) 239 return -EOPNOTSUPP; 240 if (fc) 241 rc = ptf(PTF_VERTICAL); 242 else 243 rc = ptf(PTF_HORIZONTAL); 244 if (rc) 245 return -EBUSY; 246 for_each_possible_cpu(cpu) 247 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 248 return rc; 249 } 250 251 void update_cpu_masks(void) 252 { 253 struct cpu_topology_s390 *topo, *topo_package, *topo_sibling; 254 int cpu, sibling, pkg_first, smt_first, id; 255 256 for_each_possible_cpu(cpu) { 257 topo = &cpu_topology[cpu]; 258 cpu_thread_map(&topo->thread_mask, cpu); 259 cpu_group_map(&topo->core_mask, &socket_info, cpu); 260 cpu_group_map(&topo->book_mask, &book_info, cpu); 261 cpu_group_map(&topo->drawer_mask, &drawer_info, cpu); 262 topo->booted_cores = 0; 263 if (topology_mode != TOPOLOGY_MODE_HW) { 264 id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; 265 topo->thread_id = cpu; 266 topo->core_id = cpu; 267 topo->socket_id = id; 268 topo->book_id = id; 269 topo->drawer_id = id; 270 } 271 } 272 for_each_online_cpu(cpu) { 273 topo = &cpu_topology[cpu]; 274 pkg_first = cpumask_first(&topo->core_mask); 275 topo_package = &cpu_topology[pkg_first]; 276 if (cpu == pkg_first) { 277 for_each_cpu(sibling, &topo->core_mask) { 278 topo_sibling = &cpu_topology[sibling]; 279 smt_first = cpumask_first(&topo_sibling->thread_mask); 280 if (sibling == smt_first) 281 topo_package->booted_cores++; 282 } 283 } else { 284 topo->booted_cores = topo_package->booted_cores; 285 } 286 } 287 } 288 289 void store_topology(struct sysinfo_15_1_x *info) 290 { 291 stsi(info, 15, 1, topology_mnest_limit()); 292 } 293 294 static void __arch_update_dedicated_flag(void *arg) 295 { 296 if (topology_cpu_dedicated(smp_processor_id())) 297 set_cpu_flag(CIF_DEDICATED_CPU); 298 else 299 clear_cpu_flag(CIF_DEDICATED_CPU); 300 } 301 302 static int __arch_update_cpu_topology(void) 303 { 304 struct sysinfo_15_1_x *info = tl_info; 305 int rc = 0; 306 307 mutex_lock(&smp_cpu_state_mutex); 308 if (MACHINE_HAS_TOPOLOGY) { 309 rc = 1; 310 store_topology(info); 311 tl_to_masks(info); 312 } 313 update_cpu_masks(); 314 if (!MACHINE_HAS_TOPOLOGY) 315 topology_update_polarization_simple(); 316 mutex_unlock(&smp_cpu_state_mutex); 317 return rc; 318 } 319 320 int arch_update_cpu_topology(void) 321 { 322 struct device *dev; 323 int cpu, rc; 324 325 rc = __arch_update_cpu_topology(); 326 on_each_cpu(__arch_update_dedicated_flag, NULL, 0); 327 for_each_online_cpu(cpu) { 328 dev = get_cpu_device(cpu); 329 if (dev) 330 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 331 } 332 return rc; 333 } 334 335 static void topology_work_fn(struct work_struct *work) 336 { 337 rebuild_sched_domains(); 338 } 339 340 void topology_schedule_update(void) 341 { 342 schedule_work(&topology_work); 343 } 344 345 static void topology_flush_work(void) 346 { 347 flush_work(&topology_work); 348 } 349 350 static void topology_timer_fn(struct timer_list *unused) 351 { 352 if (ptf(PTF_CHECK)) 353 topology_schedule_update(); 354 set_topology_timer(); 355 } 356 357 static struct timer_list topology_timer; 358 359 static atomic_t topology_poll = ATOMIC_INIT(0); 360 361 static void set_topology_timer(void) 362 { 363 if (atomic_add_unless(&topology_poll, -1, 0)) 364 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); 365 else 366 mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); 367 } 368 369 void topology_expect_change(void) 370 { 371 if (!MACHINE_HAS_TOPOLOGY) 372 return; 373 /* This is racy, but it doesn't matter since it is just a heuristic. 374 * Worst case is that we poll in a higher frequency for a bit longer. 375 */ 376 if (atomic_read(&topology_poll) > 60) 377 return; 378 atomic_add(60, &topology_poll); 379 set_topology_timer(); 380 } 381 382 static int cpu_management; 383 384 static ssize_t dispatching_show(struct device *dev, 385 struct device_attribute *attr, 386 char *buf) 387 { 388 ssize_t count; 389 390 mutex_lock(&smp_cpu_state_mutex); 391 count = sprintf(buf, "%d\n", cpu_management); 392 mutex_unlock(&smp_cpu_state_mutex); 393 return count; 394 } 395 396 static ssize_t dispatching_store(struct device *dev, 397 struct device_attribute *attr, 398 const char *buf, 399 size_t count) 400 { 401 int val, rc; 402 char delim; 403 404 if (sscanf(buf, "%d %c", &val, &delim) != 1) 405 return -EINVAL; 406 if (val != 0 && val != 1) 407 return -EINVAL; 408 rc = 0; 409 get_online_cpus(); 410 mutex_lock(&smp_cpu_state_mutex); 411 if (cpu_management == val) 412 goto out; 413 rc = topology_set_cpu_management(val); 414 if (rc) 415 goto out; 416 cpu_management = val; 417 topology_expect_change(); 418 out: 419 mutex_unlock(&smp_cpu_state_mutex); 420 put_online_cpus(); 421 return rc ? rc : count; 422 } 423 static DEVICE_ATTR_RW(dispatching); 424 425 static ssize_t cpu_polarization_show(struct device *dev, 426 struct device_attribute *attr, char *buf) 427 { 428 int cpu = dev->id; 429 ssize_t count; 430 431 mutex_lock(&smp_cpu_state_mutex); 432 switch (smp_cpu_get_polarization(cpu)) { 433 case POLARIZATION_HRZ: 434 count = sprintf(buf, "horizontal\n"); 435 break; 436 case POLARIZATION_VL: 437 count = sprintf(buf, "vertical:low\n"); 438 break; 439 case POLARIZATION_VM: 440 count = sprintf(buf, "vertical:medium\n"); 441 break; 442 case POLARIZATION_VH: 443 count = sprintf(buf, "vertical:high\n"); 444 break; 445 default: 446 count = sprintf(buf, "unknown\n"); 447 break; 448 } 449 mutex_unlock(&smp_cpu_state_mutex); 450 return count; 451 } 452 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 453 454 static struct attribute *topology_cpu_attrs[] = { 455 &dev_attr_polarization.attr, 456 NULL, 457 }; 458 459 static struct attribute_group topology_cpu_attr_group = { 460 .attrs = topology_cpu_attrs, 461 }; 462 463 static ssize_t cpu_dedicated_show(struct device *dev, 464 struct device_attribute *attr, char *buf) 465 { 466 int cpu = dev->id; 467 ssize_t count; 468 469 mutex_lock(&smp_cpu_state_mutex); 470 count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu)); 471 mutex_unlock(&smp_cpu_state_mutex); 472 return count; 473 } 474 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); 475 476 static struct attribute *topology_extra_cpu_attrs[] = { 477 &dev_attr_dedicated.attr, 478 NULL, 479 }; 480 481 static struct attribute_group topology_extra_cpu_attr_group = { 482 .attrs = topology_extra_cpu_attrs, 483 }; 484 485 int topology_cpu_init(struct cpu *cpu) 486 { 487 int rc; 488 489 rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 490 if (rc || !MACHINE_HAS_TOPOLOGY) 491 return rc; 492 rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); 493 if (rc) 494 sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); 495 return rc; 496 } 497 498 static const struct cpumask *cpu_thread_mask(int cpu) 499 { 500 return &cpu_topology[cpu].thread_mask; 501 } 502 503 504 const struct cpumask *cpu_coregroup_mask(int cpu) 505 { 506 return &cpu_topology[cpu].core_mask; 507 } 508 509 static const struct cpumask *cpu_book_mask(int cpu) 510 { 511 return &cpu_topology[cpu].book_mask; 512 } 513 514 static const struct cpumask *cpu_drawer_mask(int cpu) 515 { 516 return &cpu_topology[cpu].drawer_mask; 517 } 518 519 static struct sched_domain_topology_level s390_topology[] = { 520 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 521 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 522 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 523 { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, 524 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 525 { NULL, }, 526 }; 527 528 static void __init alloc_masks(struct sysinfo_15_1_x *info, 529 struct mask_info *mask, int offset) 530 { 531 int i, nr_masks; 532 533 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 534 for (i = 0; i < info->mnest - offset; i++) 535 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 536 nr_masks = max(nr_masks, 1); 537 for (i = 0; i < nr_masks; i++) { 538 mask->next = memblock_alloc(sizeof(*mask->next), 8); 539 if (!mask->next) 540 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 541 __func__, sizeof(*mask->next), 8); 542 mask = mask->next; 543 } 544 } 545 546 void __init topology_init_early(void) 547 { 548 struct sysinfo_15_1_x *info; 549 550 set_sched_topology(s390_topology); 551 if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { 552 if (MACHINE_HAS_TOPOLOGY) 553 topology_mode = TOPOLOGY_MODE_HW; 554 else 555 topology_mode = TOPOLOGY_MODE_SINGLE; 556 } 557 if (!MACHINE_HAS_TOPOLOGY) 558 goto out; 559 tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 560 if (!tl_info) 561 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 562 __func__, PAGE_SIZE, PAGE_SIZE); 563 info = tl_info; 564 store_topology(info); 565 pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", 566 info->mag[0], info->mag[1], info->mag[2], info->mag[3], 567 info->mag[4], info->mag[5], info->mnest); 568 alloc_masks(info, &socket_info, 1); 569 alloc_masks(info, &book_info, 2); 570 alloc_masks(info, &drawer_info, 3); 571 out: 572 __arch_update_cpu_topology(); 573 __arch_update_dedicated_flag(NULL); 574 } 575 576 static inline int topology_get_mode(int enabled) 577 { 578 if (!enabled) 579 return TOPOLOGY_MODE_SINGLE; 580 return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; 581 } 582 583 static inline int topology_is_enabled(void) 584 { 585 return topology_mode != TOPOLOGY_MODE_SINGLE; 586 } 587 588 static int __init topology_setup(char *str) 589 { 590 bool enabled; 591 int rc; 592 593 rc = kstrtobool(str, &enabled); 594 if (rc) 595 return rc; 596 topology_mode = topology_get_mode(enabled); 597 return 0; 598 } 599 early_param("topology", topology_setup); 600 601 static int topology_ctl_handler(struct ctl_table *ctl, int write, 602 void *buffer, size_t *lenp, loff_t *ppos) 603 { 604 int enabled = topology_is_enabled(); 605 int new_mode; 606 int rc; 607 struct ctl_table ctl_entry = { 608 .procname = ctl->procname, 609 .data = &enabled, 610 .maxlen = sizeof(int), 611 .extra1 = SYSCTL_ZERO, 612 .extra2 = SYSCTL_ONE, 613 }; 614 615 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 616 if (rc < 0 || !write) 617 return rc; 618 619 mutex_lock(&smp_cpu_state_mutex); 620 new_mode = topology_get_mode(enabled); 621 if (topology_mode != new_mode) { 622 topology_mode = new_mode; 623 topology_schedule_update(); 624 } 625 mutex_unlock(&smp_cpu_state_mutex); 626 topology_flush_work(); 627 628 return rc; 629 } 630 631 static struct ctl_table topology_ctl_table[] = { 632 { 633 .procname = "topology", 634 .mode = 0644, 635 .proc_handler = topology_ctl_handler, 636 }, 637 { }, 638 }; 639 640 static struct ctl_table topology_dir_table[] = { 641 { 642 .procname = "s390", 643 .maxlen = 0, 644 .mode = 0555, 645 .child = topology_ctl_table, 646 }, 647 { }, 648 }; 649 650 static int __init topology_init(void) 651 { 652 timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); 653 if (MACHINE_HAS_TOPOLOGY) 654 set_topology_timer(); 655 else 656 topology_update_polarization_simple(); 657 register_sysctl_table(topology_dir_table); 658 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 659 } 660 device_initcall(topology_init); 661