1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2007, 2011 4 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 5 */ 6 7 #define KMSG_COMPONENT "cpu" 8 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 9 10 #include <linux/workqueue.h> 11 #include <linux/memblock.h> 12 #include <linux/uaccess.h> 13 #include <linux/sysctl.h> 14 #include <linux/cpuset.h> 15 #include <linux/device.h> 16 #include <linux/export.h> 17 #include <linux/kernel.h> 18 #include <linux/sched.h> 19 #include <linux/sched/topology.h> 20 #include <linux/delay.h> 21 #include <linux/init.h> 22 #include <linux/slab.h> 23 #include <linux/cpu.h> 24 #include <linux/smp.h> 25 #include <linux/mm.h> 26 #include <linux/nodemask.h> 27 #include <linux/node.h> 28 #include <asm/sysinfo.h> 29 #include <asm/numa.h> 30 31 #define PTF_HORIZONTAL (0UL) 32 #define PTF_VERTICAL (1UL) 33 #define PTF_CHECK (2UL) 34 35 enum { 36 TOPOLOGY_MODE_HW, 37 TOPOLOGY_MODE_SINGLE, 38 TOPOLOGY_MODE_PACKAGE, 39 TOPOLOGY_MODE_UNINITIALIZED 40 }; 41 42 struct mask_info { 43 struct mask_info *next; 44 unsigned char id; 45 cpumask_t mask; 46 }; 47 48 static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; 49 static void set_topology_timer(void); 50 static void topology_work_fn(struct work_struct *work); 51 static struct sysinfo_15_1_x *tl_info; 52 53 static DECLARE_WORK(topology_work, topology_work_fn); 54 55 /* 56 * Socket/Book linked lists and cpu_topology updates are 57 * protected by "sched_domains_mutex". 58 */ 59 static struct mask_info socket_info; 60 static struct mask_info book_info; 61 static struct mask_info drawer_info; 62 63 struct cpu_topology_s390 cpu_topology[NR_CPUS]; 64 EXPORT_SYMBOL_GPL(cpu_topology); 65 66 cpumask_t cpus_with_topology; 67 68 static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) 69 { 70 cpumask_t mask; 71 72 cpumask_copy(&mask, cpumask_of(cpu)); 73 switch (topology_mode) { 74 case TOPOLOGY_MODE_HW: 75 while (info) { 76 if (cpumask_test_cpu(cpu, &info->mask)) { 77 mask = info->mask; 78 break; 79 } 80 info = info->next; 81 } 82 if (cpumask_empty(&mask)) 83 cpumask_copy(&mask, cpumask_of(cpu)); 84 break; 85 case TOPOLOGY_MODE_PACKAGE: 86 cpumask_copy(&mask, cpu_present_mask); 87 break; 88 default: 89 /* fallthrough */ 90 case TOPOLOGY_MODE_SINGLE: 91 cpumask_copy(&mask, cpumask_of(cpu)); 92 break; 93 } 94 return mask; 95 } 96 97 static cpumask_t cpu_thread_map(unsigned int cpu) 98 { 99 cpumask_t mask; 100 int i; 101 102 cpumask_copy(&mask, cpumask_of(cpu)); 103 if (topology_mode != TOPOLOGY_MODE_HW) 104 return mask; 105 cpu -= cpu % (smp_cpu_mtid + 1); 106 for (i = 0; i <= smp_cpu_mtid; i++) 107 if (cpu_present(cpu + i)) 108 cpumask_set_cpu(cpu + i, &mask); 109 return mask; 110 } 111 112 #define TOPOLOGY_CORE_BITS 64 113 114 static void add_cpus_to_mask(struct topology_core *tl_core, 115 struct mask_info *drawer, 116 struct mask_info *book, 117 struct mask_info *socket) 118 { 119 struct cpu_topology_s390 *topo; 120 unsigned int core; 121 122 for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) { 123 unsigned int rcore; 124 int lcpu, i; 125 126 rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; 127 lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); 128 if (lcpu < 0) 129 continue; 130 for (i = 0; i <= smp_cpu_mtid; i++) { 131 topo = &cpu_topology[lcpu + i]; 132 topo->drawer_id = drawer->id; 133 topo->book_id = book->id; 134 topo->socket_id = socket->id; 135 topo->core_id = rcore; 136 topo->thread_id = lcpu + i; 137 topo->dedicated = tl_core->d; 138 cpumask_set_cpu(lcpu + i, &drawer->mask); 139 cpumask_set_cpu(lcpu + i, &book->mask); 140 cpumask_set_cpu(lcpu + i, &socket->mask); 141 cpumask_set_cpu(lcpu + i, &cpus_with_topology); 142 smp_cpu_set_polarization(lcpu + i, tl_core->pp); 143 } 144 } 145 } 146 147 static void clear_masks(void) 148 { 149 struct mask_info *info; 150 151 info = &socket_info; 152 while (info) { 153 cpumask_clear(&info->mask); 154 info = info->next; 155 } 156 info = &book_info; 157 while (info) { 158 cpumask_clear(&info->mask); 159 info = info->next; 160 } 161 info = &drawer_info; 162 while (info) { 163 cpumask_clear(&info->mask); 164 info = info->next; 165 } 166 } 167 168 static union topology_entry *next_tle(union topology_entry *tle) 169 { 170 if (!tle->nl) 171 return (union topology_entry *)((struct topology_core *)tle + 1); 172 return (union topology_entry *)((struct topology_container *)tle + 1); 173 } 174 175 static void tl_to_masks(struct sysinfo_15_1_x *info) 176 { 177 struct mask_info *socket = &socket_info; 178 struct mask_info *book = &book_info; 179 struct mask_info *drawer = &drawer_info; 180 union topology_entry *tle, *end; 181 182 clear_masks(); 183 tle = info->tle; 184 end = (union topology_entry *)((unsigned long)info + info->length); 185 while (tle < end) { 186 switch (tle->nl) { 187 case 3: 188 drawer = drawer->next; 189 drawer->id = tle->container.id; 190 break; 191 case 2: 192 book = book->next; 193 book->id = tle->container.id; 194 break; 195 case 1: 196 socket = socket->next; 197 socket->id = tle->container.id; 198 break; 199 case 0: 200 add_cpus_to_mask(&tle->cpu, drawer, book, socket); 201 break; 202 default: 203 clear_masks(); 204 return; 205 } 206 tle = next_tle(tle); 207 } 208 } 209 210 static void topology_update_polarization_simple(void) 211 { 212 int cpu; 213 214 for_each_possible_cpu(cpu) 215 smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); 216 } 217 218 static int ptf(unsigned long fc) 219 { 220 int rc; 221 222 asm volatile( 223 " .insn rre,0xb9a20000,%1,%1\n" 224 " ipm %0\n" 225 " srl %0,28\n" 226 : "=d" (rc) 227 : "d" (fc) : "cc"); 228 return rc; 229 } 230 231 int topology_set_cpu_management(int fc) 232 { 233 int cpu, rc; 234 235 if (!MACHINE_HAS_TOPOLOGY) 236 return -EOPNOTSUPP; 237 if (fc) 238 rc = ptf(PTF_VERTICAL); 239 else 240 rc = ptf(PTF_HORIZONTAL); 241 if (rc) 242 return -EBUSY; 243 for_each_possible_cpu(cpu) 244 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 245 return rc; 246 } 247 248 static void update_cpu_masks(void) 249 { 250 struct cpu_topology_s390 *topo; 251 int cpu, id; 252 253 for_each_possible_cpu(cpu) { 254 topo = &cpu_topology[cpu]; 255 topo->thread_mask = cpu_thread_map(cpu); 256 topo->core_mask = cpu_group_map(&socket_info, cpu); 257 topo->book_mask = cpu_group_map(&book_info, cpu); 258 topo->drawer_mask = cpu_group_map(&drawer_info, cpu); 259 if (topology_mode != TOPOLOGY_MODE_HW) { 260 id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; 261 topo->thread_id = cpu; 262 topo->core_id = cpu; 263 topo->socket_id = id; 264 topo->book_id = id; 265 topo->drawer_id = id; 266 if (cpu_present(cpu)) 267 cpumask_set_cpu(cpu, &cpus_with_topology); 268 } 269 } 270 numa_update_cpu_topology(); 271 } 272 273 void store_topology(struct sysinfo_15_1_x *info) 274 { 275 stsi(info, 15, 1, topology_mnest_limit()); 276 } 277 278 static void __arch_update_dedicated_flag(void *arg) 279 { 280 if (topology_cpu_dedicated(smp_processor_id())) 281 set_cpu_flag(CIF_DEDICATED_CPU); 282 else 283 clear_cpu_flag(CIF_DEDICATED_CPU); 284 } 285 286 static int __arch_update_cpu_topology(void) 287 { 288 struct sysinfo_15_1_x *info = tl_info; 289 int rc = 0; 290 291 mutex_lock(&smp_cpu_state_mutex); 292 cpumask_clear(&cpus_with_topology); 293 if (MACHINE_HAS_TOPOLOGY) { 294 rc = 1; 295 store_topology(info); 296 tl_to_masks(info); 297 } 298 update_cpu_masks(); 299 if (!MACHINE_HAS_TOPOLOGY) 300 topology_update_polarization_simple(); 301 mutex_unlock(&smp_cpu_state_mutex); 302 return rc; 303 } 304 305 int arch_update_cpu_topology(void) 306 { 307 struct device *dev; 308 int cpu, rc; 309 310 rc = __arch_update_cpu_topology(); 311 on_each_cpu(__arch_update_dedicated_flag, NULL, 0); 312 for_each_online_cpu(cpu) { 313 dev = get_cpu_device(cpu); 314 if (dev) 315 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 316 } 317 return rc; 318 } 319 320 static void topology_work_fn(struct work_struct *work) 321 { 322 rebuild_sched_domains(); 323 } 324 325 void topology_schedule_update(void) 326 { 327 schedule_work(&topology_work); 328 } 329 330 static void topology_flush_work(void) 331 { 332 flush_work(&topology_work); 333 } 334 335 static void topology_timer_fn(struct timer_list *unused) 336 { 337 if (ptf(PTF_CHECK)) 338 topology_schedule_update(); 339 set_topology_timer(); 340 } 341 342 static struct timer_list topology_timer; 343 344 static atomic_t topology_poll = ATOMIC_INIT(0); 345 346 static void set_topology_timer(void) 347 { 348 if (atomic_add_unless(&topology_poll, -1, 0)) 349 mod_timer(&topology_timer, jiffies + HZ / 10); 350 else 351 mod_timer(&topology_timer, jiffies + HZ * 60); 352 } 353 354 void topology_expect_change(void) 355 { 356 if (!MACHINE_HAS_TOPOLOGY) 357 return; 358 /* This is racy, but it doesn't matter since it is just a heuristic. 359 * Worst case is that we poll in a higher frequency for a bit longer. 360 */ 361 if (atomic_read(&topology_poll) > 60) 362 return; 363 atomic_add(60, &topology_poll); 364 set_topology_timer(); 365 } 366 367 static int cpu_management; 368 369 static ssize_t dispatching_show(struct device *dev, 370 struct device_attribute *attr, 371 char *buf) 372 { 373 ssize_t count; 374 375 mutex_lock(&smp_cpu_state_mutex); 376 count = sprintf(buf, "%d\n", cpu_management); 377 mutex_unlock(&smp_cpu_state_mutex); 378 return count; 379 } 380 381 static ssize_t dispatching_store(struct device *dev, 382 struct device_attribute *attr, 383 const char *buf, 384 size_t count) 385 { 386 int val, rc; 387 char delim; 388 389 if (sscanf(buf, "%d %c", &val, &delim) != 1) 390 return -EINVAL; 391 if (val != 0 && val != 1) 392 return -EINVAL; 393 rc = 0; 394 get_online_cpus(); 395 mutex_lock(&smp_cpu_state_mutex); 396 if (cpu_management == val) 397 goto out; 398 rc = topology_set_cpu_management(val); 399 if (rc) 400 goto out; 401 cpu_management = val; 402 topology_expect_change(); 403 out: 404 mutex_unlock(&smp_cpu_state_mutex); 405 put_online_cpus(); 406 return rc ? rc : count; 407 } 408 static DEVICE_ATTR_RW(dispatching); 409 410 static ssize_t cpu_polarization_show(struct device *dev, 411 struct device_attribute *attr, char *buf) 412 { 413 int cpu = dev->id; 414 ssize_t count; 415 416 mutex_lock(&smp_cpu_state_mutex); 417 switch (smp_cpu_get_polarization(cpu)) { 418 case POLARIZATION_HRZ: 419 count = sprintf(buf, "horizontal\n"); 420 break; 421 case POLARIZATION_VL: 422 count = sprintf(buf, "vertical:low\n"); 423 break; 424 case POLARIZATION_VM: 425 count = sprintf(buf, "vertical:medium\n"); 426 break; 427 case POLARIZATION_VH: 428 count = sprintf(buf, "vertical:high\n"); 429 break; 430 default: 431 count = sprintf(buf, "unknown\n"); 432 break; 433 } 434 mutex_unlock(&smp_cpu_state_mutex); 435 return count; 436 } 437 static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); 438 439 static struct attribute *topology_cpu_attrs[] = { 440 &dev_attr_polarization.attr, 441 NULL, 442 }; 443 444 static struct attribute_group topology_cpu_attr_group = { 445 .attrs = topology_cpu_attrs, 446 }; 447 448 static ssize_t cpu_dedicated_show(struct device *dev, 449 struct device_attribute *attr, char *buf) 450 { 451 int cpu = dev->id; 452 ssize_t count; 453 454 mutex_lock(&smp_cpu_state_mutex); 455 count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu)); 456 mutex_unlock(&smp_cpu_state_mutex); 457 return count; 458 } 459 static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL); 460 461 static struct attribute *topology_extra_cpu_attrs[] = { 462 &dev_attr_dedicated.attr, 463 NULL, 464 }; 465 466 static struct attribute_group topology_extra_cpu_attr_group = { 467 .attrs = topology_extra_cpu_attrs, 468 }; 469 470 int topology_cpu_init(struct cpu *cpu) 471 { 472 int rc; 473 474 rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); 475 if (rc || !MACHINE_HAS_TOPOLOGY) 476 return rc; 477 rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); 478 if (rc) 479 sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group); 480 return rc; 481 } 482 483 static const struct cpumask *cpu_thread_mask(int cpu) 484 { 485 return &cpu_topology[cpu].thread_mask; 486 } 487 488 489 const struct cpumask *cpu_coregroup_mask(int cpu) 490 { 491 return &cpu_topology[cpu].core_mask; 492 } 493 494 static const struct cpumask *cpu_book_mask(int cpu) 495 { 496 return &cpu_topology[cpu].book_mask; 497 } 498 499 static const struct cpumask *cpu_drawer_mask(int cpu) 500 { 501 return &cpu_topology[cpu].drawer_mask; 502 } 503 504 static struct sched_domain_topology_level s390_topology[] = { 505 { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, 506 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, 507 { cpu_book_mask, SD_INIT_NAME(BOOK) }, 508 { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, 509 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 510 { NULL, }, 511 }; 512 513 static void __init alloc_masks(struct sysinfo_15_1_x *info, 514 struct mask_info *mask, int offset) 515 { 516 int i, nr_masks; 517 518 nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; 519 for (i = 0; i < info->mnest - offset; i++) 520 nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; 521 nr_masks = max(nr_masks, 1); 522 for (i = 0; i < nr_masks; i++) { 523 mask->next = memblock_alloc(sizeof(*mask->next), 8); 524 if (!mask->next) 525 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 526 __func__, sizeof(*mask->next), 8); 527 mask = mask->next; 528 } 529 } 530 531 void __init topology_init_early(void) 532 { 533 struct sysinfo_15_1_x *info; 534 535 set_sched_topology(s390_topology); 536 if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { 537 if (MACHINE_HAS_TOPOLOGY) 538 topology_mode = TOPOLOGY_MODE_HW; 539 else 540 topology_mode = TOPOLOGY_MODE_SINGLE; 541 } 542 if (!MACHINE_HAS_TOPOLOGY) 543 goto out; 544 tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 545 if (!tl_info) 546 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 547 __func__, PAGE_SIZE, PAGE_SIZE); 548 info = tl_info; 549 store_topology(info); 550 pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", 551 info->mag[0], info->mag[1], info->mag[2], info->mag[3], 552 info->mag[4], info->mag[5], info->mnest); 553 alloc_masks(info, &socket_info, 1); 554 alloc_masks(info, &book_info, 2); 555 alloc_masks(info, &drawer_info, 3); 556 out: 557 __arch_update_cpu_topology(); 558 __arch_update_dedicated_flag(NULL); 559 } 560 561 static inline int topology_get_mode(int enabled) 562 { 563 if (!enabled) 564 return TOPOLOGY_MODE_SINGLE; 565 return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; 566 } 567 568 static inline int topology_is_enabled(void) 569 { 570 return topology_mode != TOPOLOGY_MODE_SINGLE; 571 } 572 573 static int __init topology_setup(char *str) 574 { 575 bool enabled; 576 int rc; 577 578 rc = kstrtobool(str, &enabled); 579 if (rc) 580 return rc; 581 topology_mode = topology_get_mode(enabled); 582 return 0; 583 } 584 early_param("topology", topology_setup); 585 586 static int topology_ctl_handler(struct ctl_table *ctl, int write, 587 void __user *buffer, size_t *lenp, loff_t *ppos) 588 { 589 int enabled = topology_is_enabled(); 590 int new_mode; 591 int rc; 592 struct ctl_table ctl_entry = { 593 .procname = ctl->procname, 594 .data = &enabled, 595 .maxlen = sizeof(int), 596 .extra1 = SYSCTL_ZERO, 597 .extra2 = SYSCTL_ONE, 598 }; 599 600 rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); 601 if (rc < 0 || !write) 602 return rc; 603 604 mutex_lock(&smp_cpu_state_mutex); 605 new_mode = topology_get_mode(enabled); 606 if (topology_mode != new_mode) { 607 topology_mode = new_mode; 608 topology_schedule_update(); 609 } 610 mutex_unlock(&smp_cpu_state_mutex); 611 topology_flush_work(); 612 613 return rc; 614 } 615 616 static struct ctl_table topology_ctl_table[] = { 617 { 618 .procname = "topology", 619 .mode = 0644, 620 .proc_handler = topology_ctl_handler, 621 }, 622 { }, 623 }; 624 625 static struct ctl_table topology_dir_table[] = { 626 { 627 .procname = "s390", 628 .maxlen = 0, 629 .mode = 0555, 630 .child = topology_ctl_table, 631 }, 632 { }, 633 }; 634 635 static int __init topology_init(void) 636 { 637 timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); 638 if (MACHINE_HAS_TOPOLOGY) 639 set_topology_timer(); 640 else 641 topology_update_polarization_simple(); 642 register_sysctl_table(topology_dir_table); 643 return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); 644 } 645 device_initcall(topology_init); 646